Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions dist/core/rules/index.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/core/rules/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export { default as idUnique } from './id-unique'
export { default as inlineScriptDisabled } from './inline-script-disabled'
export { default as inlineStyleDisabled } from './inline-style-disabled'
export { default as inputRequiresLabel } from './input-requires-label'
export { default as linkRelCanonicalRequire } from './link-rel-canonical-require'
export { default as mainRequire } from './main-require'
export { default as metaCharsetRequire } from './meta-charset-require'
export { default as metaDescriptionRequire } from './meta-description-require'
Expand Down
51 changes: 51 additions & 0 deletions src/core/rules/link-rel-canonical-require.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { Block, Listener } from '../htmlparser'
import { Rule } from '../types'

export default {
id: 'link-rel-canonical-require',
description:
'<link rel="canonical"> with non-blank href must be present in <head> tag.',
init(parser, reporter) {
let headSeen = false
let linkCanonicalSeen = false
let linkCanonicalHref = ''
let headEvent: Block | null = null

const onTagStart: Listener = (event) => {
const tagName = event.tagName.toLowerCase()
if (tagName === 'head') {
headSeen = true
headEvent = event
} else if (tagName === 'link') {
const mapAttrs = parser.getMapAttrs(event.attrs)
if (mapAttrs['rel'] && mapAttrs['rel'].toLowerCase() === 'canonical') {
linkCanonicalSeen = true
linkCanonicalHref = mapAttrs['href'] || ''
}
}
}

parser.addListener('tagstart', onTagStart)
parser.addListener('end', () => {
if (headSeen && headEvent) {
if (!linkCanonicalSeen) {
reporter.error(
'<link rel="canonical"> must be present in <head> tag.',
headEvent.line,
headEvent.col,
this,
headEvent.raw
)
} else if (linkCanonicalHref.trim() === '') {
reporter.error(
'<link rel="canonical"> href attribute must not be empty.',
headEvent.line,
headEvent.col,
this,
headEvent.raw
)
}
}
})
},
} as Rule
1 change: 1 addition & 0 deletions src/core/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ export interface Ruleset {
'inline-script-disabled'?: boolean
'inline-style-disabled'?: boolean
'input-requires-label'?: boolean
'link-rel-canonical-require'?: boolean
'main-require'?: boolean
'script-disabled'?: boolean
'space-tab-mixed-disabled'?:
Expand Down
103 changes: 103 additions & 0 deletions test/rules/link-rel-canonical-require.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
const HTMLHint = require('../../dist/htmlhint.js').HTMLHint
const ruleId = 'link-rel-canonical-require'

describe('Rule: link-rel-canonical-require', () => {
it('should not report an error when a valid canonical link is present', () => {
const code = `<!DOCTYPE html><html><head><link rel="canonical" href="https://example.com/dresses/green-dresses"></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(0)
})

it('should not report an error when canonical link has uppercase rel attribute', () => {
const code = `<!DOCTYPE html><html><head><link rel="CANONICAL" href="https://example.com/dresses/green-dresses"></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(0)
})

it('should not report an error when canonical link has mixed case rel attribute', () => {
const code = `<!DOCTYPE html><html><head><link rel="Canonical" href="https://example.com/dresses/green-dresses"></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(0)
})

it('should report an error when canonical link is missing', () => {
const code = `<!DOCTYPE html><html><head></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(1)
expect(messages[0].message).toBe(
'<link rel="canonical"> must be present in <head> tag.'
)
})

it('should report an error when canonical link href is blank', () => {
const code = `<!DOCTYPE html><html><head><link rel="canonical" href=""></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(1)
expect(messages[0].message).toBe(
'<link rel="canonical"> href attribute must not be empty.'
)
})

it('should report an error when canonical link href is only whitespace', () => {
const code = `<!DOCTYPE html><html><head><link rel="canonical" href=" "></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(1)
expect(messages[0].message).toBe(
'<link rel="canonical"> href attribute must not be empty.'
)
})

it('should report an error when canonical link href is missing', () => {
const code = `<!DOCTYPE html><html><head><link rel="canonical"></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(1)
expect(messages[0].message).toBe(
'<link rel="canonical"> href attribute must not be empty.'
)
})

it('should not report an error for other link tags', () => {
const code = `<!DOCTYPE html><html><head><link rel="stylesheet" href="style.css"><link rel="canonical" href="https://example.com/dresses/green-dresses"></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(0)
})

it('should report an error when only other link tags are present', () => {
const code = `<!DOCTYPE html><html><head><link rel="stylesheet" href="style.css"><link rel="icon" href="favicon.ico"></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(1)
expect(messages[0].message).toBe(
'<link rel="canonical"> must be present in <head> tag.'
)
})

it('should not report an error when canonical link is present with other meta tags', () => {
const code = `<!DOCTYPE html><html><head><meta charset="utf-8"><meta name="description" content="A description"><link rel="canonical" href="https://example.com/dresses/green-dresses"></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(0)
})

it('should not report an error when canonical link has relative URL', () => {
const code = `<!DOCTYPE html><html><head><link rel="canonical" href="/dresses/green-dresses"></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(0)
})

it('should not report an error when canonical link has query parameters', () => {
const code = `<!DOCTYPE html><html><head><link rel="canonical" href="https://example.com/dresses/green-dresses?color=green&size=m"></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(0)
})

it('should not report an error when canonical link has fragment', () => {
const code = `<!DOCTYPE html><html><head><link rel="canonical" href="https://example.com/dresses/green-dresses#section1"></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(0)
})

it('should not report an error when canonical link is self-referencing', () => {
const code = `<!DOCTYPE html><html><head><link rel="canonical" href="https://example.com/current-page"></head><body></body></html>`
const messages = HTMLHint.verify(code, { [ruleId]: true })
expect(messages.length).toBe(0)
})
})
1 change: 1 addition & 0 deletions website/src/content/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ An example configuration file (with all rules disabled):
"inline-script-disabled": false,
"inline-style-disabled": false,
"input-requires-label": false,
"link-rel-canonical-require": false,
"main-require": false,
"meta-charset-require": false,
"meta-description-require": false,
Expand Down
1 change: 1 addition & 0 deletions website/src/content/docs/rules/index.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ description: A complete list of all the rules for HTMLHint
- [`meta-charset-require`](meta-charset-require/): `<meta charset="">` must be present in `<head>` tag.
- [`meta-description-require`](meta-description-require/): `<meta name="description">` with non-blank content must be present in `<head>` tag.
- [`meta-viewport-require`](meta-viewport-require/): `<meta name="viewport">` with non-blank content must be present in `<head>` tag.
- [`link-rel-canonical-require`](link-rel-canonical-require/): `<link rel="canonical">` with non-blank href must be present in `<head>` tag.
- [`script-disabled`](script-disabled/): `<script>` tags cannot be used.
- [`style-disabled`](style-disabled/): `<style>` tags cannot be used.
- [`title-require`](title-require/): `<title>` must be present in `<head>` tag.
Expand Down
61 changes: 61 additions & 0 deletions website/src/content/docs/rules/link-rel-canonical-require.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
---
id: link-rel-canonical-require
title: link-rel-canonical-require
description: Ensures every HTML document includes a canonical link tag within the head element for better SEO and duplicate content management.
sidebar:
badge: New
hidden: true
pagefind: false
---

import { Badge } from '@astrojs/starlight/components';

A `<link rel="canonical">` with non-blank href must be present in `<head>` tag.

Level: <Badge text="Error" variant="danger" />

## Config value

- `true`: enable rule
- `false`: disable rule

## The following patterns are **not** considered rule violations

```html
<!-- Valid canonical link with absolute URL -->
<html><head><link rel="canonical" href="https://example.com/dresses/green-dresses"></head></html>

<!-- Valid canonical link with relative URL -->
<html><head><link rel="canonical" href="/dresses/green-dresses"></head></html>

<!-- Valid canonical link with query parameters -->
<html><head><link rel="canonical" href="https://example.com/dresses/green-dresses?color=green&size=m"></head></html>

<!-- Valid canonical link with fragment -->
<html><head><link rel="canonical" href="https://example.com/dresses/green-dresses#section1"></head></html>
```

## The following patterns are considered rule violations

```html
<!-- Missing canonical link -->
<html><head></head></html>

<!-- Empty href attribute -->
<html><head><link rel="canonical" href=""></head></html>

<!-- Whitespace-only href attribute -->
<html><head><link rel="canonical" href=" "></head></html>

<!-- Missing href attribute -->
<html><head><link rel="canonical"></head></html>
```

## Why this rule is important

While it's generally not critical to specify a canonical preference for your URLs, there are several reasons why you would want to explicitly tell search engines about a canonical page in a set of duplicate or similar pages:

- **Specify preferred URL**: Tell search engines which URL you want people to see in search results
- **Consolidate signals**: Help search engines consolidate signals from similar pages into a single, preferred URL
- **Simplify tracking**: Get consolidated metrics for specific content across multiple URLs
- **Optimize crawling**: Prevent search engines from wasting time crawling duplicate content
Loading