Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add glob support and make "noreferrer" configurable #14

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,29 @@ $ npm i hexo-filter-nofollow --save
nofollow:
enable: true
field: site
elements:
- 'a'
- 'img'
exclude:
- 'exclude1.com'
- 'exclude2.com'
- '*.exclude1.com'
- 'exclude2.com/path/*'
rel:
- 'external'
- 'noreferrer'
- 'nofollow'
- 'noopener'
referrerpolicy: 'no-referrer'
```

- **enable** - Enable the plugin. Default value is `true`.
- **field** - The scope you want the plugin to proceed, can be 'site' or 'post'. Default value is `site`.
- 'post' - Only add nofollow attribute to external links in your post content
- 'site' - Add nofollow attribute to external links of whole sites
- **exclude** - Exclude hostname. Specify subdomain when applicable, including `www`.
- 'exclude1.com' does not apply to `www.exclude1.com` nor `en.exclude1.com`.
- **elements** - The tag to be processed, currently only supports `<a>` and `<img>`.
- **include** - Include hostname. You can use `*` or `?` glob wildcards. If include is configured, other external links will not be processed.
- **exclude** - Exclude hostname. You can use `*` or `?` glob wildcards.
- `exclude1.com` does not apply to `www.exclude1.com` nor `en.exclude1.com`.
- `*.exclude1.com` can be apply to `www.exclude1.com` or `en.exclude1.com`.
- **minimatch** - The glob wildcard is supported by [minimath](https://github.com/isaacs/minimatch), this field can be configured for it.
- **rel** - Configurable rel attribute value.
- **referrerpolicy** - Configurable referrerpolicy attribute value.
6 changes: 5 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@
hexo.config.nofollow = Object.assign({
enable: true,
field: 'site',
exclude: []
elements: ['a'],
include: [],
exclude: [],
rel: ['noopener', 'external', 'nofollow', 'noreferrer'],
referrerpolicy: 'no-referrer'
}, hexo.config.nofollow);

const config = hexo.config.nofollow;
Expand Down
118 changes: 90 additions & 28 deletions lib/filter.js
Original file line number Diff line number Diff line change
@@ -1,56 +1,116 @@
'use strict';

const { parse } = require('url');
const { Minimatch } = require('minimatch');

/**
* Check whether the url is an external link
* @param {string} url
* @param {object} config
* @returns boolean
*/
function isExternal(url, config) {
const exclude = config.nofollow.exclude;
const { includeGlobs, excludeGlobs } = config.nofollow;
const data = parse(url);
const host = data.hostname;
const { hostname, path } = data;
const sitehost = parse(config.url).hostname || config.url;

if (!data.protocol || !sitehost) return false;
if (!data.protocol || !hostname || !sitehost) return false;

if (exclude && exclude.length) {
for (const i of exclude) {
if (host === i) return false;
const target = hostname + path;

if (excludeGlobs && excludeGlobs.length) {
for (const glob of excludeGlobs) {
if (glob.match(target)) return false;
}
}

if (host !== sitehost) return true;
if (includeGlobs && includeGlobs.length) {
for (const glob of includeGlobs) {
if (glob.match(target)) return true;
}
}

return false;
if (includeGlobs && includeGlobs.length) {
// If include is configured, other links will not be treated as external links
return false;
}
return hostname != sitehost;
}

module.exports = function(data) {
const hexo = this;
const config = hexo.config;
/**
* Add attribute to the tag
* @param {string} source tag string
* @param {string} attribute string containing the url
* @param {string} new attribute key
* @param {string | array} new attribute value
* @returns new tag string
*/
function addAttr(tagStr, urlAttrStr, attrKey, attrValue) {
const value = [...toArray(attrValue)];
const regexKey = new RegExp(`${attrKey}=`, 'gi');
const attrRegex = new RegExp(`\\s${attrKey}="(.*?)"`, 'gi');
if (regexKey.test(tagStr)) {
tagStr = tagStr.replace(attrRegex, (attrStr, attrStrValue) => {
value.push(...attrStrValue.split(' '));
return '';
});
}
// De-duplicate
const uniqValue = [...new Set(value)];
return tagStr.replace(urlAttrStr, `${urlAttrStr} ${attrKey}="${uniqValue.join(' ')}"`);
}

const exclude = config.nofollow.exclude;
if (exclude && !Array.isArray(exclude)) {
config.nofollow.exclude = [exclude];
function toArray(data) {
return data && !Array.isArray(data) ? [data] : data;
}

function addSitePath(sitePattern) {
if (!sitePattern || sitePattern.indexOf('/') >= 0) {
return sitePattern;
}
// default wildcard under the site
return sitePattern + '/**';
}

const filterExternal = data => {
return data.replace(/<a.*?(href=['"](.*?)['"]).*?>/gi, (str, hrefStr, href) => {
if (!isExternal(href, config)) return str;
module.exports = function nofollow(data) {
const hexo = this;
const config = hexo.config;

let noFollow = ['noopener', 'external', 'nofollow', 'noreferrer'];
const { elements, include, exclude, minimatch } = config.nofollow;
config.nofollow.elements = toArray(elements);
config.nofollow.include = toArray(include);
config.nofollow.exclude = toArray(exclude);

if (/rel=/gi.test(str)) {
str = str.replace(/\srel="(.*?)"/gi, (relStr, rel) => {
rel = rel.split(' ');
noFollow.push(...rel);
// De-duplicate
noFollow = [...new Set(noFollow)];
config.nofollow.includeGlobs = config.nofollow.include.map(pattern => new Minimatch(addSitePath(pattern), minimatch));
config.nofollow.excludeGlobs = config.nofollow.exclude.map(pattern => new Minimatch(addSitePath(pattern), minimatch));

return '';
});
}
const filterATagHrefExternal = data => {
return data.replace(/<a.*?(href=['"](.*?)['"]).*?>/gi, (aTagRaw, hrefAttrRaw, href) => {
if (!isExternal(href, config)) return aTagRaw;
aTagRaw = addAttr(aTagRaw, hrefAttrRaw, 'referrerpolicy', config.nofollow.referrerpolicy);
return addAttr(aTagRaw, hrefAttrRaw, 'rel', config.nofollow.rel);
});
};

return str.replace(hrefStr, `${hrefStr} rel="${noFollow.join(' ')}"`);
const filterImgTagSrcExternal = data => {
return data.replace(/<img.*?(src=['"](.*?)['"]).*?>/gi, (imgTagRaw, srcAttrRaw, src) => {
if (!isExternal(src, config)) return imgTagRaw;
imgTagRaw = addAttr(imgTagRaw, srcAttrRaw, 'referrerpolicy', config.nofollow.referrerpolicy);
return addAttr(imgTagRaw, srcAttrRaw, 'rel', config.nofollow.rel);
});
};

const filterExternal = data => {
if (config.nofollow.elements.includes('a')) {
data = filterATagHrefExternal(data);
}
if (config.nofollow.elements.includes('img')) {
data = filterImgTagSrcExternal(data);
}
return data;
};

if (config.nofollow.field === 'post') {
data.content = filterExternal(data.content);
} else {
Expand All @@ -59,3 +119,5 @@ module.exports = function(data) {

return data;
};


3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,8 @@
"eslint-config-hexo": "^4.1.0",
"hexo": "hexojs/hexo",
"mocha": "^8.0.1"
},
"dependencies": {
"minimatch": "^3.0.4"
}
}
115 changes: 98 additions & 17 deletions test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,49 +9,71 @@ describe('hexo-filter-nofollow', () => {
const nofollowFilter = require('../lib/filter').bind(hexo);

hexo.config.url = 'https://example.com';
hexo.config.nofollow = {};
hexo.config.nofollow = { include: [], exclude: [], elements: ['a', 'img'], rel: ['noopener', 'external', 'nofollow', 'noreferrer'], referrerpolicy: 'no-referrer' };

describe('Default', () => {
const content = [
'# External link test',
'1. External link',
'<a href="https://hexo.io/">Hexo</a>',
'<img src="https://hexo.io/">Hexo</img>',
'2. External link with existed "rel" Attribute',
'<a rel="license" href="https://github.com/hexojs/hexo-filter-nofollow/blob/master/LICENSE">Hexo</a>',
'<a href="https://github.com/hexojs/hexo-filter-nofollow/blob/master/LICENSE" rel="license">Hexo</a>',
'<a rel="license" href="https://hexo.io">Hexo</a>',
'<a href="https://hexo.io" rel="license">Hexo</a>',
'<img src="https://hexo.io" rel="license">Hexo</img>',
'<img rel="license" src="https://hexo.io" rel="license">Hexo</img>',
'3. External link with existing "rel=noopener", "rel=external" or "rel=noreferrer"',
'<a rel="noopener" href="https://hexo.io/">Hexo</a>',
'<a href="https://hexo.io/" rel="noreferrer">Hexo</a>',
'<a rel="noopener noreferrer" href="https://hexo.io/">Hexo</a>',
'<a href="https://hexo.io/" rel="external noreferrer">Hexo</a>',
'<img rel="noopener" src="https://hexo.io/">Hexo</img>',
'<img src="https://hexo.io/" rel="noreferrer">Hexo</img>',
'<img rel="noopener noreferrer" src="https://hexo.io/">Hexo</img>',
'<img src="https://hexo.io/" rel="external noreferrer">Hexo</img>',
'4. External link with Other Attributes',
'<a class="img" href="https://hexo.io/">Hexo</a>',
'<a href="https://hexo.io/" class="img">Hexo</a>',
'<img class="img" src="https://hexo.io/">Hexo</img>',
'<img src="https://hexo.io/" class="img">Hexo</img>',
'5. Internal link',
'<a href="/archives/foo.html">Link</a>',
'<img src="/archives/foo.html">Link</img>',
'6. Ignore links don\'t have "href" attribute',
'<a>Anchor</a>'
'<a>Anchor</a>',
'<img>Anchor</img>'
].join('\n');

const expected = [
'# External link test',
'1. External link',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</a>',
'<img src="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</img>',
'2. External link with existed "rel" Attribute',
'<a href="https://github.com/hexojs/hexo-filter-nofollow/blob/master/LICENSE" rel="noopener external nofollow noreferrer license">Hexo</a>',
'<a href="https://github.com/hexojs/hexo-filter-nofollow/blob/master/LICENSE" rel="noopener external nofollow noreferrer license">Hexo</a>',
'<a href="https://hexo.io" rel="noopener external nofollow noreferrer license" referrerpolicy="no-referrer">Hexo</a>',
'<a href="https://hexo.io" rel="noopener external nofollow noreferrer license" referrerpolicy="no-referrer">Hexo</a>',
'<img src="https://hexo.io" rel="noopener external nofollow noreferrer license" referrerpolicy="no-referrer">Hexo</img>',
'<img src="https://hexo.io" rel="noopener external nofollow noreferrer license" referrerpolicy="no-referrer">Hexo</img>',
'3. External link with existing "rel=noopener", "rel=external" or "rel=noreferrer"',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</a>',
'<img src="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</img>',
'<img src="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</img>',
'<img src="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</img>',
'<img src="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</img>',
'4. External link with Other Attributes',
'<a class="img" href="https://hexo.io/" rel="noopener external nofollow noreferrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer" class="img">Hexo</a>',
'<a class="img" href="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer" class="img">Hexo</a>',
'<img class="img" src="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</img>',
'<img src="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer" class="img">Hexo</img>',
'5. Internal link',
'<a href="/archives/foo.html">Link</a>',
'<img src="/archives/foo.html">Link</img>',
'6. Ignore links don\'t have "href" attribute',
'<a>Anchor</a>'
'<a>Anchor</a>',
'<img>Anchor</img>'
].join('\n');

it('Default to field = "site"', () => {
Expand All @@ -72,6 +94,64 @@ describe('hexo-filter-nofollow', () => {
});
});

describe('Include & Pattern', () => {
const content = [
'# Include & Pattern link test',
'1. External link',
'<a href="https://hexo.io/">Hexo</a>',
'2. External links whose hostname is not match glob pattern',
'<a href="https://example.com">Example Domain</a>',
'3. External links whose hostname is included in glob pattern',
'<a href="https://demo.example.org">Example Domain</a>',
'<a href="https://test.example.org">Example Domain</a>',
'4. External links whose hostname is included in glob pattern with path',
'<a href="https://path.example.org/path/to">Example Domain</a>',
'<a href="https://path.example.org/path">Example Domain</a>'
].join('\n');

it('String', () => {
hexo.config.nofollow.include = ['hexo.io', '*.example.org'];

const result = nofollowFilter(content);

result.should.eql([
'# Include & Pattern link test',
'1. External link',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</a>',
'2. External links whose hostname is not match glob pattern',
'<a href="https://example.com">Example Domain</a>',
'3. External links whose hostname is included in glob pattern',
'<a href="https://demo.example.org" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Example Domain</a>',
'<a href="https://test.example.org" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Example Domain</a>',
'4. External links whose hostname is included in glob pattern with path',
'<a href="https://path.example.org/path/to" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Example Domain</a>',
'<a href="https://path.example.org/path" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Example Domain</a>'
].join('\n'));
});

it('Array', () => {
hexo.config.nofollow.include = 'hexo.io';
hexo.config.nofollow.exclude = ['example.org', '*.example.org', 'path.example.org/**'];

const result = nofollowFilter(content);

result.should.eql([
'# Include & Pattern link test',
'1. External link',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</a>',
'2. External links whose hostname is not match glob pattern',
'<a href="https://example.com">Example Domain</a>',
'3. External links whose hostname is included in glob pattern',
'<a href="https://demo.example.org">Example Domain</a>',
'<a href="https://test.example.org">Example Domain</a>',
'4. External links whose hostname is included in glob pattern with path',
'<a href="https://path.example.org/path/to">Example Domain</a>',
'<a href="https://path.example.org/path">Example Domain</a>'
].join('\n'));
});
});


describe('Exclude', () => {
const content = [
'# Exclude link test',
Expand All @@ -85,19 +165,20 @@ describe('hexo-filter-nofollow', () => {
].join('\n');

it('String', () => {
hexo.config.nofollow.include = [];
hexo.config.nofollow.exclude = 'example.org';

const result = nofollowFilter(content);

result.should.eql([
'# Exclude link test',
'1. External link',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</a>',
'2. Ignore links whose hostname is same as config',
'<a href="https://example.com">Example Domain</a>',
'3. Ignore links whose hostname is included in exclude',
'<a href="https://example.org">Example Domain</a>',
'<a href="https://test.example.org" rel="noopener external nofollow noreferrer">Example Domain</a>'
'<a href="https://test.example.org" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Example Domain</a>'
].join('\n'));
});

Expand All @@ -109,7 +190,7 @@ describe('hexo-filter-nofollow', () => {
result.should.eql([
'# Exclude link test',
'1. External link',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer">Hexo</a>',
'<a href="https://hexo.io/" rel="noopener external nofollow noreferrer" referrerpolicy="no-referrer">Hexo</a>',
'2. Ignore links whose hostname is same as config',
'<a href="https://example.com">Example Domain</a>',
'3. Ignore links whose hostname is included in exclude',
Expand Down