Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Url encode directional change characters #391

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/match/url-match.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { AbstractMatch, AbstractMatchConfig } from './abstract-match';
import { httpSchemePrefixRe } from '../parser/uri-utils';
import { directionalCharRe, httpSchemePrefixRe } from '../parser/uri-utils';
import type { StripPrefixConfigObj } from '../autolinker';

/**
Expand Down Expand Up @@ -162,7 +162,7 @@ export class UrlMatch extends AbstractMatch {
public getAnchorHref(): string {
let url = this.getUrl();

return url.replace(/&/g, '&'); // any &'s in the URL should be converted back to '&' if they were displayed as & in the source html
return url.replace(directionalCharRe, encodeURIComponent).replace(/&/g, '&'); // any &'s in the URL should be converted back to '&' if they were displayed as & in the source html
}

/**
Expand All @@ -189,7 +189,7 @@ export class UrlMatch extends AbstractMatch {
if (this.decodePercentEncoding) {
anchorText = removePercentEncoding(anchorText);
}
return anchorText;
return anchorText.replace(directionalCharRe, encodeURIComponent);
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/parser/parse-matches.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { UrlMatch, UrlMatchType } from '../match/url-match';
import { Match } from '../match/match';
import { remove, assertNever } from '../utils';
import {
isDirectionalChar,
httpSchemeRe,
isDomainLabelChar,
isDomainLabelStartChar,
Expand Down Expand Up @@ -411,7 +412,7 @@ export function parseMatches(text: string, args: ParseMatchesArgs): Match[] {
} else if (isUrlSuffixStartChar(char)) {
// '/', '?', or '#'
stateMachine.state = State.Path;
} else if (isDomainLabelChar(char)) {
} else if (isDomainLabelChar(char) || isDirectionalChar(char)) {
// Stay in the DomainLabelChar state
} else {
// Anything else, end the domain name
Expand Down
10 changes: 10 additions & 0 deletions src/parser/uri-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ export const schemeUrlRe = /^[A-Za-z][-.+A-Za-z0-9]*:(\/\/)?([^:/]*)/;
// See https://www.rfc-editor.org/rfc/rfc3986#appendix-A for terminology
export const tldUrlHostRe = /^(?:\/\/)?([^/#?:]+)/; // optionally prefixed with protocol-relative '//' chars

export const directionalCharRe = /[\u202a-\u202e\u200e-\u200f]/;

/**
* Determines if the given character may start a scheme (ex: 'http').
*/
Expand Down Expand Up @@ -110,6 +112,14 @@ export function isDomainLabelChar(char: string): boolean {
return char === '_' || isDomainLabelStartChar(char);
}

/**
* Detects directional change character
* https://github.com/gregjacobs/Autolinker.js/issues/377
*/
export function isDirectionalChar(char: string): boolean {
return directionalCharRe.test(char);
}

/**
* Determines if the character is a path character ("pchar") as defined by
* https://tools.ietf.org/html/rfc3986#appendix-A
Expand Down
32 changes: 32 additions & 0 deletions tests/autolinker-url.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1032,6 +1032,38 @@ describe('Autolinker Url Matching >', () => {
});
});

describe('unicode exploits', () => {
it('text with directional change characters should not be linked', () => {
expect(autolinker.link('foo.combar.com')).toBe(
'<a href="http://foo.combar.com">foo.combar.com</a>'
);
expect(autolinker.link('foo.com\u202Ebar.com')).toBe(
'<a href="http://foo.com%E2%80%AEbar.com">foo.com%E2%80%AEbar.com</a>'
);
expect(autolinker.link('foo.com\u202abar.com')).toBe(
'<a href="http://foo.com%E2%80%AAbar.com">foo.com%E2%80%AAbar.com</a>'
);
expect(autolinker.link('foo.com\u202bbar.com')).toBe(
'<a href="http://foo.com%E2%80%ABbar.com">foo.com%E2%80%ABbar.com</a>'
);
expect(autolinker.link('foo.com\u202cbar.com')).toBe(
'<a href="http://foo.com%E2%80%ACbar.com">foo.com%E2%80%ACbar.com</a>'
);
expect(autolinker.link('foo.com\u202dbar.com')).toBe(
'<a href="http://foo.com%E2%80%ADbar.com">foo.com%E2%80%ADbar.com</a>'
);
expect(autolinker.link('foo.com\u202ebar.com')).toBe(
'<a href="http://foo.com%E2%80%AEbar.com">foo.com%E2%80%AEbar.com</a>'
);
expect(autolinker.link('foo.com/?query\u202etest')).toBe(
'<a href="http://foo.com/?query">foo.com/?query</a>\u202Etest'
);
expect(autolinker.link('foo.com/#query\u202etest')).toBe(
'<a href="http://foo.com/#query">foo.com/#query</a>\u202Etest'
);
});
});

function generateCombinationTests({
schemes,
hosts,
Expand Down