From 393a8a4308cc2cb0745690a938f62b895e46f36f Mon Sep 17 00:00:00 2001 From: lionel-rowe Date: Sat, 11 May 2024 19:32:54 +0800 Subject: [PATCH 1/2] Fix issue with catastrophic backtracking in anchorme.validate.url --- src/regex.ts | 9 ++++++--- test/b_integration/issues.test.ts | 32 +++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/regex.ts b/src/regex.ts index 17e86fe..4a477eb 100644 --- a/src/regex.ts +++ b/src/regex.ts @@ -2,8 +2,11 @@ import { nonLatinAlphabetRanges } from "./dictionary"; const emailAddress = "([\\w!#$%&'*+=?^`{|}~-]+(?:\\.[\\w!#$%&'*+=?^`{|}~-]+)*)"; const domain = `(?:(?:(?:[a-z\\d]|[a-z\\d][\\w\\-]*[a-z\\d]))\\.)+(xn--[a-z\\d]{2,}|[a-z]{2,})(?=[^.]|\\b)`; -const allowedInPath = `\\w\\-.~\\!$&*+,;=:@%'"\\[\\]()?#`; -const path = `((?:\/|\\?)(?:([${allowedInPath}${nonLatinAlphabetRanges}\\/](?:[\\w\\-~+=#&\\/${nonLatinAlphabetRanges}]|\\b)+)*)+)`; + +const allowedInPath = `\\w\\-~+=#&\\/`; +const allowedAtStartOfPath = `.\\!$*,;:@%'"\\[\\]()?`; +const path = `([/?]([${allowedAtStartOfPath}]?(?:[${allowedInPath}${nonLatinAlphabetRanges}]))*)`; + const ipv4 = `((?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?))`; const ipv6 = `\\[(?:(?:[a-f\\d:]+:+)+[a-f\\d]+)\\]`; const port = `(:(\\d{1,5}))?`; @@ -107,4 +110,4 @@ for (let i = 0; i < testers.length; i++) { console.log(JSON.stringify(iidxes)); */ -export { iidxes }; \ No newline at end of file +export { iidxes }; diff --git a/test/b_integration/issues.test.ts b/test/b_integration/issues.test.ts index a28fd8a..17f61da 100644 --- a/test/b_integration/issues.test.ts +++ b/test/b_integration/issues.test.ts @@ -68,4 +68,36 @@ describe("Issues", () => { `What's the best way to clean your smartphone? 📱🚿https://t.co/cxjsA6j60J` ); }); + + describe("Catastrophic backtracking - https://github.com/alexcorvi/anchorme.js/issues/115 and https://github.com/alexcorvi/anchorme.js/issues/82", () => { + const MAX_MILLISECONDS_PER_VALIDATION = 10; + + const examplesFromIssues = [ + 'https://respond.vitally.io/work/team/users/6e92f9e7-2204-478c-9a7f-965bdd54dd0e@', + 'https://pages.getpostman.com/rs/067-UMD-991/images/ban-api-builder (1).jpg', + 'https://en.wikipedia.org/wiki/Robert_Cranston_(Scottish_politician)', + 'https://en.wikipedia.org/wiki/Robert_Cranston(abcdefg)', + 'https://en.wikipedia.org/wiki/Robert_Cranston(a)', + ]; + + for (const example of examplesFromIssues) { + it(example, () => { + const start = Date.now(); + anchorme.validate.url(example); + expect(Date.now() - start).toBeLessThan(MAX_MILLISECONDS_PER_VALIDATION); + }); + } + + it('very long path "aaaaaa..."', () => { + const start = Date.now(); + anchorme.validate.url(`https://example.com/${'a'.repeat(1000)}@`); + expect(Date.now() - start).toBeLessThan(MAX_MILLISECONDS_PER_VALIDATION); + }); + + it('very long path "@a@a@a@a@a@a..."', () => { + const start = Date.now(); + anchorme.validate.url(`https://example.com/${'@a'.repeat(1000)}@`); + expect(Date.now() - start).toBeLessThan(MAX_MILLISECONDS_PER_VALIDATION); + }); + }); }); From aa94f604f342ab595462d7bd22f860b0384f929a Mon Sep 17 00:00:00 2001 From: lionel-rowe Date: Sat, 11 May 2024 23:37:49 +0800 Subject: [PATCH 2/2] Add logic test --- src/regex.ts | 6 +++--- test/b_integration/issues.test.ts | 33 +++++++++++++++++++++---------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/regex.ts b/src/regex.ts index 4a477eb..f754017 100644 --- a/src/regex.ts +++ b/src/regex.ts @@ -3,9 +3,9 @@ import { nonLatinAlphabetRanges } from "./dictionary"; const emailAddress = "([\\w!#$%&'*+=?^`{|}~-]+(?:\\.[\\w!#$%&'*+=?^`{|}~-]+)*)"; const domain = `(?:(?:(?:[a-z\\d]|[a-z\\d][\\w\\-]*[a-z\\d]))\\.)+(xn--[a-z\\d]{2,}|[a-z]{2,})(?=[^.]|\\b)`; -const allowedInPath = `\\w\\-~+=#&\\/`; -const allowedAtStartOfPath = `.\\!$*,;:@%'"\\[\\]()?`; -const path = `([/?]([${allowedAtStartOfPath}]?(?:[${allowedInPath}${nonLatinAlphabetRanges}]))*)`; +export const _allowedInPath = `\\w\\-~+=#&\\/${nonLatinAlphabetRanges}`; +export const _allowedAtStartOfPath = `.\\!$*,;:@%'"\\[\\]()?`; +const path = `([/?]([${_allowedAtStartOfPath}]?[${_allowedInPath}])*)`; const ipv4 = `((?:(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(?:25[0-5]|2[0-4]\\d|[01]?\\d\\d?))`; const ipv6 = `\\[(?:(?:[a-f\\d:]+:+)+[a-f\\d]+)\\]`; diff --git a/test/b_integration/issues.test.ts b/test/b_integration/issues.test.ts index 17f61da..2bdb194 100644 --- a/test/b_integration/issues.test.ts +++ b/test/b_integration/issues.test.ts @@ -1,4 +1,6 @@ +import { nonLatinAlphabetRanges } from "../../dist/node/dictionary"; import anchorme from "../../dist/node/index"; +import { _allowedInPath, _allowedAtStartOfPath } from "../../dist/node/regex"; import * as expect from "expect"; describe("Issues", () => { /** @@ -70,14 +72,25 @@ describe("Issues", () => { }); describe("Catastrophic backtracking - https://github.com/alexcorvi/anchorme.js/issues/115 and https://github.com/alexcorvi/anchorme.js/issues/82", () => { - const MAX_MILLISECONDS_PER_VALIDATION = 10; + it("check logic for backtrack-vulnerable sequence", () => { + const charsToTest = Array.from({ length: 1000 }, (_, i) => String.fromCodePoint(i)).join(""); + const matchedByAllowedInPath = new Set(charsToTest.match(new RegExp(`[${_allowedInPath}]`, "g"))); + const matchedByAllowedAtStartOfPath = new Set(charsToTest.match(new RegExp(`[${_allowedAtStartOfPath}]`, "g"))); + + // no overlap between chars matched by `_allowedInPath` and `_allowedAtStartOfPath` + expect(new Set(Array.from(matchedByAllowedInPath).concat(Array.from(matchedByAllowedAtStartOfPath))).size) + .toBe(matchedByAllowedInPath.size + matchedByAllowedAtStartOfPath.size); + }); + + // should be significantly less than this but we allow some leeway to avoid flaky tests + const MAX_MILLISECONDS_PER_VALIDATION = 50; const examplesFromIssues = [ - 'https://respond.vitally.io/work/team/users/6e92f9e7-2204-478c-9a7f-965bdd54dd0e@', - 'https://pages.getpostman.com/rs/067-UMD-991/images/ban-api-builder (1).jpg', - 'https://en.wikipedia.org/wiki/Robert_Cranston_(Scottish_politician)', - 'https://en.wikipedia.org/wiki/Robert_Cranston(abcdefg)', - 'https://en.wikipedia.org/wiki/Robert_Cranston(a)', + "https://respond.vitally.io/work/team/users/6e92f9e7-2204-478c-9a7f-965bdd54dd0e@", + "https://pages.getpostman.com/rs/067-UMD-991/images/ban-api-builder (1).jpg", + "https://en.wikipedia.org/wiki/Robert_Cranston_(Scottish_politician)", + "https://en.wikipedia.org/wiki/Robert_Cranston(abcdefg)", + "https://en.wikipedia.org/wiki/Robert_Cranston(a)", ]; for (const example of examplesFromIssues) { @@ -88,15 +101,15 @@ describe("Issues", () => { }); } - it('very long path "aaaaaa..."', () => { + it("very long path `aaaaaa...`", () => { const start = Date.now(); - anchorme.validate.url(`https://example.com/${'a'.repeat(1000)}@`); + anchorme.validate.url(`https://example.com/${"a".repeat(1000)}@`); expect(Date.now() - start).toBeLessThan(MAX_MILLISECONDS_PER_VALIDATION); }); - it('very long path "@a@a@a@a@a@a..."', () => { + it("very long path `@a@a@a@a@a@a...`", () => { const start = Date.now(); - anchorme.validate.url(`https://example.com/${'@a'.repeat(1000)}@`); + anchorme.validate.url(`https://example.com/${"@a".repeat(1000)}@`); expect(Date.now() - start).toBeLessThan(MAX_MILLISECONDS_PER_VALIDATION); }); });