From fb0a62650c39bc6b9c9365657ce4f912d50523fc Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Wed, 2 Dec 2020 15:23:16 -0500 Subject: [PATCH] enh(ci): Add tests for polynomial regex issues --- test/regex/index.js | 198 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 196 insertions(+), 2 deletions(-) diff --git a/test/regex/index.js b/test/regex/index.js index e261a45f6a..426568ba4f 100644 --- a/test/regex/index.js +++ b/test/regex/index.js @@ -15,6 +15,13 @@ hljs.debugMode(); */ const expBacktrackingCache = {}; +/** + * A map for a regex pattern to whether or not it it vulnerable to polynomial backtracking. + * + * @type {Record} + */ +const polyBacktrackingCache = {}; + function retrieveRules(language, { name }) { // first we need to get the language compiled so we have // access to the raw regex @@ -56,7 +63,7 @@ function testLanguage(languageName) { // }); // }); - it(`has ${rules.length} regex matchers`, () => {} ); + it(`have ${rules.length} regex matchers`, () => {} ); it('should not use octal escapes', function() { forEachPattern(rules, ({ ast, rulePath, reportError }) => { @@ -194,7 +201,194 @@ function testLanguage(languageName) { expBacktrackingCache[patternStr] = false; }); }); + it('should not cause polynomial backtracking', function () { + forEachPattern(rules, ({ pattern, ast, rulePath, reportError }) => { + const patternStr = String(pattern); + if (polyBacktrackingCache[patternStr] === false) { + // we know that the pattern won't cause poly backtracking because we checked before + return; + } + + const EMPTY = ast.flags.unicode ? CharSet.empty(0x10FFFF) : CharSet.empty(0xFFFF); + + /** + * @param {Node} node + * @returns {CharSet} + */ + function toCharSet(node) { + switch (node.type) { + case "Alternative": { + if (node.elements.length === 1) { + return toCharSet(node.elements[0]); + } + return EMPTY; + } + case "CapturingGroup": + case "Group": { + let total = EMPTY; + for (const item of node.alternatives) { + total = total.union(toCharSet(item)); + } + return total; + } + case "Character": + return JS.createCharSet([node.value], ast.flags); + case "CharacterClass": { + const value = JS.createCharSet(node.elements.map(x => { + if (x.type === "CharacterSet") { + return x; + } else if (x.type === "Character") { + return x.value; + } else { + return { min: x.min.value, max: x.max.value }; + } + }), ast.flags); + if (node.negate) { + return value.negate(); + } else { + return value; + } + } + case "CharacterSet": + return JS.createCharSet([node], ast.flags); + + default: + return EMPTY; + } + } + + /** + * @param {Element} from + * @returns {Element | null} + */ + function getAfter(from) { + const parent = from.parent; + if (parent.type === "Quantifier") { + return getAfter(parent); + } else if (parent.type === "Alternative") { + const index = parent.elements.indexOf(from); + const after = parent.elements[index + 1]; + if (after) { + return after; + } else { + const grandParent = parent.parent; + if (grandParent.type === "Pattern") { + return null; + } else { + return getAfter(grandParent); + } + } + } else { + throw Error("Unreachable"); + } + } + + visitRegExpAST(ast.pattern, { + onQuantifierLeave(node) { + if (node.max !== Infinity) { + return; + } + const char = toCharSet(node.element); + tryReachUntil(getAfter(node), char, null); + + /** + * @param {Quantifier} quantifier + * @param {CharSet} char + */ + function assertNoPoly(quantifier, char) { + if (quantifier.max === Infinity) { + const qChar = toCharSet(quantifier.element); + if (qChar && !qChar.isDisjointWith(char)) { + const intersection = qChar.intersect(char); + const literal = JS.toLiteral({ + type: "Concatenation", + elements: [ + { type: "CharacterClass", characters: intersection } + ] + }) + const lang = `/${literal.source}/${literal.flags}`; + + const rangeStr = patternStr.substring(node.start + 1, quantifier.end + 1); + const rangeHighlight = `^${"~".repeat(node.end - node.start - 1)}${" ".repeat(quantifier.start - node.end)}^${"~".repeat(quantifier.end - quantifier.start - 1)}`; + + reportError(`${rulePath}: Polynomial backtracking. By repeating any character that matches ${lang}, an attack string can be created.\n\n ${rangeStr}\n ${rangeHighlight}\n\nFull pattern:\n${patternStr}\n${" ".repeat(node.start + 1)}${rangeHighlight}`); + } + } + } + /** + * @param {Element | null | undefined} element + * @param {CharSet} char + * @param {Element | null | undefined} until + * @returns {CharSet} + */ + function tryReachUntil(element, char, until) { + if (!element || element == until || char.isEmpty) { + return char; + } + + const after = getAfter(element); + + if (element.type === "Quantifier") { + assertNoPoly(element, char); + } + + return tryReachUntil(after, goInto(element, after, char), until); + } + + /** + * @param {Element} element + * @param {Element} after + * @param {CharSet} char + * @returns {CharSet} + */ + function goInto(element, after, char) { + switch (element.type) { + case "Assertion": { + if (element.kind === "lookahead" || element.kind === "lookbehind") { + for (const alt of element.alternatives) { + if (alt.elements.length > 0) { + tryReachUntil(alt.elements[0], char, after); + } + } + } + return EMPTY; + } + case "Group": + case "CapturingGroup": { + let total = EMPTY; + for (const alt of element.alternatives) { + if (alt.elements.length > 0) { + total = total.union(tryReachUntil(alt.elements[0], char, after)); + } else { + total = char; + } + } + return total; + } + case "Character": + case "CharacterClass": + case "CharacterSet": { + return char.intersect(toCharSet(element)); + } + case "Quantifier": { + if (element.min === 0) { + goInto(element.element, after, char); + return char; + } else { + return goInto(element.element, after, char); + } + } + default: + return EMPTY; + } + } + }, + }); + + polyBacktrackingCache[patternStr] = false; + }); + }); }); } @@ -209,5 +403,5 @@ for (const language of languages) { } describe("COMBINED: All grammars", () => { - it(`has ${count} total regex`, () => {}); + it(`have ${count} total regex`, () => {}); });