From 82d7b3c630d0b305f90f7f48d5f6eaeaa5d80f4c Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Sun, 4 Feb 2024 14:18:33 +0100 Subject: [PATCH 01/19] feat: report granular errors on arbitrary literals --- lib/rules/no-misleading-character-class.js | 98 ++++---- package.json | 1 + .../rules/no-misleading-character-class.js | 218 +++++++++++++----- 3 files changed, 217 insertions(+), 100 deletions(-) diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index 8d818665790..bd57e0b915f 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -8,6 +8,7 @@ const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp"); const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode"); const astUtils = require("./utils/ast-utils.js"); const { isValidWithUnicodeFlag } = require("./utils/regular-expressions"); +const { parseStringLiteral, parseTemplateToken } = require("char-source"); //------------------------------------------------------------------------------ // Helpers @@ -227,59 +228,61 @@ module.exports = { const parser = new RegExpParser(); /** - * Generates a granular loc for context.report, if directly calculable. - * @param {Character[]} chars Individual characters being reported on. + * Creates a function used to generate locs for a specified node. * @param {Node} node Parent string node to report within. - * @returns {Object | null} Granular loc for context.report, if directly calculable. - * @see https://github.com/eslint/eslint/pull/17515 + * @returns {(matches: Character[][]) => Location[]} A function to generate locs for the node. */ - function generateReportLocation(chars, node) { + function createReportLocationGenerator(node) { - // Limit to to literals and expression-less templates with raw values === their value. - switch (node.type) { - case "TemplateLiteral": - if (node.expressions.length || sourceCode.getText(node).slice(1, -1) !== node.quasis[0].value.cooked) { - return null; - } - break; - - case "Literal": - if (typeof node.value === "string" && node.value !== node.raw.slice(1, -1)) { - return null; - } - break; - - default: - return null; + // Only literals and expression-less templates generate granular errors. + if (!(node.type === "TemplateLiteral" && !node.expressions.length || node.type === "Literal")) { + return matches => (matches.length ? [node.loc] : []); } - return { - start: sourceCode.getLocFromIndex(node.range[0] + 1 + chars[0].start), - end: sourceCode.getLocFromIndex(node.range[0] + 1 + chars.at(-1).end) - }; - } - - /** - * Finds the report loc(s) for a range of matches. - * @param {Character[][]} matches Characters that should trigger a report. - * @param {Node} node The node to report. - * @returns {Object | null} Node loc(s) for context.report. - */ - function getNodeReportLocations(matches, node) { - const locs = []; - - for (const chars of matches) { - const loc = generateReportLocation(chars, node); - - // If a report can't match to a range, don't report any others - if (!loc) { - return [node.loc]; - } + let charInfos = null; + + /** + * Generates a granular loc for context.report. + * @param {Character[][]} matches Lists of individual characters being reported on. + * @returns {Location[]} Granular locs for context.report. + * @see https://github.com/eslint/eslint/pull/17515 + */ + function generateReportLocation(matches) { + return matches.map(chars => { + const firstIndex = chars[0].start; + const lastIndex = chars.at(-1).end - 1; + let start; + let end; + + if (node.type === "TemplateLiteral") { + const source = sourceCode.getText(node); + const offset = node.range[0]; + + charInfos ??= parseTemplateToken(source); + start = offset + charInfos[firstIndex].start; + end = offset + charInfos[lastIndex].end; + } else if (typeof node.value === "string") { // String Literal + const source = node.raw; + const offset = node.range[0]; + + charInfos ??= parseStringLiteral(source); + start = offset + charInfos[firstIndex].start; + end = offset + charInfos[lastIndex].end; + } else { // RegExp Literal + const offset = node.range[0] + 1; // Add 1 to skip the leading slash. + + start = offset + firstIndex; + end = offset + lastIndex + 1; + } - locs.push(loc); + return { + start: sourceCode.getLocFromIndex(start), + end: sourceCode.getLocFromIndex(end) + }; + }); } - return locs; + return generateReportLocation; } /** @@ -320,12 +323,13 @@ module.exports = { } else { foundKindMatches.set(kind, [...findCharacterSequences[kind](chars)]); } - } } } }); + const generateReportLocation = createReportLocationGenerator(node); + for (const [kind, matches] of foundKindMatches) { let suggest; @@ -336,7 +340,7 @@ module.exports = { }]; } - const locs = getNodeReportLocations(matches, node); + const locs = generateReportLocation(matches); for (const loc of locs) { context.report({ diff --git a/package.json b/package.json index 10c8693e248..eb53010f33d 100644 --- a/package.json +++ b/package.json @@ -72,6 +72,7 @@ "@nodelib/fs.walk": "^1.2.8", "ajv": "^6.12.4", "chalk": "^4.0.0", + "char-source": "github:origin-1/char-source", "cross-spawn": "^7.0.2", "debug": "^4.3.2", "escape-string-regexp": "^4.0.0", diff --git a/tests/lib/rules/no-misleading-character-class.js b/tests/lib/rules/no-misleading-character-class.js index 6ad54d42d4a..cf80c20bbf6 100644 --- a/tests/lib/rules/no-misleading-character-class.js +++ b/tests/lib/rules/no-misleading-character-class.js @@ -625,10 +625,10 @@ ruleTester.run("no-misleading-character-class", rule, { { code: "var r = new RegExp(`\r\n[❇️]`)", errors: [{ - line: 1, - column: 20, + line: 2, + column: 2, endLine: 2, - endColumn: 6, + endColumn: 4, messageId: "combiningClass", suggestions: null }] @@ -654,8 +654,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = RegExp("[\\uD83D\\uDC4D]", "")`, errors: [{ - column: 16, - endColumn: 34, + column: 18, + endColumn: 32, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("[\\uD83D\\uDC4D]", "u")` }] }] @@ -663,8 +663,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = RegExp("before[\\uD83D\\uDC4D]after", "")`, errors: [{ - column: 16, - endColumn: 45, + column: 24, + endColumn: 38, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("before[\\uD83D\\uDC4D]after", "u")` }] }] @@ -672,8 +672,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = RegExp("[before\\uD83D\\uDC4Dafter]", "")`, errors: [{ - column: 16, - endColumn: 45, + column: 24, + endColumn: 38, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("[before\\uD83D\\uDC4Dafter]", "u")` }] }] @@ -681,8 +681,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = RegExp("\t\t\tπŸ‘[πŸ‘]")`, errors: [{ - column: 16, - endColumn: 30, + column: 26, + endColumn: 28, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("\t\t\tπŸ‘[πŸ‘]", "u")` }] }] @@ -690,8 +690,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("\u1234[\\uD83D\\uDC4D]")`, errors: [{ - column: 20, - endColumn: 44, + column: 28, + endColumn: 42, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = new RegExp("\u1234[\\uD83D\\uDC4D]", "u")` }] }] @@ -699,8 +699,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("\\u1234\\u5678πŸ‘Ž[πŸ‘]")`, errors: [{ - column: 20, - endColumn: 42, + column: 38, + endColumn: 40, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = new RegExp("\\u1234\\u5678πŸ‘Ž[πŸ‘]", "u")` }] }] @@ -708,8 +708,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("\\u1234\\u5678πŸ‘[πŸ‘]")`, errors: [{ - column: 20, - endColumn: 42, + column: 38, + endColumn: 40, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = new RegExp("\\u1234\\u5678πŸ‘[πŸ‘]", "u")` }] }] @@ -737,8 +737,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[πŸ‘]\\a", "")`, errors: [{ - column: 20, - endColumn: 29, + column: 22, + endColumn: 24, messageId: "surrogatePairWithoutUFlag", suggestions: null // pattern would be invalid with the 'u' flag }] @@ -784,8 +784,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u0041\\u0301]", "")`, errors: [{ - column: 20, - endColumn: 38, + column: 22, + endColumn: 36, messageId: "combiningClass", suggestions: null }] @@ -793,8 +793,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u0041\\u0301]", "u")`, errors: [{ - column: 20, - endColumn: 38, + column: 22, + endColumn: 36, messageId: "combiningClass", suggestions: null }] @@ -802,8 +802,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u{41}\\u{301}]", "u")`, errors: [{ - column: 20, - endColumn: 39, + column: 22, + endColumn: 37, messageId: "combiningClass", suggestions: null }] @@ -829,8 +829,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`new RegExp("[ \\ufe0f]", "")`, errors: [{ - column: 12, - endColumn: 24, + column: 14, + endColumn: 22, messageId: "combiningClass", suggestions: null }] @@ -838,8 +838,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`new RegExp("[ \\ufe0f]", "u")`, errors: [{ - column: 12, - endColumn: 24, + column: 14, + endColumn: 22, messageId: "combiningClass", suggestions: null }] @@ -848,8 +848,14 @@ ruleTester.run("no-misleading-character-class", rule, { code: String.raw`new RegExp("[ \\ufe0f][ \\ufe0f]")`, errors: [ { - column: 12, - endColumn: 34, + column: 14, + endColumn: 22, + messageId: "combiningClass", + suggestions: null + }, + { + column: 24, + endColumn: 32, messageId: "combiningClass", suggestions: null } @@ -858,8 +864,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u2747\\uFE0F]", "")`, errors: [{ - column: 20, - endColumn: 38, + column: 22, + endColumn: 36, messageId: "combiningClass", suggestions: null }] @@ -867,8 +873,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u2747\\uFE0F]", "u")`, errors: [{ - column: 20, - endColumn: 38, + column: 22, + endColumn: 36, messageId: "combiningClass", suggestions: null }] @@ -876,8 +882,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u{2747}\\u{FE0F}]", "u")`, errors: [{ - column: 20, - endColumn: 42, + column: 22, + endColumn: 40, messageId: "combiningClass", suggestions: null }] @@ -911,8 +917,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\uD83D\\uDC76\\uD83C\\uDFFB]", "u")`, errors: [{ - column: 20, - endColumn: 52, + column: 22, + endColumn: 50, messageId: "emojiModifier", suggestions: null }] @@ -920,8 +926,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u{1F476}\\u{1F3FB}]", "u")`, errors: [{ - column: 20, - endColumn: 44, + column: 22, + endColumn: 42, messageId: "emojiModifier", suggestions: null }] @@ -938,8 +944,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: "var r = RegExp(`\\t\\t\\tπŸ‘[πŸ‘]`)", errors: [{ - column: 16, - endColumn: 30, + column: 26, + endColumn: 28, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = RegExp(`\\t\\t\\tπŸ‘[πŸ‘]`, \"u\")" }] }] @@ -1111,8 +1117,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\uD83C\\uDDEF\\uD83C\\uDDF5]", "u")`, errors: [{ - column: 20, - endColumn: 52, + column: 22, + endColumn: 50, messageId: "regionalIndicatorSymbol", suggestions: null }] @@ -1120,8 +1126,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u{1F1EF}\\u{1F1F5}]", "u")`, errors: [{ - column: 20, - endColumn: 44, + column: 22, + endColumn: 42, messageId: "regionalIndicatorSymbol", suggestions: null }] @@ -1238,8 +1244,8 @@ ruleTester.run("no-misleading-character-class", rule, { code: String.raw`var r = new RegExp("[\\uD83D\\uDC68\\u200D\\uD83D\\uDC69\\u200D\\uD83D\\uDC66]", "u")`, errors: [ { - column: 20, - endColumn: 80, + column: 22, + endColumn: 78, messageId: "zwj", suggestions: null } @@ -1249,8 +1255,8 @@ ruleTester.run("no-misleading-character-class", rule, { code: String.raw`var r = new RegExp("[\\u{1F468}\\u{200D}\\u{1F469}\\u{200D}\\u{1F466}]", "u")`, errors: [ { - column: 20, - endColumn: 72, + column: 22, + endColumn: 70, messageId: "zwj", suggestions: null } @@ -1299,8 +1305,8 @@ ruleTester.run("no-misleading-character-class", rule, { languageOptions: { ecmaVersion: 2020 }, errors: [ { - column: 31, - endColumn: 83, + column: 33, + endColumn: 81, messageId: "zwj", suggestions: null } @@ -1334,9 +1340,115 @@ ruleTester.run("no-misleading-character-class", rule, { suggestions: null }] }, + { + code: String.raw` + + // "[" and "]" escaped as "\x5B" and "\u005D" + new RegExp("\x5B \\ufe0f\u005D") + `, + errors: [{ + column: 29, + endColumn: 37, + messageId: "combiningClass", + suggestions: null + }] + }, + { + code: String.raw` + + // backslash escaped as "\u{5c}" + new RegExp("[ \u{5c}ufe0f]") + + `, + errors: [{ + column: 26, + endColumn: 38, + messageId: "combiningClass", + suggestions: null + }] + }, + { + code: String.raw` + + // "0" escaped as "\60" + new RegExp("[ \\ufe\60f]") + + `, + languageOptions: { sourceType: "script" }, + errors: [{ + column: 26, + endColumn: 36, + messageId: "combiningClass", + suggestions: null + }] + }, + { + code: String.raw` + + // "e" escaped as "\e" + new RegExp("[ \\uf\e0f]") + + `, + errors: [{ + column: 26, + endColumn: 35, + messageId: "combiningClass", + suggestions: null + }] + }, + { + code: String.raw` + + // line continuation: backslash + + + new RegExp('[ \\ufe0f]') + + `.replace("", "\\\r\n"), + errors: [{ + line: 4, + column: 26, + endLine: 5, + endColumn: 5, + messageId: "combiningClass", + suggestions: null + }] + }, + { + code: String.raw` + + // just a backslash escaped as "\\" + new RegExp([.\\u200D.]) + + `.replaceAll("", "`"), + errors: [{ + column: 26, + endColumn: 35, + messageId: "zwj", + suggestions: null + }] + }, + + /* eslint-disable lines-around-comment -- see https://github.com/eslint/eslint/issues/18081 */ + + { + code: String.raw` + + // "u" escaped as "\x75" + new RegExp([.\\\x75200D.]) + + `.replaceAll("", "`"), + errors: [{ + column: 26, + endColumn: 38, + messageId: "zwj", + suggestions: null + }] + }, // ES2024 + + /* eslint-enable lines-around-comment -- re-enable rule */ + { code: "var r = /[[πŸ‘ΆπŸ»]]/v", languageOptions: { ecmaVersion: 2024 }, From 1e8525fe441b25f457987912645043ad06a79635 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Sun, 4 Feb 2024 15:23:29 +0100 Subject: [PATCH 02/19] use npm dependency --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index eb53010f33d..206e2cdb782 100644 --- a/package.json +++ b/package.json @@ -72,7 +72,7 @@ "@nodelib/fs.walk": "^1.2.8", "ajv": "^6.12.4", "chalk": "^4.0.0", - "char-source": "github:origin-1/char-source", + "char-source": "^0.0.0", "cross-spawn": "^7.0.2", "debug": "^4.3.2", "escape-string-regexp": "^4.0.0", From 45006ecf81ad9105dba90a9bf80805b763e9e7b7 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Sun, 4 Feb 2024 18:40:29 +0100 Subject: [PATCH 03/19] test with unescaped CRLF --- .../rules/no-misleading-character-class.js | 22 ++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/tests/lib/rules/no-misleading-character-class.js b/tests/lib/rules/no-misleading-character-class.js index cf80c20bbf6..1af342e875b 100644 --- a/tests/lib/rules/no-misleading-character-class.js +++ b/tests/lib/rules/no-misleading-character-class.js @@ -1427,9 +1427,6 @@ ruleTester.run("no-misleading-character-class", rule, { suggestions: null }] }, - - /* eslint-disable lines-around-comment -- see https://github.com/eslint/eslint/issues/18081 */ - { code: String.raw` @@ -1445,6 +1442,25 @@ ruleTester.run("no-misleading-character-class", rule, { }] }, + /* eslint-disable lines-around-comment -- see https://github.com/eslint/eslint/issues/18081 */ + + { + code: String.raw` + + // unescaped counts as a single character + new RegExp([\\u200D.]) + + `.replaceAll("", "`").replace("", "\n"), + errors: [{ + line: 4, + column: 26, + endLine: 5, + endColumn: 9, + messageId: "zwj", + suggestions: null + }] + }, + // ES2024 /* eslint-enable lines-around-comment -- re-enable rule */ From 836fe39294ea400400d20d7d6d83aa2501aebdf3 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Wed, 7 Feb 2024 09:39:33 +0100 Subject: [PATCH 04/19] inline `createReportLocationGenerator` --- lib/rules/no-misleading-character-class.js | 98 +++++++++------------- 1 file changed, 41 insertions(+), 57 deletions(-) diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index bd57e0b915f..e8406329c8e 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -228,23 +228,55 @@ module.exports = { const parser = new RegExpParser(); /** - * Creates a function used to generate locs for a specified node. - * @param {Node} node Parent string node to report within. - * @returns {(matches: Character[][]) => Location[]} A function to generate locs for the node. + * Verify a given regular expression. + * @param {Node} node The node to report. + * @param {string} pattern The regular expression pattern to verify. + * @param {string} flags The flags of the regular expression. + * @param {Function} unicodeFixer Fixer for missing "u" flag. + * @returns {void} */ - function createReportLocationGenerator(node) { + function verify(node, pattern, flags, unicodeFixer) { + let patternNode; - // Only literals and expression-less templates generate granular errors. - if (!(node.type === "TemplateLiteral" && !node.expressions.length || node.type === "Literal")) { - return matches => (matches.length ? [node.loc] : []); + try { + patternNode = parser.parsePattern( + pattern, + 0, + pattern.length, + { + unicode: flags.includes("u"), + unicodeSets: flags.includes("v") + } + ); + } catch { + + // Ignore regular expressions with syntax errors + return; } + const foundKindMatches = new Map(); + + visitRegExpAST(patternNode, { + onCharacterClassEnter(ccNode) { + for (const chars of iterateCharacterSequence(ccNode.elements)) { + for (const kind of kinds) { + if (foundKindMatches.has(kind)) { + foundKindMatches.get(kind).push(...findCharacterSequences[kind](chars)); + } else { + foundKindMatches.set(kind, [...findCharacterSequences[kind](chars)]); + } + } + } + } + }); + let charInfos = null; /** - * Generates a granular loc for context.report. + * Generates a loc for context.report. + * Only literals and expression-less templates generate granular errors. * @param {Character[][]} matches Lists of individual characters being reported on. - * @returns {Location[]} Granular locs for context.report. + * @returns {Location[]} locs for context.report. * @see https://github.com/eslint/eslint/pull/17515 */ function generateReportLocation(matches) { @@ -282,54 +314,6 @@ module.exports = { }); } - return generateReportLocation; - } - - /** - * Verify a given regular expression. - * @param {Node} node The node to report. - * @param {string} pattern The regular expression pattern to verify. - * @param {string} flags The flags of the regular expression. - * @param {Function} unicodeFixer Fixer for missing "u" flag. - * @returns {void} - */ - function verify(node, pattern, flags, unicodeFixer) { - let patternNode; - - try { - patternNode = parser.parsePattern( - pattern, - 0, - pattern.length, - { - unicode: flags.includes("u"), - unicodeSets: flags.includes("v") - } - ); - } catch { - - // Ignore regular expressions with syntax errors - return; - } - - const foundKindMatches = new Map(); - - visitRegExpAST(patternNode, { - onCharacterClassEnter(ccNode) { - for (const chars of iterateCharacterSequence(ccNode.elements)) { - for (const kind of kinds) { - if (foundKindMatches.has(kind)) { - foundKindMatches.get(kind).push(...findCharacterSequences[kind](chars)); - } else { - foundKindMatches.set(kind, [...findCharacterSequences[kind](chars)]); - } - } - } - } - }); - - const generateReportLocation = createReportLocationGenerator(node); - for (const [kind, matches] of foundKindMatches) { let suggest; From d8fd76983034fc52d0872ab9eb32bdba6779ffc5 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Wed, 7 Feb 2024 12:10:31 +0100 Subject: [PATCH 05/19] unit test for templates with expressions --- tests/lib/rules/no-misleading-character-class.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/lib/rules/no-misleading-character-class.js b/tests/lib/rules/no-misleading-character-class.js index 1af342e875b..d5c6e9346ee 100644 --- a/tests/lib/rules/no-misleading-character-class.js +++ b/tests/lib/rules/no-misleading-character-class.js @@ -72,6 +72,9 @@ ruleTester.run("no-misleading-character-class", rule, { { code: "var r = new globalThis.RegExp('[Á] [ ');", languageOptions: { ecmaVersion: 2020 } }, { code: "var r = globalThis.RegExp('{ [Á]', 'u');", languageOptions: { ecmaVersion: 2020 } }, + // don't report on templates with expressions + "var r = RegExp(`${x}[πŸ‘]`)", + // ES2024 { code: "var r = /[πŸ‘]/v", languageOptions: { ecmaVersion: 2024 } }, { code: String.raw`var r = /^[\q{πŸ‘ΆπŸ»}]$/v`, languageOptions: { ecmaVersion: 2024 } }, From d351f21433f3414fe104e622aba1ee42e1769782 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Wed, 7 Feb 2024 12:16:46 +0100 Subject: [PATCH 06/19] restore old name `getNodeReportLocations` --- lib/rules/no-misleading-character-class.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index e8406329c8e..35fb31ba5d4 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -279,7 +279,7 @@ module.exports = { * @returns {Location[]} locs for context.report. * @see https://github.com/eslint/eslint/pull/17515 */ - function generateReportLocation(matches) { + function getNodeReportLocations(matches) { return matches.map(chars => { const firstIndex = chars[0].start; const lastIndex = chars.at(-1).end - 1; @@ -324,7 +324,7 @@ module.exports = { }]; } - const locs = generateReportLocation(matches); + const locs = getNodeReportLocations(matches); for (const loc of locs) { context.report({ From b2cb30e67ce5a46b779b9ef29d302d08a5433cb0 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Wed, 7 Feb 2024 12:22:51 +0100 Subject: [PATCH 07/19] update JSDoc --- lib/rules/no-misleading-character-class.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index 35fb31ba5d4..29e9754f6d3 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -273,7 +273,7 @@ module.exports = { let charInfos = null; /** - * Generates a loc for context.report. + * Finds the report loc(s) for a range of matches. * Only literals and expression-less templates generate granular errors. * @param {Character[][]} matches Lists of individual characters being reported on. * @returns {Location[]} locs for context.report. From 90bee47d4c8838565fcc995f9aedd2e1255d1e56 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Wed, 7 Feb 2024 13:00:32 +0100 Subject: [PATCH 08/19] =?UTF-8?q?`charInfos`=20=E2=86=92=20`codeUnits`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/rules/no-misleading-character-class.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index 29e9754f6d3..a332dc04e89 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -270,7 +270,7 @@ module.exports = { } }); - let charInfos = null; + let codeUnits = null; /** * Finds the report loc(s) for a range of matches. @@ -290,16 +290,16 @@ module.exports = { const source = sourceCode.getText(node); const offset = node.range[0]; - charInfos ??= parseTemplateToken(source); - start = offset + charInfos[firstIndex].start; - end = offset + charInfos[lastIndex].end; + codeUnits ??= parseTemplateToken(source); + start = offset + codeUnits[firstIndex].start; + end = offset + codeUnits[lastIndex].end; } else if (typeof node.value === "string") { // String Literal const source = node.raw; const offset = node.range[0]; - charInfos ??= parseStringLiteral(source); - start = offset + charInfos[firstIndex].start; - end = offset + charInfos[lastIndex].end; + codeUnits ??= parseStringLiteral(source); + start = offset + codeUnits[firstIndex].start; + end = offset + codeUnits[lastIndex].end; } else { // RegExp Literal const offset = node.range[0] + 1; // Add 1 to skip the leading slash. From 53e262a771553b4beeebe60fffed8d16f4fdaaf7 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Thu, 8 Feb 2024 09:28:59 +0100 Subject: [PATCH 09/19] extract char-source to a utility module --- lib/rules/no-misleading-character-class.js | 2 +- lib/rules/utils/char-source.js | 226 ++++++++++++++++++ package.json | 1 - tests/lib/rules/utils/char-source.js | 256 +++++++++++++++++++++ 4 files changed, 483 insertions(+), 2 deletions(-) create mode 100644 lib/rules/utils/char-source.js create mode 100644 tests/lib/rules/utils/char-source.js diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index a332dc04e89..78f8c1f0dcb 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -8,7 +8,7 @@ const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp"); const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode"); const astUtils = require("./utils/ast-utils.js"); const { isValidWithUnicodeFlag } = require("./utils/regular-expressions"); -const { parseStringLiteral, parseTemplateToken } = require("char-source"); +const { parseStringLiteral, parseTemplateToken } = require("./utils/char-source"); //------------------------------------------------------------------------------ // Helpers diff --git a/lib/rules/utils/char-source.js b/lib/rules/utils/char-source.js new file mode 100644 index 00000000000..b528db3ce81 --- /dev/null +++ b/lib/rules/utils/char-source.js @@ -0,0 +1,226 @@ +/** + * @fileoverview Utility functions to locate the source text of each code unit in the value of a string literal or template token. + * @author Francesco Trotta + */ + +"use strict"; + +/** + * Represents a code unit produced by the evaluation of a JavaScript common token like a string + * literal or template token. + */ +class CodeUnit { + constructor(start, source) { + this.start = start; + this.source = source; + } + + get end() { + return this.start + this.length; + } + + get length() { + return this.source.length; + } +} + +/** + * An object used to keep track of the position in a source text where the next characters will be read. + */ +class SourceReader { + constructor(source) { + this.source = source; + this.pos = 0; + } +} + +const SIMPLE_ESCAPE_SEQUENCES = +{ __proto__: null, b: "\b", f: "\f", n: "\n", r: "\r", t: "\t", v: "\v" }; + +/** + * Reads a hex escape sequence. + * @param {SourceReader} reader The reader should be positioned on the first hexadecimal digit. + * @param {number} length The number of hexadecimal digits. + * @returns {string} A code unit. + */ +function readHexSequence(reader, length) { + const { source, pos } = reader; + const str = source.slice(pos, pos + length); + const charCode = parseInt(str, 16); + + reader.pos = pos + length; + return String.fromCharCode(charCode); +} + +/** + * Reads a Unicode escape sequence. + * @param {SourceReader} reader The reader should be positioned after the "u". + * @returns {string} A code unit. + */ +function readUnicodeSequence(reader) { + const { source, pos } = reader; + const regExp = /\{(?[\dA-Fa-f]+)\}/uy; + + regExp.lastIndex = pos; + const match = regExp.exec(source); + + if (match) { + const codePoint = parseInt(match.groups.hexDigits, 16); + + reader.pos = regExp.lastIndex; + return String.fromCodePoint(codePoint); + } + return readHexSequence(reader, 4); +} + +/** + * Reads an octal escape sequence. + * @param {SourceReader} reader The reader should be positioned after the first octal digit. + * @param {number} maxLength The maximum number of octal digits. + * @returns {string} A code unit. + */ +function readOctalSequence(reader, maxLength) { + const posAfterBackslash = reader.pos - 1; + const [octalStr] = reader.source.slice(posAfterBackslash, posAfterBackslash + maxLength).match(/^[0-7]+/u); + + reader.pos = posAfterBackslash + octalStr.length; + const octal = parseInt(octalStr, 8); + + return String.fromCharCode(octal); +} + +/** + * Reads an escape sequence or line continuation. + * @param {SourceReader} reader The reader should be positioned after the backslash. + * @returns {string} A string of zero, one or two code units. + */ +function readEscapeSequenceOrLineContinuation(reader) { + const { source, pos } = reader; + const char = source[pos]; + + reader.pos = pos + 1; + const unitChar = SIMPLE_ESCAPE_SEQUENCES[char]; + + if (unitChar) { + return unitChar; + } + switch (char) { + case "x": + return readHexSequence(reader, 2); + case "u": + return readUnicodeSequence(reader); + case "\r": + if (source[pos + 1] === "\n") { + reader.pos = pos + 2; + } + + // fallthrough + case "\n": + case "\u2028": + case "\u2029": + return ""; + case "0": + case "1": + case "2": + case "3": + return readOctalSequence(reader, 3); + case "4": + case "5": + case "6": + case "7": + return readOctalSequence(reader, 2); + default: + return char; + } +} + +/** + * Reads an escape sequence or line continuation and generates the respective `CodeUnit` elements. + * @param {SourceReader} reader The reader should be positioned on the backslash. + * @returns {Generator} Zero, one or two `CodeUnit` elements. + */ +function *mapEscapeSequenceOrLineContinuation(reader) { + const start = reader.pos++; + const str = readEscapeSequenceOrLineContinuation(reader); + const end = reader.pos; + const source = reader.source.slice(start, end); + + switch (str.length) { + case 0: + break; + case 1: + yield new CodeUnit(start, source); + break; + default: + yield new CodeUnit(start, source); + yield new CodeUnit(start, source); + break; + } +} + +/** + * Parses a string literal. + * @param {string} source The string literal to parse, including the delimiting quotes. + * @returns {CodeUnit[]} A list of code units produced by the string literal. + */ +function parseStringLiteral(source) { + const reader = new SourceReader(source); + const quote = source[0]; + + reader.pos = 1; + const codeUnits = []; + + for (;;) { + const { pos } = reader; + const char = source[pos]; + + if (char === quote) { + break; + } + if (char === "\\") { + codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader)); + } else { + reader.pos = pos + 1; + codeUnits.push(new CodeUnit(pos, char)); + } + } + return codeUnits; +} + +/** + * Parses a template token. + * @param {string} source The template token to parse, including the delimiting sequences `` ` ``, `${` and `}`. + * @returns {CodeUnit[]} A list of code units produced by the template token. + */ +function parseTemplateToken(source) { + const reader = new SourceReader(source); + + reader.pos = 1; + const codeUnits = []; + + for (;;) { + const { pos } = reader; + const char = source[pos]; + + if (char === "`" || char === "$" && source[pos + 1] === "{") { + break; + } + if (char === "\\") { + codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader)); + } else { + let unitSource; + + if (char === "\r" && source[pos + 1] === "\n") { + unitSource = "\r\n"; + reader.pos = pos + 2; + } else { + unitSource = char; + reader.pos = pos + 1; + } + codeUnits.push(new CodeUnit(pos, unitSource)); + } + } + return codeUnits; +} + +module.exports = { parseStringLiteral, parseTemplateToken }; diff --git a/package.json b/package.json index 206e2cdb782..10c8693e248 100644 --- a/package.json +++ b/package.json @@ -72,7 +72,6 @@ "@nodelib/fs.walk": "^1.2.8", "ajv": "^6.12.4", "chalk": "^4.0.0", - "char-source": "^0.0.0", "cross-spawn": "^7.0.2", "debug": "^4.3.2", "escape-string-regexp": "^4.0.0", diff --git a/tests/lib/rules/utils/char-source.js b/tests/lib/rules/utils/char-source.js new file mode 100644 index 00000000000..2f37d9f3c0f --- /dev/null +++ b/tests/lib/rules/utils/char-source.js @@ -0,0 +1,256 @@ +"use strict"; + +const assertStrict = require("node:assert/strict"); +const { parseStringLiteral, parseTemplateToken } = require("../../../../lib/rules/utils/char-source"); + +describe( + "parseStringLiteral", + () => { + const TESTS = [ + { + description: "works with an empty string", + source: '""', + expectedCodeUnits: [] + }, + { + description: "works with surrogate pairs", + source: '"aπ„žz"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\ud834" }, + { start: 3, source: "\udd1e" }, + { start: 4, source: "z" } + ] + }, + { + description: "works with escape sequences for single characters", + source: '"a\\x40\\u231Bz"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\\x40" }, + { start: 6, source: "\\u231B" }, + { start: 12, source: "z" } + ] + }, + { + description: "works with escape sequences for code points", + source: '"a\\u{ffff}\\u{10000}\\u{10ffff}z"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\\u{ffff}" }, + { start: 10, source: "\\u{10000}" }, + { start: 10, source: "\\u{10000}" }, + { start: 19, source: "\\u{10ffff}" }, + { start: 19, source: "\\u{10ffff}" }, + { start: 29, source: "z" } + ] + }, + { + description: "works with line continuations", + source: '"a\\\n\\\r\n\\\u2028\\\u2029z"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 11, source: "z" } + ] + }, + { + description: "works with simple escape sequences", + source: '"\\"\\0\\b\\f\\n\\r\\t\\v"', + expectedCodeUnits: ['\\"', "\\0", "\\b", "\\f", "\\n", "\\r", "\\t", "\\v"] + .map((source, index) => ({ source, start: 1 + index * 2 })) + }, + { + description: "works with a character outside of a line continuation", + source: '"a\u2028z"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\u2028" }, + { start: 3, source: "z" } + ] + }, + { + description: "works with a character outside of a line continuation", + source: '"a\u2029z"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\u2029" }, + { start: 3, source: "z" } + ] + }, + { + description: "works with octal escape sequences", + source: '"\\0123\\456"', + expectedCodeUnits: [ + { source: "\\012", start: 1 }, + { source: "3", start: 5 }, + { source: "\\45", start: 6 }, + { source: "6", start: 9 } + ] + }, + { + description: "works with an escaped 7", + source: '"\\7"', + expectedCodeUnits: [{ source: "\\7", start: 1 }] + }, + { + description: "works with an escaped 8", + source: '"\\8"', + expectedCodeUnits: [{ source: "\\8", start: 1 }] + }, + { + description: "works with an escaped 9", + source: '"\\9"', + expectedCodeUnits: [{ source: "\\9", start: 1 }] + }, + { + description: 'works with the escaped sequence "00"', + source: '"\\00"', + expectedCodeUnits: [{ source: "\\00", start: 1 }] + }, + { + description: "works with an escaped 0 followed by 8", + source: '"\\08"', + expectedCodeUnits: [ + { source: "\\0", start: 1 }, + { source: "8", start: 3 } + ] + }, + { + description: "works with an escaped 0 followed by 9", + source: '"\\09"', + expectedCodeUnits: [ + { source: "\\0", start: 1 }, + { source: "9", start: 3 } + ] + } + ]; + + for (const { description, source, expectedCodeUnits, only } of TESTS) { + (only ? it.only : it)( + description, + () => { + const codeUnits = parseStringLiteral(source); + const expectedCharCount = expectedCodeUnits.length; + + assertStrict.equal(codeUnits.length, expectedCharCount); + for (let index = 0; index < expectedCharCount; ++index) { + const codeUnit = codeUnits[index]; + const expectedUnit = expectedCodeUnits[index]; + const message = `Expected values to be strictly equal at index ${index}`; + + assertStrict.equal(codeUnit.start, expectedUnit.start, message); + assertStrict.equal(codeUnit.source, expectedUnit.source, message); + } + } + ); + } + } +); + +describe( + "parseTemplateToken", + () => { + const TESTS = + [ + { + description: "works with an empty template", + source: "``", + expectedCodeUnits: [] + }, + { + description: "works with surrogate pairs", + source: "`Aπ„žZ`", + expectedCodeUnits: [ + { start: 1, source: "A" }, + { start: 2, source: "\ud834" }, + { start: 3, source: "\udd1e" }, + { start: 4, source: "Z" } + ] + }, + { + description: "works with escape sequences for single characters", + source: "`A\\x40\\u231BZ${", + expectedCodeUnits: [ + { start: 1, source: "A" }, + { start: 2, source: "\\x40" }, + { start: 6, source: "\\u231B" }, + { start: 12, source: "Z" } + ] + }, + { + description: "works with escape sequences for code points", + source: "}A\\u{FFFF}\\u{10000}\\u{10FFFF}Z${", + expectedCodeUnits: [ + { start: 1, source: "A" }, + { start: 2, source: "\\u{FFFF}" }, + { start: 10, source: "\\u{10000}" }, + { start: 10, source: "\\u{10000}" }, + { start: 19, source: "\\u{10FFFF}" }, + { start: 19, source: "\\u{10FFFF}" }, + { start: 29, source: "Z" } + ] + }, + { + description: "works with line continuations", + source: "}A\\\n\\\r\n\\\u2028\\\u2029Z`", + expectedCodeUnits: [ + { start: 1, source: "A" }, + { start: 11, source: "Z" } + ] + }, + { + description: "works with simple escape sequences", + source: "`\\0\\`\\b\\f\\n\\r\\t\\v`", + expectedCodeUnits: ["\\0", "\\`", "\\b", "\\f", "\\n", "\\r", "\\t", "\\v"] + .map((source, index) => ({ source, start: 1 + index * 2 })) + }, + { + description: "works with a character outside of a line continuation", + source: "`a\u2028z`", + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\u2028" }, + { start: 3, source: "z" } + ] + }, + { + description: "works with a character outside of a line continuation", + source: "`a\u2029z`", + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\u2029" }, + { start: 3, source: "z" } + ] + }, + { + description: "works with unescaped sequences", + source: "`A\r\nZ`", + expectedCodeUnits: [ + { start: 1, source: "A" }, + { start: 2, source: "\r\n" }, + { start: 4, source: "Z" } + ] + } + ]; + + for (const { description, source, expectedCodeUnits, only } of TESTS) { + (only ? it.only : it)( + description, + () => { + const codeUnits = parseTemplateToken(source); + const expectedCharCount = expectedCodeUnits.length; + + assertStrict.equal(codeUnits.length, expectedCharCount); + for (let index = 0; index < expectedCharCount; ++index) { + const codeUnit = codeUnits[index]; + const expectedUnit = expectedCodeUnits[index]; + const message = `Expected values to be strictly equal at index ${index}`; + + assertStrict.equal(codeUnit.start, expectedUnit.start, message); + assertStrict.equal(codeUnit.source, expectedUnit.source, message); + } + } + ); + } + } +); From 448d9a9e40e8f001b59352a730098658f7d55081 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Sat, 10 Feb 2024 00:08:44 +0100 Subject: [PATCH 10/19] add `read` method to `SourceReader` --- lib/rules/utils/char-source.js | 55 +++++++++++++++++----------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/lib/rules/utils/char-source.js b/lib/rules/utils/char-source.js index b528db3ce81..bb211e2fb6e 100644 --- a/lib/rules/utils/char-source.js +++ b/lib/rules/utils/char-source.js @@ -32,6 +32,12 @@ class SourceReader { this.source = source; this.pos = 0; } + + read(offset = 0, length = 1) { + const start = offset + this.pos; + + return this.source.slice(start, start + length); + } } const SIMPLE_ESCAPE_SEQUENCES = @@ -44,11 +50,10 @@ const SIMPLE_ESCAPE_SEQUENCES = * @returns {string} A code unit. */ function readHexSequence(reader, length) { - const { source, pos } = reader; - const str = source.slice(pos, pos + length); + const str = reader.read(0, length); const charCode = parseInt(str, 16); - reader.pos = pos + length; + reader.pos += length; return String.fromCharCode(charCode); } @@ -58,11 +63,10 @@ function readHexSequence(reader, length) { * @returns {string} A code unit. */ function readUnicodeSequence(reader) { - const { source, pos } = reader; const regExp = /\{(?[\dA-Fa-f]+)\}/uy; - regExp.lastIndex = pos; - const match = regExp.exec(source); + regExp.lastIndex = reader.pos; + const match = regExp.exec(reader.source); if (match) { const codePoint = parseInt(match.groups.hexDigits, 16); @@ -80,10 +84,9 @@ function readUnicodeSequence(reader) { * @returns {string} A code unit. */ function readOctalSequence(reader, maxLength) { - const posAfterBackslash = reader.pos - 1; - const [octalStr] = reader.source.slice(posAfterBackslash, posAfterBackslash + maxLength).match(/^[0-7]+/u); + const [octalStr] = reader.read(-1, maxLength).match(/^[0-7]+/u); - reader.pos = posAfterBackslash + octalStr.length; + reader.pos += octalStr.length - 1; const octal = parseInt(octalStr, 8); return String.fromCharCode(octal); @@ -91,14 +94,13 @@ function readOctalSequence(reader, maxLength) { /** * Reads an escape sequence or line continuation. - * @param {SourceReader} reader The reader should be positioned after the backslash. + * @param {SourceReader} reader The reader should be positioned on the backslash. * @returns {string} A string of zero, one or two code units. */ function readEscapeSequenceOrLineContinuation(reader) { - const { source, pos } = reader; - const char = source[pos]; + const char = reader.read(1); - reader.pos = pos + 1; + reader.pos += 2; const unitChar = SIMPLE_ESCAPE_SEQUENCES[char]; if (unitChar) { @@ -110,8 +112,8 @@ function readEscapeSequenceOrLineContinuation(reader) { case "u": return readUnicodeSequence(reader); case "\r": - if (source[pos + 1] === "\n") { - reader.pos = pos + 2; + if (reader.read() === "\n") { + reader.pos += 1; } // fallthrough @@ -140,7 +142,7 @@ function readEscapeSequenceOrLineContinuation(reader) { * @returns {Generator} Zero, one or two `CodeUnit` elements. */ function *mapEscapeSequenceOrLineContinuation(reader) { - const start = reader.pos++; + const start = reader.pos; const str = readEscapeSequenceOrLineContinuation(reader); const end = reader.pos; const source = reader.source.slice(start, end); @@ -165,14 +167,13 @@ function *mapEscapeSequenceOrLineContinuation(reader) { */ function parseStringLiteral(source) { const reader = new SourceReader(source); - const quote = source[0]; + const quote = reader.read(); reader.pos = 1; const codeUnits = []; for (;;) { - const { pos } = reader; - const char = source[pos]; + const char = reader.read(); if (char === quote) { break; @@ -180,8 +181,8 @@ function parseStringLiteral(source) { if (char === "\\") { codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader)); } else { - reader.pos = pos + 1; - codeUnits.push(new CodeUnit(pos, char)); + codeUnits.push(new CodeUnit(reader.pos, char)); + reader.pos += 1; } } return codeUnits; @@ -199,10 +200,9 @@ function parseTemplateToken(source) { const codeUnits = []; for (;;) { - const { pos } = reader; - const char = source[pos]; + const char = reader.read(); - if (char === "`" || char === "$" && source[pos + 1] === "{") { + if (char === "`" || char === "$" && reader.read(1) === "{") { break; } if (char === "\\") { @@ -210,14 +210,13 @@ function parseTemplateToken(source) { } else { let unitSource; - if (char === "\r" && source[pos + 1] === "\n") { + if (char === "\r" && reader.read(1) === "\n") { unitSource = "\r\n"; - reader.pos = pos + 2; } else { unitSource = char; - reader.pos = pos + 1; } - codeUnits.push(new CodeUnit(pos, unitSource)); + codeUnits.push(new CodeUnit(reader.pos, unitSource)); + reader.pos += unitSource.length; } } return codeUnits; From b1cf05fc910e1b4c4986a652fa99c2a325be867f Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Thu, 15 Feb 2024 07:28:56 +0100 Subject: [PATCH 11/19] add `advance` method and JSDoc --- lib/rules/utils/char-source.js | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/lib/rules/utils/char-source.js b/lib/rules/utils/char-source.js index bb211e2fb6e..4a31cfc2f8f 100644 --- a/lib/rules/utils/char-source.js +++ b/lib/rules/utils/char-source.js @@ -33,6 +33,21 @@ class SourceReader { this.pos = 0; } + /** + * Advances the reading position of the specified number of characters. + * @param {number} length Number of characters to advance. + * @returns {void} + */ + advance(length) { + this.pos += length; + } + + /** + * Reads characters from the source. + * @param {number} [offset=0] The offset where reading starts, relative to the current position. + * @param {number} [length=1] Number of characters to read. + * @returns {string} A substring of source characters. + */ read(offset = 0, length = 1) { const start = offset + this.pos; @@ -53,7 +68,7 @@ function readHexSequence(reader, length) { const str = reader.read(0, length); const charCode = parseInt(str, 16); - reader.pos += length; + reader.advance(length); return String.fromCharCode(charCode); } @@ -86,7 +101,7 @@ function readUnicodeSequence(reader) { function readOctalSequence(reader, maxLength) { const [octalStr] = reader.read(-1, maxLength).match(/^[0-7]+/u); - reader.pos += octalStr.length - 1; + reader.advance(octalStr.length - 1); const octal = parseInt(octalStr, 8); return String.fromCharCode(octal); @@ -100,7 +115,7 @@ function readOctalSequence(reader, maxLength) { function readEscapeSequenceOrLineContinuation(reader) { const char = reader.read(1); - reader.pos += 2; + reader.advance(2); const unitChar = SIMPLE_ESCAPE_SEQUENCES[char]; if (unitChar) { @@ -113,7 +128,7 @@ function readEscapeSequenceOrLineContinuation(reader) { return readUnicodeSequence(reader); case "\r": if (reader.read() === "\n") { - reader.pos += 1; + reader.advance(1); } // fallthrough @@ -169,7 +184,7 @@ function parseStringLiteral(source) { const reader = new SourceReader(source); const quote = reader.read(); - reader.pos = 1; + reader.advance(1); const codeUnits = []; for (;;) { @@ -182,7 +197,7 @@ function parseStringLiteral(source) { codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader)); } else { codeUnits.push(new CodeUnit(reader.pos, char)); - reader.pos += 1; + reader.advance(1); } } return codeUnits; @@ -196,7 +211,7 @@ function parseStringLiteral(source) { function parseTemplateToken(source) { const reader = new SourceReader(source); - reader.pos = 1; + reader.advance(1); const codeUnits = []; for (;;) { @@ -216,7 +231,7 @@ function parseTemplateToken(source) { unitSource = char; } codeUnits.push(new CodeUnit(reader.pos, unitSource)); - reader.pos += unitSource.length; + reader.advance(unitSource.length); } } return codeUnits; From 7bb7e55f232840cfe564f11ce44a3f79a137b260 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Sun, 18 Feb 2024 21:25:35 +0100 Subject: [PATCH 12/19] fix logic --- lib/rules/no-misleading-character-class.js | 18 +++-- .../rules/no-misleading-character-class.js | 71 ++++++++++++------- 2 files changed, 59 insertions(+), 30 deletions(-) diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index 78f8c1f0dcb..8d548f4a334 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -280,6 +280,9 @@ module.exports = { * @see https://github.com/eslint/eslint/pull/17515 */ function getNodeReportLocations(matches) { + if (!astUtils.isStaticTemplateLiteral(node) && node.type !== "Literal") { + return matches.length ? [node.loc] : []; + } return matches.map(chars => { const firstIndex = chars[0].start; const lastIndex = chars.at(-1).end - 1; @@ -359,12 +362,19 @@ module.exports = { for (const { node: refNode } of tracker.iterateGlobalReferences({ RegExp: { [CALL]: true, [CONSTRUCT]: true } })) { + let pattern, flags; const [patternNode, flagsNode] = refNode.arguments; - const pattern = getStringIfConstant(patternNode, scope); - const flags = getStringIfConstant(flagsNode, scope); - if (typeof pattern === "string") { - verify(patternNode, pattern, flags || "", fixer => { + if (patternNode.type === "Literal" && patternNode.regex) { + pattern = patternNode.regex.pattern; + flags = flagsNode ? getStringIfConstant(flagsNode, scope) : patternNode.regex.flags; + } else { + pattern = getStringIfConstant(patternNode, scope); + flags = flagsNode ? getStringIfConstant(flagsNode, scope) : ""; + } + + if (typeof pattern === "string" && typeof flags === "string") { + verify(patternNode, pattern, flags, fixer => { if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) { return null; diff --git a/tests/lib/rules/no-misleading-character-class.js b/tests/lib/rules/no-misleading-character-class.js index d5c6e9346ee..7e3d962e882 100644 --- a/tests/lib/rules/no-misleading-character-class.js +++ b/tests/lib/rules/no-misleading-character-class.js @@ -40,6 +40,7 @@ ruleTester.run("no-misleading-character-class", rule, { "var r = /πŸ‡―πŸ‡΅/", "var r = /[JP]/", "var r = /πŸ‘¨β€πŸ‘©β€πŸ‘¦/", + "var r = RegExp(/[πŸ‘]/u)", // Ignore solo lead/tail surrogate. "var r = /[\\uD83D]/", @@ -75,6 +76,10 @@ ruleTester.run("no-misleading-character-class", rule, { // don't report on templates with expressions "var r = RegExp(`${x}[πŸ‘]`)", + // don't report on unknown flags + "var r = new RegExp('[πŸ‡―πŸ‡΅]', `${foo}`)", + String.raw`var r = new RegExp("[πŸ‘]", flags)`, + // ES2024 { code: "var r = /[πŸ‘]/v", languageOptions: { ecmaVersion: 2024 } }, { code: String.raw`var r = /^[\q{πŸ‘ΆπŸ»}]$/v`, languageOptions: { ecmaVersion: 2024 } }, @@ -636,15 +641,6 @@ ruleTester.run("no-misleading-character-class", rule, { suggestions: null }] }, - { - code: String.raw`var r = new RegExp("[πŸ‘]", flags)`, - errors: [{ - column: 22, - endColumn: 24, - messageId: "surrogatePairWithoutUFlag", - suggestions: null - }] - }, { code: String.raw`const flags = ""; var r = new RegExp("[πŸ‘]", flags)`, errors: [{ @@ -1004,23 +1000,6 @@ ruleTester.run("no-misleading-character-class", rule, { } ] }, - { - code: "var r = new RegExp('[πŸ‡―πŸ‡΅]', `${foo}`)", - errors: [ - { - column: 22, - endColumn: 24, - messageId: "surrogatePairWithoutUFlag", - suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = new RegExp('[πŸ‡―πŸ‡΅]', `${foo}u`)" }] - }, - { - column: 24, - endColumn: 26, - messageId: "surrogatePairWithoutUFlag", - suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = new RegExp('[πŸ‡―πŸ‡΅]', `${foo}u`)" }] - } - ] - }, { code: String.raw`var r = new RegExp("[πŸ‡―πŸ‡΅]")`, errors: [ @@ -1343,6 +1322,46 @@ ruleTester.run("no-misleading-character-class", rule, { suggestions: null }] }, + + // no granular reports on templates with expressions + { + code: 'new RegExp(`${"[πŸ‘πŸ‡―πŸ‡΅]"}[😊]`);', + errors: [{ + column: 12, + endColumn: 31, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ + messageId: "suggestUnicodeFlag", + output: 'new RegExp(`${"[πŸ‘πŸ‡―πŸ‡΅]"}[😊]`, "u");' + }] + }] + }, + + // no granular reports on identifiers + { + code: 'const pattern = "[πŸ‘]"; new RegExp(pattern);', + errors: [{ + column: 36, + endColumn: 43, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ + messageId: "suggestUnicodeFlag", + output: 'const pattern = "[πŸ‘]"; new RegExp(pattern, "u");' + }] + }] + }, + + // second argument in RegExp should override flags in regexp literal + { + code: "RegExp(/[aπŸ‘z]/u, '');", + errors: [{ + column: 11, + endColumn: 13, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ messageId: "suggestUnicodeFlag", output: "RegExp(/[aπŸ‘z]/u, 'u');" }] + }] + }, + { code: String.raw` From 072f256d21e21551f896621312578332535fbef3 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Sun, 18 Feb 2024 21:33:01 +0100 Subject: [PATCH 13/19] =?UTF-8?q?`SourceReader`=20=E2=86=92=20`TextReader`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/rules/utils/char-source.js | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/rules/utils/char-source.js b/lib/rules/utils/char-source.js index 4a31cfc2f8f..70738625b94 100644 --- a/lib/rules/utils/char-source.js +++ b/lib/rules/utils/char-source.js @@ -27,7 +27,7 @@ class CodeUnit { /** * An object used to keep track of the position in a source text where the next characters will be read. */ -class SourceReader { +class TextReader { constructor(source) { this.source = source; this.pos = 0; @@ -60,7 +60,7 @@ const SIMPLE_ESCAPE_SEQUENCES = /** * Reads a hex escape sequence. - * @param {SourceReader} reader The reader should be positioned on the first hexadecimal digit. + * @param {TextReader} reader The reader should be positioned on the first hexadecimal digit. * @param {number} length The number of hexadecimal digits. * @returns {string} A code unit. */ @@ -74,7 +74,7 @@ function readHexSequence(reader, length) { /** * Reads a Unicode escape sequence. - * @param {SourceReader} reader The reader should be positioned after the "u". + * @param {TextReader} reader The reader should be positioned after the "u". * @returns {string} A code unit. */ function readUnicodeSequence(reader) { @@ -94,7 +94,7 @@ function readUnicodeSequence(reader) { /** * Reads an octal escape sequence. - * @param {SourceReader} reader The reader should be positioned after the first octal digit. + * @param {TextReader} reader The reader should be positioned after the first octal digit. * @param {number} maxLength The maximum number of octal digits. * @returns {string} A code unit. */ @@ -109,7 +109,7 @@ function readOctalSequence(reader, maxLength) { /** * Reads an escape sequence or line continuation. - * @param {SourceReader} reader The reader should be positioned on the backslash. + * @param {TextReader} reader The reader should be positioned on the backslash. * @returns {string} A string of zero, one or two code units. */ function readEscapeSequenceOrLineContinuation(reader) { @@ -153,7 +153,7 @@ function readEscapeSequenceOrLineContinuation(reader) { /** * Reads an escape sequence or line continuation and generates the respective `CodeUnit` elements. - * @param {SourceReader} reader The reader should be positioned on the backslash. + * @param {TextReader} reader The reader should be positioned on the backslash. * @returns {Generator} Zero, one or two `CodeUnit` elements. */ function *mapEscapeSequenceOrLineContinuation(reader) { @@ -181,7 +181,7 @@ function *mapEscapeSequenceOrLineContinuation(reader) { * @returns {CodeUnit[]} A list of code units produced by the string literal. */ function parseStringLiteral(source) { - const reader = new SourceReader(source); + const reader = new TextReader(source); const quote = reader.read(); reader.advance(1); @@ -209,7 +209,7 @@ function parseStringLiteral(source) { * @returns {CodeUnit[]} A list of code units produced by the template token. */ function parseTemplateToken(source) { - const reader = new SourceReader(source); + const reader = new TextReader(source); reader.advance(1); const codeUnits = []; From 6792594b0423363b2d423faa35c2ce45d444156a Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Sun, 25 Feb 2024 12:18:38 +0100 Subject: [PATCH 14/19] handle `RegExp` calls with regex patterns --- lib/rules/no-misleading-character-class.js | 37 +++++++-- .../rules/no-misleading-character-class.js | 81 ++++++++++++++++++- 2 files changed, 109 insertions(+), 9 deletions(-) diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index 8d548f4a334..2c525a6d960 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -3,7 +3,14 @@ */ "use strict"; -const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils"); +const { isRegExp } = require("node:util/types"); +const { + CALL, + CONSTRUCT, + ReferenceTracker, + getStaticValue, + getStringIfConstant +} = require("@eslint-community/eslint-utils"); const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp"); const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode"); const astUtils = require("./utils/ast-utils.js"); @@ -226,6 +233,7 @@ module.exports = { create(context) { const sourceCode = context.sourceCode; const parser = new RegExpParser(); + const checkedPatternNodes = new Set(); /** * Verify a given regular expression. @@ -342,6 +350,9 @@ module.exports = { return { "Literal[regex]"(node) { + if (checkedPatternNodes.has(node)) { + return; + } verify(node, node.regex.pattern, node.regex.flags, fixer => { if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) { return null; @@ -364,16 +375,28 @@ module.exports = { })) { let pattern, flags; const [patternNode, flagsNode] = refNode.arguments; + const evaluatedPattern = getStaticValue(patternNode, scope); - if (patternNode.type === "Literal" && patternNode.regex) { - pattern = patternNode.regex.pattern; - flags = flagsNode ? getStringIfConstant(flagsNode, scope) : patternNode.regex.flags; + if (!evaluatedPattern) { + continue; + } + if (flagsNode) { + if (isRegExp(evaluatedPattern.value)) { + pattern = evaluatedPattern.value.source; + checkedPatternNodes.add(patternNode); + } else { + pattern = String(evaluatedPattern.value); + } + flags = getStringIfConstant(flagsNode, scope); } else { - pattern = getStringIfConstant(patternNode, scope); - flags = flagsNode ? getStringIfConstant(flagsNode, scope) : ""; + if (isRegExp(evaluatedPattern.value)) { + continue; + } + pattern = String(evaluatedPattern.value); + flags = ""; } - if (typeof pattern === "string" && typeof flags === "string") { + if (typeof flags === "string") { verify(patternNode, pattern, flags, fixer => { if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) { diff --git a/tests/lib/rules/no-misleading-character-class.js b/tests/lib/rules/no-misleading-character-class.js index 7e3d962e882..b6f71ff5374 100644 --- a/tests/lib/rules/no-misleading-character-class.js +++ b/tests/lib/rules/no-misleading-character-class.js @@ -41,6 +41,11 @@ ruleTester.run("no-misleading-character-class", rule, { "var r = /[JP]/", "var r = /πŸ‘¨β€πŸ‘©β€πŸ‘¦/", "var r = RegExp(/[πŸ‘]/u)", + "const regex = /[πŸ‘]/u; new RegExp(regex);", + { + code: "new RegExp('[πŸ‘]')", + languageOptions: { globals: { RegExp: "off" } } + }, // Ignore solo lead/tail surrogate. "var r = /[\\uD83D]/", @@ -80,6 +85,9 @@ ruleTester.run("no-misleading-character-class", rule, { "var r = new RegExp('[πŸ‡―πŸ‡΅]', `${foo}`)", String.raw`var r = new RegExp("[πŸ‘]", flags)`, + // don't report on spread arguments + "const args = ['[πŸ‘]', 'i']; new RegExp(...args);", + // ES2024 { code: "var r = /[πŸ‘]/v", languageOptions: { ecmaVersion: 2024 } }, { code: String.raw`var r = /^[\q{πŸ‘ΆπŸ»}]$/v`, languageOptions: { ecmaVersion: 2024 } }, @@ -1351,14 +1359,83 @@ ruleTester.run("no-misleading-character-class", rule, { }] }, - // second argument in RegExp should override flags in regexp literal + // second argument in RegExp should override flags in regex literal { code: "RegExp(/[aπŸ‘z]/u, '');", errors: [{ column: 11, endColumn: 13, messageId: "surrogatePairWithoutUFlag", - suggestions: [{ messageId: "suggestUnicodeFlag", output: "RegExp(/[aπŸ‘z]/u, 'u');" }] + suggestions: [{ + messageId: "suggestUnicodeFlag", + output: "RegExp(/[aπŸ‘z]/u, 'u');" + }] + }] + }, + { + code: "const pattern = /[πŸ‘]/u; RegExp(pattern, '');", + errors: [{ + column: 33, + endColumn: 40, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ + messageId: "suggestUnicodeFlag", + output: "const pattern = /[πŸ‘]/u; RegExp(pattern, 'u');" + }] + }] + }, + { + code: "const pattern = /[πŸ‘]/g; RegExp(pattern, 'i');", + errors: [{ + column: 19, + endColumn: 21, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ + messageId: "suggestUnicodeFlag", + output: "const pattern = /[πŸ‘]/gu; RegExp(pattern, 'i');" + }] + }, { + column: 33, + endColumn: 40, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ + messageId: "suggestUnicodeFlag", + output: "const pattern = /[πŸ‘]/g; RegExp(pattern, 'iu');" + }] + }] + }, + + // report only on regex literal if no flags are supplied + { + code: "RegExp(/[πŸ‘]/)", + errors: [{ + column: 10, + endColumn: 12, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ messageId: "suggestUnicodeFlag", output: "RegExp(/[πŸ‘]/u)" }] + }] + }, + + // report only on RegExp call if a regex literal and flags are supplied + { + code: "RegExp(/[πŸ‘]/, 'i');", + errors: [{ + column: 10, + endColumn: 12, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ messageId: "suggestUnicodeFlag", output: "RegExp(/[πŸ‘]/, 'iu');" }] + }] + }, + + // ignore RegExp if not built-in + { + code: "RegExp(/[πŸ‘]/, 'g');", + languageOptions: { globals: { RegExp: "off" } }, + errors: [{ + column: 10, + endColumn: 12, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ messageId: "suggestUnicodeFlag", output: "RegExp(/[πŸ‘]/u, 'g');" }] }] }, From ea6f1832643789e50caf56c39969dafe3d5a59f3 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Sun, 25 Feb 2024 12:36:30 +0100 Subject: [PATCH 15/19] fix for browser test --- lib/rules/no-misleading-character-class.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index 2c525a6d960..21cadb5867a 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -3,7 +3,6 @@ */ "use strict"; -const { isRegExp } = require("node:util/types"); const { CALL, CONSTRUCT, @@ -201,6 +200,15 @@ const findCharacterSequences = { const kinds = Object.keys(findCharacterSequences); +/** + * Determines if a specified value is a regular expression object. + * @param {any} value The value to check. + * @returns {boolean} `true` if the value is a regular expression object, otherwise `false`. + */ +function isRegExp(value) { + return value instanceof RegExp; +} + //------------------------------------------------------------------------------ // Rule Definition //------------------------------------------------------------------------------ From ed8d1cd606abc86644a2f6948af23439bd092c82 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Tue, 27 Feb 2024 07:17:00 +0100 Subject: [PATCH 16/19] fix for Node.js 18 --- lib/linter/linter.js | 7 +++++ .../rules/no-misleading-character-class.js | 29 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/lib/linter/linter.js b/lib/linter/linter.js index 2ece33c8284..fa0bb83d3e2 100644 --- a/lib/linter/linter.js +++ b/lib/linter/linter.js @@ -946,6 +946,13 @@ function runRules(sourceCode, configuredRules, ruleMapper, parserName, languageO Traverser.traverse(sourceCode.ast, { enter(node, parent) { node.parent = parent; + if (node.type === "Literal" && node.regex) { + node.value ??= { + __proto__: RegExp.prototype, + source: node.regex.pattern, + flags: [...node.regex.flags].sort().join("") + }; + } nodeQueue.push({ isEntering: true, node }); }, leave(node) { diff --git a/tests/lib/rules/no-misleading-character-class.js b/tests/lib/rules/no-misleading-character-class.js index b6f71ff5374..94466494bb4 100644 --- a/tests/lib/rules/no-misleading-character-class.js +++ b/tests/lib/rules/no-misleading-character-class.js @@ -1585,6 +1585,35 @@ ruleTester.run("no-misleading-character-class", rule, { messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = /[πŸ‘]/u" }] }] + }, + { + code: "new RegExp(/^[πŸ‘]$/v, '')", + languageOptions: { + ecmaVersion: 2024 + }, + errors: [{ + column: 15, + endColumn: 17, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ messageId: "suggestUnicodeFlag", output: "new RegExp(/^[πŸ‘]$/v, 'u')" }] + }] + }, + { + code: "var r = /[πŸ‘ΆπŸ»]/v; RegExp(r, 'v');", + languageOptions: { + ecmaVersion: 2024 + }, + errors: [{ + column: 11, + endColumn: 15, + messageId: "emojiModifier", + suggestions: null + }, { + column: 27, + endColumn: 28, + messageId: "emojiModifier", + suggestions: null + }] } ] From 7eaf8b45e4dfe9c0e80d8e3309ad43ba1a57cac0 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Thu, 29 Feb 2024 16:28:24 +0100 Subject: [PATCH 17/19] limit applicability of `getStaticValue` for Node.js 18 compatibility --- lib/linter/linter.js | 7 - lib/rules/no-misleading-character-class.js | 31 +++-- .../rules/no-misleading-character-class.js | 126 +++++++++--------- 3 files changed, 84 insertions(+), 80 deletions(-) diff --git a/lib/linter/linter.js b/lib/linter/linter.js index fa0bb83d3e2..2ece33c8284 100644 --- a/lib/linter/linter.js +++ b/lib/linter/linter.js @@ -946,13 +946,6 @@ function runRules(sourceCode, configuredRules, ruleMapper, parserName, languageO Traverser.traverse(sourceCode.ast, { enter(node, parent) { node.parent = parent; - if (node.type === "Literal" && node.regex) { - node.value ??= { - __proto__: RegExp.prototype, - source: node.regex.pattern, - flags: [...node.regex.flags].sort().join("") - }; - } nodeQueue.push({ isEntering: true, node }); }, leave(node) { diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index 21cadb5867a..c0e87aeefcc 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -201,12 +201,25 @@ const findCharacterSequences = { const kinds = Object.keys(findCharacterSequences); /** - * Determines if a specified value is a regular expression object. - * @param {any} value The value to check. - * @returns {boolean} `true` if the value is a regular expression object, otherwise `false`. + * Gets the value of the given node if it's a static value other than a regular expression object, + * or the node's `regex` property. + * The purpose of this method is to provide a replacement for `getStaticValue` on Node.js 18, where `getStaticValue` returns `null` if a regular expression literal contains the `v` flag. + * A limitation of this method is that it can only detect a regular expression if the specified node is itself a regular expression literal node. + * @param {ASTNode} node The node to be inspected. + * @param {Scope} [initialScope] Optional scope to start finding variables. If this scope was given, this tries to resolve identifier references which are in the given node as much as possible. + * @returns {{ value: any } | { value: undefined, optional?: true } | { regex: { pattern: string, flags: string } } | null} The static value of the node, or `null`. */ -function isRegExp(value) { - return value instanceof RegExp; +function getStaticValueOrRegex(node, initialScope) { + if (node.type === "Literal" && node.regex) { + return { regex: node.regex }; + } + + const staticValue = getStaticValue(node, initialScope); + + if (staticValue?.value instanceof RegExp) { + return null; + } + return staticValue; } //------------------------------------------------------------------------------ @@ -383,21 +396,21 @@ module.exports = { })) { let pattern, flags; const [patternNode, flagsNode] = refNode.arguments; - const evaluatedPattern = getStaticValue(patternNode, scope); + const evaluatedPattern = getStaticValueOrRegex(patternNode, scope); if (!evaluatedPattern) { continue; } if (flagsNode) { - if (isRegExp(evaluatedPattern.value)) { - pattern = evaluatedPattern.value.source; + if (evaluatedPattern.regex) { + pattern = evaluatedPattern.regex.pattern; checkedPatternNodes.add(patternNode); } else { pattern = String(evaluatedPattern.value); } flags = getStringIfConstant(flagsNode, scope); } else { - if (isRegExp(evaluatedPattern.value)) { + if (evaluatedPattern.regex) { continue; } pattern = String(evaluatedPattern.value); diff --git a/tests/lib/rules/no-misleading-character-class.js b/tests/lib/rules/no-misleading-character-class.js index 94466494bb4..724dc5fd194 100644 --- a/tests/lib/rules/no-misleading-character-class.js +++ b/tests/lib/rules/no-misleading-character-class.js @@ -1372,38 +1372,43 @@ ruleTester.run("no-misleading-character-class", rule, { }] }] }, - { - code: "const pattern = /[πŸ‘]/u; RegExp(pattern, '');", - errors: [{ - column: 33, - endColumn: 40, - messageId: "surrogatePairWithoutUFlag", - suggestions: [{ - messageId: "suggestUnicodeFlag", - output: "const pattern = /[πŸ‘]/u; RegExp(pattern, 'u');" - }] - }] - }, - { - code: "const pattern = /[πŸ‘]/g; RegExp(pattern, 'i');", - errors: [{ - column: 19, - endColumn: 21, - messageId: "surrogatePairWithoutUFlag", - suggestions: [{ - messageId: "suggestUnicodeFlag", - output: "const pattern = /[πŸ‘]/gu; RegExp(pattern, 'i');" - }] - }, { - column: 33, - endColumn: 40, - messageId: "surrogatePairWithoutUFlag", - suggestions: [{ - messageId: "suggestUnicodeFlag", - output: "const pattern = /[πŸ‘]/g; RegExp(pattern, 'iu');" - }] - }] - }, + + /* + * These test cases have been disabled because of a limitation in Node.js 18, see https://github.com/eslint/eslint/pull/18082#discussion_r1506142421. + * + * { + * code: "const pattern = /[πŸ‘]/u; RegExp(pattern, '');", + * errors: [{ + * column: 33, + * endColumn: 40, + * messageId: "surrogatePairWithoutUFlag", + * suggestions: [{ + * messageId: "suggestUnicodeFlag", + * output: "const pattern = /[πŸ‘]/u; RegExp(pattern, 'u');" + * }] + * }] + * }, + * { + * code: "const pattern = /[πŸ‘]/g; RegExp(pattern, 'i');", + * errors: [{ + * column: 19, + * endColumn: 21, + * messageId: "surrogatePairWithoutUFlag", + * suggestions: [{ + * messageId: "suggestUnicodeFlag", + * output: "const pattern = /[πŸ‘]/gu; RegExp(pattern, 'i');" + * }] + * }, { + * column: 33, + * endColumn: 40, + * messageId: "surrogatePairWithoutUFlag", + * suggestions: [{ + * messageId: "suggestUnicodeFlag", + * output: "const pattern = /[πŸ‘]/g; RegExp(pattern, 'iu');" + * }] + * }] + * }, + */ // report only on regex literal if no flags are supplied { @@ -1541,7 +1546,7 @@ ruleTester.run("no-misleading-character-class", rule, { }] }, - /* eslint-disable lines-around-comment -- see https://github.com/eslint/eslint/issues/18081 */ + /* eslint-disable lines-around-comment, internal-rules/multiline-comment-style -- see https://github.com/eslint/eslint/issues/18081 */ { code: String.raw` @@ -1562,8 +1567,6 @@ ruleTester.run("no-misleading-character-class", rule, { // ES2024 - /* eslint-enable lines-around-comment -- re-enable rule */ - { code: "var r = /[[πŸ‘ΆπŸ»]]/v", languageOptions: { ecmaVersion: 2024 }, @@ -1574,18 +1577,6 @@ ruleTester.run("no-misleading-character-class", rule, { suggestions: null }] }, - { - code: "var r = /[πŸ‘]/", - languageOptions: { - ecmaVersion: 2015 - }, - errors: [{ - column: 11, - endColumn: 13, - messageId: "surrogatePairWithoutUFlag", - suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = /[πŸ‘]/u" }] - }] - }, { code: "new RegExp(/^[πŸ‘]$/v, '')", languageOptions: { @@ -1597,24 +1588,31 @@ ruleTester.run("no-misleading-character-class", rule, { messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: "new RegExp(/^[πŸ‘]$/v, 'u')" }] }] - }, - { - code: "var r = /[πŸ‘ΆπŸ»]/v; RegExp(r, 'v');", - languageOptions: { - ecmaVersion: 2024 - }, - errors: [{ - column: 11, - endColumn: 15, - messageId: "emojiModifier", - suggestions: null - }, { - column: 27, - endColumn: 28, - messageId: "emojiModifier", - suggestions: null - }] } + /* + * This test case has been disabled because of a limitation in Node.js 18, see https://github.com/eslint/eslint/pull/18082#discussion_r1506142421. + * + * { + * code: "var r = /[πŸ‘ΆπŸ»]/v; RegExp(r, 'v');", + * languageOptions: { + * ecmaVersion: 2024 + * }, + * errors: [{ + * column: 11, + * endColumn: 15, + * messageId: "emojiModifier", + * suggestions: null + * }, { + * column: 27, + * endColumn: 28, + * messageId: "emojiModifier", + * suggestions: null + * }] + * } + */ + + /* eslint-enable lines-around-comment, internal-rules/multiline-comment-style -- re-enable rule */ + ] }); From ebe11f891e218fe18e456762e68107bfbeea7f4b Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Sun, 3 Mar 2024 12:17:53 +0100 Subject: [PATCH 18/19] fix for `RegExp()` without arguments --- lib/rules/no-misleading-character-class.js | 5 ++++- tests/lib/rules/no-misleading-character-class.js | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index c0e87aeefcc..4016c1b3573 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -205,11 +205,14 @@ const kinds = Object.keys(findCharacterSequences); * or the node's `regex` property. * The purpose of this method is to provide a replacement for `getStaticValue` on Node.js 18, where `getStaticValue` returns `null` if a regular expression literal contains the `v` flag. * A limitation of this method is that it can only detect a regular expression if the specified node is itself a regular expression literal node. - * @param {ASTNode} node The node to be inspected. + * @param {ASTNode|null|undefined} node The node to be inspected. * @param {Scope} [initialScope] Optional scope to start finding variables. If this scope was given, this tries to resolve identifier references which are in the given node as much as possible. * @returns {{ value: any } | { value: undefined, optional?: true } | { regex: { pattern: string, flags: string } } | null} The static value of the node, or `null`. */ function getStaticValueOrRegex(node, initialScope) { + if (!node) { + return null; + } if (node.type === "Literal" && node.regex) { return { regex: node.regex }; } diff --git a/tests/lib/rules/no-misleading-character-class.js b/tests/lib/rules/no-misleading-character-class.js index 724dc5fd194..6a276ae12c2 100644 --- a/tests/lib/rules/no-misleading-character-class.js +++ b/tests/lib/rules/no-misleading-character-class.js @@ -40,6 +40,7 @@ ruleTester.run("no-misleading-character-class", rule, { "var r = /πŸ‡―πŸ‡΅/", "var r = /[JP]/", "var r = /πŸ‘¨β€πŸ‘©β€πŸ‘¦/", + "new RegExp()", "var r = RegExp(/[πŸ‘]/u)", "const regex = /[πŸ‘]/u; new RegExp(regex);", { From 64813fbc71b5669e5d0cbe33bf64fcd78c069b47 Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Mon, 4 Mar 2024 07:12:13 +0100 Subject: [PATCH 19/19] update JSDoc for `getStaticValueOrRegex` --- lib/rules/no-misleading-character-class.js | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index 4016c1b3573..fa50e226f97 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -203,11 +203,13 @@ const kinds = Object.keys(findCharacterSequences); /** * Gets the value of the given node if it's a static value other than a regular expression object, * or the node's `regex` property. - * The purpose of this method is to provide a replacement for `getStaticValue` on Node.js 18, where `getStaticValue` returns `null` if a regular expression literal contains the `v` flag. + * The purpose of this method is to provide a replacement for `getStaticValue` in environments where certain regular expressions cannot be evaluated. + * A known example is Node.js 18 which does not support the `v` flag. + * Calling `getStaticValue` on a regular expression node with the `v` flag on Node.js 18 always returns `null`. * A limitation of this method is that it can only detect a regular expression if the specified node is itself a regular expression literal node. - * @param {ASTNode|null|undefined} node The node to be inspected. - * @param {Scope} [initialScope] Optional scope to start finding variables. If this scope was given, this tries to resolve identifier references which are in the given node as much as possible. - * @returns {{ value: any } | { value: undefined, optional?: true } | { regex: { pattern: string, flags: string } } | null} The static value of the node, or `null`. + * @param {ASTNode | undefined} node The node to be inspected. + * @param {Scope} initialScope Scope to start finding variables. This function tries to resolve identifier references which are in the given scope. + * @returns {{ value: any } | { regex: { pattern: string, flags: string } } | null} The static value of the node, or `null`. */ function getStaticValueOrRegex(node, initialScope) { if (!node) {