From a451b32b33535a57b4b7e24291f30760f65460ba Mon Sep 17 00:00:00 2001 From: Francesco Trotta Date: Tue, 5 Mar 2024 13:05:04 +0100 Subject: [PATCH] feat: make `no-misleading-character-class` report more granular errors (#18082) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: report granular errors on arbitrary literals * use npm dependency * test with unescaped CRLF * inline `createReportLocationGenerator` * unit test for templates with expressions * restore old name `getNodeReportLocations` * update JSDoc * `charInfos` β†’ `codeUnits` * extract char-source to a utility module * add `read` method to `SourceReader` * add `advance` method and JSDoc * fix logic * `SourceReader` β†’ `TextReader` * handle `RegExp` calls with regex patterns * fix for browser test * fix for Node.js 18 * limit applicability of `getStaticValue` for Node.js 18 compatibility * fix for `RegExp()` without arguments * update JSDoc for `getStaticValueOrRegex` --- lib/rules/no-misleading-character-class.js | 173 ++++--- lib/rules/utils/char-source.js | 240 ++++++++++ .../rules/no-misleading-character-class.js | 423 ++++++++++++++---- tests/lib/rules/utils/char-source.js | 256 +++++++++++ 4 files changed, 945 insertions(+), 147 deletions(-) create mode 100644 lib/rules/utils/char-source.js create mode 100644 tests/lib/rules/utils/char-source.js diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js index 8d818665790..fa50e226f97 100644 --- a/lib/rules/no-misleading-character-class.js +++ b/lib/rules/no-misleading-character-class.js @@ -3,11 +3,18 @@ */ "use strict"; -const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils"); +const { + CALL, + CONSTRUCT, + ReferenceTracker, + getStaticValue, + getStringIfConstant +} = require("@eslint-community/eslint-utils"); const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp"); const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode"); const astUtils = require("./utils/ast-utils.js"); const { isValidWithUnicodeFlag } = require("./utils/regular-expressions"); +const { parseStringLiteral, parseTemplateToken } = require("./utils/char-source"); //------------------------------------------------------------------------------ // Helpers @@ -193,6 +200,33 @@ const findCharacterSequences = { const kinds = Object.keys(findCharacterSequences); +/** + * Gets the value of the given node if it's a static value other than a regular expression object, + * or the node's `regex` property. + * The purpose of this method is to provide a replacement for `getStaticValue` in environments where certain regular expressions cannot be evaluated. + * A known example is Node.js 18 which does not support the `v` flag. + * Calling `getStaticValue` on a regular expression node with the `v` flag on Node.js 18 always returns `null`. + * A limitation of this method is that it can only detect a regular expression if the specified node is itself a regular expression literal node. + * @param {ASTNode | undefined} node The node to be inspected. + * @param {Scope} initialScope Scope to start finding variables. This function tries to resolve identifier references which are in the given scope. + * @returns {{ value: any } | { regex: { pattern: string, flags: string } } | null} The static value of the node, or `null`. + */ +function getStaticValueOrRegex(node, initialScope) { + if (!node) { + return null; + } + if (node.type === "Literal" && node.regex) { + return { regex: node.regex }; + } + + const staticValue = getStaticValue(node, initialScope); + + if (staticValue?.value instanceof RegExp) { + return null; + } + return staticValue; +} + //------------------------------------------------------------------------------ // Rule Definition //------------------------------------------------------------------------------ @@ -225,62 +259,7 @@ module.exports = { create(context) { const sourceCode = context.sourceCode; const parser = new RegExpParser(); - - /** - * Generates a granular loc for context.report, if directly calculable. - * @param {Character[]} chars Individual characters being reported on. - * @param {Node} node Parent string node to report within. - * @returns {Object | null} Granular loc for context.report, if directly calculable. - * @see https://github.com/eslint/eslint/pull/17515 - */ - function generateReportLocation(chars, node) { - - // Limit to to literals and expression-less templates with raw values === their value. - switch (node.type) { - case "TemplateLiteral": - if (node.expressions.length || sourceCode.getText(node).slice(1, -1) !== node.quasis[0].value.cooked) { - return null; - } - break; - - case "Literal": - if (typeof node.value === "string" && node.value !== node.raw.slice(1, -1)) { - return null; - } - break; - - default: - return null; - } - - return { - start: sourceCode.getLocFromIndex(node.range[0] + 1 + chars[0].start), - end: sourceCode.getLocFromIndex(node.range[0] + 1 + chars.at(-1).end) - }; - } - - /** - * Finds the report loc(s) for a range of matches. - * @param {Character[][]} matches Characters that should trigger a report. - * @param {Node} node The node to report. - * @returns {Object | null} Node loc(s) for context.report. - */ - function getNodeReportLocations(matches, node) { - const locs = []; - - for (const chars of matches) { - const loc = generateReportLocation(chars, node); - - // If a report can't match to a range, don't report any others - if (!loc) { - return [node.loc]; - } - - locs.push(loc); - } - - return locs; - } + const checkedPatternNodes = new Set(); /** * Verify a given regular expression. @@ -320,12 +299,58 @@ module.exports = { } else { foundKindMatches.set(kind, [...findCharacterSequences[kind](chars)]); } - } } } }); + let codeUnits = null; + + /** + * Finds the report loc(s) for a range of matches. + * Only literals and expression-less templates generate granular errors. + * @param {Character[][]} matches Lists of individual characters being reported on. + * @returns {Location[]} locs for context.report. + * @see https://github.com/eslint/eslint/pull/17515 + */ + function getNodeReportLocations(matches) { + if (!astUtils.isStaticTemplateLiteral(node) && node.type !== "Literal") { + return matches.length ? [node.loc] : []; + } + return matches.map(chars => { + const firstIndex = chars[0].start; + const lastIndex = chars.at(-1).end - 1; + let start; + let end; + + if (node.type === "TemplateLiteral") { + const source = sourceCode.getText(node); + const offset = node.range[0]; + + codeUnits ??= parseTemplateToken(source); + start = offset + codeUnits[firstIndex].start; + end = offset + codeUnits[lastIndex].end; + } else if (typeof node.value === "string") { // String Literal + const source = node.raw; + const offset = node.range[0]; + + codeUnits ??= parseStringLiteral(source); + start = offset + codeUnits[firstIndex].start; + end = offset + codeUnits[lastIndex].end; + } else { // RegExp Literal + const offset = node.range[0] + 1; // Add 1 to skip the leading slash. + + start = offset + firstIndex; + end = offset + lastIndex + 1; + } + + return { + start: sourceCode.getLocFromIndex(start), + end: sourceCode.getLocFromIndex(end) + }; + }); + } + for (const [kind, matches] of foundKindMatches) { let suggest; @@ -336,7 +361,7 @@ module.exports = { }]; } - const locs = getNodeReportLocations(matches, node); + const locs = getNodeReportLocations(matches); for (const loc of locs) { context.report({ @@ -351,6 +376,9 @@ module.exports = { return { "Literal[regex]"(node) { + if (checkedPatternNodes.has(node)) { + return; + } verify(node, node.regex.pattern, node.regex.flags, fixer => { if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) { return null; @@ -371,12 +399,31 @@ module.exports = { for (const { node: refNode } of tracker.iterateGlobalReferences({ RegExp: { [CALL]: true, [CONSTRUCT]: true } })) { + let pattern, flags; const [patternNode, flagsNode] = refNode.arguments; - const pattern = getStringIfConstant(patternNode, scope); - const flags = getStringIfConstant(flagsNode, scope); + const evaluatedPattern = getStaticValueOrRegex(patternNode, scope); + + if (!evaluatedPattern) { + continue; + } + if (flagsNode) { + if (evaluatedPattern.regex) { + pattern = evaluatedPattern.regex.pattern; + checkedPatternNodes.add(patternNode); + } else { + pattern = String(evaluatedPattern.value); + } + flags = getStringIfConstant(flagsNode, scope); + } else { + if (evaluatedPattern.regex) { + continue; + } + pattern = String(evaluatedPattern.value); + flags = ""; + } - if (typeof pattern === "string") { - verify(patternNode, pattern, flags || "", fixer => { + if (typeof flags === "string") { + verify(patternNode, pattern, flags, fixer => { if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) { return null; diff --git a/lib/rules/utils/char-source.js b/lib/rules/utils/char-source.js new file mode 100644 index 00000000000..70738625b94 --- /dev/null +++ b/lib/rules/utils/char-source.js @@ -0,0 +1,240 @@ +/** + * @fileoverview Utility functions to locate the source text of each code unit in the value of a string literal or template token. + * @author Francesco Trotta + */ + +"use strict"; + +/** + * Represents a code unit produced by the evaluation of a JavaScript common token like a string + * literal or template token. + */ +class CodeUnit { + constructor(start, source) { + this.start = start; + this.source = source; + } + + get end() { + return this.start + this.length; + } + + get length() { + return this.source.length; + } +} + +/** + * An object used to keep track of the position in a source text where the next characters will be read. + */ +class TextReader { + constructor(source) { + this.source = source; + this.pos = 0; + } + + /** + * Advances the reading position of the specified number of characters. + * @param {number} length Number of characters to advance. + * @returns {void} + */ + advance(length) { + this.pos += length; + } + + /** + * Reads characters from the source. + * @param {number} [offset=0] The offset where reading starts, relative to the current position. + * @param {number} [length=1] Number of characters to read. + * @returns {string} A substring of source characters. + */ + read(offset = 0, length = 1) { + const start = offset + this.pos; + + return this.source.slice(start, start + length); + } +} + +const SIMPLE_ESCAPE_SEQUENCES = +{ __proto__: null, b: "\b", f: "\f", n: "\n", r: "\r", t: "\t", v: "\v" }; + +/** + * Reads a hex escape sequence. + * @param {TextReader} reader The reader should be positioned on the first hexadecimal digit. + * @param {number} length The number of hexadecimal digits. + * @returns {string} A code unit. + */ +function readHexSequence(reader, length) { + const str = reader.read(0, length); + const charCode = parseInt(str, 16); + + reader.advance(length); + return String.fromCharCode(charCode); +} + +/** + * Reads a Unicode escape sequence. + * @param {TextReader} reader The reader should be positioned after the "u". + * @returns {string} A code unit. + */ +function readUnicodeSequence(reader) { + const regExp = /\{(?[\dA-Fa-f]+)\}/uy; + + regExp.lastIndex = reader.pos; + const match = regExp.exec(reader.source); + + if (match) { + const codePoint = parseInt(match.groups.hexDigits, 16); + + reader.pos = regExp.lastIndex; + return String.fromCodePoint(codePoint); + } + return readHexSequence(reader, 4); +} + +/** + * Reads an octal escape sequence. + * @param {TextReader} reader The reader should be positioned after the first octal digit. + * @param {number} maxLength The maximum number of octal digits. + * @returns {string} A code unit. + */ +function readOctalSequence(reader, maxLength) { + const [octalStr] = reader.read(-1, maxLength).match(/^[0-7]+/u); + + reader.advance(octalStr.length - 1); + const octal = parseInt(octalStr, 8); + + return String.fromCharCode(octal); +} + +/** + * Reads an escape sequence or line continuation. + * @param {TextReader} reader The reader should be positioned on the backslash. + * @returns {string} A string of zero, one or two code units. + */ +function readEscapeSequenceOrLineContinuation(reader) { + const char = reader.read(1); + + reader.advance(2); + const unitChar = SIMPLE_ESCAPE_SEQUENCES[char]; + + if (unitChar) { + return unitChar; + } + switch (char) { + case "x": + return readHexSequence(reader, 2); + case "u": + return readUnicodeSequence(reader); + case "\r": + if (reader.read() === "\n") { + reader.advance(1); + } + + // fallthrough + case "\n": + case "\u2028": + case "\u2029": + return ""; + case "0": + case "1": + case "2": + case "3": + return readOctalSequence(reader, 3); + case "4": + case "5": + case "6": + case "7": + return readOctalSequence(reader, 2); + default: + return char; + } +} + +/** + * Reads an escape sequence or line continuation and generates the respective `CodeUnit` elements. + * @param {TextReader} reader The reader should be positioned on the backslash. + * @returns {Generator} Zero, one or two `CodeUnit` elements. + */ +function *mapEscapeSequenceOrLineContinuation(reader) { + const start = reader.pos; + const str = readEscapeSequenceOrLineContinuation(reader); + const end = reader.pos; + const source = reader.source.slice(start, end); + + switch (str.length) { + case 0: + break; + case 1: + yield new CodeUnit(start, source); + break; + default: + yield new CodeUnit(start, source); + yield new CodeUnit(start, source); + break; + } +} + +/** + * Parses a string literal. + * @param {string} source The string literal to parse, including the delimiting quotes. + * @returns {CodeUnit[]} A list of code units produced by the string literal. + */ +function parseStringLiteral(source) { + const reader = new TextReader(source); + const quote = reader.read(); + + reader.advance(1); + const codeUnits = []; + + for (;;) { + const char = reader.read(); + + if (char === quote) { + break; + } + if (char === "\\") { + codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader)); + } else { + codeUnits.push(new CodeUnit(reader.pos, char)); + reader.advance(1); + } + } + return codeUnits; +} + +/** + * Parses a template token. + * @param {string} source The template token to parse, including the delimiting sequences `` ` ``, `${` and `}`. + * @returns {CodeUnit[]} A list of code units produced by the template token. + */ +function parseTemplateToken(source) { + const reader = new TextReader(source); + + reader.advance(1); + const codeUnits = []; + + for (;;) { + const char = reader.read(); + + if (char === "`" || char === "$" && reader.read(1) === "{") { + break; + } + if (char === "\\") { + codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader)); + } else { + let unitSource; + + if (char === "\r" && reader.read(1) === "\n") { + unitSource = "\r\n"; + } else { + unitSource = char; + } + codeUnits.push(new CodeUnit(reader.pos, unitSource)); + reader.advance(unitSource.length); + } + } + return codeUnits; +} + +module.exports = { parseStringLiteral, parseTemplateToken }; diff --git a/tests/lib/rules/no-misleading-character-class.js b/tests/lib/rules/no-misleading-character-class.js index 6ad54d42d4a..6a276ae12c2 100644 --- a/tests/lib/rules/no-misleading-character-class.js +++ b/tests/lib/rules/no-misleading-character-class.js @@ -40,6 +40,13 @@ ruleTester.run("no-misleading-character-class", rule, { "var r = /πŸ‡―πŸ‡΅/", "var r = /[JP]/", "var r = /πŸ‘¨β€πŸ‘©β€πŸ‘¦/", + "new RegExp()", + "var r = RegExp(/[πŸ‘]/u)", + "const regex = /[πŸ‘]/u; new RegExp(regex);", + { + code: "new RegExp('[πŸ‘]')", + languageOptions: { globals: { RegExp: "off" } } + }, // Ignore solo lead/tail surrogate. "var r = /[\\uD83D]/", @@ -72,6 +79,16 @@ ruleTester.run("no-misleading-character-class", rule, { { code: "var r = new globalThis.RegExp('[Á] [ ');", languageOptions: { ecmaVersion: 2020 } }, { code: "var r = globalThis.RegExp('{ [Á]', 'u');", languageOptions: { ecmaVersion: 2020 } }, + // don't report on templates with expressions + "var r = RegExp(`${x}[πŸ‘]`)", + + // don't report on unknown flags + "var r = new RegExp('[πŸ‡―πŸ‡΅]', `${foo}`)", + String.raw`var r = new RegExp("[πŸ‘]", flags)`, + + // don't report on spread arguments + "const args = ['[πŸ‘]', 'i']; new RegExp(...args);", + // ES2024 { code: "var r = /[πŸ‘]/v", languageOptions: { ecmaVersion: 2024 } }, { code: String.raw`var r = /^[\q{πŸ‘ΆπŸ»}]$/v`, languageOptions: { ecmaVersion: 2024 } }, @@ -625,23 +642,14 @@ ruleTester.run("no-misleading-character-class", rule, { { code: "var r = new RegExp(`\r\n[❇️]`)", errors: [{ - line: 1, - column: 20, + line: 2, + column: 2, endLine: 2, - endColumn: 6, + endColumn: 4, messageId: "combiningClass", suggestions: null }] }, - { - code: String.raw`var r = new RegExp("[πŸ‘]", flags)`, - errors: [{ - column: 22, - endColumn: 24, - messageId: "surrogatePairWithoutUFlag", - suggestions: null - }] - }, { code: String.raw`const flags = ""; var r = new RegExp("[πŸ‘]", flags)`, errors: [{ @@ -654,8 +662,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = RegExp("[\\uD83D\\uDC4D]", "")`, errors: [{ - column: 16, - endColumn: 34, + column: 18, + endColumn: 32, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("[\\uD83D\\uDC4D]", "u")` }] }] @@ -663,8 +671,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = RegExp("before[\\uD83D\\uDC4D]after", "")`, errors: [{ - column: 16, - endColumn: 45, + column: 24, + endColumn: 38, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("before[\\uD83D\\uDC4D]after", "u")` }] }] @@ -672,8 +680,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = RegExp("[before\\uD83D\\uDC4Dafter]", "")`, errors: [{ - column: 16, - endColumn: 45, + column: 24, + endColumn: 38, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("[before\\uD83D\\uDC4Dafter]", "u")` }] }] @@ -681,8 +689,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = RegExp("\t\t\tπŸ‘[πŸ‘]")`, errors: [{ - column: 16, - endColumn: 30, + column: 26, + endColumn: 28, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("\t\t\tπŸ‘[πŸ‘]", "u")` }] }] @@ -690,8 +698,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("\u1234[\\uD83D\\uDC4D]")`, errors: [{ - column: 20, - endColumn: 44, + column: 28, + endColumn: 42, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = new RegExp("\u1234[\\uD83D\\uDC4D]", "u")` }] }] @@ -699,8 +707,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("\\u1234\\u5678πŸ‘Ž[πŸ‘]")`, errors: [{ - column: 20, - endColumn: 42, + column: 38, + endColumn: 40, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = new RegExp("\\u1234\\u5678πŸ‘Ž[πŸ‘]", "u")` }] }] @@ -708,8 +716,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("\\u1234\\u5678πŸ‘[πŸ‘]")`, errors: [{ - column: 20, - endColumn: 42, + column: 38, + endColumn: 40, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = new RegExp("\\u1234\\u5678πŸ‘[πŸ‘]", "u")` }] }] @@ -737,8 +745,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[πŸ‘]\\a", "")`, errors: [{ - column: 20, - endColumn: 29, + column: 22, + endColumn: 24, messageId: "surrogatePairWithoutUFlag", suggestions: null // pattern would be invalid with the 'u' flag }] @@ -784,8 +792,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u0041\\u0301]", "")`, errors: [{ - column: 20, - endColumn: 38, + column: 22, + endColumn: 36, messageId: "combiningClass", suggestions: null }] @@ -793,8 +801,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u0041\\u0301]", "u")`, errors: [{ - column: 20, - endColumn: 38, + column: 22, + endColumn: 36, messageId: "combiningClass", suggestions: null }] @@ -802,8 +810,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u{41}\\u{301}]", "u")`, errors: [{ - column: 20, - endColumn: 39, + column: 22, + endColumn: 37, messageId: "combiningClass", suggestions: null }] @@ -829,8 +837,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`new RegExp("[ \\ufe0f]", "")`, errors: [{ - column: 12, - endColumn: 24, + column: 14, + endColumn: 22, messageId: "combiningClass", suggestions: null }] @@ -838,8 +846,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`new RegExp("[ \\ufe0f]", "u")`, errors: [{ - column: 12, - endColumn: 24, + column: 14, + endColumn: 22, messageId: "combiningClass", suggestions: null }] @@ -848,8 +856,14 @@ ruleTester.run("no-misleading-character-class", rule, { code: String.raw`new RegExp("[ \\ufe0f][ \\ufe0f]")`, errors: [ { - column: 12, - endColumn: 34, + column: 14, + endColumn: 22, + messageId: "combiningClass", + suggestions: null + }, + { + column: 24, + endColumn: 32, messageId: "combiningClass", suggestions: null } @@ -858,8 +872,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u2747\\uFE0F]", "")`, errors: [{ - column: 20, - endColumn: 38, + column: 22, + endColumn: 36, messageId: "combiningClass", suggestions: null }] @@ -867,8 +881,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u2747\\uFE0F]", "u")`, errors: [{ - column: 20, - endColumn: 38, + column: 22, + endColumn: 36, messageId: "combiningClass", suggestions: null }] @@ -876,8 +890,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u{2747}\\u{FE0F}]", "u")`, errors: [{ - column: 20, - endColumn: 42, + column: 22, + endColumn: 40, messageId: "combiningClass", suggestions: null }] @@ -911,8 +925,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\uD83D\\uDC76\\uD83C\\uDFFB]", "u")`, errors: [{ - column: 20, - endColumn: 52, + column: 22, + endColumn: 50, messageId: "emojiModifier", suggestions: null }] @@ -920,8 +934,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u{1F476}\\u{1F3FB}]", "u")`, errors: [{ - column: 20, - endColumn: 44, + column: 22, + endColumn: 42, messageId: "emojiModifier", suggestions: null }] @@ -938,8 +952,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: "var r = RegExp(`\\t\\t\\tπŸ‘[πŸ‘]`)", errors: [{ - column: 16, - endColumn: 30, + column: 26, + endColumn: 28, messageId: "surrogatePairWithoutUFlag", suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = RegExp(`\\t\\t\\tπŸ‘[πŸ‘]`, \"u\")" }] }] @@ -995,23 +1009,6 @@ ruleTester.run("no-misleading-character-class", rule, { } ] }, - { - code: "var r = new RegExp('[πŸ‡―πŸ‡΅]', `${foo}`)", - errors: [ - { - column: 22, - endColumn: 24, - messageId: "surrogatePairWithoutUFlag", - suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = new RegExp('[πŸ‡―πŸ‡΅]', `${foo}u`)" }] - }, - { - column: 24, - endColumn: 26, - messageId: "surrogatePairWithoutUFlag", - suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = new RegExp('[πŸ‡―πŸ‡΅]', `${foo}u`)" }] - } - ] - }, { code: String.raw`var r = new RegExp("[πŸ‡―πŸ‡΅]")`, errors: [ @@ -1111,8 +1108,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\uD83C\\uDDEF\\uD83C\\uDDF5]", "u")`, errors: [{ - column: 20, - endColumn: 52, + column: 22, + endColumn: 50, messageId: "regionalIndicatorSymbol", suggestions: null }] @@ -1120,8 +1117,8 @@ ruleTester.run("no-misleading-character-class", rule, { { code: String.raw`var r = new RegExp("[\\u{1F1EF}\\u{1F1F5}]", "u")`, errors: [{ - column: 20, - endColumn: 44, + column: 22, + endColumn: 42, messageId: "regionalIndicatorSymbol", suggestions: null }] @@ -1238,8 +1235,8 @@ ruleTester.run("no-misleading-character-class", rule, { code: String.raw`var r = new RegExp("[\\uD83D\\uDC68\\u200D\\uD83D\\uDC69\\u200D\\uD83D\\uDC66]", "u")`, errors: [ { - column: 20, - endColumn: 80, + column: 22, + endColumn: 78, messageId: "zwj", suggestions: null } @@ -1249,8 +1246,8 @@ ruleTester.run("no-misleading-character-class", rule, { code: String.raw`var r = new RegExp("[\\u{1F468}\\u{200D}\\u{1F469}\\u{200D}\\u{1F466}]", "u")`, errors: [ { - column: 20, - endColumn: 72, + column: 22, + endColumn: 70, messageId: "zwj", suggestions: null } @@ -1299,8 +1296,8 @@ ruleTester.run("no-misleading-character-class", rule, { languageOptions: { ecmaVersion: 2020 }, errors: [ { - column: 31, - endColumn: 83, + column: 33, + endColumn: 81, messageId: "zwj", suggestions: null } @@ -1335,8 +1332,242 @@ ruleTester.run("no-misleading-character-class", rule, { }] }, + // no granular reports on templates with expressions + { + code: 'new RegExp(`${"[πŸ‘πŸ‡―πŸ‡΅]"}[😊]`);', + errors: [{ + column: 12, + endColumn: 31, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ + messageId: "suggestUnicodeFlag", + output: 'new RegExp(`${"[πŸ‘πŸ‡―πŸ‡΅]"}[😊]`, "u");' + }] + }] + }, + + // no granular reports on identifiers + { + code: 'const pattern = "[πŸ‘]"; new RegExp(pattern);', + errors: [{ + column: 36, + endColumn: 43, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ + messageId: "suggestUnicodeFlag", + output: 'const pattern = "[πŸ‘]"; new RegExp(pattern, "u");' + }] + }] + }, + + // second argument in RegExp should override flags in regex literal + { + code: "RegExp(/[aπŸ‘z]/u, '');", + errors: [{ + column: 11, + endColumn: 13, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ + messageId: "suggestUnicodeFlag", + output: "RegExp(/[aπŸ‘z]/u, 'u');" + }] + }] + }, + + /* + * These test cases have been disabled because of a limitation in Node.js 18, see https://github.com/eslint/eslint/pull/18082#discussion_r1506142421. + * + * { + * code: "const pattern = /[πŸ‘]/u; RegExp(pattern, '');", + * errors: [{ + * column: 33, + * endColumn: 40, + * messageId: "surrogatePairWithoutUFlag", + * suggestions: [{ + * messageId: "suggestUnicodeFlag", + * output: "const pattern = /[πŸ‘]/u; RegExp(pattern, 'u');" + * }] + * }] + * }, + * { + * code: "const pattern = /[πŸ‘]/g; RegExp(pattern, 'i');", + * errors: [{ + * column: 19, + * endColumn: 21, + * messageId: "surrogatePairWithoutUFlag", + * suggestions: [{ + * messageId: "suggestUnicodeFlag", + * output: "const pattern = /[πŸ‘]/gu; RegExp(pattern, 'i');" + * }] + * }, { + * column: 33, + * endColumn: 40, + * messageId: "surrogatePairWithoutUFlag", + * suggestions: [{ + * messageId: "suggestUnicodeFlag", + * output: "const pattern = /[πŸ‘]/g; RegExp(pattern, 'iu');" + * }] + * }] + * }, + */ + + // report only on regex literal if no flags are supplied + { + code: "RegExp(/[πŸ‘]/)", + errors: [{ + column: 10, + endColumn: 12, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ messageId: "suggestUnicodeFlag", output: "RegExp(/[πŸ‘]/u)" }] + }] + }, + + // report only on RegExp call if a regex literal and flags are supplied + { + code: "RegExp(/[πŸ‘]/, 'i');", + errors: [{ + column: 10, + endColumn: 12, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ messageId: "suggestUnicodeFlag", output: "RegExp(/[πŸ‘]/, 'iu');" }] + }] + }, + + // ignore RegExp if not built-in + { + code: "RegExp(/[πŸ‘]/, 'g');", + languageOptions: { globals: { RegExp: "off" } }, + errors: [{ + column: 10, + endColumn: 12, + messageId: "surrogatePairWithoutUFlag", + suggestions: [{ messageId: "suggestUnicodeFlag", output: "RegExp(/[πŸ‘]/u, 'g');" }] + }] + }, + + { + code: String.raw` + + // "[" and "]" escaped as "\x5B" and "\u005D" + new RegExp("\x5B \\ufe0f\u005D") + + `, + errors: [{ + column: 29, + endColumn: 37, + messageId: "combiningClass", + suggestions: null + }] + }, + { + code: String.raw` + + // backslash escaped as "\u{5c}" + new RegExp("[ \u{5c}ufe0f]") + + `, + errors: [{ + column: 26, + endColumn: 38, + messageId: "combiningClass", + suggestions: null + }] + }, + { + code: String.raw` + + // "0" escaped as "\60" + new RegExp("[ \\ufe\60f]") + + `, + languageOptions: { sourceType: "script" }, + errors: [{ + column: 26, + endColumn: 36, + messageId: "combiningClass", + suggestions: null + }] + }, + { + code: String.raw` + + // "e" escaped as "\e" + new RegExp("[ \\uf\e0f]") + + `, + errors: [{ + column: 26, + endColumn: 35, + messageId: "combiningClass", + suggestions: null + }] + }, + { + code: String.raw` + + // line continuation: backslash + + + new RegExp('[ \\ufe0f]') + + `.replace("", "\\\r\n"), + errors: [{ + line: 4, + column: 26, + endLine: 5, + endColumn: 5, + messageId: "combiningClass", + suggestions: null + }] + }, + { + code: String.raw` + + // just a backslash escaped as "\\" + new RegExp([.\\u200D.]) + + `.replaceAll("", "`"), + errors: [{ + column: 26, + endColumn: 35, + messageId: "zwj", + suggestions: null + }] + }, + { + code: String.raw` + + // "u" escaped as "\x75" + new RegExp([.\\\x75200D.]) + + `.replaceAll("", "`"), + errors: [{ + column: 26, + endColumn: 38, + messageId: "zwj", + suggestions: null + }] + }, + + /* eslint-disable lines-around-comment, internal-rules/multiline-comment-style -- see https://github.com/eslint/eslint/issues/18081 */ + + { + code: String.raw` + + // unescaped counts as a single character + new RegExp([\\u200D.]) + + `.replaceAll("", "`").replace("", "\n"), + errors: [{ + line: 4, + column: 26, + endLine: 5, + endColumn: 9, + messageId: "zwj", + suggestions: null + }] + }, // ES2024 + { code: "var r = /[[πŸ‘ΆπŸ»]]/v", languageOptions: { ecmaVersion: 2024 }, @@ -1348,17 +1579,41 @@ ruleTester.run("no-misleading-character-class", rule, { }] }, { - code: "var r = /[πŸ‘]/", + code: "new RegExp(/^[πŸ‘]$/v, '')", languageOptions: { - ecmaVersion: 2015 + ecmaVersion: 2024 }, errors: [{ - column: 11, - endColumn: 13, + column: 15, + endColumn: 17, messageId: "surrogatePairWithoutUFlag", - suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = /[πŸ‘]/u" }] + suggestions: [{ messageId: "suggestUnicodeFlag", output: "new RegExp(/^[πŸ‘]$/v, 'u')" }] }] } + /* + * This test case has been disabled because of a limitation in Node.js 18, see https://github.com/eslint/eslint/pull/18082#discussion_r1506142421. + * + * { + * code: "var r = /[πŸ‘ΆπŸ»]/v; RegExp(r, 'v');", + * languageOptions: { + * ecmaVersion: 2024 + * }, + * errors: [{ + * column: 11, + * endColumn: 15, + * messageId: "emojiModifier", + * suggestions: null + * }, { + * column: 27, + * endColumn: 28, + * messageId: "emojiModifier", + * suggestions: null + * }] + * } + */ + + /* eslint-enable lines-around-comment, internal-rules/multiline-comment-style -- re-enable rule */ + ] }); diff --git a/tests/lib/rules/utils/char-source.js b/tests/lib/rules/utils/char-source.js new file mode 100644 index 00000000000..2f37d9f3c0f --- /dev/null +++ b/tests/lib/rules/utils/char-source.js @@ -0,0 +1,256 @@ +"use strict"; + +const assertStrict = require("node:assert/strict"); +const { parseStringLiteral, parseTemplateToken } = require("../../../../lib/rules/utils/char-source"); + +describe( + "parseStringLiteral", + () => { + const TESTS = [ + { + description: "works with an empty string", + source: '""', + expectedCodeUnits: [] + }, + { + description: "works with surrogate pairs", + source: '"aπ„žz"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\ud834" }, + { start: 3, source: "\udd1e" }, + { start: 4, source: "z" } + ] + }, + { + description: "works with escape sequences for single characters", + source: '"a\\x40\\u231Bz"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\\x40" }, + { start: 6, source: "\\u231B" }, + { start: 12, source: "z" } + ] + }, + { + description: "works with escape sequences for code points", + source: '"a\\u{ffff}\\u{10000}\\u{10ffff}z"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\\u{ffff}" }, + { start: 10, source: "\\u{10000}" }, + { start: 10, source: "\\u{10000}" }, + { start: 19, source: "\\u{10ffff}" }, + { start: 19, source: "\\u{10ffff}" }, + { start: 29, source: "z" } + ] + }, + { + description: "works with line continuations", + source: '"a\\\n\\\r\n\\\u2028\\\u2029z"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 11, source: "z" } + ] + }, + { + description: "works with simple escape sequences", + source: '"\\"\\0\\b\\f\\n\\r\\t\\v"', + expectedCodeUnits: ['\\"', "\\0", "\\b", "\\f", "\\n", "\\r", "\\t", "\\v"] + .map((source, index) => ({ source, start: 1 + index * 2 })) + }, + { + description: "works with a character outside of a line continuation", + source: '"a\u2028z"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\u2028" }, + { start: 3, source: "z" } + ] + }, + { + description: "works with a character outside of a line continuation", + source: '"a\u2029z"', + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\u2029" }, + { start: 3, source: "z" } + ] + }, + { + description: "works with octal escape sequences", + source: '"\\0123\\456"', + expectedCodeUnits: [ + { source: "\\012", start: 1 }, + { source: "3", start: 5 }, + { source: "\\45", start: 6 }, + { source: "6", start: 9 } + ] + }, + { + description: "works with an escaped 7", + source: '"\\7"', + expectedCodeUnits: [{ source: "\\7", start: 1 }] + }, + { + description: "works with an escaped 8", + source: '"\\8"', + expectedCodeUnits: [{ source: "\\8", start: 1 }] + }, + { + description: "works with an escaped 9", + source: '"\\9"', + expectedCodeUnits: [{ source: "\\9", start: 1 }] + }, + { + description: 'works with the escaped sequence "00"', + source: '"\\00"', + expectedCodeUnits: [{ source: "\\00", start: 1 }] + }, + { + description: "works with an escaped 0 followed by 8", + source: '"\\08"', + expectedCodeUnits: [ + { source: "\\0", start: 1 }, + { source: "8", start: 3 } + ] + }, + { + description: "works with an escaped 0 followed by 9", + source: '"\\09"', + expectedCodeUnits: [ + { source: "\\0", start: 1 }, + { source: "9", start: 3 } + ] + } + ]; + + for (const { description, source, expectedCodeUnits, only } of TESTS) { + (only ? it.only : it)( + description, + () => { + const codeUnits = parseStringLiteral(source); + const expectedCharCount = expectedCodeUnits.length; + + assertStrict.equal(codeUnits.length, expectedCharCount); + for (let index = 0; index < expectedCharCount; ++index) { + const codeUnit = codeUnits[index]; + const expectedUnit = expectedCodeUnits[index]; + const message = `Expected values to be strictly equal at index ${index}`; + + assertStrict.equal(codeUnit.start, expectedUnit.start, message); + assertStrict.equal(codeUnit.source, expectedUnit.source, message); + } + } + ); + } + } +); + +describe( + "parseTemplateToken", + () => { + const TESTS = + [ + { + description: "works with an empty template", + source: "``", + expectedCodeUnits: [] + }, + { + description: "works with surrogate pairs", + source: "`Aπ„žZ`", + expectedCodeUnits: [ + { start: 1, source: "A" }, + { start: 2, source: "\ud834" }, + { start: 3, source: "\udd1e" }, + { start: 4, source: "Z" } + ] + }, + { + description: "works with escape sequences for single characters", + source: "`A\\x40\\u231BZ${", + expectedCodeUnits: [ + { start: 1, source: "A" }, + { start: 2, source: "\\x40" }, + { start: 6, source: "\\u231B" }, + { start: 12, source: "Z" } + ] + }, + { + description: "works with escape sequences for code points", + source: "}A\\u{FFFF}\\u{10000}\\u{10FFFF}Z${", + expectedCodeUnits: [ + { start: 1, source: "A" }, + { start: 2, source: "\\u{FFFF}" }, + { start: 10, source: "\\u{10000}" }, + { start: 10, source: "\\u{10000}" }, + { start: 19, source: "\\u{10FFFF}" }, + { start: 19, source: "\\u{10FFFF}" }, + { start: 29, source: "Z" } + ] + }, + { + description: "works with line continuations", + source: "}A\\\n\\\r\n\\\u2028\\\u2029Z`", + expectedCodeUnits: [ + { start: 1, source: "A" }, + { start: 11, source: "Z" } + ] + }, + { + description: "works with simple escape sequences", + source: "`\\0\\`\\b\\f\\n\\r\\t\\v`", + expectedCodeUnits: ["\\0", "\\`", "\\b", "\\f", "\\n", "\\r", "\\t", "\\v"] + .map((source, index) => ({ source, start: 1 + index * 2 })) + }, + { + description: "works with a character outside of a line continuation", + source: "`a\u2028z`", + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\u2028" }, + { start: 3, source: "z" } + ] + }, + { + description: "works with a character outside of a line continuation", + source: "`a\u2029z`", + expectedCodeUnits: [ + { start: 1, source: "a" }, + { start: 2, source: "\u2029" }, + { start: 3, source: "z" } + ] + }, + { + description: "works with unescaped sequences", + source: "`A\r\nZ`", + expectedCodeUnits: [ + { start: 1, source: "A" }, + { start: 2, source: "\r\n" }, + { start: 4, source: "Z" } + ] + } + ]; + + for (const { description, source, expectedCodeUnits, only } of TESTS) { + (only ? it.only : it)( + description, + () => { + const codeUnits = parseTemplateToken(source); + const expectedCharCount = expectedCodeUnits.length; + + assertStrict.equal(codeUnits.length, expectedCharCount); + for (let index = 0; index < expectedCharCount; ++index) { + const codeUnit = codeUnits[index]; + const expectedUnit = expectedCodeUnits[index]; + const message = `Expected values to be strictly equal at index ${index}`; + + assertStrict.equal(codeUnit.start, expectedUnit.start, message); + assertStrict.equal(codeUnit.source, expectedUnit.source, message); + } + } + ); + } + } +);