From a451b32b33535a57b4b7e24291f30760f65460ba Mon Sep 17 00:00:00 2001
From: Francesco Trotta <github@fasttime.org>
Date: Tue, 5 Mar 2024 13:05:04 +0100
Subject: [PATCH] feat: make `no-misleading-character-class` report more
 granular errors (#18082)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: report granular errors on arbitrary literals

* use npm dependency

* test with unescaped CRLF

* inline `createReportLocationGenerator`

* unit test for templates with expressions

* restore old name `getNodeReportLocations`

* update JSDoc

* `charInfos` → `codeUnits`

* extract char-source to a utility module

* add `read` method to `SourceReader`

* add `advance` method and JSDoc

* fix logic

* `SourceReader` → `TextReader`

* handle `RegExp` calls with regex patterns

* fix for browser test

* fix for Node.js 18

* limit applicability of `getStaticValue` for Node.js 18 compatibility

* fix for `RegExp()` without arguments

* update JSDoc for `getStaticValueOrRegex`
---
 lib/rules/no-misleading-character-class.js    | 173 ++++---
 lib/rules/utils/char-source.js                | 240 ++++++++++
 .../rules/no-misleading-character-class.js    | 423 ++++++++++++++----
 tests/lib/rules/utils/char-source.js          | 256 +++++++++++
 4 files changed, 945 insertions(+), 147 deletions(-)
 create mode 100644 lib/rules/utils/char-source.js
 create mode 100644 tests/lib/rules/utils/char-source.js

diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js
index 8d818665790..fa50e226f97 100644
--- a/lib/rules/no-misleading-character-class.js
+++ b/lib/rules/no-misleading-character-class.js
@@ -3,11 +3,18 @@
  */
 "use strict";
 
-const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils");
+const {
+    CALL,
+    CONSTRUCT,
+    ReferenceTracker,
+    getStaticValue,
+    getStringIfConstant
+} = require("@eslint-community/eslint-utils");
 const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
 const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
 const astUtils = require("./utils/ast-utils.js");
 const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
+const { parseStringLiteral, parseTemplateToken } = require("./utils/char-source");
 
 //------------------------------------------------------------------------------
 // Helpers
@@ -193,6 +200,33 @@ const findCharacterSequences = {
 
 const kinds = Object.keys(findCharacterSequences);
 
+/**
+ * Gets the value of the given node if it's a static value other than a regular expression object,
+ * or the node's `regex` property.
+ * The purpose of this method is to provide a replacement for `getStaticValue` in environments where certain regular expressions cannot be evaluated.
+ * A known example is Node.js 18 which does not support the `v` flag.
+ * Calling `getStaticValue` on a regular expression node with the `v` flag on Node.js 18 always returns `null`.
+ * A limitation of this method is that it can only detect a regular expression if the specified node is itself a regular expression literal node.
+ * @param {ASTNode | undefined} node The node to be inspected.
+ * @param {Scope} initialScope Scope to start finding variables. This function tries to resolve identifier references which are in the given scope.
+ * @returns {{ value: any } | { regex: { pattern: string, flags: string } } | null} The static value of the node, or `null`.
+ */
+function getStaticValueOrRegex(node, initialScope) {
+    if (!node) {
+        return null;
+    }
+    if (node.type === "Literal" && node.regex) {
+        return { regex: node.regex };
+    }
+
+    const staticValue = getStaticValue(node, initialScope);
+
+    if (staticValue?.value instanceof RegExp) {
+        return null;
+    }
+    return staticValue;
+}
+
 //------------------------------------------------------------------------------
 // Rule Definition
 //------------------------------------------------------------------------------
@@ -225,62 +259,7 @@ module.exports = {
     create(context) {
         const sourceCode = context.sourceCode;
         const parser = new RegExpParser();
-
-        /**
-         * Generates a granular loc for context.report, if directly calculable.
-         * @param {Character[]} chars Individual characters being reported on.
-         * @param {Node} node Parent string node to report within.
-         * @returns {Object | null} Granular loc for context.report, if directly calculable.
-         * @see https://github.com/eslint/eslint/pull/17515
-         */
-        function generateReportLocation(chars, node) {
-
-            // Limit to to literals and expression-less templates with raw values === their value.
-            switch (node.type) {
-                case "TemplateLiteral":
-                    if (node.expressions.length || sourceCode.getText(node).slice(1, -1) !== node.quasis[0].value.cooked) {
-                        return null;
-                    }
-                    break;
-
-                case "Literal":
-                    if (typeof node.value === "string" && node.value !== node.raw.slice(1, -1)) {
-                        return null;
-                    }
-                    break;
-
-                default:
-                    return null;
-            }
-
-            return {
-                start: sourceCode.getLocFromIndex(node.range[0] + 1 + chars[0].start),
-                end: sourceCode.getLocFromIndex(node.range[0] + 1 + chars.at(-1).end)
-            };
-        }
-
-        /**
-         * Finds the report loc(s) for a range of matches.
-         * @param {Character[][]} matches Characters that should trigger a report.
-         * @param {Node} node The node to report.
-         * @returns {Object | null} Node loc(s) for context.report.
-         */
-        function getNodeReportLocations(matches, node) {
-            const locs = [];
-
-            for (const chars of matches) {
-                const loc = generateReportLocation(chars, node);
-
-                // If a report can't match to a range, don't report any others
-                if (!loc) {
-                    return [node.loc];
-                }
-
-                locs.push(loc);
-            }
-
-            return locs;
-        }
+        const checkedPatternNodes = new Set();
 
         /**
          * Verify a given regular expression.
@@ -320,12 +299,58 @@ module.exports = {
                             } else {
                                 foundKindMatches.set(kind, [...findCharacterSequences[kind](chars)]);
                             }
-
                         }
                     }
                 }
             });
 
+            let codeUnits = null;
+
+            /**
+             * Finds the report loc(s) for a range of matches.
+             * Only literals and expression-less templates generate granular errors.
+             * @param {Character[][]} matches Lists of individual characters being reported on.
+             * @returns {Location[]} locs for context.report.
+             * @see https://github.com/eslint/eslint/pull/17515
+             */
+            function getNodeReportLocations(matches) {
+                if (!astUtils.isStaticTemplateLiteral(node) && node.type !== "Literal") {
+                    return matches.length ? [node.loc] : [];
+                }
+                return matches.map(chars => {
+                    const firstIndex = chars[0].start;
+                    const lastIndex = chars.at(-1).end - 1;
+                    let start;
+                    let end;
+
+                    if (node.type === "TemplateLiteral") {
+                        const source = sourceCode.getText(node);
+                        const offset = node.range[0];
+
+                        codeUnits ??= parseTemplateToken(source);
+                        start = offset + codeUnits[firstIndex].start;
+                        end = offset + codeUnits[lastIndex].end;
+                    } else if (typeof node.value === "string") { // String Literal
+                        const source = node.raw;
+                        const offset = node.range[0];
+
+                        codeUnits ??= parseStringLiteral(source);
+                        start = offset + codeUnits[firstIndex].start;
+                        end = offset + codeUnits[lastIndex].end;
+                    } else { // RegExp Literal
+                        const offset = node.range[0] + 1; // Add 1 to skip the leading slash.
+
+                        start = offset + firstIndex;
+                        end = offset + lastIndex + 1;
+                    }
+
+                    return {
+                        start: sourceCode.getLocFromIndex(start),
+                        end: sourceCode.getLocFromIndex(end)
+                    };
+                });
+            }
+
             for (const [kind, matches] of foundKindMatches) {
                 let suggest;
 
@@ -336,7 +361,7 @@ module.exports = {
                     }];
                 }
 
-                const locs = getNodeReportLocations(matches, node);
+                const locs = getNodeReportLocations(matches);
 
                 for (const loc of locs) {
                     context.report({
@@ -351,6 +376,9 @@ module.exports = {
 
         return {
             "Literal[regex]"(node) {
+                if (checkedPatternNodes.has(node)) {
+                    return;
+                }
                 verify(node, node.regex.pattern, node.regex.flags, fixer => {
                     if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) {
                         return null;
@@ -371,12 +399,31 @@ module.exports = {
                 for (const { node: refNode } of tracker.iterateGlobalReferences({
                     RegExp: { [CALL]: true, [CONSTRUCT]: true }
                 })) {
+                    let pattern, flags;
                     const [patternNode, flagsNode] = refNode.arguments;
-                    const pattern = getStringIfConstant(patternNode, scope);
-                    const flags = getStringIfConstant(flagsNode, scope);
+                    const evaluatedPattern = getStaticValueOrRegex(patternNode, scope);
+
+                    if (!evaluatedPattern) {
+                        continue;
+                    }
+                    if (flagsNode) {
+                        if (evaluatedPattern.regex) {
+                            pattern = evaluatedPattern.regex.pattern;
+                            checkedPatternNodes.add(patternNode);
+                        } else {
+                            pattern = String(evaluatedPattern.value);
+                        }
+                        flags = getStringIfConstant(flagsNode, scope);
+                    } else {
+                        if (evaluatedPattern.regex) {
+                            continue;
+                        }
+                        pattern = String(evaluatedPattern.value);
+                        flags = "";
+                    }
 
-                    if (typeof pattern === "string") {
-                        verify(patternNode, pattern, flags || "", fixer => {
+                    if (typeof flags === "string") {
+                        verify(patternNode, pattern, flags, fixer => {
 
                             if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
                                 return null;
diff --git a/lib/rules/utils/char-source.js b/lib/rules/utils/char-source.js
new file mode 100644
index 00000000000..70738625b94
--- /dev/null
+++ b/lib/rules/utils/char-source.js
@@ -0,0 +1,240 @@
+/**
+ * @fileoverview Utility functions to locate the source text of each code unit in the value of a string literal or template token.
+ * @author Francesco Trotta
+ */
+
+"use strict";
+
+/**
+ * Represents a code unit produced by the evaluation of a JavaScript common token like a string
+ * literal or template token.
+ */
+class CodeUnit {
+    constructor(start, source) {
+        this.start = start;
+        this.source = source;
+    }
+
+    get end() {
+        return this.start + this.length;
+    }
+
+    get length() {
+        return this.source.length;
+    }
+}
+
+/**
+ * An object used to keep track of the position in a source text where the next characters will be read.
+ */
+class TextReader {
+    constructor(source) {
+        this.source = source;
+        this.pos = 0;
+    }
+
+    /**
+     * Advances the reading position of the specified number of characters.
+     * @param {number} length Number of characters to advance.
+     * @returns {void}
+     */
+    advance(length) {
+        this.pos += length;
+    }
+
+    /**
+     * Reads characters from the source.
+     * @param {number} [offset=0] The offset where reading starts, relative to the current position.
+     * @param {number} [length=1] Number of characters to read.
+     * @returns {string} A substring of source characters.
+     */
+    read(offset = 0, length = 1) {
+        const start = offset + this.pos;
+
+        return this.source.slice(start, start + length);
+    }
+}
+
+const SIMPLE_ESCAPE_SEQUENCES =
+{ __proto__: null, b: "\b", f: "\f", n: "\n", r: "\r", t: "\t", v: "\v" };
+
+/**
+ * Reads a hex escape sequence.
+ * @param {TextReader} reader The reader should be positioned on the first hexadecimal digit.
+ * @param {number} length The number of hexadecimal digits.
+ * @returns {string} A code unit.
+ */
+function readHexSequence(reader, length) {
+    const str = reader.read(0, length);
+    const charCode = parseInt(str, 16);
+
+    reader.advance(length);
+    return String.fromCharCode(charCode);
+}
+
+/**
+ * Reads a Unicode escape sequence.
+ * @param {TextReader} reader The reader should be positioned after the "u".
+ * @returns {string} A code unit.
+ */
+function readUnicodeSequence(reader) {
+    const regExp = /\{(?<hexDigits>[\dA-Fa-f]+)\}/uy;
+
+    regExp.lastIndex = reader.pos;
+    const match = regExp.exec(reader.source);
+
+    if (match) {
+        const codePoint = parseInt(match.groups.hexDigits, 16);
+
+        reader.pos = regExp.lastIndex;
+        return String.fromCodePoint(codePoint);
+    }
+    return readHexSequence(reader, 4);
+}
+
+/**
+ * Reads an octal escape sequence.
+ * @param {TextReader} reader The reader should be positioned after the first octal digit.
+ * @param {number} maxLength The maximum number of octal digits.
+ * @returns {string} A code unit.
+ */
+function readOctalSequence(reader, maxLength) {
+    const [octalStr] = reader.read(-1, maxLength).match(/^[0-7]+/u);
+
+    reader.advance(octalStr.length - 1);
+    const octal = parseInt(octalStr, 8);
+
+    return String.fromCharCode(octal);
+}
+
+/**
+ * Reads an escape sequence or line continuation.
+ * @param {TextReader} reader The reader should be positioned on the backslash.
+ * @returns {string} A string of zero, one or two code units.
+ */
+function readEscapeSequenceOrLineContinuation(reader) {
+    const char = reader.read(1);
+
+    reader.advance(2);
+    const unitChar = SIMPLE_ESCAPE_SEQUENCES[char];
+
+    if (unitChar) {
+        return unitChar;
+    }
+    switch (char) {
+        case "x":
+            return readHexSequence(reader, 2);
+        case "u":
+            return readUnicodeSequence(reader);
+        case "\r":
+            if (reader.read() === "\n") {
+                reader.advance(1);
+            }
+
+            // fallthrough
+        case "\n":
+        case "\u2028":
+        case "\u2029":
+            return "";
+        case "0":
+        case "1":
+        case "2":
+        case "3":
+            return readOctalSequence(reader, 3);
+        case "4":
+        case "5":
+        case "6":
+        case "7":
+            return readOctalSequence(reader, 2);
+        default:
+            return char;
+    }
+}
+
+/**
+ * Reads an escape sequence or line continuation and generates the respective `CodeUnit` elements.
+ * @param {TextReader} reader The reader should be positioned on the backslash.
+ * @returns {Generator<CodeUnit>} Zero, one or two `CodeUnit` elements.
+ */
+function *mapEscapeSequenceOrLineContinuation(reader) {
+    const start = reader.pos;
+    const str = readEscapeSequenceOrLineContinuation(reader);
+    const end = reader.pos;
+    const source = reader.source.slice(start, end);
+
+    switch (str.length) {
+        case 0:
+            break;
+        case 1:
+            yield new CodeUnit(start, source);
+            break;
+        default:
+            yield new CodeUnit(start, source);
+            yield new CodeUnit(start, source);
+            break;
+    }
+}
+
+/**
+ * Parses a string literal.
+ * @param {string} source The string literal to parse, including the delimiting quotes.
+ * @returns {CodeUnit[]} A list of code units produced by the string literal.
+ */
+function parseStringLiteral(source) {
+    const reader = new TextReader(source);
+    const quote = reader.read();
+
+    reader.advance(1);
+    const codeUnits = [];
+
+    for (;;) {
+        const char = reader.read();
+
+        if (char === quote) {
+            break;
+        }
+        if (char === "\\") {
+            codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
+        } else {
+            codeUnits.push(new CodeUnit(reader.pos, char));
+            reader.advance(1);
+        }
+    }
+    return codeUnits;
+}
+
+/**
+ * Parses a template token.
+ * @param {string} source The template token to parse, including the delimiting sequences `` ` ``, `${` and `}`.
+ * @returns {CodeUnit[]} A list of code units produced by the template token.
+ */
+function parseTemplateToken(source) {
+    const reader = new TextReader(source);
+
+    reader.advance(1);
+    const codeUnits = [];
+
+    for (;;) {
+        const char = reader.read();
+
+        if (char === "`" || char === "$" && reader.read(1) === "{") {
+            break;
+        }
+        if (char === "\\") {
+            codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
+        } else {
+            let unitSource;
+
+            if (char === "\r" && reader.read(1) === "\n") {
+                unitSource = "\r\n";
+            } else {
+                unitSource = char;
+            }
+            codeUnits.push(new CodeUnit(reader.pos, unitSource));
+            reader.advance(unitSource.length);
+        }
+    }
+    return codeUnits;
+}
+
+module.exports = { parseStringLiteral, parseTemplateToken };
diff --git a/tests/lib/rules/no-misleading-character-class.js b/tests/lib/rules/no-misleading-character-class.js
index 6ad54d42d4a..6a276ae12c2 100644
--- a/tests/lib/rules/no-misleading-character-class.js
+++ b/tests/lib/rules/no-misleading-character-class.js
@@ -40,6 +40,13 @@ ruleTester.run("no-misleading-character-class", rule, {
         "var r = /🇯🇵/",
         "var r = /[JP]/",
         "var r = /👨‍👩‍👦/",
+        "new RegExp()",
+        "var r = RegExp(/[👍]/u)",
+        "const regex = /[👍]/u; new RegExp(regex);",
+        {
+            code: "new RegExp('[👍]')",
+            languageOptions: { globals: { RegExp: "off" } }
+        },
 
         // Ignore solo lead/tail surrogate.
         "var r = /[\\uD83D]/",
@@ -72,6 +79,16 @@ ruleTester.run("no-misleading-character-class", rule, {
         { code: "var r = new globalThis.RegExp('[Á] [ ');", languageOptions: { ecmaVersion: 2020 } },
         { code: "var r = globalThis.RegExp('{ [Á]', 'u');", languageOptions: { ecmaVersion: 2020 } },
 
+        // don't report on templates with expressions
+        "var r = RegExp(`${x}[👍]`)",
+
+        // don't report on unknown flags
+        "var r = new RegExp('[🇯🇵]', `${foo}`)",
+        String.raw`var r = new RegExp("[👍]", flags)`,
+
+        // don't report on spread arguments
+        "const args = ['[👍]', 'i']; new RegExp(...args);",
+
         // ES2024
         { code: "var r = /[👍]/v", languageOptions: { ecmaVersion: 2024 } },
         { code: String.raw`var r = /^[\q{👶🏻}]$/v`, languageOptions: { ecmaVersion: 2024 } },
@@ -625,23 +642,14 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: "var r = new RegExp(`\r\n[❇️]`)",
             errors: [{
-                line: 1,
-                column: 20,
+                line: 2,
+                column: 2,
                 endLine: 2,
-                endColumn: 6,
+                endColumn: 4,
                 messageId: "combiningClass",
                 suggestions: null
             }]
         },
-        {
-            code: String.raw`var r = new RegExp("[👍]", flags)`,
-            errors: [{
-                column: 22,
-                endColumn: 24,
-                messageId: "surrogatePairWithoutUFlag",
-                suggestions: null
-            }]
-        },
         {
             code: String.raw`const flags = ""; var r = new RegExp("[👍]", flags)`,
             errors: [{
@@ -654,8 +662,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = RegExp("[\\uD83D\\uDC4D]", "")`,
             errors: [{
-                column: 16,
-                endColumn: 34,
+                column: 18,
+                endColumn: 32,
                 messageId: "surrogatePairWithoutUFlag",
                 suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("[\\uD83D\\uDC4D]", "u")` }]
             }]
@@ -663,8 +671,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = RegExp("before[\\uD83D\\uDC4D]after", "")`,
             errors: [{
-                column: 16,
-                endColumn: 45,
+                column: 24,
+                endColumn: 38,
                 messageId: "surrogatePairWithoutUFlag",
                 suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("before[\\uD83D\\uDC4D]after", "u")` }]
             }]
@@ -672,8 +680,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = RegExp("[before\\uD83D\\uDC4Dafter]", "")`,
             errors: [{
-                column: 16,
-                endColumn: 45,
+                column: 24,
+                endColumn: 38,
                 messageId: "surrogatePairWithoutUFlag",
                 suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("[before\\uD83D\\uDC4Dafter]", "u")` }]
             }]
@@ -681,8 +689,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = RegExp("\t\t\t👍[👍]")`,
             errors: [{
-                column: 16,
-                endColumn: 30,
+                column: 26,
+                endColumn: 28,
                 messageId: "surrogatePairWithoutUFlag",
                 suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = RegExp("\t\t\t👍[👍]", "u")` }]
             }]
@@ -690,8 +698,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("\u1234[\\uD83D\\uDC4D]")`,
             errors: [{
-                column: 20,
-                endColumn: 44,
+                column: 28,
+                endColumn: 42,
                 messageId: "surrogatePairWithoutUFlag",
                 suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = new RegExp("\u1234[\\uD83D\\uDC4D]", "u")` }]
             }]
@@ -699,8 +707,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("\\u1234\\u5678👎[👍]")`,
             errors: [{
-                column: 20,
-                endColumn: 42,
+                column: 38,
+                endColumn: 40,
                 messageId: "surrogatePairWithoutUFlag",
                 suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = new RegExp("\\u1234\\u5678👎[👍]", "u")` }]
             }]
@@ -708,8 +716,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("\\u1234\\u5678👍[👍]")`,
             errors: [{
-                column: 20,
-                endColumn: 42,
+                column: 38,
+                endColumn: 40,
                 messageId: "surrogatePairWithoutUFlag",
                 suggestions: [{ messageId: "suggestUnicodeFlag", output: String.raw`var r = new RegExp("\\u1234\\u5678👍[👍]", "u")` }]
             }]
@@ -737,8 +745,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("[👍]\\a", "")`,
             errors: [{
-                column: 20,
-                endColumn: 29,
+                column: 22,
+                endColumn: 24,
                 messageId: "surrogatePairWithoutUFlag",
                 suggestions: null // pattern would be invalid with the 'u' flag
             }]
@@ -784,8 +792,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("[\\u0041\\u0301]", "")`,
             errors: [{
-                column: 20,
-                endColumn: 38,
+                column: 22,
+                endColumn: 36,
                 messageId: "combiningClass",
                 suggestions: null
             }]
@@ -793,8 +801,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("[\\u0041\\u0301]", "u")`,
             errors: [{
-                column: 20,
-                endColumn: 38,
+                column: 22,
+                endColumn: 36,
                 messageId: "combiningClass",
                 suggestions: null
             }]
@@ -802,8 +810,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("[\\u{41}\\u{301}]", "u")`,
             errors: [{
-                column: 20,
-                endColumn: 39,
+                column: 22,
+                endColumn: 37,
                 messageId: "combiningClass",
                 suggestions: null
             }]
@@ -829,8 +837,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`new RegExp("[ \\ufe0f]", "")`,
             errors: [{
-                column: 12,
-                endColumn: 24,
+                column: 14,
+                endColumn: 22,
                 messageId: "combiningClass",
                 suggestions: null
             }]
@@ -838,8 +846,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`new RegExp("[ \\ufe0f]", "u")`,
             errors: [{
-                column: 12,
-                endColumn: 24,
+                column: 14,
+                endColumn: 22,
                 messageId: "combiningClass",
                 suggestions: null
             }]
@@ -848,8 +856,14 @@ ruleTester.run("no-misleading-character-class", rule, {
             code: String.raw`new RegExp("[ \\ufe0f][ \\ufe0f]")`,
             errors: [
                 {
-                    column: 12,
-                    endColumn: 34,
+                    column: 14,
+                    endColumn: 22,
+                    messageId: "combiningClass",
+                    suggestions: null
+                },
+                {
+                    column: 24,
+                    endColumn: 32,
                     messageId: "combiningClass",
                     suggestions: null
                 }
@@ -858,8 +872,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("[\\u2747\\uFE0F]", "")`,
             errors: [{
-                column: 20,
-                endColumn: 38,
+                column: 22,
+                endColumn: 36,
                 messageId: "combiningClass",
                 suggestions: null
             }]
@@ -867,8 +881,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("[\\u2747\\uFE0F]", "u")`,
             errors: [{
-                column: 20,
-                endColumn: 38,
+                column: 22,
+                endColumn: 36,
                 messageId: "combiningClass",
                 suggestions: null
             }]
@@ -876,8 +890,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("[\\u{2747}\\u{FE0F}]", "u")`,
             errors: [{
-                column: 20,
-                endColumn: 42,
+                column: 22,
+                endColumn: 40,
                 messageId: "combiningClass",
                 suggestions: null
             }]
@@ -911,8 +925,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("[\\uD83D\\uDC76\\uD83C\\uDFFB]", "u")`,
             errors: [{
-                column: 20,
-                endColumn: 52,
+                column: 22,
+                endColumn: 50,
                 messageId: "emojiModifier",
                 suggestions: null
             }]
@@ -920,8 +934,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("[\\u{1F476}\\u{1F3FB}]", "u")`,
             errors: [{
-                column: 20,
-                endColumn: 44,
+                column: 22,
+                endColumn: 42,
                 messageId: "emojiModifier",
                 suggestions: null
             }]
@@ -938,8 +952,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: "var r = RegExp(`\\t\\t\\t👍[👍]`)",
             errors: [{
-                column: 16,
-                endColumn: 30,
+                column: 26,
+                endColumn: 28,
                 messageId: "surrogatePairWithoutUFlag",
                 suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = RegExp(`\\t\\t\\t👍[👍]`, \"u\")" }]
             }]
@@ -995,23 +1009,6 @@ ruleTester.run("no-misleading-character-class", rule, {
                 }
             ]
         },
-        {
-            code: "var r = new RegExp('[🇯🇵]', `${foo}`)",
-            errors: [
-                {
-                    column: 22,
-                    endColumn: 24,
-                    messageId: "surrogatePairWithoutUFlag",
-                    suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = new RegExp('[🇯🇵]', `${foo}u`)" }]
-                },
-                {
-                    column: 24,
-                    endColumn: 26,
-                    messageId: "surrogatePairWithoutUFlag",
-                    suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = new RegExp('[🇯🇵]', `${foo}u`)" }]
-                }
-            ]
-        },
         {
             code: String.raw`var r = new RegExp("[🇯🇵]")`,
             errors: [
@@ -1111,8 +1108,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("[\\uD83C\\uDDEF\\uD83C\\uDDF5]", "u")`,
             errors: [{
-                column: 20,
-                endColumn: 52,
+                column: 22,
+                endColumn: 50,
                 messageId: "regionalIndicatorSymbol",
                 suggestions: null
             }]
@@ -1120,8 +1117,8 @@ ruleTester.run("no-misleading-character-class", rule, {
         {
             code: String.raw`var r = new RegExp("[\\u{1F1EF}\\u{1F1F5}]", "u")`,
             errors: [{
-                column: 20,
-                endColumn: 44,
+                column: 22,
+                endColumn: 42,
                 messageId: "regionalIndicatorSymbol",
                 suggestions: null
             }]
@@ -1238,8 +1235,8 @@ ruleTester.run("no-misleading-character-class", rule, {
             code: String.raw`var r = new RegExp("[\\uD83D\\uDC68\\u200D\\uD83D\\uDC69\\u200D\\uD83D\\uDC66]", "u")`,
             errors: [
                 {
-                    column: 20,
-                    endColumn: 80,
+                    column: 22,
+                    endColumn: 78,
                     messageId: "zwj",
                     suggestions: null
                 }
@@ -1249,8 +1246,8 @@ ruleTester.run("no-misleading-character-class", rule, {
             code: String.raw`var r = new RegExp("[\\u{1F468}\\u{200D}\\u{1F469}\\u{200D}\\u{1F466}]", "u")`,
             errors: [
                 {
-                    column: 20,
-                    endColumn: 72,
+                    column: 22,
+                    endColumn: 70,
                     messageId: "zwj",
                     suggestions: null
                 }
@@ -1299,8 +1296,8 @@ ruleTester.run("no-misleading-character-class", rule, {
             languageOptions: { ecmaVersion: 2020 },
             errors: [
                 {
-                    column: 31,
-                    endColumn: 83,
+                    column: 33,
+                    endColumn: 81,
                     messageId: "zwj",
                     suggestions: null
                 }
@@ -1335,8 +1332,242 @@ ruleTester.run("no-misleading-character-class", rule, {
             }]
         },
 
+        // no granular reports on templates with expressions
+        {
+            code: 'new RegExp(`${"[👍🇯🇵]"}[😊]`);',
+            errors: [{
+                column: 12,
+                endColumn: 31,
+                messageId: "surrogatePairWithoutUFlag",
+                suggestions: [{
+                    messageId: "suggestUnicodeFlag",
+                    output: 'new RegExp(`${"[👍🇯🇵]"}[😊]`, "u");'
+                }]
+            }]
+        },
+
+        // no granular reports on identifiers
+        {
+            code: 'const pattern = "[👍]"; new RegExp(pattern);',
+            errors: [{
+                column: 36,
+                endColumn: 43,
+                messageId: "surrogatePairWithoutUFlag",
+                suggestions: [{
+                    messageId: "suggestUnicodeFlag",
+                    output: 'const pattern = "[👍]"; new RegExp(pattern, "u");'
+                }]
+            }]
+        },
+
+        // second argument in RegExp should override flags in regex literal
+        {
+            code: "RegExp(/[a👍z]/u, '');",
+            errors: [{
+                column: 11,
+                endColumn: 13,
+                messageId: "surrogatePairWithoutUFlag",
+                suggestions: [{
+                    messageId: "suggestUnicodeFlag",
+                    output: "RegExp(/[a👍z]/u, 'u');"
+                }]
+            }]
+        },
+
+        /*
+         * These test cases have been disabled because of a limitation in Node.js 18, see https://github.com/eslint/eslint/pull/18082#discussion_r1506142421.
+         *
+         * {
+         *     code: "const pattern = /[👍]/u; RegExp(pattern, '');",
+         *     errors: [{
+         *         column: 33,
+         *         endColumn: 40,
+         *         messageId: "surrogatePairWithoutUFlag",
+         *         suggestions: [{
+         *             messageId: "suggestUnicodeFlag",
+         *             output: "const pattern = /[👍]/u; RegExp(pattern, 'u');"
+         *         }]
+         *     }]
+         * },
+         * {
+         *     code: "const pattern = /[👍]/g; RegExp(pattern, 'i');",
+         *     errors: [{
+         *         column: 19,
+         *         endColumn: 21,
+         *         messageId: "surrogatePairWithoutUFlag",
+         *         suggestions: [{
+         *             messageId: "suggestUnicodeFlag",
+         *             output: "const pattern = /[👍]/gu; RegExp(pattern, 'i');"
+         *         }]
+         *     }, {
+         *         column: 33,
+         *         endColumn: 40,
+         *         messageId: "surrogatePairWithoutUFlag",
+         *         suggestions: [{
+         *             messageId: "suggestUnicodeFlag",
+         *             output: "const pattern = /[👍]/g; RegExp(pattern, 'iu');"
+         *         }]
+         *     }]
+         * },
+         */
+
+        // report only on regex literal if no flags are supplied
+        {
+            code: "RegExp(/[👍]/)",
+            errors: [{
+                column: 10,
+                endColumn: 12,
+                messageId: "surrogatePairWithoutUFlag",
+                suggestions: [{ messageId: "suggestUnicodeFlag", output: "RegExp(/[👍]/u)" }]
+            }]
+        },
+
+        // report only on RegExp call if a regex literal and flags are supplied
+        {
+            code: "RegExp(/[👍]/, 'i');",
+            errors: [{
+                column: 10,
+                endColumn: 12,
+                messageId: "surrogatePairWithoutUFlag",
+                suggestions: [{ messageId: "suggestUnicodeFlag", output: "RegExp(/[👍]/, 'iu');" }]
+            }]
+        },
+
+        // ignore RegExp if not built-in
+        {
+            code: "RegExp(/[👍]/, 'g');",
+            languageOptions: { globals: { RegExp: "off" } },
+            errors: [{
+                column: 10,
+                endColumn: 12,
+                messageId: "surrogatePairWithoutUFlag",
+                suggestions: [{ messageId: "suggestUnicodeFlag", output: "RegExp(/[👍]/u, 'g');" }]
+            }]
+        },
+
+        {
+            code: String.raw`
+
+            // "[" and "]" escaped as "\x5B" and "\u005D"
+            new RegExp("\x5B \\ufe0f\u005D")
+
+            `,
+            errors: [{
+                column: 29,
+                endColumn: 37,
+                messageId: "combiningClass",
+                suggestions: null
+            }]
+        },
+        {
+            code: String.raw`
+
+            // backslash escaped as "\u{5c}"
+            new RegExp("[ \u{5c}ufe0f]")
+
+            `,
+            errors: [{
+                column: 26,
+                endColumn: 38,
+                messageId: "combiningClass",
+                suggestions: null
+            }]
+        },
+        {
+            code: String.raw`
+
+            // "0" escaped as "\60"
+            new RegExp("[ \\ufe\60f]")
+
+            `,
+            languageOptions: { sourceType: "script" },
+            errors: [{
+                column: 26,
+                endColumn: 36,
+                messageId: "combiningClass",
+                suggestions: null
+            }]
+        },
+        {
+            code: String.raw`
+
+            // "e" escaped as "\e"
+            new RegExp("[ \\uf\e0f]")
+
+            `,
+            errors: [{
+                column: 26,
+                endColumn: 35,
+                messageId: "combiningClass",
+                suggestions: null
+            }]
+        },
+        {
+            code: String.raw`
+
+            // line continuation: backslash + <CR> + <LF>
+            new RegExp('[ \\u<line continuation>fe0f]')
+
+            `.replace("<line continuation>", "\\\r\n"),
+            errors: [{
+                line: 4,
+                column: 26,
+                endLine: 5,
+                endColumn: 5,
+                messageId: "combiningClass",
+                suggestions: null
+            }]
+        },
+        {
+            code: String.raw`
+
+            // just a backslash escaped as "\\"
+            new RegExp(<backtick>[.\\u200D.]<backtick>)
+
+            `.replaceAll("<backtick>", "`"),
+            errors: [{
+                column: 26,
+                endColumn: 35,
+                messageId: "zwj",
+                suggestions: null
+            }]
+        },
+        {
+            code: String.raw`
+
+            // "u" escaped as "\x75"
+            new RegExp(<backtick>[.\\\x75200D.]<backtick>)
+
+            `.replaceAll("<backtick>", "`"),
+            errors: [{
+                column: 26,
+                endColumn: 38,
+                messageId: "zwj",
+                suggestions: null
+            }]
+        },
+
+        /* eslint-disable lines-around-comment, internal-rules/multiline-comment-style -- see https://github.com/eslint/eslint/issues/18081 */
+
+        {
+            code: String.raw`
+
+            // unescaped <CR> <LF> counts as a single character
+            new RegExp(<backtick>[<crlf>\\u200D.]<backtick>)
+
+            `.replaceAll("<backtick>", "`").replace("<crlf>", "\n"),
+            errors: [{
+                line: 4,
+                column: 26,
+                endLine: 5,
+                endColumn: 9,
+                messageId: "zwj",
+                suggestions: null
+            }]
+        },
 
         // ES2024
+
         {
             code: "var r = /[[👶🏻]]/v",
             languageOptions: { ecmaVersion: 2024 },
@@ -1348,17 +1579,41 @@ ruleTester.run("no-misleading-character-class", rule, {
             }]
         },
         {
-            code: "var r = /[👍]/",
+            code: "new RegExp(/^[👍]$/v, '')",
             languageOptions: {
-                ecmaVersion: 2015
+                ecmaVersion: 2024
             },
             errors: [{
-                column: 11,
-                endColumn: 13,
+                column: 15,
+                endColumn: 17,
                 messageId: "surrogatePairWithoutUFlag",
-                suggestions: [{ messageId: "suggestUnicodeFlag", output: "var r = /[👍]/u" }]
+                suggestions: [{ messageId: "suggestUnicodeFlag", output: "new RegExp(/^[👍]$/v, 'u')" }]
             }]
         }
 
+        /*
+         * This test case has been disabled because of a limitation in Node.js 18, see https://github.com/eslint/eslint/pull/18082#discussion_r1506142421.
+         *
+         * {
+         *     code: "var r = /[👶🏻]/v; RegExp(r, 'v');",
+         *     languageOptions: {
+         *         ecmaVersion: 2024
+         *     },
+         *     errors: [{
+         *         column: 11,
+         *         endColumn: 15,
+         *         messageId: "emojiModifier",
+         *         suggestions: null
+         *     }, {
+         *         column: 27,
+         *         endColumn: 28,
+         *         messageId: "emojiModifier",
+         *         suggestions: null
+         *     }]
+         * }
+         */
+
+        /* eslint-enable lines-around-comment, internal-rules/multiline-comment-style -- re-enable rule */
+
     ]
 });
diff --git a/tests/lib/rules/utils/char-source.js b/tests/lib/rules/utils/char-source.js
new file mode 100644
index 00000000000..2f37d9f3c0f
--- /dev/null
+++ b/tests/lib/rules/utils/char-source.js
@@ -0,0 +1,256 @@
+"use strict";
+
+const assertStrict = require("node:assert/strict");
+const { parseStringLiteral, parseTemplateToken } = require("../../../../lib/rules/utils/char-source");
+
+describe(
+    "parseStringLiteral",
+    () => {
+        const TESTS = [
+            {
+                description: "works with an empty string",
+                source: '""',
+                expectedCodeUnits: []
+            },
+            {
+                description: "works with surrogate pairs",
+                source: '"a𝄞z"',
+                expectedCodeUnits: [
+                    { start: 1, source: "a" },
+                    { start: 2, source: "\ud834" },
+                    { start: 3, source: "\udd1e" },
+                    { start: 4, source: "z" }
+                ]
+            },
+            {
+                description: "works with escape sequences for single characters",
+                source: '"a\\x40\\u231Bz"',
+                expectedCodeUnits: [
+                    { start: 1, source: "a" },
+                    { start: 2, source: "\\x40" },
+                    { start: 6, source: "\\u231B" },
+                    { start: 12, source: "z" }
+                ]
+            },
+            {
+                description: "works with escape sequences for code points",
+                source: '"a\\u{ffff}\\u{10000}\\u{10ffff}z"',
+                expectedCodeUnits: [
+                    { start: 1, source: "a" },
+                    { start: 2, source: "\\u{ffff}" },
+                    { start: 10, source: "\\u{10000}" },
+                    { start: 10, source: "\\u{10000}" },
+                    { start: 19, source: "\\u{10ffff}" },
+                    { start: 19, source: "\\u{10ffff}" },
+                    { start: 29, source: "z" }
+                ]
+            },
+            {
+                description: "works with line continuations",
+                source: '"a\\\n\\\r\n\\\u2028\\\u2029z"',
+                expectedCodeUnits: [
+                    { start: 1, source: "a" },
+                    { start: 11, source: "z" }
+                ]
+            },
+            {
+                description: "works with simple escape sequences",
+                source: '"\\"\\0\\b\\f\\n\\r\\t\\v"',
+                expectedCodeUnits: ['\\"', "\\0", "\\b", "\\f", "\\n", "\\r", "\\t", "\\v"]
+                    .map((source, index) => ({ source, start: 1 + index * 2 }))
+            },
+            {
+                description: "works with a <LS> character outside of a line continuation",
+                source: '"a\u2028z"',
+                expectedCodeUnits: [
+                    { start: 1, source: "a" },
+                    { start: 2, source: "\u2028" },
+                    { start: 3, source: "z" }
+                ]
+            },
+            {
+                description: "works with a <PS> character outside of a line continuation",
+                source: '"a\u2029z"',
+                expectedCodeUnits: [
+                    { start: 1, source: "a" },
+                    { start: 2, source: "\u2029" },
+                    { start: 3, source: "z" }
+                ]
+            },
+            {
+                description: "works with octal escape sequences",
+                source: '"\\0123\\456"',
+                expectedCodeUnits: [
+                    { source: "\\012", start: 1 },
+                    { source: "3", start: 5 },
+                    { source: "\\45", start: 6 },
+                    { source: "6", start: 9 }
+                ]
+            },
+            {
+                description: "works with an escaped 7",
+                source: '"\\7"',
+                expectedCodeUnits: [{ source: "\\7", start: 1 }]
+            },
+            {
+                description: "works with an escaped 8",
+                source: '"\\8"',
+                expectedCodeUnits: [{ source: "\\8", start: 1 }]
+            },
+            {
+                description: "works with an escaped 9",
+                source: '"\\9"',
+                expectedCodeUnits: [{ source: "\\9", start: 1 }]
+            },
+            {
+                description: 'works with the escaped sequence "00"',
+                source: '"\\00"',
+                expectedCodeUnits: [{ source: "\\00", start: 1 }]
+            },
+            {
+                description: "works with an escaped 0 followed by 8",
+                source: '"\\08"',
+                expectedCodeUnits: [
+                    { source: "\\0", start: 1 },
+                    { source: "8", start: 3 }
+                ]
+            },
+            {
+                description: "works with an escaped 0 followed by 9",
+                source: '"\\09"',
+                expectedCodeUnits: [
+                    { source: "\\0", start: 1 },
+                    { source: "9", start: 3 }
+                ]
+            }
+        ];
+
+        for (const { description, source, expectedCodeUnits, only } of TESTS) {
+            (only ? it.only : it)(
+                description,
+                () => {
+                    const codeUnits = parseStringLiteral(source);
+                    const expectedCharCount = expectedCodeUnits.length;
+
+                    assertStrict.equal(codeUnits.length, expectedCharCount);
+                    for (let index = 0; index < expectedCharCount; ++index) {
+                        const codeUnit = codeUnits[index];
+                        const expectedUnit = expectedCodeUnits[index];
+                        const message = `Expected values to be strictly equal at index ${index}`;
+
+                        assertStrict.equal(codeUnit.start, expectedUnit.start, message);
+                        assertStrict.equal(codeUnit.source, expectedUnit.source, message);
+                    }
+                }
+            );
+        }
+    }
+);
+
+describe(
+    "parseTemplateToken",
+    () => {
+        const TESTS =
+        [
+            {
+                description: "works with an empty template",
+                source: "``",
+                expectedCodeUnits: []
+            },
+            {
+                description: "works with surrogate pairs",
+                source: "`A𝄞Z`",
+                expectedCodeUnits: [
+                    { start: 1, source: "A" },
+                    { start: 2, source: "\ud834" },
+                    { start: 3, source: "\udd1e" },
+                    { start: 4, source: "Z" }
+                ]
+            },
+            {
+                description: "works with escape sequences for single characters",
+                source: "`A\\x40\\u231BZ${",
+                expectedCodeUnits: [
+                    { start: 1, source: "A" },
+                    { start: 2, source: "\\x40" },
+                    { start: 6, source: "\\u231B" },
+                    { start: 12, source: "Z" }
+                ]
+            },
+            {
+                description: "works with escape sequences for code points",
+                source: "}A\\u{FFFF}\\u{10000}\\u{10FFFF}Z${",
+                expectedCodeUnits: [
+                    { start: 1, source: "A" },
+                    { start: 2, source: "\\u{FFFF}" },
+                    { start: 10, source: "\\u{10000}" },
+                    { start: 10, source: "\\u{10000}" },
+                    { start: 19, source: "\\u{10FFFF}" },
+                    { start: 19, source: "\\u{10FFFF}" },
+                    { start: 29, source: "Z" }
+                ]
+            },
+            {
+                description: "works with line continuations",
+                source: "}A\\\n\\\r\n\\\u2028\\\u2029Z`",
+                expectedCodeUnits: [
+                    { start: 1, source: "A" },
+                    { start: 11, source: "Z" }
+                ]
+            },
+            {
+                description: "works with simple escape sequences",
+                source: "`\\0\\`\\b\\f\\n\\r\\t\\v`",
+                expectedCodeUnits: ["\\0", "\\`", "\\b", "\\f", "\\n", "\\r", "\\t", "\\v"]
+                    .map((source, index) => ({ source, start: 1 + index * 2 }))
+            },
+            {
+                description: "works with a <LS> character outside of a line continuation",
+                source: "`a\u2028z`",
+                expectedCodeUnits: [
+                    { start: 1, source: "a" },
+                    { start: 2, source: "\u2028" },
+                    { start: 3, source: "z" }
+                ]
+            },
+            {
+                description: "works with a <PS> character outside of a line continuation",
+                source: "`a\u2029z`",
+                expectedCodeUnits: [
+                    { start: 1, source: "a" },
+                    { start: 2, source: "\u2029" },
+                    { start: 3, source: "z" }
+                ]
+            },
+            {
+                description: "works with unescaped <CR> <LF> sequences",
+                source: "`A\r\nZ`",
+                expectedCodeUnits: [
+                    { start: 1, source: "A" },
+                    { start: 2, source: "\r\n" },
+                    { start: 4, source: "Z" }
+                ]
+            }
+        ];
+
+        for (const { description, source, expectedCodeUnits, only } of TESTS) {
+            (only ? it.only : it)(
+                description,
+                () => {
+                    const codeUnits = parseTemplateToken(source);
+                    const expectedCharCount = expectedCodeUnits.length;
+
+                    assertStrict.equal(codeUnits.length, expectedCharCount);
+                    for (let index = 0; index < expectedCharCount; ++index) {
+                        const codeUnit = codeUnits[index];
+                        const expectedUnit = expectedCodeUnits[index];
+                        const message = `Expected values to be strictly equal at index ${index}`;
+
+                        assertStrict.equal(codeUnit.start, expectedUnit.start, message);
+                        assertStrict.equal(codeUnit.source, expectedUnit.source, message);
+                    }
+                }
+            );
+        }
+    }
+);