extract char-source to a utility module

eslint · Feb 8, 2024 · 53e262a · 53e262a
1 parent 90bee47
commit 53e262a
Show file tree

Hide file tree

Showing 4 changed files with 483 additions and 2 deletions.
diff --git a/lib/rules/no-misleading-character-class.js b/lib/rules/no-misleading-character-class.js
@@ -8,7 +8,7 @@ const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
 const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
 const astUtils = require("./utils/ast-utils.js");
 const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
-const { parseStringLiteral, parseTemplateToken } = require("char-source");
+const { parseStringLiteral, parseTemplateToken } = require("./utils/char-source");
 
 //------------------------------------------------------------------------------
 // Helpers

diff --git a/lib/rules/utils/char-source.js b/lib/rules/utils/char-source.js
@@ -0,0 +1,226 @@
+/**
+ * @fileoverview Utility functions to locate the source text of each code unit in the value of a string literal or template token.
+ * @author Francesco Trotta
+ */
+
+"use strict";
+
+/**
+ * Represents a code unit produced by the evaluation of a JavaScript common token like a string
+ * literal or template token.
+ */
+class CodeUnit {
+    constructor(start, source) {
+        this.start = start;
+        this.source = source;
+    }
+
+    get end() {
+        return this.start + this.length;
+    }
+
+    get length() {
+        return this.source.length;
+    }
+}
+
+/**
+ * An object used to keep track of the position in a source text where the next characters will be read.
+ */
+class SourceReader {
+    constructor(source) {
+        this.source = source;
+        this.pos = 0;
+    }
+}
+
+const SIMPLE_ESCAPE_SEQUENCES =
+{ __proto__: null, b: "\b", f: "\f", n: "\n", r: "\r", t: "\t", v: "\v" };
+
+/**
+ * Reads a hex escape sequence.
+ * @param {SourceReader} reader The reader should be positioned on the first hexadecimal digit.
+ * @param {number} length The number of hexadecimal digits.
+ * @returns {string} A code unit.
+ */
+function readHexSequence(reader, length) {
+    const { source, pos } = reader;
+    const str = source.slice(pos, pos + length);
+    const charCode = parseInt(str, 16);
+
+    reader.pos = pos + length;
+    return String.fromCharCode(charCode);
+}
+
+/**
+ * Reads a Unicode escape sequence.
+ * @param {SourceReader} reader The reader should be positioned after the "u".
+ * @returns {string} A code unit.
+ */
+function readUnicodeSequence(reader) {
+    const { source, pos } = reader;
+    const regExp = /\{(?<hexDigits>[\dA-Fa-f]+)\}/uy;
+
+    regExp.lastIndex = pos;
+    const match = regExp.exec(source);
+
+    if (match) {
+        const codePoint = parseInt(match.groups.hexDigits, 16);
+
+        reader.pos = regExp.lastIndex;
+        return String.fromCodePoint(codePoint);
+    }
+    return readHexSequence(reader, 4);
+}
+
+/**
+ * Reads an octal escape sequence.
+ * @param {SourceReader} reader The reader should be positioned after the first octal digit.
+ * @param {number} maxLength The maximum number of octal digits.
+ * @returns {string} A code unit.
+ */
+function readOctalSequence(reader, maxLength) {
+    const posAfterBackslash = reader.pos - 1;
+    const [octalStr] = reader.source.slice(posAfterBackslash, posAfterBackslash + maxLength).match(/^[0-7]+/u);
+
+    reader.pos = posAfterBackslash + octalStr.length;
+    const octal = parseInt(octalStr, 8);
+
+    return String.fromCharCode(octal);
+}
+
+/**
+ * Reads an escape sequence or line continuation.
+ * @param {SourceReader} reader The reader should be positioned after the backslash.
+ * @returns {string} A string of zero, one or two code units.
+ */
+function readEscapeSequenceOrLineContinuation(reader) {
+    const { source, pos } = reader;
+    const char = source[pos];
+
+    reader.pos = pos + 1;
+    const unitChar = SIMPLE_ESCAPE_SEQUENCES[char];
+
+    if (unitChar) {
+        return unitChar;
+    }
+    switch (char) {
+        case "x":
+            return readHexSequence(reader, 2);
+        case "u":
+            return readUnicodeSequence(reader);
+        case "\r":
+            if (source[pos + 1] === "\n") {
+                reader.pos = pos + 2;
+            }
+
+            // fallthrough
+        case "\n":
+        case "\u2028":
+        case "\u2029":
+            return "";
+        case "0":
+        case "1":
+        case "2":
+        case "3":
+            return readOctalSequence(reader, 3);
+        case "4":
+        case "5":
+        case "6":
+        case "7":
+            return readOctalSequence(reader, 2);
+        default:
+            return char;
+    }
+}
+
+/**
+ * Reads an escape sequence or line continuation and generates the respective `CodeUnit` elements.
+ * @param {SourceReader} reader The reader should be positioned on the backslash.
+ * @returns {Generator<CodeUnit>} Zero, one or two `CodeUnit` elements.
+ */
+function *mapEscapeSequenceOrLineContinuation(reader) {
+    const start = reader.pos++;
+    const str = readEscapeSequenceOrLineContinuation(reader);
+    const end = reader.pos;
+    const source = reader.source.slice(start, end);
+
+    switch (str.length) {
+        case 0:
+            break;
+        case 1:
+            yield new CodeUnit(start, source);
+            break;
+        default:
+            yield new CodeUnit(start, source);
+            yield new CodeUnit(start, source);
+            break;
+    }
+}
+
+/**
+ * Parses a string literal.
+ * @param {string} source The string literal to parse, including the delimiting quotes.
+ * @returns {CodeUnit[]} A list of code units produced by the string literal.
+ */
+function parseStringLiteral(source) {
+    const reader = new SourceReader(source);
+    const quote = source[0];
+
+    reader.pos = 1;
+    const codeUnits = [];
+
+    for (;;) {
+        const { pos } = reader;
+        const char = source[pos];
+
+        if (char === quote) {
+            break;
+        }
+        if (char === "\\") {
+            codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
+        } else {
+            reader.pos = pos + 1;
+            codeUnits.push(new CodeUnit(pos, char));
+        }
+    }
+    return codeUnits;
+}
+
+/**
+ * Parses a template token.
+ * @param {string} source The template token to parse, including the delimiting sequences `` ` ``, `${` and `}`.
+ * @returns {CodeUnit[]} A list of code units produced by the template token.
+ */
+function parseTemplateToken(source) {
+    const reader = new SourceReader(source);
+
+    reader.pos = 1;
+    const codeUnits = [];
+
+    for (;;) {
+        const { pos } = reader;
+        const char = source[pos];
+
+        if (char === "`" || char === "$" && source[pos + 1] === "{") {
+            break;
+        }
+        if (char === "\\") {
+            codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
+        } else {
+            let unitSource;
+
+            if (char === "\r" && source[pos + 1] === "\n") {
+                unitSource = "\r\n";
+                reader.pos = pos + 2;
+            } else {
+                unitSource = char;
+                reader.pos = pos + 1;
+            }
+            codeUnits.push(new CodeUnit(pos, unitSource));
+        }
+    }
+    return codeUnits;
+}
+
+module.exports = { parseStringLiteral, parseTemplateToken };
diff --git a/package.json b/package.json
@@ -72,7 +72,6 @@
     "@nodelib/fs.walk": "^1.2.8",
     "ajv": "^6.12.4",
     "chalk": "^4.0.0",
-    "char-source": "^0.0.0",
     "cross-spawn": "^7.0.2",
     "debug": "^4.3.2",
     "escape-string-regexp": "^4.0.0",