Skip to content

Commit

Permalink
extract char-source to a utility module
Browse files Browse the repository at this point in the history
  • Loading branch information
fasttime committed Feb 8, 2024
1 parent 90bee47 commit 53e262a
Show file tree
Hide file tree
Showing 4 changed files with 483 additions and 2 deletions.
2 changes: 1 addition & 1 deletion lib/rules/no-misleading-character-class.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp");
const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode");
const astUtils = require("./utils/ast-utils.js");
const { isValidWithUnicodeFlag } = require("./utils/regular-expressions");
const { parseStringLiteral, parseTemplateToken } = require("char-source");
const { parseStringLiteral, parseTemplateToken } = require("./utils/char-source");

//------------------------------------------------------------------------------
// Helpers
Expand Down
226 changes: 226 additions & 0 deletions lib/rules/utils/char-source.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@
/**
* @fileoverview Utility functions to locate the source text of each code unit in the value of a string literal or template token.
* @author Francesco Trotta
*/

"use strict";

/**
* Represents a code unit produced by the evaluation of a JavaScript common token like a string
* literal or template token.
*/
class CodeUnit {
constructor(start, source) {
this.start = start;
this.source = source;
}

get end() {
return this.start + this.length;
}

get length() {
return this.source.length;
}
}

/**
* An object used to keep track of the position in a source text where the next characters will be read.
*/
class SourceReader {
constructor(source) {
this.source = source;
this.pos = 0;
}
}

const SIMPLE_ESCAPE_SEQUENCES =
{ __proto__: null, b: "\b", f: "\f", n: "\n", r: "\r", t: "\t", v: "\v" };

/**
* Reads a hex escape sequence.
* @param {SourceReader} reader The reader should be positioned on the first hexadecimal digit.
* @param {number} length The number of hexadecimal digits.
* @returns {string} A code unit.
*/
function readHexSequence(reader, length) {
const { source, pos } = reader;
const str = source.slice(pos, pos + length);
const charCode = parseInt(str, 16);

reader.pos = pos + length;
return String.fromCharCode(charCode);
}

/**
* Reads a Unicode escape sequence.
* @param {SourceReader} reader The reader should be positioned after the "u".
* @returns {string} A code unit.
*/
function readUnicodeSequence(reader) {
const { source, pos } = reader;
const regExp = /\{(?<hexDigits>[\dA-Fa-f]+)\}/uy;

regExp.lastIndex = pos;
const match = regExp.exec(source);

if (match) {
const codePoint = parseInt(match.groups.hexDigits, 16);

reader.pos = regExp.lastIndex;
return String.fromCodePoint(codePoint);
}
return readHexSequence(reader, 4);
}

/**
* Reads an octal escape sequence.
* @param {SourceReader} reader The reader should be positioned after the first octal digit.
* @param {number} maxLength The maximum number of octal digits.
* @returns {string} A code unit.
*/
function readOctalSequence(reader, maxLength) {
const posAfterBackslash = reader.pos - 1;
const [octalStr] = reader.source.slice(posAfterBackslash, posAfterBackslash + maxLength).match(/^[0-7]+/u);

reader.pos = posAfterBackslash + octalStr.length;
const octal = parseInt(octalStr, 8);

return String.fromCharCode(octal);
}

/**
* Reads an escape sequence or line continuation.
* @param {SourceReader} reader The reader should be positioned after the backslash.
* @returns {string} A string of zero, one or two code units.
*/
function readEscapeSequenceOrLineContinuation(reader) {
const { source, pos } = reader;
const char = source[pos];

reader.pos = pos + 1;
const unitChar = SIMPLE_ESCAPE_SEQUENCES[char];

if (unitChar) {
return unitChar;
}
switch (char) {
case "x":
return readHexSequence(reader, 2);
case "u":
return readUnicodeSequence(reader);
case "\r":
if (source[pos + 1] === "\n") {
reader.pos = pos + 2;
}

// fallthrough
case "\n":
case "\u2028":
case "\u2029":
return "";
case "0":
case "1":
case "2":
case "3":
return readOctalSequence(reader, 3);
case "4":
case "5":
case "6":
case "7":
return readOctalSequence(reader, 2);
default:
return char;
}
}

/**
* Reads an escape sequence or line continuation and generates the respective `CodeUnit` elements.
* @param {SourceReader} reader The reader should be positioned on the backslash.
* @returns {Generator<CodeUnit>} Zero, one or two `CodeUnit` elements.
*/
function *mapEscapeSequenceOrLineContinuation(reader) {
const start = reader.pos++;
const str = readEscapeSequenceOrLineContinuation(reader);
const end = reader.pos;
const source = reader.source.slice(start, end);

switch (str.length) {
case 0:
break;
case 1:
yield new CodeUnit(start, source);
break;
default:
yield new CodeUnit(start, source);
yield new CodeUnit(start, source);
break;
}
}

/**
* Parses a string literal.
* @param {string} source The string literal to parse, including the delimiting quotes.
* @returns {CodeUnit[]} A list of code units produced by the string literal.
*/
function parseStringLiteral(source) {
const reader = new SourceReader(source);
const quote = source[0];

reader.pos = 1;
const codeUnits = [];

for (;;) {
const { pos } = reader;
const char = source[pos];

if (char === quote) {
break;
}
if (char === "\\") {
codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
} else {
reader.pos = pos + 1;
codeUnits.push(new CodeUnit(pos, char));
}
}
return codeUnits;
}

/**
* Parses a template token.
* @param {string} source The template token to parse, including the delimiting sequences `` ` ``, `${` and `}`.
* @returns {CodeUnit[]} A list of code units produced by the template token.
*/
function parseTemplateToken(source) {
const reader = new SourceReader(source);

reader.pos = 1;
const codeUnits = [];

for (;;) {
const { pos } = reader;
const char = source[pos];

if (char === "`" || char === "$" && source[pos + 1] === "{") {
break;
}
if (char === "\\") {
codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
} else {
let unitSource;

if (char === "\r" && source[pos + 1] === "\n") {
unitSource = "\r\n";
reader.pos = pos + 2;
} else {
unitSource = char;
reader.pos = pos + 1;
}
codeUnits.push(new CodeUnit(pos, unitSource));
}
}
return codeUnits;
}

module.exports = { parseStringLiteral, parseTemplateToken };
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@
"@nodelib/fs.walk": "^1.2.8",
"ajv": "^6.12.4",
"chalk": "^4.0.0",
"char-source": "^0.0.0",
"cross-spawn": "^7.0.2",
"debug": "^4.3.2",
"escape-string-regexp": "^4.0.0",
Expand Down
Loading

0 comments on commit 53e262a

Please sign in to comment.