New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: make no-misleading-character-class
report more granular errors
#18082
Changes from 16 commits
82d7b3c
1e8525f
45006ec
836fe39
d8fd769
d351f21
b2cb30e
90bee47
53e262a
448d9a9
b1cf05f
7bb7e55
072f256
6792594
ea6f183
ed8d1cd
7eaf8b4
ebe11f8
64813fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
fasttime marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,11 +3,18 @@ | |
*/ | ||
"use strict"; | ||
|
||
const { CALL, CONSTRUCT, ReferenceTracker, getStringIfConstant } = require("@eslint-community/eslint-utils"); | ||
const { | ||
CALL, | ||
CONSTRUCT, | ||
ReferenceTracker, | ||
getStaticValue, | ||
getStringIfConstant | ||
} = require("@eslint-community/eslint-utils"); | ||
const { RegExpParser, visitRegExpAST } = require("@eslint-community/regexpp"); | ||
const { isCombiningCharacter, isEmojiModifier, isRegionalIndicatorSymbol, isSurrogatePair } = require("./utils/unicode"); | ||
const astUtils = require("./utils/ast-utils.js"); | ||
const { isValidWithUnicodeFlag } = require("./utils/regular-expressions"); | ||
const { parseStringLiteral, parseTemplateToken } = require("./utils/char-source"); | ||
|
||
//------------------------------------------------------------------------------ | ||
// Helpers | ||
|
@@ -193,6 +200,15 @@ const findCharacterSequences = { | |
|
||
const kinds = Object.keys(findCharacterSequences); | ||
|
||
/** | ||
* Determines if a specified value is a regular expression object. | ||
* @param {any} value The value to check. | ||
* @returns {boolean} `true` if the value is a regular expression object, otherwise `false`. | ||
*/ | ||
function isRegExp(value) { | ||
return value instanceof RegExp; | ||
} | ||
|
||
//------------------------------------------------------------------------------ | ||
// Rule Definition | ||
//------------------------------------------------------------------------------ | ||
|
@@ -225,62 +241,7 @@ module.exports = { | |
create(context) { | ||
const sourceCode = context.sourceCode; | ||
const parser = new RegExpParser(); | ||
|
||
/** | ||
* Generates a granular loc for context.report, if directly calculable. | ||
* @param {Character[]} chars Individual characters being reported on. | ||
* @param {Node} node Parent string node to report within. | ||
* @returns {Object | null} Granular loc for context.report, if directly calculable. | ||
* @see https://github.com/eslint/eslint/pull/17515 | ||
*/ | ||
function generateReportLocation(chars, node) { | ||
|
||
// Limit to to literals and expression-less templates with raw values === their value. | ||
switch (node.type) { | ||
case "TemplateLiteral": | ||
if (node.expressions.length || sourceCode.getText(node).slice(1, -1) !== node.quasis[0].value.cooked) { | ||
return null; | ||
} | ||
break; | ||
|
||
case "Literal": | ||
if (typeof node.value === "string" && node.value !== node.raw.slice(1, -1)) { | ||
return null; | ||
} | ||
break; | ||
|
||
default: | ||
return null; | ||
} | ||
|
||
return { | ||
start: sourceCode.getLocFromIndex(node.range[0] + 1 + chars[0].start), | ||
end: sourceCode.getLocFromIndex(node.range[0] + 1 + chars.at(-1).end) | ||
}; | ||
} | ||
|
||
/** | ||
* Finds the report loc(s) for a range of matches. | ||
* @param {Character[][]} matches Characters that should trigger a report. | ||
* @param {Node} node The node to report. | ||
* @returns {Object | null} Node loc(s) for context.report. | ||
*/ | ||
function getNodeReportLocations(matches, node) { | ||
const locs = []; | ||
|
||
for (const chars of matches) { | ||
const loc = generateReportLocation(chars, node); | ||
|
||
// If a report can't match to a range, don't report any others | ||
if (!loc) { | ||
return [node.loc]; | ||
} | ||
|
||
locs.push(loc); | ||
} | ||
|
||
return locs; | ||
} | ||
const checkedPatternNodes = new Set(); | ||
|
||
/** | ||
* Verify a given regular expression. | ||
|
@@ -320,12 +281,58 @@ module.exports = { | |
} else { | ||
foundKindMatches.set(kind, [...findCharacterSequences[kind](chars)]); | ||
} | ||
|
||
} | ||
} | ||
} | ||
}); | ||
|
||
let codeUnits = null; | ||
|
||
/** | ||
* Finds the report loc(s) for a range of matches. | ||
* Only literals and expression-less templates generate granular errors. | ||
* @param {Character[][]} matches Lists of individual characters being reported on. | ||
* @returns {Location[]} locs for context.report. | ||
* @see https://github.com/eslint/eslint/pull/17515 | ||
*/ | ||
function getNodeReportLocations(matches) { | ||
if (!astUtils.isStaticTemplateLiteral(node) && node.type !== "Literal") { | ||
return matches.length ? [node.loc] : []; | ||
} | ||
return matches.map(chars => { | ||
const firstIndex = chars[0].start; | ||
const lastIndex = chars.at(-1).end - 1; | ||
let start; | ||
let end; | ||
|
||
if (node.type === "TemplateLiteral") { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should check if the template literal has expressions. For example, this crashes: /* eslint no-misleading-character-class: "error" */
new RegExp(`${"[👍]"}`);
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct! Those cases should be covered by unit tests. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in 7bb7e55. |
||
const source = sourceCode.getText(node); | ||
const offset = node.range[0]; | ||
|
||
codeUnits ??= parseTemplateToken(source); | ||
start = offset + codeUnits[firstIndex].start; | ||
end = offset + codeUnits[lastIndex].end; | ||
} else if (typeof node.value === "string") { // String Literal | ||
const source = node.raw; | ||
const offset = node.range[0]; | ||
|
||
codeUnits ??= parseStringLiteral(source); | ||
start = offset + codeUnits[firstIndex].start; | ||
end = offset + codeUnits[lastIndex].end; | ||
} else { // RegExp Literal | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in 7bb7e55. |
||
const offset = node.range[0] + 1; // Add 1 to skip the leading slash. | ||
|
||
start = offset + firstIndex; | ||
end = offset + lastIndex + 1; | ||
} | ||
|
||
return { | ||
start: sourceCode.getLocFromIndex(start), | ||
end: sourceCode.getLocFromIndex(end) | ||
}; | ||
}); | ||
} | ||
|
||
for (const [kind, matches] of foundKindMatches) { | ||
let suggest; | ||
|
||
|
@@ -336,7 +343,7 @@ module.exports = { | |
}]; | ||
} | ||
|
||
const locs = getNodeReportLocations(matches, node); | ||
const locs = getNodeReportLocations(matches); | ||
|
||
for (const loc of locs) { | ||
context.report({ | ||
|
@@ -351,6 +358,9 @@ module.exports = { | |
|
||
return { | ||
"Literal[regex]"(node) { | ||
if (checkedPatternNodes.has(node)) { | ||
return; | ||
} | ||
verify(node, node.regex.pattern, node.regex.flags, fixer => { | ||
if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, node.regex.pattern)) { | ||
return null; | ||
|
@@ -371,12 +381,31 @@ module.exports = { | |
for (const { node: refNode } of tracker.iterateGlobalReferences({ | ||
RegExp: { [CALL]: true, [CONSTRUCT]: true } | ||
})) { | ||
let pattern, flags; | ||
const [patternNode, flagsNode] = refNode.arguments; | ||
const pattern = getStringIfConstant(patternNode, scope); | ||
const flags = getStringIfConstant(flagsNode, scope); | ||
const evaluatedPattern = getStaticValue(patternNode, scope); | ||
|
||
if (!evaluatedPattern) { | ||
continue; | ||
} | ||
if (flagsNode) { | ||
if (isRegExp(evaluatedPattern.value)) { | ||
pattern = evaluatedPattern.value.source; | ||
checkedPatternNodes.add(patternNode); | ||
} else { | ||
pattern = String(evaluatedPattern.value); | ||
} | ||
flags = getStringIfConstant(flagsNode, scope); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add this test: {
code: "new RegExp(/^[👍]$/v, '')",
languageOptions: {
ecmaVersion: 2024
},
errors: [{
column: 15,
endColumn: 17,
messageId: "surrogatePairWithoutUFlag",
suggestions: [{ messageId: "suggestUnicodeFlag", output: "new RegExp(/^[👍]$/v, 'u')" }]
}]
} It's interesting to see what will happen on Node 18 because it doesn't support the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! This will fail in Node.js 18 because the parser silently sets the The only way I found to fix this in ESLint directly is by patching the AST with the missing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think that's out of scope for this change, but we could evaluate it separately. As for this particular case, maybe we could additionally check if the first argument is a regex literal? That would retain the current functionality where an error is reported regardless of the Node.js version. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That would not work as expected when the first argument is a variable, e.g.: var r = /[👶🏻]/v; // error
RegExp(r, 'v'); // error In Node.js 18, the second line would not be flagged because Maybe we should just check if the node is a regex literal without using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
What would the rule check for the RegExp constructors node with two arguments in case the first argument isn't a regex literal? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
In this case, the regex literal would be reported in all Node.js versions, but the RegExp constructor would only in Node.js > 18? That's definitely something we should aim to avoid, but I think this was the case before this change as well so we're at least not introducing new cases where the behavior depends on the runtime. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If the first argument is not a regex literal we could call
Yes, if it's okay to keep the existing bug that's also an option. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Good idea! Let's do that to fix inconsistencies. At some future point, we could see if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, I've done the changes in 7eaf8b4. I had to comment out some of the unit tests that were no longer passing. Maybe we could also update the tests to make them pass and add a note that the expected results are not really what was intended because of the limitations with Node.js 18. When I have some time I would like to look at how |
||
} else { | ||
if (isRegExp(evaluatedPattern.value)) { | ||
continue; | ||
} | ||
pattern = String(evaluatedPattern.value); | ||
flags = ""; | ||
} | ||
|
||
if (typeof pattern === "string") { | ||
verify(patternNode, pattern, flags || "", fixer => { | ||
if (typeof flags === "string") { | ||
verify(patternNode, pattern, flags, fixer => { | ||
|
||
if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) { | ||
return null; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should definitely not be modifying the AST at this point.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, that was a sort of PoC but way too hazardous. It's reverted now. At some point I would like to work with eslint-community to find a solution to improve the usability of eslint-utils in Node.js 18 without introducing any breaking changes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just to note that
v
flag in Node.js 18 is just one example we have at the moment. It can be assumed that there will always be some new regex syntax that isn't supported in some (if not all) runtime environments we support.