Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: no-misleading-character-class granular errors #17515

Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
25107bc
feat: `no-misleading-character-class` granular errors
JoshuaKGoldberg Aug 30, 2023
abaefa3
fix: column offsets
JoshuaKGoldberg Sep 5, 2023
cf27ff2
fix: missing CallExpression
JoshuaKGoldberg Sep 5, 2023
a822015
Apply suggestions from code review
JoshuaKGoldberg Sep 5, 2023
f87a046
Merge branch 'main'
JoshuaKGoldberg Oct 10, 2023
f31d8e1
All tests passing again
JoshuaKGoldberg Oct 11, 2023
4472129
Edge case: quadruple back slashes
JoshuaKGoldberg Oct 11, 2023
629fa22
Apply suggestions from code review
JoshuaKGoldberg Oct 13, 2023
206f0e2
Update lib/rules/no-misleading-character-class.js
JoshuaKGoldberg Oct 13, 2023
5bca639
Adjusted for repeat characters
JoshuaKGoldberg Oct 13, 2023
c370130
Separated kinds into a Set
JoshuaKGoldberg Oct 13, 2023
66e472d
Update lib/rules/no-misleading-character-class.js
JoshuaKGoldberg Oct 16, 2023
1df22bc
Adjust unit tests for accepted changes
JoshuaKGoldberg Oct 16, 2023
a54104d
Split into adjustment function for sequence pairs
JoshuaKGoldberg Oct 16, 2023
11b0abf
Reduced complexity because of edge cases, as reqiested
JoshuaKGoldberg Nov 1, 2023
59845bd
Use chars array as suggested - mostly there
JoshuaKGoldberg Nov 4, 2023
a7c2107
Checked that slices include first and last
JoshuaKGoldberg Nov 4, 2023
06c23c7
Used sourceCode.getText(node) over node.raw, and added comments
JoshuaKGoldberg Nov 11, 2023
482949d
Update lib/rules/no-misleading-character-class.js
JoshuaKGoldberg Nov 11, 2023
a2c192a
Drastically limit applicability
JoshuaKGoldberg Nov 13, 2023
d0313a4
Restricted regexp literals on \u
JoshuaKGoldberg Nov 19, 2023
6e21dd2
Update lib/rules/no-misleading-character-class.js
JoshuaKGoldberg Nov 24, 2023
27358ab
Corrected erroneous escaped u exclusion
JoshuaKGoldberg Nov 27, 2023
fb94bb0
Update lib/rules/no-misleading-character-class.js
JoshuaKGoldberg Dec 12, 2023
46b4368
If a report range is missing, skip other reports
JoshuaKGoldberg Dec 13, 2023
ed5d5f2
Update lib/rules/no-misleading-character-class.js
JoshuaKGoldberg Dec 15, 2023
9b2b365
Merge branch 'main'
JoshuaKGoldberg Dec 15, 2023
3244450
Implement fasttime suggestion for a Map
JoshuaKGoldberg Dec 15, 2023
f1480c1
nit: for loop
JoshuaKGoldberg Dec 15, 2023
c869b2d
Unify zwj reports
JoshuaKGoldberg Dec 15, 2023
1441bb9
Merge branch 'main'
JoshuaKGoldberg Dec 15, 2023
7d21099
Added a couple of multiline tests
JoshuaKGoldberg Dec 15, 2023
15ebc24
Merge branch 'main' into no-misleading-character-class-granular-errors
JoshuaKGoldberg Dec 28, 2023
b838339
use .at(-1)
mdjermanovic Jan 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
256 changes: 186 additions & 70 deletions lib/rules/no-misleading-character-class.js
Expand Up @@ -62,7 +62,6 @@
}
}


/**
* Checks whether the given character node is a Unicode code point escape or not.
* @param {Character} char the character node to check.
Expand All @@ -73,80 +72,120 @@
}

/**
* Each function returns `true` if it detects that kind of problem.
* @type {Record<string, (chars: Character[]) => boolean>}
* Each function returns matched characters if it detects that kind of problem.
* @type {Record<string, (char: Character, index: number, chars: Character[]) => Character[] | null>}
*/
const hasCharacterSequence = {
surrogatePairWithoutUFlag(chars) {
return chars.some((c, i) => {
if (i === 0) {
return false;
}
const c1 = chars[i - 1];

return (
isSurrogatePair(c1.value, c.value) &&
!isUnicodeCodePointEscape(c1) &&
!isUnicodeCodePointEscape(c)
);
});
const characterSequenceIndexFilters = {
surrogatePairWithoutUFlag(char, index, chars) {
JoshuaKGoldberg marked this conversation as resolved.
Show resolved Hide resolved
if (index === 0) {
return null;
}

const previous = chars[index - 1];

if (
isSurrogatePair(previous.value, char.value) &&
!isUnicodeCodePointEscape(previous) &&
!isUnicodeCodePointEscape(char)
) {
return [previous, char];
}

return null;
},

surrogatePair(chars) {
return chars.some((c, i) => {
if (i === 0) {
return false;
}
const c1 = chars[i - 1];

return (
isSurrogatePair(c1.value, c.value) &&
(
isUnicodeCodePointEscape(c1) ||
isUnicodeCodePointEscape(c)
)
);
});
surrogatePair(char, index, chars) {
if (index === 0) {
return null;
}

const previous = chars[index - 1];

if (
isSurrogatePair(previous.value, char.value) &&
(
isUnicodeCodePointEscape(previous) ||
isUnicodeCodePointEscape(char)
)
) {
return [previous, char];
}

return null;
},

combiningClass(chars) {
return chars.some((c, i) => (
i !== 0 &&
isCombiningCharacter(c.value) &&
!isCombiningCharacter(chars[i - 1].value)
));
combiningClass(char, index, chars) {
if (
index !== 0 &&
isCombiningCharacter(char.value) &&
!isCombiningCharacter(chars[index - 1].value)
) {
return [chars[index - 1], char];
}

return null;
},

emojiModifier(chars) {
return chars.some((c, i) => (
i !== 0 &&
isEmojiModifier(c.value) &&
!isEmojiModifier(chars[i - 1].value)
));
emojiModifier(char, index, chars) {
if (
index !== 0 &&
isEmojiModifier(char.value) &&
!isEmojiModifier(chars[index - 1].value)
) {
return [chars[index - 1], char];
}

return null;
},

regionalIndicatorSymbol(chars) {
return chars.some((c, i) => (
i !== 0 &&
isRegionalIndicatorSymbol(c.value) &&
isRegionalIndicatorSymbol(chars[i - 1].value)
));
regionalIndicatorSymbol(char, index, chars) {
if (
index !== 0 &&
isRegionalIndicatorSymbol(char.value) &&
isRegionalIndicatorSymbol(chars[index - 1].value)
) {
return [chars[index - 1], char];
}

return null;
},

zwj(chars) {
const lastIndex = chars.length - 1;
zwj(char, index, chars) {
if (
index !== 0 &&
index !== chars.length - 1 &&
char.value === 0x200d &&
chars[index - 1].value !== 0x200d &&
chars[index + 1].value !== 0x200d
) {
return chars.slice(index - 1, index + 2);
}

return chars.some((c, i) => (
i !== 0 &&
i !== lastIndex &&
c.value === 0x200d &&
chars[i - 1].value !== 0x200d &&
chars[i + 1].value !== 0x200d
));
return null;
}
};

const kinds = Object.keys(hasCharacterSequence);
const kinds = Object.keys(characterSequenceIndexFilters);

/**
* Collects the indices where the filter returns an array.
* @param {Character[]} chars Characters to run the filter on.
* @param {(char: Character, index: number, chars: Character[]) => Character[] | null} filter Finds matches for an index.
* @returns {Character[][]} Indices where the filter returned true.
*/
function accumulate(chars, filter) {
const matchingChars = [];

chars.forEach((char, index) => {
const matches = filter(char, index, chars);

if (matches) {
matchingChars.push(matches);
}
});

return matchingChars;
}

//------------------------------------------------------------------------------
// Rule Definition
Expand Down Expand Up @@ -181,6 +220,62 @@
const sourceCode = context.sourceCode;
const parser = new RegExpParser();

/**
* Generates a granular loc for context.report, if directly calculable.
* @param {Character[]} chars Individual characters being reported on.
* @param {Node} node Parent string node to report within.
* @returns {Object | null} Granular loc for context.report, if directly calculable.
* @see https://github.com/eslint/eslint/pull/17515
*/
function generateReportLocation(chars, node) {
JoshuaKGoldberg marked this conversation as resolved.
Show resolved Hide resolved

// Limit to to literals and expression-less templates with raw values === their value.
switch (node.type) {
case "TemplateLiteral":
if (node.expressions.length || node.quasis[0].value.raw !== node.quasis[0].value.cooked) {
return null;
}
break;

case "Literal":
if (typeof node.value === "string" && node.value !== node.raw.slice(1, -1)) {
return null;
}
break;

default:
return null;
}

return {
start: sourceCode.getLocFromIndex(node.range[0] + 1 + chars[0].start),
end: sourceCode.getLocFromIndex(node.range[0] + 1 + chars[chars.length - 1].end)

Check failure on line 252 in lib/rules/no-misleading-character-class.js

View workflow job for this annotation

GitHub Actions / Verify Files

Prefer `.at(…)` over `[….length - index]`
mdjermanovic marked this conversation as resolved.
Show resolved Hide resolved
};
}

JoshuaKGoldberg marked this conversation as resolved.
Show resolved Hide resolved
/**
* Finds the report loc(s) for a range of matches.
* @param {Character[][]} matches Characters that should trigger a report.
* @param {Node} node The node to report.
* @returns {Object | null} Node loc(s) for context.report.
*/
function getNodeReportLocations(matches, node) {
const locs = [];

for (const chars of matches) {
const loc = generateReportLocation(chars, node);

// If a report can't match to a range, don't report any others
if (!loc) {
return [node.loc];
}

locs.push(loc);
}

return locs;
}

/**
* Verify a given regular expression.
* @param {Node} node The node to report.
Expand Down Expand Up @@ -208,21 +303,26 @@
return;
}

const foundKinds = new Set();
const foundKindMatches = new Map();

visitRegExpAST(patternNode, {
onCharacterClassEnter(ccNode) {
for (const chars of iterateCharacterSequence(ccNode.elements)) {
for (const kind of kinds) {
if (hasCharacterSequence[kind](chars)) {
foundKinds.add(kind);
const matches = accumulate(chars, characterSequenceIndexFilters[kind]);

if (foundKindMatches.has(kind)) {
foundKindMatches.get(kind).push(...matches);
} else {
foundKindMatches.set(kind, matches);
}

}
}
}
});

for (const kind of foundKinds) {
for (const [kind, matches] of foundKindMatches) {
let suggest;

if (kind === "surrogatePairWithoutUFlag") {
Expand All @@ -232,11 +332,27 @@
}];
}

context.report({
node,
messageId: kind,
suggest
});
const locs = getNodeReportLocations(matches, node);

// Grapheme zero-width joiners (e.g. in 👨‍👩‍👦) visually show as one emoji
if (kind === "zwj" && locs.length > 1) {
context.report({
loc: {
start: locs[0].start,
end: locs[1].end
},
messageId: kind,
suggest
});
Comment on lines +338 to +346
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This wouldn't show all if there are more of these in the pattern, but we can fix that in a follow-up.

} else {
for (const loc of locs) {
context.report({
loc,
messageId: kind,
suggest
});
}
}
JoshuaKGoldberg marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down Expand Up @@ -267,7 +383,7 @@
const flags = getStringIfConstant(flagsNode, scope);

if (typeof pattern === "string") {
verify(refNode, pattern, flags || "", fixer => {
verify(patternNode, pattern, flags || "", fixer => {

if (!isValidWithUnicodeFlag(context.languageOptions.ecmaVersion, pattern)) {
return null;
Expand Down