Skip to content

Commit

Permalink
[Regression] pattern attribute handling for invalid regular expressio…
Browse files Browse the repository at this point in the history
…ns (unicode-invalid-01)

https://bugs.webkit.org/show_bug.cgi?id=261077
rdar://115234392

Reviewed by Yusuke Suzuki.

Fixed issue where RegExp parsing code allowed escaped ClassSetReservedPunctuator characters anywhere in an expression with the 'v' flag.
The ECMAScript standard actually only allows escaped ClassSetReservedPunctuator characters within a ClassSetExpression or a ClassStringDisjunction.
See https://tc39.es/ecma262/#prod-ClassSetExpression and https://tc39.es/ecma262/#prod-ClassStringDisjunction and follow the productions to
ClassSetCharacter which has a production \ ClassSetReservedPunctuator.

Added tests to check the syntax errors when ClassSetReservedPunctuator characters are used incorrectly as well as tests with ClassSetReservedPunctuator
characters in both a Class Set and a Class String Disjunction.

* JSTests/stress/regexp-vflag-property-of-strings.js:
* Source/JavaScriptCore/yarr/YarrParser.h:
(JSC::Yarr::Parser::isIdentityEscapeAnError):
(JSC::Yarr::Parser::parseEscape):

Canonical link: https://commits.webkit.org/269502@main
  • Loading branch information
msaboff committed Oct 19, 2023
1 parent 3937d68 commit cd16ef7
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 10 deletions.
61 changes: 61 additions & 0 deletions JSTests/stress/regexp-vflag-property-of-strings.js
Original file line number Diff line number Diff line change
Expand Up @@ -506,3 +506,64 @@ testRegExp(/[a&&[\q{a|ab}]]/v, "ab", ["a"]);
testRegExp(/[a--[\q{ab}]]/v, "ab", ["a"]);
testRegExp(/[[\q{a|ab}]&&a]/v, "ab", ["a"]);
testRegExp(/[[\q{a|ab}]--a]/v, "ab", ["ab"]);
testRegExpSyntaxError("\\-", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");

// Test 251
testRegExpSyntaxError("\\!", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\#", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\%", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\,", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\:", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");

// Test 256
testRegExpSyntaxError("\\;", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\<", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\=", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\>", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\@", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");

// Test 261
testRegExpSyntaxError("\\`", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\~", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("[\\&]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("[\\!]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("[\\#]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");

// Test 255
testRegExpSyntaxError("[\\%]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("[\\,]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("[\\:]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("[\\;]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("[\\<]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");

// Test 271
testRegExpSyntaxError("[\\=]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("[\\>]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("[\\@]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("[\\`]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("[\\~]", "u", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");

// Test 276
testRegExpSyntaxError("\\&", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\-", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\!", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\#", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\%", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");

// Test 281
testRegExpSyntaxError("\\,", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\:", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\;", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\<", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\=", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");

// Test 286
testRegExpSyntaxError("\\>", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\@", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\`", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\~", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");
testRegExpSyntaxError("\\q{a}", "v", "SyntaxError: Invalid regular expression: invalid escaped character for Unicode pattern");

// Test 291
testRegExp(/[\&\-\!\#\%\,\:\;\<\=\>\@\`\~]*/v, "&-!#%,:;<=>@`~", ["&-!#%,:;<=>@`~"]);
testRegExp(/[\q{\&\-\!\#\%\,\:\;\<\=\>\@\`\~}X]*/v, "X&-!#%,:;<=>@`~X", ["X&-!#%,:;<=>@`~X"]);
21 changes: 11 additions & 10 deletions Source/JavaScriptCore/yarr/YarrParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -781,14 +781,15 @@ class Parser {
}

// The handling of IdentityEscapes is different depending on which unicode flag if any is active.
// For Unicode patterns, IdentityEscapes only include SyntaxCharacters or '/'.
// For UnicodeSet patterns, adds to the UnicodePatterns IdentityEscapes
// ClassSetReservedPunctionation which includes &-!#%,:;<=>@`~
// For both Unicode and UnicodeSet patterns, IdentityEscapes only include SyntaxCharacters or '/'.
// For UnicodeSet patterns when parsing ClassSet expressions and ClassStringDisjunctions, escapes include SyntaxCharacters, '/'
// and ClassSetReservedPunctionation, which is any of &-!#%,:;<=>@`~
// For non-unicode patterns, most any character can be escaped.
template<ParseEscapeMode parseEscapeMode>
bool isIdentityEscapeAnError(int ch)
{
if (isEitherUnicodeCompilation()
&& ((isASCII(ch) && !strchr(isUnicodeCompilation() ? "^$\\.*+?()[]{}|/" : "^$\\.*+?()[]{}|/&-!#%,:;<=>@`~" , ch)) || !ch)) {
&& ((isASCII(ch) && !strchr((parseEscapeMode == ParseEscapeMode::ClassSet || parseEscapeMode == ParseEscapeMode::ClassStringDisjunction) ? "^$\\.*+?()[]{}|/&-!#%,:;<=>@`~" : "^$\\.*+?()[]{}|/", ch)) || !ch)) {
m_errorCode = ErrorCode::InvalidIdentityEscape;
return true;
}
Expand Down Expand Up @@ -850,7 +851,7 @@ class Parser {
case 'B':
consume();
if (parseEscapeMode != ParseEscapeMode::Normal) {
if (isIdentityEscapeAnError('B'))
if (isIdentityEscapeAnError<parseEscapeMode>('B'))
break;

delegate.atomPatternCharacter('B');
Expand Down Expand Up @@ -1026,7 +1027,7 @@ class Parser {
consume();
int x = tryConsumeHex(2);
if (x == -1) {
if (isIdentityEscapeAnError('x'))
if (isIdentityEscapeAnError<parseEscapeMode>('x'))
break;

delegate.atomPatternCharacter('x');
Expand Down Expand Up @@ -1059,7 +1060,7 @@ class Parser {
}

restoreState(state);
if (!isIdentityEscapeAnError('k')) {
if (!isIdentityEscapeAnError<parseEscapeMode>('k')) {
delegate.atomPatternCharacter('k');
m_kIdentityEscapeSeen = true;
}
Expand All @@ -1072,7 +1073,7 @@ class Parser {
int escapeChar = consume();

if (isLegacyCompilation() || parseEscapeMode == ParseEscapeMode::ClassStringDisjunction) {
if (isIdentityEscapeAnError(escapeChar))
if (isIdentityEscapeAnError<parseEscapeMode>(escapeChar))
break;
delegate.atomPatternCharacter(escapeChar);
break;
Expand Down Expand Up @@ -1112,7 +1113,7 @@ class Parser {
m_errorCode = ErrorCode::InvalidUnicodePropertyExpression;
}

if (isIdentityEscapeAnError(escapeChar))
if (isIdentityEscapeAnError<parseEscapeMode>(escapeChar))
break;

delegate.atomPatternCharacter(escapeChar);
Expand All @@ -1139,7 +1140,7 @@ class Parser {
break;
}

if (isIdentityEscapeAnError(ch))
if (isIdentityEscapeAnError<parseEscapeMode>(ch))
break;

delegate.atomPatternCharacter(consume());
Expand Down

0 comments on commit cd16ef7

Please sign in to comment.