From 541d42da6c7883dc9d97a7ad404ae336911d1f67 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Mon, 22 May 2023 21:47:22 +0900 Subject: [PATCH 1/8] Add support for ES2024 Regex v flag --- acorn/src/regexp.js | 320 ++++++++++++++++++++++++++++- acorn/src/unicode-property-data.js | 13 ++ bin/test262.unsupported-features | 1 - test/run.js | 1 + test/tests-regexp-2024.js | 78 +++++++ 5 files changed, 403 insertions(+), 10 deletions(-) create mode 100644 test/tests-regexp-2024.js diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index 2a1d54825..dfe9bb33b 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -8,17 +8,19 @@ const pp = Parser.prototype export class RegExpValidationState { constructor(parser) { this.parser = parser - this.validFlags = `gim${parser.options.ecmaVersion >= 6 ? "uy" : ""}${parser.options.ecmaVersion >= 9 ? "s" : ""}${parser.options.ecmaVersion >= 13 ? "d" : ""}` + this.validFlags = `gim${parser.options.ecmaVersion >= 6 ? "uy" : ""}${parser.options.ecmaVersion >= 9 ? "s" : ""}${parser.options.ecmaVersion >= 13 ? "d" : ""}${parser.options.ecmaVersion >= 15 ? "v" : ""}` this.unicodeProperties = UNICODE_PROPERTY_VALUES[parser.options.ecmaVersion >= 14 ? 14 : parser.options.ecmaVersion] this.source = "" this.flags = "" this.start = 0 this.switchU = false + this.switchV = false this.switchN = false this.pos = 0 this.lastIntValue = 0 this.lastStringValue = "" this.lastAssertionIsQuantifiable = false + this.lastMayContainStrings = false this.numCapturingParens = 0 this.maxBackReference = 0 this.groupNames = [] @@ -26,12 +28,20 @@ export class RegExpValidationState { } reset(start, pattern, flags) { + const unicodeSets = flags.indexOf("v") !== -1 const unicode = flags.indexOf("u") !== -1 this.start = start | 0 this.source = pattern + "" this.flags = flags - this.switchU = unicode && this.parser.options.ecmaVersion >= 6 - this.switchN = unicode && this.parser.options.ecmaVersion >= 9 + if (unicodeSets && this.parser.options.ecmaVersion >= 15) { + this.switchU = true + this.switchV = true + this.switchN = true + } else { + this.switchU = unicode && this.parser.options.ecmaVersion >= 6 + this.switchV = false + this.switchN = unicode && this.parser.options.ecmaVersion >= 9 + } } raise(message) { @@ -99,6 +109,9 @@ pp.validateRegExpFlags = function(state) { const validFlags = state.validFlags const flags = state.flags + let u = false + let v = false + for (let i = 0; i < flags.length; i++) { const flag = flags.charAt(i) if (validFlags.indexOf(flag) === -1) { @@ -107,6 +120,11 @@ pp.validateRegExpFlags = function(state) { if (flags.indexOf(flag, i + 1) > -1) { this.raise(state.start, "Duplicate regular expression flag") } + if (flag === "u") u = true + if (flag === "v") v = true + } + if (this.options.ecmaVersion >= 15 && u && v) { + this.raise(state.start, "Invalid regular expression flag") } } @@ -136,6 +154,7 @@ pp.regexp_pattern = function(state) { state.lastIntValue = 0 state.lastStringValue = "" state.lastAssertionIsQuantifiable = false + state.lastMayContainStrings = false state.numCapturingParens = 0 state.maxBackReference = 0 state.groupNames.length = 0 @@ -726,13 +745,15 @@ pp.regexp_eatCharacterClassEscape = function(state) { if (isCharacterClassEscape(ch)) { state.lastIntValue = -1 state.advance() + state.lastMayContainStrings = false return true } + let negate = false if ( state.switchU && this.options.ecmaVersion >= 9 && - (ch === 0x50 /* P */ || ch === 0x70 /* p */) + ((negate = ch === 0x50 /* P */) || ch === 0x70 /* p */) ) { state.lastIntValue = -1 state.advance() @@ -741,6 +762,9 @@ pp.regexp_eatCharacterClassEscape = function(state) { this.regexp_eatUnicodePropertyValueExpression(state) && state.eat(0x7D /* } */) ) { + if (negate && state.lastMayContainStrings) { + state.raise("Invalid property name") + } return true } state.raise("Invalid property name") @@ -771,6 +795,7 @@ pp.regexp_eatUnicodePropertyValueExpression = function(state) { if (this.regexp_eatUnicodePropertyValue(state)) { const value = state.lastStringValue this.regexp_validateUnicodePropertyNameAndValue(state, name, value) + state.lastMayContainStrings = false return true } } @@ -791,8 +816,15 @@ pp.regexp_validateUnicodePropertyNameAndValue = function(state, name, value) { state.raise("Invalid property value") } pp.regexp_validateUnicodePropertyNameOrValue = function(state, nameOrValue) { - if (!state.unicodeProperties.binary.test(nameOrValue)) - state.raise("Invalid property name") + if (state.unicodeProperties.binary.test(nameOrValue)) { + state.lastMayContainStrings = false + return + } + if (state.switchV && state.unicodeProperties.binaryOfStrings.test(nameOrValue)) { + state.lastMayContainStrings = true + return + } + state.raise("Invalid property name") } // UnicodePropertyName :: @@ -834,21 +866,50 @@ pp.regexp_eatLoneUnicodePropertyNameOrValue = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-CharacterClass pp.regexp_eatCharacterClass = function(state) { if (state.eat(0x5B /* [ */)) { - state.eat(0x5E /* ^ */) - this.regexp_classRanges(state) + const negate = state.eat(0x5E /* ^ */) + this.regexp_classContents(state) if (state.eat(0x5D /* ] */)) { + if (negate && state.lastMayContainStrings) { + state.raise("Negated character class may contain strings") + } return true } + if (state.switchV) { + const start = state.pos + this.regexp_nonEmptyClassRanges(state) + const terminated = state.current() === 0x5D /* ] */ + state.pos = start + if (terminated) { + // Make the same message as V8. + state.raise("Invalid character in character class") + } + } + // Unreachable since it threw "unterminated regular expression" error before. state.raise("Unterminated character class") } return false } +// https://tc39.es/ecma262/#prod-ClassContents // https://www.ecma-international.org/ecma-262/8.0/#prod-ClassRanges +pp.regexp_classContents = function(state) { + if (state.current() === 0x5D /* ] */) { + // empty + state.lastMayContainStrings = false + return + } + if (state.switchV) { + this.regexp_classSetExpression(state) + } else { + this.regexp_nonEmptyClassRanges(state) + state.lastMayContainStrings = false + } +} + // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRanges // https://www.ecma-international.org/ecma-262/8.0/#prod-NonemptyClassRangesNoDash -pp.regexp_classRanges = function(state) { +pp.regexp_nonEmptyClassRanges = function(state) { while (this.regexp_eatClassAtom(state)) { const left = state.lastIntValue if (state.eat(0x2D /* - */) && this.regexp_eatClassAtom(state)) { @@ -920,6 +981,247 @@ pp.regexp_eatClassEscape = function(state) { ) } +// https://tc39.es/ecma262/#prod-ClassSetExpression +// https://tc39.es/ecma262/#prod-ClassUnion +// https://tc39.es/ecma262/#prod-ClassIntersection +// https://tc39.es/ecma262/#prod-ClassSubtraction +pp.regexp_classSetExpression = function(state) { + let nextMayContainStrings = false + if (this.regexp_eatClassSetRange(state)) + ; + else if (this.regexp_eatClassSetOperand(state)) { + nextMayContainStrings = state.lastMayContainStrings + let pos = state.pos + const start = pos + // https://tc39.es/ecma262/#prod-ClassIntersection + while ( + state.eat(0x26 /* & */) && state.eat(0x26 /* & */) && + state.current() !== 0x26 /* & */ && + this.regexp_eatClassSetOperand(state) + ) { + if (!state.lastMayContainStrings) nextMayContainStrings = false + pos = state.pos + } + state.pos = pos + if (state.pos !== start) { + state.lastMayContainStrings = nextMayContainStrings + return + } + // https://tc39.es/ecma262/#prod-ClassSubtraction + while ( + state.eat(0x2D /* - */) && state.eat(0x2D /* - */) && + this.regexp_eatClassSetOperand(state) + ) { + pos = state.pos + } + state.pos = pos + if (state.pos !== start) { + state.lastMayContainStrings = nextMayContainStrings + return + } + } else { + const ch = state.current() + if (ch === 0x5C /* \ */ && state.lookahead() !== 0x71 /* q */) { + // Make the same message as V8. + state.raise("Invalid escape") + } + if (ch === state.lookahead() && isClassSetReservedDoublePunctuatorCharacter(ch)) { + // Make the same message as V8. + state.raise("Invalid set operation in character class") + } + state.raise("Invalid character in character class") + } + // https://tc39.es/ecma262/#prod-ClassUnion + for (;;) { + if (this.regexp_eatClassSetRange(state)) { + if (state.lastMayContainStrings) nextMayContainStrings = true + continue + } + if (this.regexp_eatClassSetOperand(state)) { + continue + } + break + } + state.lastMayContainStrings = nextMayContainStrings +} + +// https://tc39.es/ecma262/#prod-ClassSetRange +pp.regexp_eatClassSetRange = function(state) { + const start = state.pos + if (this.regexp_eatClassSetCharacter(state)) { + const left = state.lastIntValue + if (state.eat(0x2D /* - */) && this.regexp_eatClassSetCharacter(state)) { + const right = state.lastIntValue + if (left !== -1 && right !== -1 && left > right) { + state.raise("Range out of order in character class") + } + return true + } + state.pos = start + } + return false +} + +// https://tc39.es/ecma262/#prod-ClassSetOperand +pp.regexp_eatClassSetOperand = function(state) { + if (this.regexp_eatClassSetCharacter(state)) { + state.lastMayContainStrings = false + return true + } + return ( + this.regexp_eatClassStringDisjunction(state) || + this.regexp_eatNestedClass(state) + ) +} + +// https://tc39.es/ecma262/#prod-NestedClass +pp.regexp_eatNestedClass = function(state) { + const start = state.pos + if (state.eat(0x5B /* [ */)) { + const negate = state.eat(0x5E /* ^ */) + this.regexp_classContents(state) + if (state.eat(0x5D /* ] */)) { + if (negate && state.lastMayContainStrings) { + state.raise("Negated character class may contain strings") + } + return true + } + state.pos = start + } + if (state.eat(0x5C /* \ */) && this.regexp_eatCharacterClassEscape(state)) { + return true + } + state.pos = start + return false +} + +// https://tc39.es/ecma262/#prod-ClassStringDisjunction +pp.regexp_eatClassStringDisjunction = function(state) { + const start = state.pos + if (state.eat(0x5C /* \ */)) { + if (state.eat(0x71 /* q */)) { + if (state.eat(0x7B /* { */)) { + this.regexp_classStringDisjunctionContents(state) + if (state.eat(0x7D /* } */)) { + return true + } + } else { + // Make the same message as V8. + state.raise("Invalid escape") + } + } + state.pos = start + } + return false +} + +// https://tc39.es/ecma262/#prod-ClassStringDisjunctionContents +pp.regexp_classStringDisjunctionContents = function(state) { + this.regexp_classString(state) + let nextMayContainStrings = state.lastMayContainStrings + while (state.eat(0x7C /* | */)) { + this.regexp_classString(state) + if (state.lastMayContainStrings) nextMayContainStrings = true + } + state.lastMayContainStrings = nextMayContainStrings +} + +// https://tc39.es/ecma262/#prod-ClassString +// https://tc39.es/ecma262/#prod-NonEmptyClassString +pp.regexp_classString = function(state) { + if (!this.regexp_eatClassSetCharacter(state)) { + // empty + state.lastMayContainStrings = true + return + } + let nextMayContainStrings = false + while (this.regexp_eatClassSetCharacter(state)) + nextMayContainStrings = true + state.lastMayContainStrings = nextMayContainStrings +} + +// https://tc39.es/ecma262/#prod-ClassSetCharacter +pp.regexp_eatClassSetCharacter = function(state) { + const start = state.pos + if (state.eat(0x5C /* \ */)) { + if ( + this.regexp_eatCharacterEscape(state) || + this.regexp_eatClassSetReservedPunctuator(state) + ) { + return true + } + if (state.eat(0x62 /* b */)) { + state.lastIntValue = 0x08 /* */ + return true + } + state.pos = start + return false + } + const ch = state.current() + if (ch === state.lookahead() && isClassSetReservedDoublePunctuatorCharacter(ch)) { + return false + } + if (isClassSetSyntaxCharacter(ch)) { + return false + } + state.advance() + state.lastIntValue = ch + return true +} + +// https://tc39.es/ecma262/#prod-ClassSetReservedDoublePunctuator +function isClassSetReservedDoublePunctuatorCharacter(ch) { + return ( + ch === 0x21 /* ! */ || + ch >= 0x23 /* # */ && ch <= 0x26 /* & */ || + ch >= 0x2A /* * */ && ch <= 0x2C /* , */ || + ch === 0x2E /* . */ || + ch >= 0x3A /* : */ && ch <= 0x40 /* @ */ || + ch === 0x5E /* ^ */ || + ch === 0x60 /* ` */ || + ch === 0x7E /* ~ */ + ) +} + +// https://tc39.es/ecma262/#prod-ClassSetSyntaxCharacter +function isClassSetSyntaxCharacter(ch) { + return ( + ch === 0x28 /* ( */ || + ch === 0x29 /* ) */ || + ch === 0x2D /* - */ || + ch === 0x2F /* / */ || + ch >= 0x5B /* [ */ && ch <= 0x5D /* ] */ || + ch >= 0x7B /* { */ && ch <= 0x7D /* } */ + ) +} + +// https://tc39.es/ecma262/#prod-ClassSetReservedPunctuator +pp.regexp_eatClassSetReservedPunctuator = function(state) { + const ch = state.current() + if (isClassSetReservedPunctuator(ch)) { + state.lastIntValue = ch + state.advance() + return true + } + return false +} + +// https://tc39.es/ecma262/#prod-ClassSetReservedPunctuator +function isClassSetReservedPunctuator(ch) { + return ( + ch === 0x21 /* ! */ || + ch === 0x23 /* # */ || + ch === 0x25 /* % */ || + ch === 0x26 /* & */ || + ch === 0x2C /* , */ || + ch === 0x2D /* - */ || + ch >= 0x3A /* : */ && ch <= 0x3E /* > */ || + ch === 0x40 /* @ */ || + ch === 0x60 /* ` */ || + ch === 0x7E /* ~ */ + ) +} + // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-ClassControlLetter pp.regexp_eatClassControlLetter = function(state) { const ch = state.current() diff --git a/acorn/src/unicode-property-data.js b/acorn/src/unicode-property-data.js index c7a775627..7fdbf43e1 100644 --- a/acorn/src/unicode-property-data.js +++ b/acorn/src/unicode-property-data.js @@ -21,6 +21,18 @@ const unicodeBinaryProperties = { 14: ecma14BinaryProperties } +// #table-binary-unicode-properties-of-strings +const ecma14BinaryPropertiesOfStrings = "Basic_Emoji Emoji_Keycap_Sequence RGI_Emoji_Modifier_Sequence RGI_Emoji_Flag_Sequence RGI_Emoji_Tag_Sequence RGI_Emoji_ZWJ_Sequence RGI_Emoji" + +const unicodeBinaryPropertiesOfStrings = { + 9: "", + 10: "", + 11: "", + 12: "", + 13: "", + 14: ecma14BinaryPropertiesOfStrings +} + // #table-unicode-general-category-values const unicodeGeneralCategoryValues = "Cased_Letter LC Close_Punctuation Pe Connector_Punctuation Pc Control Cc cntrl Currency_Symbol Sc Dash_Punctuation Pd Decimal_Number Nd digit Enclosing_Mark Me Final_Punctuation Pf Format Cf Initial_Punctuation Pi Letter L Letter_Number Nl Line_Separator Zl Lowercase_Letter Ll Mark M Combining_Mark Math_Symbol Sm Modifier_Letter Lm Modifier_Symbol Sk Nonspacing_Mark Mn Number N Open_Punctuation Ps Other C Other_Letter Lo Other_Number No Other_Punctuation Po Other_Symbol So Paragraph_Separator Zp Private_Use Co Punctuation P punct Separator Z Space_Separator Zs Spacing_Mark Mc Surrogate Cs Symbol S Titlecase_Letter Lt Unassigned Cn Uppercase_Letter Lu" @@ -45,6 +57,7 @@ const data = {} function buildUnicodeData(ecmaVersion) { const d = data[ecmaVersion] = { binary: wordsRegexp(unicodeBinaryProperties[ecmaVersion] + " " + unicodeGeneralCategoryValues), + binaryOfStrings: wordsRegexp(unicodeBinaryPropertiesOfStrings[ecmaVersion]), nonBinary: { General_Category: wordsRegexp(unicodeGeneralCategoryValues), Script: wordsRegexp(unicodeScriptValues[ecmaVersion]) diff --git a/bin/test262.unsupported-features b/bin/test262.unsupported-features index d7bd6a317..5ab02064c 100644 --- a/bin/test262.unsupported-features +++ b/bin/test262.unsupported-features @@ -1,4 +1,3 @@ decorators import-assertions regexp-duplicate-named-groups -regexp-v-flag diff --git a/test/run.js b/test/run.js index 84f1b1875..05087c483 100644 --- a/test/run.js +++ b/test/run.js @@ -14,6 +14,7 @@ require("./tests-regexp-2018.js"); require("./tests-regexp-2020.js"); require("./tests-regexp-2022.js"); + require("./tests-regexp-2024.js"); require("./tests-json-superset.js"); require("./tests-optional-catch-binding.js"); require("./tests-bigint.js"); diff --git a/test/tests-regexp-2024.js b/test/tests-regexp-2024.js new file mode 100644 index 000000000..f13d2aabb --- /dev/null +++ b/test/tests-regexp-2024.js @@ -0,0 +1,78 @@ +if (typeof exports !== "undefined") { + var test = require("./driver.js").test + var testFail = require("./driver.js").testFail +} +test("/[\\p{Basic_Emoji}][^]/v", {}, { ecmaVersion: 2024 }) + +test("/a/v", {}, { ecmaVersion: 2024 }) +testFail("/a/v", "Invalid regular expression flag (1:1)", { ecmaVersion: 2023 }) +testFail("/a/uv", "Invalid regular expression flag (1:1)", { ecmaVersion: 2024 }) +test("/[]/v", {}, { ecmaVersion: 2024 }) +test("/[^]/v", {}, { ecmaVersion: 2024 }) +test("/[&]/v", {}, { ecmaVersion: 2024 }) +test("/[\\b]/v", {}, { ecmaVersion: 2024 }) +test("/[\\&]/v", {}, { ecmaVersion: 2024 }) +testFail("/[\\z]/v", "Invalid regular expression: /[\\z]/: Invalid escape (1:1)", { ecmaVersion: 2024 }) +testFail("/[a\\z]/v", "Invalid regular expression: /[a\\z]/: Invalid escape (1:1)", { ecmaVersion: 2024 }) +// Union +test("/[abc]/v", {}, { ecmaVersion: 2024 }) +test("/[a-c]/v", {}, { ecmaVersion: 2024 }) +test("/[a-bc]/v", {}, { ecmaVersion: 2024 }) +test("/[ab-c]/v", {}, { ecmaVersion: 2024 }) +testFail("/[c-a]/v", "Invalid regular expression: /[c-a]/: Range out of order in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a-\\b]/v", "Invalid regular expression: /[a-\\b]/: Range out of order in character class (1:1)", { ecmaVersion: 2024 }) +// Expression +test("/[a&&b]/v", {}, { ecmaVersion: 2024 }) +test("/[a--b]/v", {}, { ecmaVersion: 2024 }) +test("/[a&&b&&c]/v", {}, { ecmaVersion: 2024 }) +test("/[a--b--c]/v", {}, { ecmaVersion: 2024 }) +testFail("/[a--]/v", "Invalid regular expression: /[a--]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a&&]/v", "Invalid regular expression: /[a&&]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a--b&&c]/v", "Invalid regular expression: /[a--b&&c]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a&&b--c]/v", "Invalid regular expression: /[a&&b--c]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a&&&]/v", "Invalid regular expression: /[a&&&]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +test("/[a&&\\&]/v", {}, { ecmaVersion: 2024 }) +testFail("/[&&]/v", "Invalid regular expression: /[&&]/: Invalid set operation in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[!!]/v", "Invalid regular expression: /[!!]/: Invalid set operation in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[##]/v", "Invalid regular expression: /[##]/: Invalid set operation in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[--]/v", "Invalid regular expression: /[--]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +// Nested +test("/[[a&&b]--[c&&d]]/v", {}, { ecmaVersion: 2024 }) +test("/[[a--b]&&[c--d]]/v", {}, { ecmaVersion: 2024 }) +test("/[[a&&b][c--d][ef]]/v", {}, { ecmaVersion: 2024 }) +// Class String +test("/[\\q{a|b}]/v", {}, { ecmaVersion: 2024 }) +test("/[\\q{abc}]/v", {}, { ecmaVersion: 2024 }) +test("/[\\q{}]/v", {}, { ecmaVersion: 2024 }) +test("/[\\q{abc|def}]/v", {}, { ecmaVersion: 2024 }) +test("/[\\q{abc|d|ef}]/v", {}, { ecmaVersion: 2024 }) +test("/[\\q{|||abc||||}]/v", {}, { ecmaVersion: 2024 }) +testFail("/\\q{a|b}/v", "Invalid regular expression: /\\q{a|b}/: Invalid escape (1:1)", { ecmaVersion: 2024 }) +testFail("/[\\q{a|b]/v", "Invalid regular expression: /[\\q{a|b]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[\\q{a|b}]/u", "Invalid regular expression: /[\\q{a|b}]/: Invalid escape (1:1)", { ecmaVersion: 2024 }) +// Unicode binary properties of strings +test("/\\p{Basic_Emoji}/v", {}, { ecmaVersion: 2024 }) +testFail("/\\p{Basic_Emoji}/u", "Invalid regular expression: /\\p{Basic_Emoji}/: Invalid property name (1:1)", { ecmaVersion: 2024 }) +test("/\\p{Basic_Emoji}/", {}, { ecmaVersion: 2024 }) // Non unicode binary properties of strings +// MayContainStrings +testFail("/\\P{Basic_Emoji}/v", "Invalid regular expression: /\\P{Basic_Emoji}/: Invalid property name (1:1)", { ecmaVersion: 2024 }) +test("/\\p{ASCII}/v", {}, { ecmaVersion: 2024 }) +test("/\\P{General_Category=Letter}/v", {}, { ecmaVersion: 2024 }) +testFail("/[^\\p{Basic_Emoji}]/v", "Invalid regular expression: /[^\\p{Basic_Emoji}]/: Negated character class may contain strings (1:1)", { ecmaVersion: 2024 }) +testFail("/[^[\\p{Basic_Emoji}]]/v", "Invalid regular expression: /[^[\\p{Basic_Emoji}]]/: Negated character class may contain strings (1:1)", { ecmaVersion: 2024 }) +test("/[^\\d]/v", {}, { ecmaVersion: 2024 }) +test("/[^\\D]/v", {}, { ecmaVersion: 2024 }) +test("/[^\\s]/v", {}, { ecmaVersion: 2024 }) +test("/[^\\S]/v", {}, { ecmaVersion: 2024 }) +test("/[^\\w]/v", {}, { ecmaVersion: 2024 }) +test("/[^\\W]/v", {}, { ecmaVersion: 2024 }) +test("/[^\\p{ASCII}]/v", {}, { ecmaVersion: 2024 }) +test("/[^\\p{General_Category=Letter}]/v", {}, { ecmaVersion: 2024 }) +test("/[^[^\\p{ASCII}]]/v", {}, { ecmaVersion: 2024 }) +test("/[\\p{Basic_Emoji}][^]/v", {}, { ecmaVersion: 2024 }) +test("/[^[\\p{ASCII}]]/v", {}, { ecmaVersion: 2024 }) +test("/[^\\q{a}]/v", {}, { ecmaVersion: 2024 }) +test("/[^\\q{a|b}]/v", {}, { ecmaVersion: 2024 }) +testFail("/[^\\q{}]/v", "Invalid regular expression: /[^\\q{}]/: Negated character class may contain strings (1:1)", { ecmaVersion: 2024 }) +testFail("/[^\\q{ab}]/v", "Invalid regular expression: /[^\\q{ab}]/: Negated character class may contain strings (1:1)", { ecmaVersion: 2024 }) +testFail("/[^\\q{a|bc}]/v", "Invalid regular expression: /[^\\q{a|bc}]/: Negated character class may contain strings (1:1)", { ecmaVersion: 2024 }) From 966be764822234cd4ff2fa8b6c9a2599ef7e563a Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Tue, 23 May 2023 03:15:30 +0900 Subject: [PATCH 2/8] add test cases and fix bug --- acorn/src/regexp.js | 2 +- test/tests-regexp-2024.js | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index dfe9bb33b..0d7256eb3 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -1034,10 +1034,10 @@ pp.regexp_classSetExpression = function(state) { // https://tc39.es/ecma262/#prod-ClassUnion for (;;) { if (this.regexp_eatClassSetRange(state)) { - if (state.lastMayContainStrings) nextMayContainStrings = true continue } if (this.regexp_eatClassSetOperand(state)) { + if (state.lastMayContainStrings) nextMayContainStrings = true continue } break diff --git a/test/tests-regexp-2024.js b/test/tests-regexp-2024.js index f13d2aabb..47b942647 100644 --- a/test/tests-regexp-2024.js +++ b/test/tests-regexp-2024.js @@ -2,7 +2,6 @@ if (typeof exports !== "undefined") { var test = require("./driver.js").test var testFail = require("./driver.js").testFail } -test("/[\\p{Basic_Emoji}][^]/v", {}, { ecmaVersion: 2024 }) test("/a/v", {}, { ecmaVersion: 2024 }) testFail("/a/v", "Invalid regular expression flag (1:1)", { ecmaVersion: 2023 }) @@ -76,3 +75,10 @@ test("/[^\\q{a|b}]/v", {}, { ecmaVersion: 2024 }) testFail("/[^\\q{}]/v", "Invalid regular expression: /[^\\q{}]/: Negated character class may contain strings (1:1)", { ecmaVersion: 2024 }) testFail("/[^\\q{ab}]/v", "Invalid regular expression: /[^\\q{ab}]/: Negated character class may contain strings (1:1)", { ecmaVersion: 2024 }) testFail("/[^\\q{a|bc}]/v", "Invalid regular expression: /[^\\q{a|bc}]/: Negated character class may contain strings (1:1)", { ecmaVersion: 2024 }) +test("/[^\\q{a}\\q{b}]/v", {}, { ecmaVersion: 2024 }) +testFail("/[^\\q{a}\\q{bc}]/v", "Invalid regular expression: /[^\\q{a}\\q{bc}]/: Negated character class may contain strings (1:1)", { ecmaVersion: 2024 }) +test("/[^\\q{a}&&\\q{bc}]/v", {}, { ecmaVersion: 2024 }) +test("/[^\\q{ab}&&\\q{c}]/v", {}, { ecmaVersion: 2024 }) +testFail("/[^\\q{ab}&&\\q{cd}]/v", "Invalid regular expression: /[^\\q{ab}&&\\q{cd}]/: Negated character class may contain strings (1:1)", { ecmaVersion: 2024 }) +test("/[^\\q{a}--\\q{bc}]/v", {}, { ecmaVersion: 2024 }) +testFail("/[^\\q{ab}--\\q{c}]/v", "Invalid regular expression: /[^\\q{ab}--\\q{c}]/: Negated character class may contain strings (1:1)", { ecmaVersion: 2024 }) From 990916164f1f7b337f546ec268e3f1a5087078e3 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Tue, 23 May 2023 03:32:00 +0900 Subject: [PATCH 3/8] refactor guard for regexp_eatNestedClass --- acorn/src/regexp.js | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index 0d7256eb3..9d9409d7b 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -1088,10 +1088,12 @@ pp.regexp_eatNestedClass = function(state) { } state.pos = start } - if (state.eat(0x5C /* \ */) && this.regexp_eatCharacterClassEscape(state)) { - return true + if (state.eat(0x5C /* \ */)) { + if (this.regexp_eatCharacterClassEscape(state)) { + return true + } + state.pos = start } - state.pos = start return false } From 8dd203ee7d96d24a6970e4f092d08a5f3f0249c5 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Tue, 23 May 2023 18:03:56 +0900 Subject: [PATCH 4/8] refactor semicolon to empty branches --- acorn/src/regexp.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index 9d9409d7b..0b48a62a7 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -987,9 +987,9 @@ pp.regexp_eatClassEscape = function(state) { // https://tc39.es/ecma262/#prod-ClassSubtraction pp.regexp_classSetExpression = function(state) { let nextMayContainStrings = false - if (this.regexp_eatClassSetRange(state)) - ; - else if (this.regexp_eatClassSetOperand(state)) { + if (this.regexp_eatClassSetRange(state)) { + // Continue with ClassUnion processing. + } else if (this.regexp_eatClassSetOperand(state)) { nextMayContainStrings = state.lastMayContainStrings let pos = state.pos const start = pos From 0659703377c44e786e230412cb16d39f8458b636 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Tue, 23 May 2023 18:27:42 +0900 Subject: [PATCH 5/8] add function that eats multiple characters. --- acorn/src/regexp.js | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index 0b48a62a7..fa13efeec 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -97,6 +97,19 @@ export class RegExpValidationState { } return false } + + eatChars(chs, forceU = false) { + let pos = this.pos + for (const ch of chs) { + const current = this.at(pos, forceU) + if (current === -1 || current !== ch) { + return false + } + pos = this.nextIndex(pos, forceU) + } + this.pos = pos + return true + } } /** @@ -995,7 +1008,7 @@ pp.regexp_classSetExpression = function(state) { const start = pos // https://tc39.es/ecma262/#prod-ClassIntersection while ( - state.eat(0x26 /* & */) && state.eat(0x26 /* & */) && + state.eatChars([0x26, 0x26] /* && */) && state.current() !== 0x26 /* & */ && this.regexp_eatClassSetOperand(state) ) { @@ -1009,7 +1022,7 @@ pp.regexp_classSetExpression = function(state) { } // https://tc39.es/ecma262/#prod-ClassSubtraction while ( - state.eat(0x2D /* - */) && state.eat(0x2D /* - */) && + state.eatChars([0x2D, 0x2D] /* -- */) && this.regexp_eatClassSetOperand(state) ) { pos = state.pos @@ -1100,17 +1113,15 @@ pp.regexp_eatNestedClass = function(state) { // https://tc39.es/ecma262/#prod-ClassStringDisjunction pp.regexp_eatClassStringDisjunction = function(state) { const start = state.pos - if (state.eat(0x5C /* \ */)) { - if (state.eat(0x71 /* q */)) { - if (state.eat(0x7B /* { */)) { - this.regexp_classStringDisjunctionContents(state) - if (state.eat(0x7D /* } */)) { - return true - } - } else { - // Make the same message as V8. - state.raise("Invalid escape") + if (state.eatChars([0x5C, 0x71] /* \q */)) { + if (state.eat(0x7B /* { */)) { + this.regexp_classStringDisjunctionContents(state) + if (state.eat(0x7D /* } */)) { + return true } + } else { + // Make the same message as V8. + state.raise("Invalid escape") } state.pos = start } From 826f6f7af1ae2ea91e3105f89a7fe7df9bc9d3c4 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Tue, 23 May 2023 19:38:28 +0900 Subject: [PATCH 6/8] remove complex logic for identifying error messages --- acorn/src/regexp.js | 19 ------------------- test/tests-regexp-2024.js | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index fa13efeec..c7625ef72 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -887,16 +887,6 @@ pp.regexp_eatCharacterClass = function(state) { } return true } - if (state.switchV) { - const start = state.pos - this.regexp_nonEmptyClassRanges(state) - const terminated = state.current() === 0x5D /* ] */ - state.pos = start - if (terminated) { - // Make the same message as V8. - state.raise("Invalid character in character class") - } - } // Unreachable since it threw "unterminated regular expression" error before. state.raise("Unterminated character class") @@ -1033,15 +1023,6 @@ pp.regexp_classSetExpression = function(state) { return } } else { - const ch = state.current() - if (ch === 0x5C /* \ */ && state.lookahead() !== 0x71 /* q */) { - // Make the same message as V8. - state.raise("Invalid escape") - } - if (ch === state.lookahead() && isClassSetReservedDoublePunctuatorCharacter(ch)) { - // Make the same message as V8. - state.raise("Invalid set operation in character class") - } state.raise("Invalid character in character class") } // https://tc39.es/ecma262/#prod-ClassUnion diff --git a/test/tests-regexp-2024.js b/test/tests-regexp-2024.js index 47b942647..8111ec106 100644 --- a/test/tests-regexp-2024.js +++ b/test/tests-regexp-2024.js @@ -11,8 +11,8 @@ test("/[^]/v", {}, { ecmaVersion: 2024 }) test("/[&]/v", {}, { ecmaVersion: 2024 }) test("/[\\b]/v", {}, { ecmaVersion: 2024 }) test("/[\\&]/v", {}, { ecmaVersion: 2024 }) -testFail("/[\\z]/v", "Invalid regular expression: /[\\z]/: Invalid escape (1:1)", { ecmaVersion: 2024 }) -testFail("/[a\\z]/v", "Invalid regular expression: /[a\\z]/: Invalid escape (1:1)", { ecmaVersion: 2024 }) +testFail("/[\\z]/v", "Invalid regular expression: /[\\z]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a\\z]/v", "Invalid regular expression: /[a\\z]/: Unterminated character class (1:1)", { ecmaVersion: 2024 }) // Union test("/[abc]/v", {}, { ecmaVersion: 2024 }) test("/[a-c]/v", {}, { ecmaVersion: 2024 }) @@ -25,15 +25,15 @@ test("/[a&&b]/v", {}, { ecmaVersion: 2024 }) test("/[a--b]/v", {}, { ecmaVersion: 2024 }) test("/[a&&b&&c]/v", {}, { ecmaVersion: 2024 }) test("/[a--b--c]/v", {}, { ecmaVersion: 2024 }) -testFail("/[a--]/v", "Invalid regular expression: /[a--]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) -testFail("/[a&&]/v", "Invalid regular expression: /[a&&]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) -testFail("/[a--b&&c]/v", "Invalid regular expression: /[a--b&&c]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) -testFail("/[a&&b--c]/v", "Invalid regular expression: /[a&&b--c]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) -testFail("/[a&&&]/v", "Invalid regular expression: /[a&&&]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a--]/v", "Invalid regular expression: /[a--]/: Unterminated character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a&&]/v", "Invalid regular expression: /[a&&]/: Unterminated character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a--b&&c]/v", "Invalid regular expression: /[a--b&&c]/: Unterminated character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a&&b--c]/v", "Invalid regular expression: /[a&&b--c]/: Unterminated character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a&&&]/v", "Invalid regular expression: /[a&&&]/: Unterminated character class (1:1)", { ecmaVersion: 2024 }) test("/[a&&\\&]/v", {}, { ecmaVersion: 2024 }) -testFail("/[&&]/v", "Invalid regular expression: /[&&]/: Invalid set operation in character class (1:1)", { ecmaVersion: 2024 }) -testFail("/[!!]/v", "Invalid regular expression: /[!!]/: Invalid set operation in character class (1:1)", { ecmaVersion: 2024 }) -testFail("/[##]/v", "Invalid regular expression: /[##]/: Invalid set operation in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[&&]/v", "Invalid regular expression: /[&&]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[!!]/v", "Invalid regular expression: /[!!]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[##]/v", "Invalid regular expression: /[##]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) testFail("/[--]/v", "Invalid regular expression: /[--]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) // Nested test("/[[a&&b]--[c&&d]]/v", {}, { ecmaVersion: 2024 }) From d09dc4e187291677f6c4225c8606311026c162c1 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Wed, 24 May 2023 05:34:01 +0900 Subject: [PATCH 7/8] change to exchange MayContainStrings with result. --- acorn/src/regexp.js | 108 ++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 58 deletions(-) diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index c7625ef72..8e93c531e 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -20,7 +20,6 @@ export class RegExpValidationState { this.lastIntValue = 0 this.lastStringValue = "" this.lastAssertionIsQuantifiable = false - this.lastMayContainStrings = false this.numCapturingParens = 0 this.maxBackReference = 0 this.groupNames = [] @@ -167,7 +166,6 @@ pp.regexp_pattern = function(state) { state.lastIntValue = 0 state.lastStringValue = "" state.lastAssertionIsQuantifiable = false - state.lastMayContainStrings = false state.numCapturingParens = 0 state.maxBackReference = 0 state.groupNames.length = 0 @@ -758,8 +756,7 @@ pp.regexp_eatCharacterClassEscape = function(state) { if (isCharacterClassEscape(ch)) { state.lastIntValue = -1 state.advance() - state.lastMayContainStrings = false - return true + return {} } let negate = false @@ -770,20 +767,21 @@ pp.regexp_eatCharacterClassEscape = function(state) { ) { state.lastIntValue = -1 state.advance() + let result if ( state.eat(0x7B /* { */) && - this.regexp_eatUnicodePropertyValueExpression(state) && + (result = this.regexp_eatUnicodePropertyValueExpression(state)) && state.eat(0x7D /* } */) ) { - if (negate && state.lastMayContainStrings) { + if (negate && result.mayContainStrings) { state.raise("Invalid property name") } - return true + return result } state.raise("Invalid property name") } - return false + return null } function isCharacterClassEscape(ch) { return ( @@ -808,8 +806,7 @@ pp.regexp_eatUnicodePropertyValueExpression = function(state) { if (this.regexp_eatUnicodePropertyValue(state)) { const value = state.lastStringValue this.regexp_validateUnicodePropertyNameAndValue(state, name, value) - state.lastMayContainStrings = false - return true + return {} } } state.pos = start @@ -817,10 +814,9 @@ pp.regexp_eatUnicodePropertyValueExpression = function(state) { // LoneUnicodePropertyNameOrValue if (this.regexp_eatLoneUnicodePropertyNameOrValue(state)) { const nameOrValue = state.lastStringValue - this.regexp_validateUnicodePropertyNameOrValue(state, nameOrValue) - return true + return this.regexp_validateUnicodePropertyNameOrValue(state, nameOrValue) } - return false + return null } pp.regexp_validateUnicodePropertyNameAndValue = function(state, name, value) { if (!hasOwn(state.unicodeProperties.nonBinary, name)) @@ -830,12 +826,10 @@ pp.regexp_validateUnicodePropertyNameAndValue = function(state, name, value) { } pp.regexp_validateUnicodePropertyNameOrValue = function(state, nameOrValue) { if (state.unicodeProperties.binary.test(nameOrValue)) { - state.lastMayContainStrings = false - return + return {} } if (state.switchV && state.unicodeProperties.binaryOfStrings.test(nameOrValue)) { - state.lastMayContainStrings = true - return + return {mayContainStrings: true} } state.raise("Invalid property name") } @@ -880,18 +874,18 @@ pp.regexp_eatLoneUnicodePropertyNameOrValue = function(state) { pp.regexp_eatCharacterClass = function(state) { if (state.eat(0x5B /* [ */)) { const negate = state.eat(0x5E /* ^ */) - this.regexp_classContents(state) + const result = this.regexp_classContents(state) if (state.eat(0x5D /* ] */)) { - if (negate && state.lastMayContainStrings) { + if (negate && result.mayContainStrings) { state.raise("Negated character class may contain strings") } - return true + return result } // Unreachable since it threw "unterminated regular expression" error before. state.raise("Unterminated character class") } - return false + return null } // https://tc39.es/ecma262/#prod-ClassContents @@ -899,14 +893,13 @@ pp.regexp_eatCharacterClass = function(state) { pp.regexp_classContents = function(state) { if (state.current() === 0x5D /* ] */) { // empty - state.lastMayContainStrings = false - return + return {} } if (state.switchV) { - this.regexp_classSetExpression(state) + return this.regexp_classSetExpression(state) } else { this.regexp_nonEmptyClassRanges(state) - state.lastMayContainStrings = false + return {} } } @@ -989,26 +982,26 @@ pp.regexp_eatClassEscape = function(state) { // https://tc39.es/ecma262/#prod-ClassIntersection // https://tc39.es/ecma262/#prod-ClassSubtraction pp.regexp_classSetExpression = function(state) { - let nextMayContainStrings = false + let mayContainStrings = false + let result if (this.regexp_eatClassSetRange(state)) { // Continue with ClassUnion processing. - } else if (this.regexp_eatClassSetOperand(state)) { - nextMayContainStrings = state.lastMayContainStrings + } else if (result = this.regexp_eatClassSetOperand(state)) { + mayContainStrings = result.mayContainStrings let pos = state.pos const start = pos // https://tc39.es/ecma262/#prod-ClassIntersection while ( state.eatChars([0x26, 0x26] /* && */) && state.current() !== 0x26 /* & */ && - this.regexp_eatClassSetOperand(state) + (result = this.regexp_eatClassSetOperand(state)) ) { - if (!state.lastMayContainStrings) nextMayContainStrings = false + if (!result.mayContainStrings) mayContainStrings = false pos = state.pos } state.pos = pos if (state.pos !== start) { - state.lastMayContainStrings = nextMayContainStrings - return + return {mayContainStrings} } // https://tc39.es/ecma262/#prod-ClassSubtraction while ( @@ -1019,8 +1012,7 @@ pp.regexp_classSetExpression = function(state) { } state.pos = pos if (state.pos !== start) { - state.lastMayContainStrings = nextMayContainStrings - return + return {mayContainStrings} } } else { state.raise("Invalid character in character class") @@ -1030,13 +1022,14 @@ pp.regexp_classSetExpression = function(state) { if (this.regexp_eatClassSetRange(state)) { continue } - if (this.regexp_eatClassSetOperand(state)) { - if (state.lastMayContainStrings) nextMayContainStrings = true + const result = this.regexp_eatClassSetOperand(state) + if (result) { + if (result.mayContainStrings) mayContainStrings = true continue } break } - state.lastMayContainStrings = nextMayContainStrings + return {mayContainStrings} } // https://tc39.es/ecma262/#prod-ClassSetRange @@ -1059,8 +1052,7 @@ pp.regexp_eatClassSetRange = function(state) { // https://tc39.es/ecma262/#prod-ClassSetOperand pp.regexp_eatClassSetOperand = function(state) { if (this.regexp_eatClassSetCharacter(state)) { - state.lastMayContainStrings = false - return true + return {} } return ( this.regexp_eatClassStringDisjunction(state) || @@ -1073,22 +1065,23 @@ pp.regexp_eatNestedClass = function(state) { const start = state.pos if (state.eat(0x5B /* [ */)) { const negate = state.eat(0x5E /* ^ */) - this.regexp_classContents(state) + const result = this.regexp_classContents(state) if (state.eat(0x5D /* ] */)) { - if (negate && state.lastMayContainStrings) { + if (negate && result.mayContainStrings) { state.raise("Negated character class may contain strings") } - return true + return result } state.pos = start } if (state.eat(0x5C /* \ */)) { - if (this.regexp_eatCharacterClassEscape(state)) { - return true + const result = this.regexp_eatCharacterClassEscape(state) + if (result) { + return result } state.pos = start } - return false + return null } // https://tc39.es/ecma262/#prod-ClassStringDisjunction @@ -1096,9 +1089,9 @@ pp.regexp_eatClassStringDisjunction = function(state) { const start = state.pos if (state.eatChars([0x5C, 0x71] /* \q */)) { if (state.eat(0x7B /* { */)) { - this.regexp_classStringDisjunctionContents(state) + const result = this.regexp_classStringDisjunctionContents(state) if (state.eat(0x7D /* } */)) { - return true + return result } } else { // Make the same message as V8. @@ -1106,18 +1099,18 @@ pp.regexp_eatClassStringDisjunction = function(state) { } state.pos = start } - return false + return null } // https://tc39.es/ecma262/#prod-ClassStringDisjunctionContents pp.regexp_classStringDisjunctionContents = function(state) { - this.regexp_classString(state) - let nextMayContainStrings = state.lastMayContainStrings + const result = this.regexp_classString(state) + let mayContainStrings = result.mayContainStrings while (state.eat(0x7C /* | */)) { - this.regexp_classString(state) - if (state.lastMayContainStrings) nextMayContainStrings = true + const result = this.regexp_classString(state) + if (result.mayContainStrings) mayContainStrings = true } - state.lastMayContainStrings = nextMayContainStrings + return {mayContainStrings} } // https://tc39.es/ecma262/#prod-ClassString @@ -1125,13 +1118,12 @@ pp.regexp_classStringDisjunctionContents = function(state) { pp.regexp_classString = function(state) { if (!this.regexp_eatClassSetCharacter(state)) { // empty - state.lastMayContainStrings = true - return + return {mayContainStrings: true} } - let nextMayContainStrings = false + let mayContainStrings = false while (this.regexp_eatClassSetCharacter(state)) - nextMayContainStrings = true - state.lastMayContainStrings = nextMayContainStrings + mayContainStrings = true + return {mayContainStrings} } // https://tc39.es/ecma262/#prod-ClassSetCharacter From 0265d1b5933e49841c50088b956abd3760155097 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Wed, 24 May 2023 06:04:55 +0900 Subject: [PATCH 8/8] change to raise error if no operand follows operator --- acorn/src/regexp.js | 35 +++++++++++++++++------------------ test/tests-regexp-2024.js | 8 +++++--- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index 8e93c531e..f2f4714b2 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -988,30 +988,29 @@ pp.regexp_classSetExpression = function(state) { // Continue with ClassUnion processing. } else if (result = this.regexp_eatClassSetOperand(state)) { mayContainStrings = result.mayContainStrings - let pos = state.pos - const start = pos // https://tc39.es/ecma262/#prod-ClassIntersection - while ( - state.eatChars([0x26, 0x26] /* && */) && - state.current() !== 0x26 /* & */ && - (result = this.regexp_eatClassSetOperand(state)) - ) { - if (!result.mayContainStrings) mayContainStrings = false - pos = state.pos + const start = state.pos + while (state.eatChars([0x26, 0x26] /* && */)) { + if ( + state.current() !== 0x26 /* & */ && + (result = this.regexp_eatClassSetOperand(state)) + ) { + if (!result.mayContainStrings) mayContainStrings = false + continue + } + state.raise("Invalid character in character class") } - state.pos = pos - if (state.pos !== start) { + if (start !== state.pos) { return {mayContainStrings} } // https://tc39.es/ecma262/#prod-ClassSubtraction - while ( - state.eatChars([0x2D, 0x2D] /* -- */) && - this.regexp_eatClassSetOperand(state) - ) { - pos = state.pos + while (state.eatChars([0x2D, 0x2D] /* -- */)) { + if (this.regexp_eatClassSetOperand(state)) { + continue + } + state.raise("Invalid character in character class") } - state.pos = pos - if (state.pos !== start) { + if (start !== state.pos) { return {mayContainStrings} } } else { diff --git a/test/tests-regexp-2024.js b/test/tests-regexp-2024.js index 8111ec106..dacc631b3 100644 --- a/test/tests-regexp-2024.js +++ b/test/tests-regexp-2024.js @@ -25,11 +25,13 @@ test("/[a&&b]/v", {}, { ecmaVersion: 2024 }) test("/[a--b]/v", {}, { ecmaVersion: 2024 }) test("/[a&&b&&c]/v", {}, { ecmaVersion: 2024 }) test("/[a--b--c]/v", {}, { ecmaVersion: 2024 }) -testFail("/[a--]/v", "Invalid regular expression: /[a--]/: Unterminated character class (1:1)", { ecmaVersion: 2024 }) -testFail("/[a&&]/v", "Invalid regular expression: /[a&&]/: Unterminated character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a--]/v", "Invalid regular expression: /[a--]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a&&]/v", "Invalid regular expression: /[a&&]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) testFail("/[a--b&&c]/v", "Invalid regular expression: /[a--b&&c]/: Unterminated character class (1:1)", { ecmaVersion: 2024 }) testFail("/[a&&b--c]/v", "Invalid regular expression: /[a&&b--c]/: Unterminated character class (1:1)", { ecmaVersion: 2024 }) -testFail("/[a&&&]/v", "Invalid regular expression: /[a&&&]/: Unterminated character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a&&&]/v", "Invalid regular expression: /[a&&&]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a&&b&&]/v", "Invalid regular expression: /[a&&b&&]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) +testFail("/[a--b--]/v", "Invalid regular expression: /[a--b--]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) test("/[a&&\\&]/v", {}, { ecmaVersion: 2024 }) testFail("/[&&]/v", "Invalid regular expression: /[&&]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 }) testFail("/[!!]/v", "Invalid regular expression: /[!!]/: Invalid character in character class (1:1)", { ecmaVersion: 2024 })