Skip to content

Commit

Permalink
perf: compare charcode on valid_regex_flags
Browse files Browse the repository at this point in the history
baseline 256 small regexp literal with all flags: 4342 ops/sec 卤1.57% (0.23ms)
baseline 512 small regexp literal with all flags: 2213 ops/sec 卤1.51% (0.452ms)
baseline 1024 small regexp literal with all flags: 1144 ops/sec 卤0.22% (0.874ms)
baseline 2048 small regexp literal with all flags: 541 ops/sec 卤1.12% (1.849ms)
current 256 small regexp literal with all flags: 4643 ops/sec 卤1.3% (0.215ms)
current 512 small regexp literal with all flags: 2355 ops/sec 卤1.14% (0.425ms)
current 1024 small regexp literal with all flags: 1176 ops/sec 卤0.87% (0.85ms)
current 2048 small regexp literal with all flags: 553 ops/sec 卤1.31% (1.807ms)
  • Loading branch information
JLHwung committed Jun 10, 2021
1 parent 0f2b924 commit f377065
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 9 deletions.
@@ -0,0 +1,22 @@
import Benchmark from "benchmark";
import baseline from "../../lib/index-v2.js";
import current from "../../lib/index.js";
import { report } from "../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "/x/dgimsuy;".repeat(length);
}
function benchCases(name, implementation, options) {
for (const length of [256, 512, 1024, 2048]) {
const input = createInput(length);
suite.add(`${name} ${length} small regexp literal with all flags`, () => {
implementation.parse(input, options);
});
}
}

benchCases("baseline", baseline);
benchCases("current", current);

suite.on("cycle", report).run();
24 changes: 15 additions & 9 deletions packages/babel-parser/src/tokenizer/index.js
Expand Up @@ -20,7 +20,15 @@ import {
import State from "./state";
import type { LookaheadState } from "./state";

const VALID_REGEX_FLAGS = new Set(["g", "m", "s", "i", "y", "u", "d"]);
const VALID_REGEX_FLAGS = new Set([
charCodes.lowercaseG,
charCodes.lowercaseM,
charCodes.lowercaseS,
charCodes.lowercaseI,
charCodes.lowercaseY,
charCodes.lowercaseU,
charCodes.lowercaseD,
]);

// The following character codes are forbidden from being
// an immediate sibling of NumericLiteralSeparator _
Expand Down Expand Up @@ -1003,17 +1011,15 @@ export default class Tokenizer extends ParserErrors {
let mods = "";

while (pos < this.length) {
const char = this.input[pos];
const charCode = this.codePointAtPos(pos);
const cp = this.codePointAtPos(pos);
// It doesn't matter if cp > 0xffff, the loop will either throw or break because we check on cp
const char = String.fromCharCode(cp);

if (VALID_REGEX_FLAGS.has(char)) {
if (mods.indexOf(char) > -1) {
if (VALID_REGEX_FLAGS.has(cp)) {
if (mods.includes(char)) {
this.raise(pos + 1, Errors.DuplicateRegExpFlags);
}
} else if (
isIdentifierChar(charCode) ||
charCode === charCodes.backslash
) {
} else if (isIdentifierChar(cp) || cp === charCodes.backslash) {
this.raise(pos + 1, Errors.MalformedRegExpFlags);
} else {
break;
Expand Down

0 comments on commit f377065

Please sign in to comment.