perf: compare charcode on valid_regex_flags

baseline 256 small regexp literal with all flags: 4342 ops/sec ±1.57% (0.23ms) baseline 512 small regexp literal with all flags: 2213 ops/sec ±1.51% (0.452ms) baseline 1024 small regexp literal with all flags: 1144 ops/sec ±0.22% (0.874ms) baseline 2048 small regexp literal with all flags: 541 ops/sec ±1.12% (1.849ms) current 256 small regexp literal with all flags: 4643 ops/sec ±1.3% (0.215ms) current 512 small regexp literal with all flags: 2355 ops/sec ±1.14% (0.425ms) current 1024 small regexp literal with all flags: 1176 ops/sec ±0.87% (0.85ms) current 2048 small regexp literal with all flags: 553 ops/sec ±1.31% (1.807ms)
babel · Jun 10, 2021 · f377065 · f377065
1 parent 0f2b924
commit f377065
Show file tree

Hide file tree

Showing 2 changed files with 37 additions and 9 deletions.
diff --git a/packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs b/packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs
@@ -0,0 +1,22 @@
+import Benchmark from "benchmark";
+import baseline from "../../lib/index-v2.js";
+import current from "../../lib/index.js";
+import { report } from "../util.mjs";
+
+const suite = new Benchmark.Suite();
+function createInput(length) {
+  return "/x/dgimsuy;".repeat(length);
+}
+function benchCases(name, implementation, options) {
+  for (const length of [256, 512, 1024, 2048]) {
+    const input = createInput(length);
+    suite.add(`${name} ${length} small regexp literal with all flags`, () => {
+      implementation.parse(input, options);
+    });
+  }
+}
+
+benchCases("baseline", baseline);
+benchCases("current", current);
+
+suite.on("cycle", report).run();
diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js
@@ -20,7 +20,15 @@ import {
 import State from "./state";
 import type { LookaheadState } from "./state";
 
-const VALID_REGEX_FLAGS = new Set(["g", "m", "s", "i", "y", "u", "d"]);
+const VALID_REGEX_FLAGS = new Set([
+  charCodes.lowercaseG,
+  charCodes.lowercaseM,
+  charCodes.lowercaseS,
+  charCodes.lowercaseI,
+  charCodes.lowercaseY,
+  charCodes.lowercaseU,
+  charCodes.lowercaseD,
+]);
 
 // The following character codes are forbidden from being
 // an immediate sibling of NumericLiteralSeparator _
@@ -1003,17 +1011,15 @@ export default class Tokenizer extends ParserErrors {
     let mods = "";
 
     while (pos < this.length) {
-      const char = this.input[pos];
-      const charCode = this.codePointAtPos(pos);
+      const cp = this.codePointAtPos(pos);
+      // It doesn't matter if cp > 0xffff, the loop will either throw or break because we check on cp
+      const char = String.fromCharCode(cp);
 
-      if (VALID_REGEX_FLAGS.has(char)) {
-        if (mods.indexOf(char) > -1) {
+      if (VALID_REGEX_FLAGS.has(cp)) {
+        if (mods.includes(char)) {
           this.raise(pos + 1, Errors.DuplicateRegExpFlags);
         }
-      } else if (
-        isIdentifierChar(charCode) ||
-        charCode === charCodes.backslash
-      ) {
+      } else if (isIdentifierChar(cp) || cp === charCodes.backslash) {
         this.raise(pos + 1, Errors.MalformedRegExpFlags);
       } else {
         break;