-
-
Notifications
You must be signed in to change notification settings - Fork 5.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Faster tokenizer lookahead #13341
Faster tokenizer lookahead #13341
Changes from all commits
37afbd9
723ff6e
2660173
b1ff7ea
1453b86
7a47bb2
c1a6957
8dafa65
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import Benchmark from "benchmark"; | ||
import baseline from "@babel-baseline/parser"; | ||
import current from "../../lib/index.js"; | ||
import { report } from "../util.mjs"; | ||
|
||
const suite = new Benchmark.Suite(); | ||
function createInput(length) { | ||
return "type A = " + "| (x) => void".repeat(length); | ||
} | ||
function benchCases(name, implementation, options) { | ||
for (const length of [256, 512, 1024, 2048]) { | ||
const input = createInput(length); | ||
suite.add(`${name} ${length} arrow function types`, () => { | ||
implementation.parse(input, options); | ||
}); | ||
} | ||
} | ||
|
||
benchCases("baseline", baseline, { plugins: ["flow"] }); | ||
benchCases("current", current, { plugins: ["flow"] }); | ||
|
||
suite.on("cycle", report).run(); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ import { | |
skipWhiteSpace, | ||
} from "../util/whitespace"; | ||
import State from "./state"; | ||
import type { LookaheadState } from "./state"; | ||
|
||
const VALID_REGEX_FLAGS = new Set(["g", "m", "s", "i", "y", "u"]); | ||
|
||
|
@@ -144,11 +145,9 @@ export default class Tokenizer extends ParserErrors { | |
// Move to the next token | ||
|
||
next(): void { | ||
if (!this.isLookahead) { | ||
this.checkKeywordEscapes(); | ||
if (this.options.tokens) { | ||
this.pushToken(new Token(this.state)); | ||
} | ||
this.checkKeywordEscapes(); | ||
if (this.options.tokens) { | ||
this.pushToken(new Token(this.state)); | ||
} | ||
|
||
this.state.lastTokEnd = this.state.end; | ||
|
@@ -175,14 +174,51 @@ export default class Tokenizer extends ParserErrors { | |
return this.state.type === type; | ||
} | ||
|
||
// TODO | ||
/** | ||
* Create a LookaheadState from current parser state | ||
* | ||
* @param {State} state | ||
* @returns {LookaheadState} | ||
* @memberof Tokenizer | ||
*/ | ||
createLookaheadState(state: State): LookaheadState { | ||
return { | ||
pos: state.pos, | ||
value: null, | ||
type: state.type, | ||
start: state.start, | ||
end: state.end, | ||
lastTokEnd: state.end, | ||
context: [this.curContext()], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't have to copy the whole context because lookahead never update contexts. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here you got 2x |
||
exprAllowed: state.exprAllowed, | ||
inType: state.inType, | ||
}; | ||
} | ||
|
||
lookahead(): State { | ||
/** | ||
* lookahead peeks the next token, skipping changes to token context and | ||
* comment stack. For performance it returns a limited LookaheadState | ||
* instead of full parser state. | ||
* | ||
* The { column, line } Loc info is not included in lookahead since such usage | ||
* is rare. Although it may return other location properties e.g. `curLine` and | ||
* `lineStart`, these properties are not listed in the LookaheadState interface | ||
* and thus the returned value is _NOT_ reliable. | ||
* | ||
* The tokenizer should make best efforts to avoid using any parser state | ||
* other than those defined in LookaheadState | ||
* | ||
* @returns {LookaheadState} | ||
* @memberof Tokenizer | ||
*/ | ||
lookahead(): LookaheadState { | ||
const old = this.state; | ||
this.state = old.clone(true); | ||
// For performance we use a simpified tokenizer state structure | ||
// $FlowIgnore | ||
this.state = this.createLookaheadState(old); | ||
|
||
this.isLookahead = true; | ||
this.next(); | ||
this.nextToken(); | ||
this.isLookahead = false; | ||
|
||
const curr = this.state; | ||
|
@@ -247,17 +283,16 @@ export default class Tokenizer extends ParserErrors { | |
|
||
nextToken(): void { | ||
const curContext = this.curContext(); | ||
if (!curContext?.preserveSpace) this.skipSpace(); | ||
if (!curContext.preserveSpace) this.skipSpace(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The default value of |
||
this.state.start = this.state.pos; | ||
this.state.startLoc = this.state.curPosition(); | ||
if (!this.isLookahead) this.state.startLoc = this.state.curPosition(); | ||
if (this.state.pos >= this.length) { | ||
this.finishToken(tt.eof); | ||
return; | ||
} | ||
|
||
const override = curContext?.override; | ||
if (override) { | ||
override(this); | ||
if (curContext === ct.template) { | ||
this.readTmplToken(); | ||
} else { | ||
this.getTokenFromCode(this.codePointAtPos(this.state.pos)); | ||
} | ||
|
@@ -285,7 +320,8 @@ export default class Tokenizer extends ParserErrors { | |
} | ||
|
||
skipBlockComment(): void { | ||
const startLoc = this.state.curPosition(); | ||
let startLoc; | ||
if (!this.isLookahead) startLoc = this.state.curPosition(); | ||
const start = this.state.pos; | ||
const end = this.input.indexOf("*/", this.state.pos + 2); | ||
if (end === -1) throw this.raise(start, Errors.UnterminatedComment); | ||
|
@@ -304,6 +340,7 @@ export default class Tokenizer extends ParserErrors { | |
// If we are doing a lookahead right now we need to advance the position (above code) | ||
// but we do not want to push the comment to the state. | ||
if (this.isLookahead) return; | ||
/*:: invariant(startLoc) */ | ||
|
||
this.pushComment( | ||
true, | ||
|
@@ -317,7 +354,8 @@ export default class Tokenizer extends ParserErrors { | |
|
||
skipLineComment(startSkip: number): void { | ||
const start = this.state.pos; | ||
const startLoc = this.state.curPosition(); | ||
let startLoc; | ||
if (!this.isLookahead) startLoc = this.state.curPosition(); | ||
let ch = this.input.charCodeAt((this.state.pos += startSkip)); | ||
if (this.state.pos < this.length) { | ||
while (!isNewLine(ch) && ++this.state.pos < this.length) { | ||
|
@@ -328,6 +366,7 @@ export default class Tokenizer extends ParserErrors { | |
// If we are doing a lookahead right now we need to advance the position (above code) | ||
// but we do not want to push the comment to the state. | ||
if (this.isLookahead) return; | ||
/*:: invariant(startLoc) */ | ||
|
||
this.pushComment( | ||
false, | ||
|
@@ -398,12 +437,14 @@ export default class Tokenizer extends ParserErrors { | |
|
||
finishToken(type: TokenType, val: any): void { | ||
this.state.end = this.state.pos; | ||
this.state.endLoc = this.state.curPosition(); | ||
const prevType = this.state.type; | ||
this.state.type = type; | ||
this.state.value = val; | ||
|
||
if (!this.isLookahead) this.updateContext(prevType); | ||
if (!this.isLookahead) { | ||
this.state.endLoc = this.state.curPosition(); | ||
this.updateContext(prevType); | ||
} | ||
} | ||
|
||
// ### Token reading | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -178,3 +178,14 @@ export default class State { | |
return state; | ||
} | ||
} | ||
|
||
export type LookaheadState = { | ||
pos: number, | ||
value: any, | ||
type: TokenType, | ||
start: number, | ||
end: number, | ||
/* Used only in readSlashToken */ | ||
exprAllowed: boolean, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can be 5% faster if we get rid of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does copying a couple of bool influence that much perf? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. Unlike C, in V8 the boolean literal occupies 64 bit memory, essentially an address points to the In Babel 7.7 we improved the traverser performance by compressing 3 booleans to a bit array: https://hackmd.io/UMdqwvVgQGaofjCZHfGKrA?view#Compress-boolean-flags Note that the benchmark is specifically constructed to highlight the performance impact of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @JLHwung Funny to see devs playing with perf :) Your suggested changes is only micro-optimization and not scalable, but yeah you are on right track. You really should reduce amount of object access too. This is expensive There are many other things you can do to optimize Babel parser. I mentioned only a few. |
||
inType: boolean, | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
/*1*/ export /*2*/ { /*3*/ A /*4*/, /*5*/ B /*6*/ as /*7*/ C /*8*/ } /*9*/ from /*10*/ "foo"; | ||
/*1*/ export /*2*/ * /*3*/ from /*4*/ "foo" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
isLookahead
condition is removed because we callnextToken
inlookahead()
.