Skip to content

Commit

Permalink
Simplify state management in the Javascript parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
inspirer committed Sep 20, 2017
1 parent e224d05 commit 9245e08
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 165 deletions.
2 changes: 1 addition & 1 deletion tm-parsers/js/const_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func TestTokenRanges(t *testing.T) {
}

func TestStateValues(t *testing.T) {
if StateDiv&^1 != StateInitial || StateJsxTemplateDiv&^1 != StateJsxTemplate || StateTemplateDiv&^1 != StateTemplate {
if StateDiv&^1 != StateInitial || StateTemplateDiv&^1 != StateTemplate || StateTemplateExprDiv&^1 != StateTemplateExpr {
t.Error("div states must be odd and one greater than non-div states")
}
}
87 changes: 43 additions & 44 deletions tm-parsers/js/js.tm
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ extraTypes = ["InsertedSemicolon"]

:: lexer

%s initial, div, template, templateDiv, templateExpr, templateExprDiv, jsxTemplate, jsxTemplateDiv;
%s initial, div, template, templateDiv, templateExpr, templateExprDiv;
%x jsxTag, jsxClosingTag, jsxText;

# Accept end-of-input in all states.
Expand All @@ -23,7 +23,7 @@ extraTypes = ["InsertedSemicolon"]
invalid_token:
error:

<initial, div, template, templateDiv, templateExpr, templateExprDiv, jsxTemplate, jsxTemplateDiv, jsxTag, jsxClosingTag> {
<initial, div, template, templateDiv, templateExpr, templateExprDiv, jsxTag, jsxClosingTag> {
WhiteSpace: /[\t\x0b\x0c\x20\xa0\ufeff\p{Zs}]/ (space)
}

Expand Down Expand Up @@ -237,10 +237,10 @@ StringLiteral: /'{ssChar}*'/
tplChars = /([^\$`\\]|\$*{escape}|\$*{lineCont}|\$+[^\$\{`\\])*\$*/
<initial, div, templateExpr, templateExprDiv, jsxTemplate, jsxTemplateDiv>
<initial, div, templateExpr, templateExprDiv>
'}': /\}/
<initial, div, template, templateDiv, templateExpr, templateExprDiv, jsxTemplate, jsxTemplateDiv> {
<initial, div, template, templateDiv, templateExpr, templateExprDiv> {
NoSubstitutionTemplate: /`{tplChars}`/
TemplateHead: /`{tplChars}\$\{/
}
Expand All @@ -250,7 +250,7 @@ TemplateMiddle: /\}{tplChars}\$\{/
TemplateTail: /\}{tplChars}`/
}
<initial, template, jsxTemplate> {
<initial, template, templateExpr> {
reBS = /\\[^\n\r\u2028\u2029]/
reClass = /\[([^\n\r\u2028\u2029\]\\]|{reBS})*\]/
reFirst = /[^\n\r\u2028\u2029\*\[\\\/]|{reBS}|{reClass}/
Expand All @@ -260,7 +260,7 @@ reFlags = /[a-z]*/
RegularExpressionLiteral: /\/{reFirst}{reChar}*\/{reFlags}/
}
<div, templateDiv, templateExprDiv, jsxTemplateDiv> {
<div, templateDiv, templateExprDiv> {
'/': /\//
'/=': /\/=/
}
Expand All @@ -279,7 +279,7 @@ jsxStringLiteral: /"[^"]*"/

jsxIdentifier: /{identifierStart}({identifierPart}|-)*/
# Note: the following rule disables backtracking for incomplete identifiers.
invalid_token: /({identifierStart}({identifierPart}|-)*)?{brokenEscapeSequence}/
invalid_token: /({identifierStart}({identifierPart}|-)*)?{brokenEscapeSequence}/
}

<jsxText> {
Expand Down Expand Up @@ -1612,20 +1612,17 @@ ${template go_lexer.stateVars}
Dialect Dialect
token Token // last token
Stack []int // stack of JSX states, non-empty for StateJsx*
Opened []int // number of opened curly braces per jsxTemplate* state
${end}

${template go_lexer.initStateVars-}
l.Dialect = Javascript
l.token = UNAVAILABLE
l.Stack = nil
l.Opened = nil
${end}

${template go_parser.setupLookaheadLexer-}
var alloc2, alloc3 [8]int
var alloc2 [8]int
lexer.Stack = alloc2[:0]
lexer.Opened = alloc3[:0]
${end}

${template go_lexer.onBeforeNext-}
Expand All @@ -1643,7 +1640,7 @@ ${template go_lexer.onAfterNext}
// See the following thread for more details:
// http://stackoverflow.com/questions/5519596/when-parsing-javascript-what

if l.State <= StateJsxTemplateDiv {
if l.State <= StateTemplateExprDiv {
// The lowest bit of "l.State" determines how to interpret a forward
// slash if it happens to be the next character.
// unset: start of a regular expression literal
Expand All @@ -1652,14 +1649,12 @@ ${template go_lexer.onAfterNext}
case NEW, DELETE, VOID, TYPEOF, INSTANCEOF, IN, DO, RETURN, CASE, THROW, ELSE:
l.State &^= 1
case TEMPLATEHEAD:
l.Stack = append(l.Stack, l.State|1)
l.Opened = append(l.Opened, 1)
fallthrough
l.State |= 1
l.pushState(StateTemplate)
case TEMPLATEMIDDLE:
l.State = StateTemplate
case TEMPLATETAIL:
l.State = l.Stack[len(l.Stack)-1]
l.Stack = l.Stack[:len(l.Stack)-1]
l.popState()
case RPAREN, RBRACK:
// TODO support if (...) /aaaa/;
l.State |= 1
Expand All @@ -1675,34 +1670,22 @@ ${template go_lexer.onAfterNext}
if l.State&1 == 0 {
// Start a new JSX tag.
if l.Dialect != Typescript {
l.Stack = append(l.Stack, l.State|1)
l.State = StateJsxTag
l.State |= 1
l.pushState(StateJsxTag)
}
} else {
l.State &^= 1
}
case LBRACE:
l.State &^= 1
if l.State >= StateTemplate {
l.Opened[len(l.Opened)-1]++
if l.State < StateTemplateExpr {
l.State = StateTemplateExpr
}
l.pushState(StateTemplateExpr)
}
l.State &^= 1
case RBRACE:
l.State &^= 1
if l.State >= StateTemplate {
last := len(l.Opened) - 1
l.Opened[last]--
if l.Opened[last] == 0 {
l.Opened = l.Opened[:last]
l.State = l.Stack[len(l.Stack)-1]
l.Stack = l.Stack[:len(l.Stack)-1]
break
} else if l.Opened[last] == 1 && l.State <= StateTemplateExprDiv {
l.State = StateTemplate
}
l.popState()
}
l.State &^= 1
case SINGLELINECOMMENT, MULTILINECOMMENT:
break
default:
Expand All @@ -1716,30 +1699,46 @@ ${template go_lexer.onAfterNext}
// Handling JSX states.
switch token {
case DIV:
if l.State == StateJsxTag && l.token == LT && l.Stack[len(l.Stack)-1] == StateJsxText {
if l.State == StateJsxTag && l.token == LT {
l.State = StateJsxClosingTag
l.Stack = l.Stack[:len(l.Stack)-1]
if len(l.Stack) > 0 {
l.Stack = l.Stack[:len(l.Stack)-1]
}
}
case GT:
if l.State == StateJsxClosingTag || l.token == DIV {
l.State = l.Stack[len(l.Stack)-1]
l.Stack = l.Stack[:len(l.Stack)-1]
l.popState()
} else {
l.State = StateJsxText
}
case LBRACE:
l.Opened = append(l.Opened, 1)
l.Stack = append(l.Stack, l.State)
l.State = StateJsxTemplate
l.pushState(StateTemplateExpr)
case LT:
// Start a new JSX tag.
l.Stack = append(l.Stack, l.State)
l.State = StateJsxTag
l.pushState(StateJsxTag)
}
}
l.token = token
${end}

${template go_lexer.lexerNext-}
${call base-}

func (l *Lexer) pushState(newState int) {
l.Stack = append(l.Stack, l.State)
l.State = newState
}

func (l *Lexer) popState() {
if ln := len(l.Stack); ln > 0 {
l.State = l.Stack[ln-1]
l.Stack = l.Stack[:ln-1]
} else {
l.State = StateDiv
}
}
${end}

${template go_parser.parser-}
package ${self->go.package()}

Expand Down
74 changes: 34 additions & 40 deletions tm-parsers/js/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,9 @@ const (
StateTemplateDiv = 3
StateTemplateExpr = 4
StateTemplateExprDiv = 5
StateJsxTemplate = 6
StateJsxTemplateDiv = 7
StateJsxTag = 8
StateJsxClosingTag = 9
StateJsxText = 10
StateJsxTag = 6
StateJsxClosingTag = 7
StateJsxText = 8
)

type Dialect int
Expand Down Expand Up @@ -48,7 +46,6 @@ type Lexer struct {
Dialect Dialect
token Token // last token
Stack []int // stack of JSX states, non-empty for StateJsx*
Opened []int // number of opened curly braces per jsxTemplate* state
}

var bomSeq = "\xef\xbb\xbf"
Expand All @@ -68,7 +65,6 @@ func (l *Lexer) Init(source string) {
l.Dialect = Javascript
l.token = UNAVAILABLE
l.Stack = nil
l.Opened = nil

if strings.HasPrefix(source, bomSeq) {
l.scanOffset += len(bomSeq)
Expand Down Expand Up @@ -500,7 +496,7 @@ restart:
// See the following thread for more details:
// http://stackoverflow.com/questions/5519596/when-parsing-javascript-what

if l.State <= StateJsxTemplateDiv {
if l.State <= StateTemplateExprDiv {
// The lowest bit of "l.State" determines how to interpret a forward
// slash if it happens to be the next character.
// unset: start of a regular expression literal
Expand All @@ -509,14 +505,12 @@ restart:
case NEW, DELETE, VOID, TYPEOF, INSTANCEOF, IN, DO, RETURN, CASE, THROW, ELSE:
l.State &^= 1
case TEMPLATEHEAD:
l.Stack = append(l.Stack, l.State|1)
l.Opened = append(l.Opened, 1)
fallthrough
l.State |= 1
l.pushState(StateTemplate)
case TEMPLATEMIDDLE:
l.State = StateTemplate
case TEMPLATETAIL:
l.State = l.Stack[len(l.Stack)-1]
l.Stack = l.Stack[:len(l.Stack)-1]
l.popState()
case RPAREN, RBRACK:
// TODO support if (...) /aaaa/;
l.State |= 1
Expand All @@ -532,34 +526,22 @@ restart:
if l.State&1 == 0 {
// Start a new JSX tag.
if l.Dialect != Typescript {
l.Stack = append(l.Stack, l.State|1)
l.State = StateJsxTag
l.State |= 1
l.pushState(StateJsxTag)
}
} else {
l.State &^= 1
}
case LBRACE:
l.State &^= 1
if l.State >= StateTemplate {
l.Opened[len(l.Opened)-1]++
if l.State < StateTemplateExpr {
l.State = StateTemplateExpr
}
l.pushState(StateTemplateExpr)
}
l.State &^= 1
case RBRACE:
l.State &^= 1
if l.State >= StateTemplate {
last := len(l.Opened) - 1
l.Opened[last]--
if l.Opened[last] == 0 {
l.Opened = l.Opened[:last]
l.State = l.Stack[len(l.Stack)-1]
l.Stack = l.Stack[:len(l.Stack)-1]
break
} else if l.Opened[last] == 1 && l.State <= StateTemplateExprDiv {
l.State = StateTemplate
}
l.popState()
}
l.State &^= 1
case SINGLELINECOMMENT, MULTILINECOMMENT:
break
default:
Expand All @@ -573,31 +555,43 @@ restart:
// Handling JSX states.
switch token {
case DIV:
if l.State == StateJsxTag && l.token == LT && l.Stack[len(l.Stack)-1] == StateJsxText {
if l.State == StateJsxTag && l.token == LT {
l.State = StateJsxClosingTag
l.Stack = l.Stack[:len(l.Stack)-1]
if len(l.Stack) > 0 {
l.Stack = l.Stack[:len(l.Stack)-1]
}
}
case GT:
if l.State == StateJsxClosingTag || l.token == DIV {
l.State = l.Stack[len(l.Stack)-1]
l.Stack = l.Stack[:len(l.Stack)-1]
l.popState()
} else {
l.State = StateJsxText
}
case LBRACE:
l.Opened = append(l.Opened, 1)
l.Stack = append(l.Stack, l.State)
l.State = StateJsxTemplate
l.pushState(StateTemplateExpr)
case LT:
// Start a new JSX tag.
l.Stack = append(l.Stack, l.State)
l.State = StateJsxTag
l.pushState(StateJsxTag)
}
}
l.token = token
return token
}

func (l *Lexer) pushState(newState int) {
l.Stack = append(l.Stack, l.State)
l.State = newState
}

func (l *Lexer) popState() {
if ln := len(l.Stack); ln > 0 {
l.State = l.Stack[ln-1]
l.Stack = l.Stack[:ln-1]
} else {
l.State = StateDiv
}
}

// Pos returns the start and end positions of the last token returned by Next().
func (l *Lexer) Pos() (start, end int) {
start = l.tokenOffset
Expand Down
Loading

0 comments on commit 9245e08

Please sign in to comment.