Skip to content

Commit

Permalink
Add optional BatchLexer interface usable by PeekingLexer
Browse files Browse the repository at this point in the history
  • Loading branch information
Peter Dolak committed Aug 13, 2022
1 parent 0635297 commit c2ffb31
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 8 deletions.
5 changes: 5 additions & 0 deletions lexer/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ type Lexer interface {
Next() (Token, error)
}

// BatchLexer consumes and returns a batch of tokens at a time. A Lexer may choose to implement this for extra speed.
type BatchLexer interface {
NextBatch() ([]Token, error)
}

// SymbolsByRune returns a map of lexer symbol names keyed by rune.
func SymbolsByRune(def Definition) map[TokenType]string {
symbols := def.Symbols()
Expand Down
29 changes: 22 additions & 7 deletions lexer/peek.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,31 @@ func Upgrade(lex Lexer, elide ...TokenType) (*PeekingLexer, error) {
for _, rn := range elide {
r.elide[rn] = true
}
for {
t, err := lex.Next()
if err != nil {
return r, err
if batchLex, ok := lex.(BatchLexer); ok {
for {
batch, err := batchLex.NextBatch()
if err != nil {
return r, err
}
r.tokens = append(r.tokens, batch...)
last := batch[len(batch)-1]
if last.EOF() {
break
}
}
r.tokens = append(r.tokens, t)
if t.EOF() {
break
} else {
for {
t, err := lex.Next()
if err != nil {
return r, err
}
r.tokens = append(r.tokens, t)
if t.EOF() {
break
}
}
}

r.advanceToNonElided()
return r, nil
}
Expand Down
32 changes: 31 additions & 1 deletion lexer/peek_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,21 @@ func (s *staticLexer) Next() (lexer.Token, error) {
return t, nil
}

func TestUpgrade(t *testing.T) {
type batchLexer struct {
batches [][]lexer.Token
}

func (b *batchLexer) Next() (lexer.Token, error) {
panic("shouldn't be called")
}

func (b *batchLexer) NextBatch() ([]lexer.Token, error) {
ret := b.batches[0] // Should never be called after EOF is returned at the end of a batch
b.batches = b.batches[1:]
return ret, nil
}

func TestUpgrade_Lexer(t *testing.T) {
t0 := lexer.Token{Type: 1, Value: "moo"}
ts := lexer.Token{Type: 3, Value: " "}
t1 := lexer.Token{Type: 2, Value: "blah"}
Expand All @@ -33,6 +47,22 @@ func TestUpgrade(t *testing.T) {
require.Equal(t, tokens, l.Range(0, 3))
}

func TestUpgrade_BatchLexer(t *testing.T) {
batches := [][]lexer.Token{
{{Type: 1, Value: "x"}, {Type: 3, Value: " "}},
{{Type: 1, Value: "y"}},
{{Type: 3, Value: " "}, {Type: 2, Value: "z"}, lexer.EOFToken(lexer.Position{})},
}
l, err := lexer.Upgrade(&batchLexer{batches: batches}, 3)
require.NoError(t, err)
require.Equal(t, 1, l.Peek().Type)
require.Equal(t, "x", l.Next().Value)
require.Equal(t, "y", l.Next().Value)
require.Equal(t, "z", l.Next().Value)
require.Equal(t, lexer.EOF, l.Next().Type)
require.Equal(t, lexer.EOF, l.Peek().Type)
}

func TestPeekingLexer_Peek_Next_Checkpoint(t *testing.T) {
slexdef := lexer.MustSimple([]lexer.SimpleRule{
{"Ident", `\w+`},
Expand Down

0 comments on commit c2ffb31

Please sign in to comment.