From c2ffb31d2f501ab780f1fbd831fb9c533f6b0dfb Mon Sep 17 00:00:00 2001 From: Peter Dolak Date: Sat, 13 Aug 2022 13:51:10 +0200 Subject: [PATCH] Add optional BatchLexer interface usable by PeekingLexer --- lexer/api.go | 5 +++++ lexer/peek.go | 29 ++++++++++++++++++++++------- lexer/peek_test.go | 32 +++++++++++++++++++++++++++++++- 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/lexer/api.go b/lexer/api.go index 9f89d7aa..dd49ad53 100644 --- a/lexer/api.go +++ b/lexer/api.go @@ -47,6 +47,11 @@ type Lexer interface { Next() (Token, error) } +// BatchLexer consumes and returns a batch of tokens at a time. A Lexer may choose to implement this for extra speed. +type BatchLexer interface { + NextBatch() ([]Token, error) +} + // SymbolsByRune returns a map of lexer symbol names keyed by rune. func SymbolsByRune(def Definition) map[TokenType]string { symbols := def.Symbols() diff --git a/lexer/peek.go b/lexer/peek.go index 30865142..7316d124 100644 --- a/lexer/peek.go +++ b/lexer/peek.go @@ -29,16 +29,31 @@ func Upgrade(lex Lexer, elide ...TokenType) (*PeekingLexer, error) { for _, rn := range elide { r.elide[rn] = true } - for { - t, err := lex.Next() - if err != nil { - return r, err + if batchLex, ok := lex.(BatchLexer); ok { + for { + batch, err := batchLex.NextBatch() + if err != nil { + return r, err + } + r.tokens = append(r.tokens, batch...) + last := batch[len(batch)-1] + if last.EOF() { + break + } } - r.tokens = append(r.tokens, t) - if t.EOF() { - break + } else { + for { + t, err := lex.Next() + if err != nil { + return r, err + } + r.tokens = append(r.tokens, t) + if t.EOF() { + break + } } } + r.advanceToNonElided() return r, nil } diff --git a/lexer/peek_test.go b/lexer/peek_test.go index 34c0a27f..bb23e70c 100644 --- a/lexer/peek_test.go +++ b/lexer/peek_test.go @@ -21,7 +21,21 @@ func (s *staticLexer) Next() (lexer.Token, error) { return t, nil } -func TestUpgrade(t *testing.T) { +type batchLexer struct { + batches [][]lexer.Token +} + +func (b *batchLexer) Next() (lexer.Token, error) { + panic("shouldn't be called") +} + +func (b *batchLexer) NextBatch() ([]lexer.Token, error) { + ret := b.batches[0] // Should never be called after EOF is returned at the end of a batch + b.batches = b.batches[1:] + return ret, nil +} + +func TestUpgrade_Lexer(t *testing.T) { t0 := lexer.Token{Type: 1, Value: "moo"} ts := lexer.Token{Type: 3, Value: " "} t1 := lexer.Token{Type: 2, Value: "blah"} @@ -33,6 +47,22 @@ func TestUpgrade(t *testing.T) { require.Equal(t, tokens, l.Range(0, 3)) } +func TestUpgrade_BatchLexer(t *testing.T) { + batches := [][]lexer.Token{ + {{Type: 1, Value: "x"}, {Type: 3, Value: " "}}, + {{Type: 1, Value: "y"}}, + {{Type: 3, Value: " "}, {Type: 2, Value: "z"}, lexer.EOFToken(lexer.Position{})}, + } + l, err := lexer.Upgrade(&batchLexer{batches: batches}, 3) + require.NoError(t, err) + require.Equal(t, 1, l.Peek().Type) + require.Equal(t, "x", l.Next().Value) + require.Equal(t, "y", l.Next().Value) + require.Equal(t, "z", l.Next().Value) + require.Equal(t, lexer.EOF, l.Next().Type) + require.Equal(t, lexer.EOF, l.Peek().Type) +} + func TestPeekingLexer_Peek_Next_Checkpoint(t *testing.T) { slexdef := lexer.MustSimple([]lexer.SimpleRule{ {"Ident", `\w+`},