Remove quote hacks in text/scanner based lexer.

This lexer predated participle.Unquote() and automatically removed quotes where really it shouldn't have. It also supported strings with single quotes, via a very ugly hack. Neither of these work anymore.
alecthomas · Nov 26, 2020 · 0a1bf01 · 0a1bf01
1 parent c1e0275
commit 0a1bf01
Show file tree

Hide file tree

Showing 9 changed files with 112 additions and 92 deletions.
diff --git a/v2/lexer/text_scanner.go b/v2/lexer/text_scanner.go
@@ -2,12 +2,9 @@ package lexer
 
 import (
 	"bytes"
-	"fmt"
 	"io"
-	"strconv"
 	"strings"
 	"text/scanner"
-	"unicode/utf8"
 )
 
 // TextScannerLexer is a lexer that uses the text/scanner module.
@@ -95,32 +92,9 @@ func (t *textScannerLexer) Next() (Token, error) {
 	if t.err != nil {
 		return Token{}, t.err
 	}
-	return textScannerTransform(Token{
+	return Token{
 		Type:  typ,
 		Value: text,
 		Pos:   pos,
-	})
-}
-
-func textScannerTransform(token Token) (Token, error) {
-	// Unquote strings.
-	switch token.Type {
-	case scanner.Char:
-		// FIXME(alec): This is pretty hacky...we convert a single quoted char into a double
-		// quoted string in order to support single quoted strings.
-		token.Value = fmt.Sprintf("\"%s\"", token.Value[1:len(token.Value)-1])
-		fallthrough
-	case scanner.String:
-		s, err := strconv.Unquote(token.Value)
-		if err != nil {
-			return Token{}, Errorf(token.Pos, "%s: %q", err.Error(), token.Value)
-		}
-		token.Value = s
-		if token.Type == scanner.Char && utf8.RuneCountInString(s) > 1 {
-			token.Type = scanner.String
-		}
-	case scanner.RawString:
-		token.Value = token.Value[1 : len(token.Value)-1]
-	}
-	return token, nil
+	}, nil
 }
diff --git a/v2/lexer/text_scanner_go110_test.go b/v2/lexer/text_scanner_go110_test.go
diff --git a/v2/lexer/text_scanner_go111_test.go b/v2/lexer/text_scanner_go111_test.go
diff --git a/v2/lexer/text_scanner_test.go b/v2/lexer/text_scanner_test.go
@@ -24,21 +24,21 @@ func TestLexer(t *testing.T) {
 }
 
 func TestLexString(t *testing.T) {
-	lexer := LexString("", `"hello\nworld"`)
+	lexer := LexString("", "\"hello world\"")
 	token, err := lexer.Next()
 	require.NoError(t, err)
-	require.Equal(t, Token{Type: scanner.String, Value: "hello\nworld", Pos: Position{Line: 1, Column: 1}}, token)
+	require.Equal(t, token, Token{Type: scanner.String, Value: "\"hello world\"", Pos: Position{Line: 1, Column: 1}})
 }
 
 func TestLexSingleString(t *testing.T) {
-	lexer := LexString("", `'hello\nworld'`)
+	lexer := LexString("", "`hello world`")
 	token, err := lexer.Next()
 	require.NoError(t, err)
-	require.Equal(t, Token{Type: scanner.String, Value: "hello\nworld", Pos: Position{Line: 1, Column: 1}}, token)
+	require.Equal(t, Token{Type: scanner.RawString, Value: "`hello world`", Pos: Position{Line: 1, Column: 1}}, token)
 	lexer = LexString("", `'\U00008a9e'`)
 	token, err = lexer.Next()
 	require.NoError(t, err)
-	require.Equal(t, Token{Type: scanner.Char, Value: "\U00008a9e", Pos: Position{Line: 1, Column: 1}}, token)
+	require.Equal(t, Token{Type: scanner.Char, Value: `'\U00008a9e'`, Pos: Position{Line: 1, Column: 1}}, token)
 }
 
 func BenchmarkTextScannerLexer(b *testing.B) {

diff --git a/v2/lookahead_test.go b/v2/lookahead_test.go
@@ -20,22 +20,21 @@ func TestIssue3Example1(t *testing.T) {
 	}
 
 	g := &LAT1Module{}
-	p := mustTestParser(t, g, participle.UseLookahead(5))
+	p := mustTestParser(t, g, participle.UseLookahead(5), participle.Unquote())
 	err := p.ParseString("", `
 		source_filename = "foo.c"
 		target datalayout = "bar"
 		target triple = "baz"
 	`, g)
 	require.NoError(t, err)
 	require.Equal(t,
-		g,
 		&LAT1Module{
 			Decls: []*LAT1Decl{
 				{SourceFilename: "foo.c"},
 				{DataLayout: "bar"},
 				{TargetTriple: "baz"},
 			},
-		})
+		}, g)
 }
 
 type LAT2Config struct {
@@ -59,7 +58,7 @@ type LAT2Group struct {
 
 func TestIssue3Example2(t *testing.T) {
 	g := &LAT2Config{}
-	p := mustTestParser(t, g, participle.UseLookahead(2))
+	p := mustTestParser(t, g, participle.UseLookahead(2), participle.Unquote())
 	err := p.ParseString("", `
 		key = "value"
 		block {
@@ -68,7 +67,6 @@ func TestIssue3Example2(t *testing.T) {
 	`, g)
 	require.NoError(t, err)
 	require.Equal(t,
-		g,
 		&LAT2Config{
 			Entries: []*LAT2Entry{
 				{Attribute: &LAT2Attribute{Key: "key", Value: "value"}},
@@ -82,6 +80,7 @@ func TestIssue3Example2(t *testing.T) {
 				},
 			},
 		},
+		g,
 	)
 }
 
@@ -228,7 +227,7 @@ type issue28Value struct {
 }
 
 func TestIssue28(t *testing.T) {
-	p := mustTestParser(t, &issue28Term{}, participle.UseLookahead(5))
+	p := mustTestParser(t, &issue28Term{}, participle.UseLookahead(5), participle.Unquote())
 
 	actual := &issue28Term{}
 	err := p.ParseString("", `"key": "value"`, actual)

diff --git a/v2/map.go b/v2/map.go
@@ -86,16 +86,36 @@ func Elide(types ...string) Option {
 
 // Apply a Mapping to all tokens coming out of a Lexer.
 type mappingLexerDef struct {
-	lexer.Definition
+	l      lexer.Definition
 	mapper Mapper
 }
 
+var _ lexer.Definition = &mappingLexerDef{}
+
+func (m *mappingLexerDef) Symbols() map[string]rune { return m.l.Symbols() }
+
+func (m *mappingLexerDef) LexString(filename string, s string) (lexer.Lexer, error) {
+	l, err := m.l.LexString(filename, s)
+	if err != nil {
+		return nil, err
+	}
+	return &mappingLexer{l, m.mapper}, nil
+}
+
+func (m *mappingLexerDef) LexBytes(filename string, b []byte) (lexer.Lexer, error) {
+	l, err := m.l.LexBytes(filename, b)
+	if err != nil {
+		return nil, err
+	}
+	return &mappingLexer{l, m.mapper}, nil
+}
+
 func (m *mappingLexerDef) LexReader(filename string, r io.Reader) (lexer.Lexer, error) {
-	lexer, err := m.Definition.LexReader("", r)
+	l, err := m.l.LexReader(filename, r)
 	if err != nil {
 		return nil, err
 	}
-	return &mappingLexer{lexer, m.mapper}, nil
+	return &mappingLexer{l, m.mapper}, nil
 }
 
 type mappingLexer struct {

diff --git a/v2/parser.go b/v2/parser.go
@@ -41,10 +41,6 @@ func Build(grammar interface{}, options ...Option) (parser *Parser, err error) {
 		useLookahead:    1,
 	}
 	for _, option := range options {
-		if option == nil {
-			return nil, fmt.Errorf("nil Option passed, signature has changed; " +
-				"if you intended to provide a custom Lexer, try participle.Build(grammar, participle.Lexer(lexer))")
-		}
 		if err = option(p); err != nil {
 			return nil, err
 		}

diff --git a/v2/parser_test.go b/v2/parser_test.go
@@ -277,7 +277,7 @@ type EBNF struct {
 }
 
 func TestEBNFParser(t *testing.T) {
-	parser := mustTestParser(t, &EBNF{})
+	parser := mustTestParser(t, &EBNF{}, participle.Unquote())
 
 	expected := &EBNF{
 		Productions: []*Production{
@@ -508,9 +508,9 @@ func TestHello(t *testing.T) {
 		To    string `@String`
 	}
 
-	parser := mustTestParser(t, &testHello{})
+	parser := mustTestParser(t, &testHello{}, participle.Unquote())
 
-	expected := &testHello{"hello", "Bobby Brown"}
+	expected := &testHello{"hello", `Bobby Brown`}
 	actual := &testHello{}
 	err := parser.ParseString("", `hello "Bobby Brown"`, actual)
 	require.NoError(t, err)
@@ -655,7 +655,7 @@ func TestLiteralTypeConstraint(t *testing.T) {
 		Literal string `@"123456":String`
 	}
 
-	parser := mustTestParser(t, &grammar{})
+	parser := mustTestParser(t, &grammar{}, participle.Unquote())
 
 	actual := &grammar{}
 	expected := &grammar{Literal: "123456"}
@@ -681,7 +681,7 @@ func TestStructCaptureInterface(t *testing.T) {
 		Capture *nestedCapture `@String`
 	}
 
-	parser, err := participle.Build(&grammar{})
+	parser, err := participle.Build(&grammar{}, participle.Unquote())
 	require.NoError(t, err)
 
 	actual := &grammar{}
@@ -711,7 +711,7 @@ func TestParseable(t *testing.T) {
 		Inner *parseableStruct `@@`
 	}
 
-	parser, err := participle.Build(&grammar{})
+	parser, err := participle.Build(&grammar{}, participle.Unquote())
 	require.NoError(t, err)
 
 	actual := &grammar{}
@@ -1239,16 +1239,16 @@ func TestNegationWithPattern(t *testing.T) {
 		EverythingMoreComplex *[]string `@!(';' String)* @';' @String`
 	}
 
-	p := mustTestParser(t, &grammar{})
+	p := mustTestParser(t, &grammar{}, participle.Unquote())
 	// j, err := json.MarshalIndent(p.root, "", "  ")
 	// log.Print(j)
 	// log.Print(stringer(p.root))
 	ast := &grammar{}
-	err := p.ParseString("", `hello world ; 'some-str'`, ast)
+	err := p.ParseString("", `hello world ; "some-str"`, ast)
 	require.NoError(t, err)
 	require.Equal(t, &[]string{"hello", "world", ";", `some-str`}, ast.EverythingMoreComplex)
 
-	err = p.ParseString("", `hello ; world ; 'hey'`, ast)
+	err = p.ParseString("", `hello ; world ; "hey"`, ast)
 	require.NoError(t, err)
 	require.Equal(t, &[]string{"hello", ";", "world", ";", `hey`}, ast.EverythingMoreComplex)
 

diff --git a/v2/struct.go b/v2/struct.go
@@ -3,6 +3,10 @@ package participle
 import (
 	"fmt"
 	"reflect"
+	"strconv"
+	"strings"
+	"text/scanner"
+	"unicode/utf8"
 
 	"github.com/alecthomas/participle/v2/lexer"
 )
@@ -26,7 +30,7 @@ func lexStruct(s reflect.Type) (*structLexer, error) {
 	}
 	if len(slex.indexes) > 0 {
 		tag := fieldLexerTag(slex.Field().StructField)
-		slex.lexer, err = lexer.Upgrade(lexer.LexString(s.Name(), tag))
+		slex.lexer, err = lexer.Upgrade(newTagLexer(s.Name(), tag))
 		if err != nil {
 			return nil, err
 		}
@@ -77,7 +81,7 @@ func (s *structLexer) Peek() (lexer.Token, error) {
 		}
 		ft := s.GetField(field).StructField
 		tag := fieldLexerTag(ft)
-		lex, err = lexer.Upgrade(lexer.LexString(ft.Name, tag))
+		lex, err = lexer.Upgrade(newTagLexer(ft.Name, tag))
 		if err != nil {
 			return token, err
 		}
@@ -99,7 +103,7 @@ func (s *structLexer) Next() (lexer.Token, error) {
 	s.field++
 	ft := s.Field().StructField
 	tag := fieldLexerTag(ft)
-	s.lexer, err = lexer.Upgrade(lexer.LexString(ft.Name, tag))
+	s.lexer, err = lexer.Upgrade(newTagLexer(ft.Name, tag))
 	if err != nil {
 		return token, err
 	}
@@ -140,3 +144,64 @@ func collectFieldIndexes(s reflect.Type) (out [][]int, err error) {
 	}
 	return
 }
+
+// tagLexer is a Lexer based on text/scanner.Scanner
+type tagLexer struct {
+	scanner  *scanner.Scanner
+	filename string
+	err      error
+}
+
+func newTagLexer(filename string, tag string) *tagLexer {
+	s := &scanner.Scanner{}
+	s.Init(strings.NewReader(tag))
+	lexer := &tagLexer{
+		filename: filename,
+		scanner:  s,
+	}
+	lexer.scanner.Error = func(s *scanner.Scanner, msg string) {
+		// This is to support single quoted strings. Hacky.
+		if !strings.HasSuffix(msg, "char literal") {
+			lexer.err = fmt.Errorf("%s: %s", lexer.scanner.Pos(), msg)
+		}
+	}
+	return lexer
+}
+
+func (t *tagLexer) Next() (lexer.Token, error) {
+	typ := t.scanner.Scan()
+	text := t.scanner.TokenText()
+	pos := lexer.Position(t.scanner.Position)
+	pos.Filename = t.filename
+	if t.err != nil {
+		return lexer.Token{}, t.err
+	}
+	return textScannerTransform(lexer.Token{
+		Type:  typ,
+		Value: text,
+		Pos:   pos,
+	})
+}
+
+func textScannerTransform(token lexer.Token) (lexer.Token, error) {
+	// Unquote strings.
+	switch token.Type {
+	case scanner.Char:
+		// FIXME(alec): This is pretty hacky...we convert a single quoted char into a double
+		// quoted string in order to support single quoted strings.
+		token.Value = fmt.Sprintf("\"%s\"", token.Value[1:len(token.Value)-1])
+		fallthrough
+	case scanner.String:
+		s, err := strconv.Unquote(token.Value)
+		if err != nil {
+			return lexer.Token{}, Errorf(token.Pos, "%s: %q", err.Error(), token.Value)
+		}
+		token.Value = s
+		if token.Type == scanner.Char && utf8.RuneCountInString(s) > 1 {
+			token.Type = scanner.String
+		}
+	case scanner.RawString:
+		token.Value = token.Value[1 : len(token.Value)-1]
+	}
+	return token, nil
+}