Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fixes, improvements, optimization & refactoring before parser generation #288

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 9 additions & 6 deletions context.go
Expand Up @@ -29,8 +29,8 @@ type parseContext struct {
allowTrailing bool
}

func newParseContext(lex *lexer.PeekingLexer, lookahead int, caseInsensitive map[lexer.TokenType]bool) *parseContext {
return &parseContext{
func newParseContext(lex *lexer.PeekingLexer, lookahead int, caseInsensitive map[lexer.TokenType]bool) parseContext {
return parseContext{
PeekingLexer: *lex,
caseInsensitive: caseInsensitive,
lookahead: lookahead,
Expand Down Expand Up @@ -90,11 +90,14 @@ func (p *parseContext) MaybeUpdateError(err error) {

// Stop returns true if parsing should terminate after the given "branch" failed to match.
//
// Additionally, "err" should be the branch error, if any. This will be tracked to
// aid in error reporting under the assumption that the deepest occurring error is more
// useful than errors further up.
// Additionally, track the deepest error in the branch - the deeper the error, the more useful it usually is.
// It could already be the deepest error in the branch (only if deeper than current parent context deepest),
// or it could be "err", the latest error on the branch (even if same depth; the lexer holds the position).
func (p *parseContext) Stop(err error, branch *parseContext) bool {
if branch.PeekingLexer.Cursor() >= p.deepestErrorDepth {
if branch.deepestErrorDepth > p.deepestErrorDepth {
p.deepestError = branch.deepestError
p.deepestErrorDepth = branch.deepestErrorDepth
} else if branch.PeekingLexer.Cursor() >= p.deepestErrorDepth {
p.deepestError = err
p.deepestErrorDepth = maxInt(branch.PeekingLexer.Cursor(), branch.deepestErrorDepth)
}
Expand Down
10 changes: 1 addition & 9 deletions ebnf.go
Expand Up @@ -62,7 +62,7 @@ func buildEBNF(root bool, n node, seen map[node]bool, p *ebnfp, outp *[]*ebnfp)
p = &ebnfp{name: name}
*outp = append(*outp, p)
seen[n] = true
for i, next := range n.nodeMembers {
for i, next := range n.disjunction.nodes {
if i > 0 {
p.out += " | "
}
Expand Down Expand Up @@ -111,14 +111,6 @@ func buildEBNF(root bool, n node, seen map[node]bool, p *ebnfp, outp *[]*ebnfp)
case *reference:
p.out += "<" + strings.ToLower(n.identifier) + ">"

case *optional:
buildEBNF(false, n.node, seen, p, outp)
p.out += "?"

case *repetition:
buildEBNF(false, n.node, seen, p, outp)
p.out += "*"

case *negation:
p.out += "~"
buildEBNF(false, n.node, seen, p, outp)
Expand Down
9 changes: 6 additions & 3 deletions error.go
Expand Up @@ -42,15 +42,18 @@ func FormatError(err Error) string {
// This is useful for composing parsers in order to detect when a sub-parser has terminated.
type UnexpectedTokenError struct {
Unexpected lexer.Token
at node
Expect string
alecthomas marked this conversation as resolved.
Show resolved Hide resolved
expectNode node // Usable instead of Expect, delays creating the string representation until necessary
}

func (u *UnexpectedTokenError) Error() string { return FormatError(u) }

func (u *UnexpectedTokenError) Message() string { // nolint: golint
var expected string
if u.at != nil {
expected = fmt.Sprintf(" (expected %s)", u.at)
if u.expectNode != nil {
expected = fmt.Sprintf(" (expected %s)", u.expectNode)
} else if u.Expect != "" {
expected = fmt.Sprintf(" (expected %s)", u.Expect)
}
return fmt.Sprintf("unexpected token %q%s", u.Unexpected, expected)
}
Expand Down
19 changes: 12 additions & 7 deletions error_test.go
Expand Up @@ -5,14 +5,16 @@ import (
"testing"

require "github.com/alecthomas/assert/v2"

"github.com/alecthomas/participle/v2"
"github.com/alecthomas/participle/v2/lexer"
)

func TestErrorReporting(t *testing.T) {
type cls struct {
Visibility string `@"public"?`
Class string `"class" @Ident`
Visibility string `@"public"?`
Class string `"class" @Ident`
Bases []string `('(' @Ident (',' @Ident)+ ')')?`
}
type union struct {
Visibility string `@"public"?`
Expand All @@ -27,17 +29,20 @@ func TestErrorReporting(t *testing.T) {
}
p := mustTestParser[grammar](t, participle.UseLookahead(5))

ast, err := p.ParseString("", `public class A;`)
ast, err := p.ParseString("", `public class A(B, C); class D; public union A;`)
require.NoError(t, err)
require.Equal(t, &grammar{Decls: []*decl{
{Class: &cls{Visibility: "public", Class: "A"}},
{Class: &cls{Visibility: "public", Class: "A", Bases: []string{"B", "C"}}},
{Class: &cls{Class: "D"}},
{Union: &union{Visibility: "public", Union: "A"}},
}}, ast)
_, err = p.ParseString("", `public union A;`)
require.NoError(t, err)

_, err = p.ParseString("", `public struct Bar;`)
require.EqualError(t, err, `1:8: unexpected token "struct" (expected "union" <ident>)`)
_, err = p.ParseString("", `public class 1;`)
require.EqualError(t, err, `1:14: unexpected token "1" (expected <ident>)`)
require.EqualError(t, err, `1:14: unexpected token "1" (expected <ident> ("(" <ident> ("," <ident>)+ ")")?)`)
_, err = p.ParseString("", `public class A(B,C,);`)
require.EqualError(t, err, `1:20: unexpected token ")" (expected <ident>)`)
}

func TestErrorWrap(t *testing.T) {
Expand Down
7 changes: 5 additions & 2 deletions grammar.go
Expand Up @@ -30,7 +30,7 @@ func (g *generatorContext) addUnionDefs(defs []unionDef) error {
}
unionNode := &union{
unionDef: def,
nodeMembers: make([]node, 0, len(def.members)),
disjunction: disjunction{nodes: make([]node, 0, len(def.members))},
}
g.typeNodes[def.typ], unionNodes[i] = unionNode, unionNode
}
Expand All @@ -41,7 +41,7 @@ func (g *generatorContext) addUnionDefs(defs []unionDef) error {
if err != nil {
return err
}
unionNode.nodeMembers = append(unionNode.nodeMembers, memberNode)
unionNode.disjunction.nodes = append(unionNode.disjunction.nodes, memberNode)
}
}
return nil
Expand All @@ -61,6 +61,9 @@ func (g *generatorContext) addCustomDefs(defs []customDef) error {
func (g *generatorContext) parseType(t reflect.Type) (_ node, returnedError error) {
t = indirectType(t)
if n, ok := g.typeNodes[t]; ok {
if s, ok := n.(*strct); ok {
s.usages++
}
return n, nil
}
if t.Implements(parseableType) {
Expand Down
110 changes: 26 additions & 84 deletions nodes.go
Expand Up @@ -97,16 +97,15 @@ func (c *custom) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.V
// @@ (for a union)
type union struct {
unionDef
nodeMembers []node
disjunction disjunction
}

func (u *union) String() string { return ebnf(u) }
func (u *union) GoString() string { return u.typ.Name() }

func (u *union) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
defer ctx.printTrace(u)()
temp := disjunction{u.nodeMembers}
vals, err := temp.Parse(ctx, parent)
vals, err := u.disjunction.Parse(ctx, parent)
if err != nil {
return nil, err
}
Expand All @@ -123,11 +122,13 @@ type strct struct {
tokensFieldIndex []int
posFieldIndex []int
endPosFieldIndex []int
usages int
}

func newStrct(typ reflect.Type) *strct {
s := &strct{
typ: typ,
typ: typ,
usages: 1,
}
field, ok := typ.FieldByName("Pos")
if ok && field.Type == positionType {
Expand Down Expand Up @@ -295,19 +296,18 @@ type lookaheadGroup struct {
negative bool
}

func (n *lookaheadGroup) String() string { return ebnf(n) }
func (n *lookaheadGroup) GoString() string { return "lookaheadGroup{}" }
func (l *lookaheadGroup) String() string { return ebnf(l) }
func (l *lookaheadGroup) GoString() string { return "lookaheadGroup{}" }

func (n *lookaheadGroup) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
defer ctx.printTrace(n)()
func (l *lookaheadGroup) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
defer ctx.printTrace(l)()
// Create a branch to avoid advancing the parser as any match will be discarded
branch := ctx.Branch()
out, err = n.expr.Parse(branch, parent)
out, err = l.expr.Parse(branch, parent)
matchedLookahead := err == nil && out != nil
expectingMatch := !n.negative
expectingMatch := !l.negative
if matchedLookahead != expectingMatch {
peek := ctx.Peek()
return nil, Errorf(peek.Pos, "unexpected '%s'", peek.Value)
return nil, &UnexpectedTokenError{Unexpected: *ctx.Peek()}
}
return []reflect.Value{}, nil // Empty match slice means a match, unlike nil
}
Expand Down Expand Up @@ -382,7 +382,7 @@ func (s *sequence) Parse(ctx *parseContext, parent reflect.Value) (out []reflect
return nil, nil
}
token := ctx.Peek()
return out, &UnexpectedTokenError{Unexpected: *token, at: n}
return out, &UnexpectedTokenError{Unexpected: *token, expectNode: n}
}
// Special-case for when children return an empty match.
// Appending an empty, non-nil slice to a nil slice returns a nil slice.
Expand Down Expand Up @@ -440,72 +440,6 @@ func (r *reference) Parse(ctx *parseContext, parent reflect.Value) (out []reflec
return []reflect.Value{reflect.ValueOf(token.Value)}, nil
}

// [ <expr> ] <sequence>
type optional struct {
node node
}

func (o *optional) String() string { return ebnf(o) }
func (o *optional) GoString() string { return "optional{}" }

func (o *optional) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
defer ctx.printTrace(o)()
branch := ctx.Branch()
out, err = o.node.Parse(branch, parent)
if err != nil {
// Optional part failed to match.
if ctx.Stop(err, branch) {
return out, err
}
} else {
ctx.Accept(branch)
}
if out == nil {
out = []reflect.Value{}
}
return out, nil
}

// { <expr> } <sequence>
type repetition struct {
node node
}

func (r *repetition) String() string { return ebnf(r) }
func (r *repetition) GoString() string { return "repetition{}" }

// Parse a repetition. Once a repetition is encountered it will always match, so grammars
// should ensure that branches are differentiated prior to the repetition.
func (r *repetition) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
defer ctx.printTrace(r)()
i := 0
for ; i < MaxIterations; i++ {
branch := ctx.Branch()
v, err := r.node.Parse(branch, parent)
out = append(out, v...)
if err != nil {
// Optional part failed to match.
if ctx.Stop(err, branch) {
return out, err
}
break
} else {
ctx.Accept(branch)
}
if v == nil {
break
}
}
if i >= MaxIterations {
t := ctx.Peek()
return nil, Errorf(t.Pos, "too many iterations of %s (> %d)", r, MaxIterations)
}
if out == nil {
out = []reflect.Value{}
}
return out, nil
}

// Match a token literal exactly "..."[:<type>].
type literal struct {
s string
Expand Down Expand Up @@ -556,7 +490,7 @@ func (n *negation) Parse(ctx *parseContext, parent reflect.Value) (out []reflect
out, err = n.node.Parse(branch, parent)
if out != nil && err == nil {
// out being non-nil means that what we don't want is actually here, so we report nomatch
return nil, Errorf(notEOF.Pos, "unexpected '%s'", notEOF.Value)
return nil, &UnexpectedTokenError{Unexpected: *notEOF}
}

// Just give the next token
Expand Down Expand Up @@ -591,15 +525,15 @@ func conform(t reflect.Type, values []reflect.Value) (out []reflect.Value, err e
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
n, err := strconv.ParseInt(v.String(), 0, sizeOfKind(kind))
if err != nil {
return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err)
return nil, err
}
v = reflect.New(t).Elem()
v.SetInt(n)

case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
n, err := strconv.ParseUint(v.String(), 0, sizeOfKind(kind))
if err != nil {
return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err)
return nil, err
}
v = reflect.New(t).Elem()
v.SetUint(n)
Expand All @@ -610,7 +544,7 @@ func conform(t reflect.Type, values []reflect.Value) (out []reflect.Value, err e
case reflect.Float32, reflect.Float64:
n, err := strconv.ParseFloat(v.String(), sizeOfKind(kind))
if err != nil {
return nil, fmt.Errorf("invalid integer %q: %s", v.String(), err)
return nil, err
}
v = reflect.New(t).Elem()
v.SetFloat(n)
Expand Down Expand Up @@ -735,9 +669,14 @@ func setField(tokens []lexer.Token, strct reflect.Value, field structLexerField,
if err != nil {
return err
}
if len(fieldValue) == 0 {
return nil
}
accumulated := f.String()
for _, v := range fieldValue {
f.Set(reflect.ValueOf(f.String() + v.String()).Convert(f.Type()))
accumulated += v.String()
}
f.SetString(accumulated)
return nil
}

Expand All @@ -755,6 +694,9 @@ func setField(tokens []lexer.Token, strct reflect.Value, field structLexerField,
if err != nil {
return err
}
if len(fieldValue) == 0 {
return nil // Nothing to capture, can happen when trying to get a partial parse tree
}

fv := fieldValue[0]

Expand Down