diff --git a/nodes.go b/nodes.go index 9f448330..fda2ebe4 100644 --- a/nodes.go +++ b/nodes.go @@ -26,6 +26,7 @@ type node interface { // Nodes should panic if parsing fails. Parse(lex lexer.Lexer, parent reflect.Value) []reflect.Value String() string + Definition() string } func decorate(name string) { @@ -56,6 +57,10 @@ func (p *parseable) Parse(lex lexer.Lexer, parent reflect.Value) (out []reflect. return []reflect.Value{rv.Elem()} } +func (p *parseable) Definition() string { + return p.t.String() +} + type strct struct { typ reflect.Type expr node @@ -65,6 +70,10 @@ func (s *strct) String() string { return s.expr.String() } +func (s *strct) Definition() string { + return s.typ.String() +} + func (s *strct) maybeInjectPos(pos lexer.Position, v reflect.Value) { // Fast path if f := v.FieldByName("Pos"); f.IsValid() && f.Type() == positionType { @@ -102,6 +111,14 @@ func (e disjunction) String() string { return strings.Join(out, " | ") } +func (e disjunction) Definition() string { + out := []string{} + for _, n := range e { + out = append(out, n.Definition()) + } + return strings.Join(out, " | ") +} + func (e disjunction) Parse(lex lexer.Lexer, parent reflect.Value) (out []reflect.Value) { for _, a := range e { if value := a.Parse(lex, parent); value != nil { @@ -118,6 +135,14 @@ func (a sequence) String() string { return a[0].String() } +func (a sequence) Definition() string { + out := []string{} + for _, n := range a { + out = append(out, n.Definition()) + } + return strings.Join(out, " ") +} + func (a sequence) Parse(lex lexer.Lexer, parent reflect.Value) (out []reflect.Value) { for i, n := range a { // If first value doesn't match, we early exit, otherwise all values must match. @@ -147,6 +172,10 @@ func (r *reference) String() string { return r.field.Name + ":" + r.node.String() } +func (r *reference) Definition() string { + return r.field.Name +} + func (r *reference) Parse(lex lexer.Lexer, parent reflect.Value) (out []reflect.Value) { pos := lex.Peek().Pos v := r.node.Parse(lex, parent) @@ -175,6 +204,10 @@ func (t *tokenReference) Parse(lex lexer.Lexer, parent reflect.Value) (out []ref return []reflect.Value{reflect.ValueOf(token.Value)} } +func (t *tokenReference) Definition() string { + return t.identifier +} + // [ ] type optional struct { node node @@ -192,6 +225,10 @@ func (o *optional) Parse(lex lexer.Lexer, parent reflect.Value) (out []reflect.V return v } +func (o *optional) Definition() string { + return o.node.Definition() +} + // { } type repetition struct { node node @@ -215,6 +252,10 @@ func (r *repetition) Parse(lex lexer.Lexer, parent reflect.Value) (out []reflect return out } +func (r *repetition) Definition() string { + return "{" + r.node.Definition() + "}" +} + // Match a token literal exactly "...". type literal struct { s string @@ -236,6 +277,10 @@ func (s *literal) Parse(lex lexer.Lexer, parent reflect.Value) (out []reflect.Va return nil } +func (s *literal) Definition() string { + return s.s +} + // Attempt to transform values to given type. // // This will dereference pointers, and attempt to parse strings into integer values, floats, etc. diff --git a/parser.go b/parser.go index 5a5198b7..88a2d7d2 100644 --- a/parser.go +++ b/parser.go @@ -111,3 +111,8 @@ func (p *Parser) ParseBytes(b []byte, v interface{}) error { func (p *Parser) String() string { return dumpNode(p.root) } + +// Ebnf representation of the grammar. +func (p *Parser) Ebnf() string { + return dumpEbnfNode(p.root) +} diff --git a/printer.go b/printer.go index 2a4545a9..b6a47633 100644 --- a/printer.go +++ b/printer.go @@ -52,3 +52,122 @@ func nodePrinter(seen map[reflect.Value]bool, v node) string { } return "?" } + +type definitionsList struct { + definitionsMap map[string]node + definitionsList []string +} + +func (d *definitionsList) addDefinition(definitionName string, n node) { + if _, ok := d.definitionsMap[definitionName]; ok { + return + } + + d.definitionsMap[definitionName] = n + d.definitionsList = append(d.definitionsList, definitionName) +} + +func (d *definitionsList) getKeys() []string { + return d.definitionsList +} + +func (d *definitionsList) getValue(key string) node { + return d.definitionsMap[key] +} + +func dumpEbnfNode(v node) string { + seen := map[reflect.Value]bool{} + definitions := &definitionsList{map[string]node{},[]string{}} + result := "" + + findDefinitions(seen, definitions, v) + for _, definition := range definitions.getKeys() { + node := definitions.getValue(definition) + parsedDef := parseDefinition(node) + result += fmt.Sprintf("%s := %s . \n", definition, parsedDef) + } + + return result +} + +func findDefinitions(seen map[reflect.Value]bool, definitions *definitionsList, v node) { + if seen[reflect.ValueOf(v)] { + return + } + seen[reflect.ValueOf(v)] = true + switch n := v.(type) { + case disjunction: + for _, n := range n { + findDefinitions(seen, definitions, n) + } + return + + case *strct: + findDefinitions(seen, definitions, n.expr) + return + + case sequence: + for _, n := range n { + findDefinitions(seen, definitions, n) + } + return + + case *reference: + definitions.addDefinition(n.field.Name, n.node) + findDefinitions(seen, definitions, n.node) + return + + case *tokenReference: + return + + case *optional: + findDefinitions(seen, definitions, n.node) + return + + case *repetition: + findDefinitions(seen, definitions, n.node) + return + + case *literal: + return + + } +} + +func parseDefinition(v node) string { + switch n := v.(type) { + case disjunction: + out := []string{} + for _, n := range n { + out = append(out, n.Definition()) + } + return strings.Join(out, "|") + + case *strct: + return parseDefinition(n.expr) + + case sequence: + out := []string{} + for _, n := range n { + out = append(out, n.Definition()) + } + return strings.Join(out, " ") + + case *reference: + return n.field.Name + + case *tokenReference: + return fmt.Sprintf("token(%q)", n.Definition()) + + case *optional: + return fmt.Sprintf("[%s]", parseDefinition(n.node)) + + case *repetition: + return fmt.Sprintf("{ %s }", parseDefinition(n.node)) + + case *literal: + return n.Definition() + + } + return "?" +} \ No newline at end of file