/
rule.go
1802 lines (1672 loc) · 52.6 KB
/
rule.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) 2018, Cogent Core. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package parser does the parsing stage after lexing, using a top-down recursive-descent
// (TDRD) strategy, with a special reverse mode to deal with left-associative binary expressions
// which otherwise end up being right-associative for TDRD parsing.
// Higher-level rules provide scope to lower-level ones, with a special EOS end-of-statement
// scope recognized for
package parser
import (
"fmt"
"io"
"reflect"
"strconv"
"strings"
"text/tabwriter"
"cogentcore.org/core/gox/indent"
"cogentcore.org/core/parse/lexer"
"cogentcore.org/core/parse/syms"
"cogentcore.org/core/parse/token"
"cogentcore.org/core/tree"
)
// Set GuiActive to true if the gui (piview) is active -- ensures that the
// Ast tree is updated when nodes are swapped in reverse mode, and maybe
// other things
var GuiActive = false
// DepthLimit is the infinite recursion prevention cutoff
var DepthLimit = 10000
// parser.Rule operates on the lexically-tokenized input, not the raw source.
//
// The overall strategy is pragmatically based on the current known form of
// most languages, which are organized around a sequence of statements having
// a clear scoping defined by the EOS (end of statement), which is identified
// in a first pass through tokenized output in PassTwo.
//
// We use a top-down, recursive-descent style parsing, with flexible lookahead
// based on scoping provided by the EOS tags. Each rule progressively scopes
// down the space, using token matches etc to bracket the space for flexible
// elements.
//
// There are two different rule types:
// 1. Parents with multiple children (i.e. Groups), which are all the different
// variations for satisfying that rule, with precedence encoded directly in the
// ordering of the children. These have empty "Rule" string and Rules.
// 2. Explicit rules specified in the Rule string.
// The first step is matching which searches in order for matches within the
// children of parent nodes, and for explicit rule nodes, it looks first
// through all the explicit tokens in the rule. If there are no explicit tokens
// then matching defers to ONLY the first node listed by default -- you can
// add a @ prefix to indicate a rule that is also essential to match.
//
// After a rule matches, it then proceeds through the rules narrowing the scope
// and calling the sub-nodes..
type Rule struct {
tree.NodeBase
// disable this rule -- useful for testing and exploration
Off bool
// description / comments about this rule
Desc string
// the rule as a space-separated list of rule names and token(s) -- use single quotes around 'tokens' (using token.Tokens names or symbols). For keywords use 'key:keyword'. All tokens are matched at the same nesting depth as the start of the scope of this rule, unless they have a +D relative depth value differential before the token. Use @ prefix for a sub-rule to require that rule to match -- by default explicit tokens are used if available, and then only the first sub-rule failing that. Use ! by itself to define start of an exclusionary rule -- doesn't match when those rule elements DO match. Use : prefix for a special group node that matches a single token at start of scope, and then defers to the child rules to perform full match -- this is used for FirstTokenMap when there are multiple versions of a given keyword rule. Use - prefix for tokens anchored by the end (next token) instead of the previous one -- typically just for token prior to 'EOS' but also a block of tokens that need to go backward in the middle of a sequence to avoid ambiguity can be marked with -
Rule string
// if present, this rule only fires if stack has this on it
StackMatch string
// what action should be take for this node when it matches
Ast AstActs
// actions to perform based on parsed Ast tree data, when this rule is done executing
Acts Acts
// for group-level rules having lots of children and lots of recursiveness, and also of high-frequency, when we first encounter such a rule, make a map of all the tokens in the entire scope, and use that for a first-pass rejection on matching tokens
OptTokenMap bool
// for group-level rules with a number of rules that match based on first tokens / keywords, build map to directly go to that rule -- must also organize all of these rules sequentially from the start -- if no match, goes directly to first non-lookup case
FirstTokenMap bool
// rule elements compiled from Rule string
Rules RuleList `json:"-" xml:"-"`
// strategic matching order for matching the rules
Order []int `edit:"-" json:"-" xml:"-"`
// map from first tokens / keywords to rules for FirstTokenMap case
FiTokenMap map[string]*Rule `edit:"-" json:"-" xml:"-" set:"-"`
// for FirstTokenMap, the start of the else cases not covered by the map
FiTokenElseIndex int `edit:"-" json:"-" xml:"-" set:"-"`
// exclusionary key index -- this is the token in Rules that we need to exclude matches for using ExclFwd and ExclRev rules
ExclKeyIndex int `edit:"-" json:"-" xml:"-" set:"-"`
// exclusionary forward-search rule elements compiled from Rule string
ExclFwd RuleList `edit:"-" json:"-" xml:"-" set:"-"`
// exclusionary reverse-search rule elements compiled from Rule string
ExclRev RuleList `edit:"-" json:"-" xml:"-" set:"-"`
}
// RuleFlags define bitflags for rule options compiled from rule syntax
type RuleFlags tree.Flags //enums:bitflag
const (
// SetsScope means that this rule sets its own scope, because it ends with EOS
SetsScope RuleFlags = RuleFlags(tree.FlagsN) + iota
// Reverse means that this rule runs in reverse (starts with - sign) -- for arithmetic
// binary expressions only: this is needed to produce proper associativity result for
// mathematical expressions in the recursive descent parser.
// Only for rules of form: Expr '+' Expr -- two sub-rules with a token operator
// in the middle.
Reverse
// NoTokens means that this rule doesn't have any explicit tokens -- only refers to
// other rules
NoTokens
// OnlyTokens means that this rule only has explicit tokens for matching -- can be
// optimized
OnlyTokens
// MatchEOS means that the rule ends with a *matched* EOS with StInc = 1.
// SetsScope applies for optional and matching EOS rules alike.
MatchEOS
// MultiEOS means that the rule has multiple EOS tokens within it --
// changes some of the logic
MultiEOS
// TokenMatchGroup is a group node that also has a single token match, so it can
// be used in a FirstTokenMap to optimize lookup of rules
TokenMatchGroup
)
// Parser is the interface type for parsers -- likely not necessary except is essential
// for defining the BaseIface for gui in making new nodes
type Parser interface {
tree.Node
// Compile compiles string rules into their runnable elements
Compile(ps *State) bool
// Validate checks for any errors in the rules and issues warnings,
// returns true if valid (no err) and false if invalid (errs)
Validate(ps *State) bool
// Parse tries to apply rule to given input state, returns rule that matched or nil
// parent is the parent rule that we're being called from
// ast is the current ast node that we add to
Parse(ps *State, parent *Rule, ast *Ast, scope lexer.Reg, optMap lexer.TokenMap, depth int) *Rule
// AsParseRule returns object as a parser.Rule
AsParseRule() *Rule
}
// check that Rule implements Parser interface
var _ Parser = (*Rule)(nil)
// RuleEl is an element of a parsing rule -- either a pointer to another rule or a token
type RuleEl struct {
// sub-rule for this position -- nil if token
Rule *Rule
// token, None if rule
Token token.KeyToken
// start increment for matching -- this is the number of non-optional, non-match items between (start | last match) and this item -- increments start region for matching
StInc int
// if true, this rule must match for rule to fire -- by default only tokens and, failing that, the first sub-rule is used for matching -- use @ to require a match
Match bool
// this rule is optional -- will absorb tokens if they exist -- indicated with ? prefix
Opt bool
// match this rule working backward from the next token -- triggered by - (minus) prefix and optimizes cases where there can be a lot of tokens going forward but few going from end -- must be anchored by a terminal EOS or other FromNext elements and is ignored if at the very end
FromNext bool
}
func (re RuleEl) IsRule() bool {
return re.Rule != nil
}
func (re RuleEl) IsToken() bool {
return re.Rule == nil
}
// RuleList is a list (slice) of rule elements
type RuleList []RuleEl
// Last returns the last rule -- only used in cases where there are rules
func (rl RuleList) Last() *RuleEl {
return &rl[len(rl)-1]
}
// RuleMap is a map of all the rule names, for quick lookup
var RuleMap map[string]*Rule
// Matches encodes the regions of each match, Err for no match
type Matches []lexer.Reg
// StartEnd returns the first and last non-zero positions in the Matches list as a region
func (mm Matches) StartEnd() lexer.Reg {
reg := lexer.RegZero
for _, mp := range mm {
if mp.St != lexer.PosZero {
if reg.St == lexer.PosZero {
reg.St = mp.St
}
reg.Ed = mp.Ed
}
}
return reg
}
// StartEndExcl returns the first and last non-zero positions in the Matches list as a region
// moves the end to next toke to make it the usual exclusive end pos
func (mm Matches) StartEndExcl(ps *State) lexer.Reg {
reg := mm.StartEnd()
reg.Ed, _ = ps.Src.NextTokenPos(reg.Ed)
return reg
}
///////////////////////////////////////////////////////////////////////
// Rule
func (pr *Rule) BaseIface() reflect.Type {
return reflect.TypeOf((*Parser)(nil)).Elem()
}
func (pr *Rule) AsParseRule() *Rule {
return pr.This().(*Rule)
}
// IsGroup returns true if this node is a group, else it should have rules
func (pr *Rule) IsGroup() bool {
return pr.HasChildren()
}
// SetRuleMap is called on the top-level Rule and initializes the RuleMap
func (pr *Rule) SetRuleMap(ps *State) {
RuleMap = map[string]*Rule{}
pr.WalkDown(func(k tree.Node) bool {
pri := k.(*Rule)
if epr, has := RuleMap[pri.Nm]; has {
ps.Error(lexer.PosZero, fmt.Sprintf("Parser Compile: multiple rules with same name: %v and %v", pri.Path(), epr.Path()), pri)
} else {
RuleMap[pri.Nm] = pri
}
return true
})
}
// CompileAll is called on the top-level Rule to compile all nodes
// it calls SetRuleMap first.
// Returns true if everything is ok, false if there were compile errors
func (pr *Rule) CompileAll(ps *State) bool {
pr.SetRuleMap(ps)
allok := true
pr.WalkDown(func(k tree.Node) bool {
pri := k.(*Rule)
ok := pri.Compile(ps)
if !ok {
allok = false
}
return true
})
return allok
}
// Compile compiles string rules into their runnable elements.
// Returns true if everything is ok, false if there were compile errors.
func (pr *Rule) Compile(ps *State) bool {
if pr.Off {
pr.SetProperty("inactive", true)
} else {
pr.DeleteProperty("inactive")
}
if pr.Rule == "" { // parent
pr.Rules = nil
pr.SetFlag(false, SetsScope)
return true
}
valid := true
rstr := pr.Rule
if pr.Rule[0] == '-' {
rstr = rstr[1:]
pr.SetFlag(true, Reverse)
} else {
pr.SetFlag(false, Reverse)
}
rs := strings.Split(rstr, " ")
nr := len(rs)
pr.Rules = make(RuleList, nr)
pr.ExclFwd = nil
pr.ExclRev = nil
pr.SetFlag(false, NoTokens)
pr.SetFlag(true, OnlyTokens) // default is this..
pr.SetFlag(false, SetsScope)
pr.SetFlag(false, MatchEOS)
pr.SetFlag(false, MultiEOS)
pr.SetFlag(false, TokenMatchGroup)
pr.Order = nil
nmatch := 0
ntok := 0
curStInc := 0
eoses := 0
for ri := range rs {
rn := strings.TrimSpace(rs[ri])
if len(rn) == 0 {
ps.Error(lexer.PosZero, "Compile: Rules has empty string -- make sure there is only one space between rule elements", pr)
valid = false
break
}
if rn == "!" { // exclusionary rule
nr = ri
pr.Rules = pr.Rules[:ri]
pr.CompileExcl(ps, rs, ri+1)
break
}
if rn[0] == ':' {
pr.SetFlag(true, TokenMatchGroup)
}
rr := &pr.Rules[ri]
tokst := strings.Index(rn, "'")
if tokst >= 0 {
if rn[0] == '?' {
rr.Opt = true
} else {
rr.StInc = curStInc
rr.Match = true // all tokens match by default
pr.Order = append(pr.Order, ri)
nmatch++
ntok++
curStInc = 0
}
sz := len(rn)
if rn[0] == '+' {
td, _ := strconv.ParseInt(rn[1:tokst], 10, 64)
rr.Token.Depth = int(td)
} else if rn[0] == '-' {
rr.FromNext = true
}
tn := rn[tokst+1 : sz-1]
if len(tn) > 4 && tn[:4] == "key:" {
rr.Token.Token = token.Keyword
rr.Token.Key = tn[4:]
} else {
if pmt, has := token.OpPunctMap[tn]; has {
rr.Token.Token = pmt
} else {
err := rr.Token.Token.SetString(tn)
if err != nil {
ps.Error(lexer.PosZero, fmt.Sprintf("Compile: token convert error: %v", err.Error()), pr)
valid = false
}
}
}
if rr.Token.Token == token.EOS {
eoses++
if eoses > 1 {
pr.SetFlag(true, MultiEOS)
}
if ri == nr-1 {
rr.StInc = eoses
pr.SetFlag(true, SetsScope)
if rr.Match && eoses == 1 {
pr.SetFlag(true, MatchEOS)
}
}
}
} else {
st := 0
if rn[:2] == "?@" || rn[:2] == "@?" {
st = 2
rr.Opt = true
rr.Match = true
} else if rn[0] == '?' {
st = 1
rr.Opt = true
} else if rn[0] == '@' {
st = 1
rr.Match = true
pr.SetFlag(false, OnlyTokens)
pr.Order = append(pr.Order, ri)
nmatch++
} else {
curStInc++
}
rp, ok := RuleMap[rn[st:]]
if !ok {
ps.Error(lexer.PosZero, fmt.Sprintf("Compile: refers to rule %v not found", rn), pr)
valid = false
} else {
rr.Rule = rp
}
}
}
if pr.Is(Reverse) {
pr.Ast = AnchorAst // must be
}
if ntok == 0 && nmatch == 0 {
pr.Rules[0].Match = true
pr.Order = append(pr.Order, 0)
pr.SetFlag(true, NoTokens)
} else {
pr.OptimizeOrder(ps)
}
return valid
}
// OptimizeOrder optimizes the order of processing rule elements, including:
// * A block of reversed elements that match from next
func (pr *Rule) OptimizeOrder(ps *State) {
osz := len(pr.Order)
if osz == 0 {
return
}
nfmnxt := 0
fmnSt := -1
fmnEd := -1
lastwas := false
for oi := 0; oi < osz; oi++ {
ri := pr.Order[oi]
rr := &pr.Rules[ri]
if rr.FromNext {
nfmnxt++
if fmnSt < 0 {
fmnSt = oi
}
if lastwas {
fmnEd = oi // end of block
}
lastwas = true
} else {
lastwas = false
}
}
if nfmnxt > 1 && fmnEd > 0 {
nword := make([]int, osz)
for oi := 0; oi < fmnSt; oi++ {
nword[oi] = pr.Order[oi]
}
idx := fmnSt
for oi := fmnEd - 1; oi >= fmnSt; oi-- {
nword[idx] = pr.Order[oi]
idx++
}
for oi := fmnEd; oi < osz; oi++ {
nword[oi] = pr.Order[oi]
}
pr.Order = nword
}
}
// CompileTokMap compiles first token map
func (pr *Rule) CompileTokMap(ps *State) bool {
valid := true
pr.FiTokenMap = make(map[string]*Rule, len(pr.Kids))
pr.FiTokenElseIndex = len(pr.Kids)
for i, kpri := range pr.Kids {
kpr := kpri.(*Rule)
if len(kpr.Rules) == 0 || !kpr.Rules[0].IsToken() {
pr.FiTokenElseIndex = i
break
}
fr := kpr.Rules[0]
skey := fr.Token.StringKey()
if _, has := pr.FiTokenMap[skey]; has {
ps.Error(lexer.PosZero, fmt.Sprintf("CompileFirstTokenMap: multiple rules have the same first token: %v -- must be unique -- use a :'tok' group to match that first token and put all the sub-rules as children of that node", fr.Token), pr)
pr.FiTokenElseIndex = 0
valid = false
} else {
pr.FiTokenMap[skey] = kpr
}
}
return valid
}
// CompileExcl compiles exclusionary rules starting at given point
// currently only working for single-token matching rule
func (pr *Rule) CompileExcl(ps *State, rs []string, rist int) bool {
valid := true
nr := len(rs)
var ktok token.KeyToken
ktoki := -1
for ri := 0; ri < rist; ri++ {
rr := &pr.Rules[ri]
if !rr.IsToken() {
continue
}
ktok = rr.Token
ktoki = ri
break
}
if ktoki < 0 {
ps.Error(lexer.PosZero, "CompileExcl: no token found for matching exclusion rules", pr)
return false
}
pr.ExclRev = make(RuleList, nr-rist)
ki := -1
for ri := rist; ri < nr; ri++ {
rn := strings.TrimSpace(rs[ri])
rr := &pr.ExclRev[ri-rist]
if rn[0] == '?' {
rr.Opt = true
}
tokst := strings.Index(rn, "'")
if tokst < 0 {
continue // pure optional
}
if !rr.Opt {
rr.Match = true // all tokens match by default
}
sz := len(rn)
if rn[0] == '+' {
td, _ := strconv.ParseInt(rn[1:tokst], 10, 64)
rr.Token.Depth = int(td)
}
tn := rn[tokst+1 : sz-1]
if len(tn) > 4 && tn[:4] == "key:" {
rr.Token.Token = token.Keyword
rr.Token.Key = tn[4:]
} else {
if pmt, has := token.OpPunctMap[tn]; has {
rr.Token.Token = pmt
} else {
err := rr.Token.Token.SetString(tn)
if err != nil {
ps.Error(lexer.PosZero, fmt.Sprintf("CompileExcl: token convert error: %v", err.Error()), pr)
valid = false
}
}
}
if rr.Token.Equal(ktok) {
ki = ri
}
}
if ki < 0 {
ps.Error(lexer.PosZero, fmt.Sprintf("CompileExcl: key token: %v not found in exclusion rule", ktok), pr)
valid = false
return valid
}
pr.ExclKeyIndex = ktoki
pr.ExclFwd = pr.ExclRev[ki+1-rist:]
pr.ExclRev = pr.ExclRev[:ki-rist]
return valid
}
// Validate checks for any errors in the rules and issues warnings,
// returns true if valid (no err) and false if invalid (errs)
func (pr *Rule) Validate(ps *State) bool {
valid := true
// do this here so everything else is compiled
if len(pr.Rules) == 0 && pr.FirstTokenMap {
pr.CompileTokMap(ps)
}
if len(pr.Rules) == 0 && !pr.HasChildren() && !tree.IsRoot(pr) {
ps.Error(lexer.PosZero, "Validate: rule has no rules and no children", pr)
valid = false
}
if !pr.Is(TokenMatchGroup) && len(pr.Rules) > 0 && pr.HasChildren() {
ps.Error(lexer.PosZero, "Validate: rule has both rules and children -- should be either-or", pr)
valid = false
}
if pr.Is(Reverse) {
if len(pr.Rules) != 3 {
ps.Error(lexer.PosZero, "Validate: a Reverse (-) rule must have 3 children -- for binary operator expressions only", pr)
valid = false
} else {
if !pr.Rules[1].IsToken() {
ps.Error(lexer.PosZero, "Validate: a Reverse (-) rule must have a token to be recognized in the middle of two rules -- for binary operator expressions only", pr)
}
}
}
if len(pr.Rules) > 0 {
if pr.Rules[0].IsRule() && (pr.Rules[0].Rule == pr || pr.ParentLevel(pr.Rules[0].Rule) >= 0) { // left recursive
if pr.Rules[0].Match {
ps.Error(lexer.PosZero, fmt.Sprintf("Validate: rule refers to itself recursively in first sub-rule: %v and that sub-rule is marked as a Match -- this is infinite recursion and is not allowed! Must use distinctive tokens in rule to match this rule, and then left-recursive elements will be filled in when the rule runs, but they cannot be used for matching rule.", pr.Rules[0].Rule.Name()), pr)
valid = false
}
ntok := 0
for _, rr := range pr.Rules {
if rr.IsToken() {
ntok++
}
}
if ntok == 0 {
ps.Error(lexer.PosZero, fmt.Sprintf("Validate: rule refers to itself recursively in first sub-rule: %v, and does not have any tokens in the rule -- MUST promote tokens to this rule to disambiguate match, otherwise will just do infinite recursion!", pr.Rules[0].Rule.Name()), pr)
valid = false
}
}
}
// now we iterate over our kids
for _, kpri := range pr.Kids {
kpr := kpri.(*Rule)
if !kpr.Validate(ps) {
valid = false
}
}
return valid
}
// StartParse is called on the root of the parse rule tree to start the parsing process
func (pr *Rule) StartParse(ps *State) *Rule {
if ps.AtEofNext() || !pr.HasChildren() {
ps.GotoEof()
return nil
}
kpr := pr.Kids[0].(*Rule) // first rule is special set of valid top-level matches
var parAst *Ast
scope := lexer.Reg{St: ps.Pos}
if ps.Ast.HasChildren() {
parAst = ps.Ast.ChildAst(0)
} else {
parAst = ps.Ast.NewChild(AstType, kpr.Name()).(*Ast)
ok := false
scope.St, ok = ps.Src.ValidTokenPos(scope.St)
if !ok {
ps.GotoEof()
return nil
}
ps.Pos = scope.St
}
didErr := false
for {
cpos := ps.Pos
mrule := kpr.Parse(ps, pr, parAst, scope, nil, 0)
ps.ResetNonMatches()
if ps.AtEof() {
return nil
}
if cpos == ps.Pos {
if !didErr {
ps.Error(cpos, "did not advance position -- need more rules to match current input -- skipping to next EOS", pr)
didErr = true
}
cp, ok := ps.Src.NextTokenPos(ps.Pos)
if !ok {
ps.GotoEof()
return nil
}
ep, ok := ps.Src.NextEosAnyDepth(cp)
if !ok {
ps.GotoEof()
return nil
}
ps.Pos = ep
} else {
return mrule
}
}
}
// Parse tries to apply rule to given input state, returns rule that matched or nil
// parent is the parent rule that we're being called from.
// parAst is the current ast node that we add to.
// scope is the region to search within, defined by parent or EOS if we have a terminal
// one
func (pr *Rule) Parse(ps *State, parent *Rule, parAst *Ast, scope lexer.Reg, optMap lexer.TokenMap, depth int) *Rule {
if pr.Off {
return nil
}
if depth >= DepthLimit {
ps.Error(scope.St, "depth limit exceeded -- parser rules error -- look for recursive cases", pr)
return nil
}
nr := len(pr.Rules)
if !pr.Is(TokenMatchGroup) && nr > 0 {
return pr.ParseRules(ps, parent, parAst, scope, optMap, depth)
}
if optMap == nil && pr.OptTokenMap {
optMap = ps.Src.TokenMapReg(scope)
if ps.Trace.On {
ps.Trace.Out(ps, pr, Run, scope.St, scope, parAst, fmt.Sprintf("made optmap of size: %d", len(optMap)))
}
}
// pure group types just iterate over kids
for _, kpri := range pr.Kids {
kpr := kpri.(*Rule)
if mrule := kpr.Parse(ps, pr, parAst, scope, optMap, depth+1); mrule != nil {
return mrule
}
}
return nil
}
// ParseRules parses rules and returns this rule if it matches, nil if not
func (pr *Rule) ParseRules(ps *State, parent *Rule, parAst *Ast, scope lexer.Reg, optMap lexer.TokenMap, depth int) *Rule {
ok := false
if pr.Is(SetsScope) {
scope, ok = pr.Scope(ps, parAst, scope)
if !ok {
return nil
}
} else if GuiActive {
if scope == lexer.RegZero {
ps.Error(scope.St, "scope is empty and no EOS in rule -- invalid rules -- starting rules must all have EOS", pr)
return nil
}
}
match, nscope, mpos := pr.Match(ps, parAst, scope, 0, optMap)
if !match {
return nil
}
rparent := parent.Par.(*Rule)
if parent.Ast != NoAst && parent.IsGroup() {
if parAst.Nm != parent.Nm {
mreg := mpos.StartEndExcl(ps)
newAst := ps.AddAst(parAst, parent.Name(), mreg)
if parent.Ast == AnchorAst {
parAst = newAst
}
}
} else if parent.IsGroup() && rparent.Ast != NoAst && rparent.IsGroup() { // two-level group...
if parAst.Nm != rparent.Nm {
mreg := mpos.StartEndExcl(ps)
newAst := ps.AddAst(parAst, rparent.Name(), mreg)
if rparent.Ast == AnchorAst {
parAst = newAst
}
}
}
valid := pr.DoRules(ps, parent, parAst, nscope, mpos, optMap, depth) // returns validity but we don't care once matched..
if !valid {
return nil
}
return pr
}
// Scope finds the potential scope region for looking for tokens -- either from
// EOS position or State ScopeStack pushed from parents.
// Returns new scope and false if no valid scope found.
func (pr *Rule) Scope(ps *State, parAst *Ast, scope lexer.Reg) (lexer.Reg, bool) {
// prf := profile.Start("Scope")
// defer prf.End()
nscope := scope
creg := scope
lr := pr.Rules.Last()
for ei := 0; ei < lr.StInc; ei++ {
stlx := ps.Src.LexAt(creg.St)
ep, ok := ps.Src.NextEos(creg.St, stlx.Token.Depth)
if !ok {
// ps.Error(creg.St, "could not find EOS at target nesting depth -- parens / bracket / brace mismatch?", pr)
return nscope, false
}
if scope.Ed != lexer.PosZero && lr.Opt && scope.Ed.IsLess(ep) {
// optional tokens can't take us out of scope
return scope, true
}
if ei == lr.StInc-1 {
nscope.Ed = ep
if ps.Trace.On {
ps.Trace.Out(ps, pr, SubMatch, nscope.St, nscope, parAst, fmt.Sprintf("from EOS: starting scope: %v new scope: %v end pos: %v depth: %v", scope, nscope, ep, stlx.Token.Depth))
}
} else {
creg.St, ok = ps.Src.NextTokenPos(ep) // advance
if !ok {
// ps.Error(scope.St, "end of file looking for EOS tokens -- premature file end?", pr)
return nscope, false
}
}
}
return nscope, true
}
// Match attempts to match the rule, returns true if it matches, and the
// match positions, along with any update to the scope
func (pr *Rule) Match(ps *State, parAst *Ast, scope lexer.Reg, depth int, optMap lexer.TokenMap) (bool, lexer.Reg, Matches) {
if pr.Off {
return false, scope, nil
}
if depth > DepthLimit {
ps.Error(scope.St, "depth limit exceeded -- parser rules error -- look for recursive cases", pr)
return false, scope, nil
}
if ps.IsNonMatch(scope, pr) {
return false, scope, nil
}
if pr.StackMatch != "" {
if ps.Stack.Top() != pr.StackMatch {
return false, scope, nil
}
}
// mprf := profile.Start("Match")
// defer mprf.End()
// Note: uncomment the following to see which rules are taking the most
// time -- very helpful for focusing effort on optimizing those rules.
// prf := profile.Start(pr.Nm)
// defer prf.End()
nr := len(pr.Rules)
if pr.Is(TokenMatchGroup) || nr == 0 { // Group
return pr.MatchGroup(ps, parAst, scope, depth, optMap)
}
// prf := profile.Start("IsMatch")
if mst, match := ps.IsMatch(pr, scope); match {
// prf.End()
return true, scope, mst.Regs
}
// prf.End()
var mpos Matches
match := false
if pr.Is(NoTokens) {
match, mpos = pr.MatchNoToks(ps, parAst, scope, depth, optMap)
} else if pr.Is(OnlyTokens) {
match, mpos = pr.MatchOnlyToks(ps, parAst, scope, depth, optMap)
} else {
match, mpos = pr.MatchMixed(ps, parAst, scope, depth, optMap)
}
if !match {
ps.AddNonMatch(scope, pr)
return false, scope, nil
}
if len(pr.ExclFwd) > 0 || len(pr.ExclRev) > 0 {
ktpos := mpos[pr.ExclKeyIndex]
if pr.MatchExclude(ps, scope, ktpos, depth, optMap) {
if ps.Trace.On {
ps.Trace.Out(ps, pr, NoMatch, ktpos.St, scope, parAst, "Exclude criteria matched")
}
ps.AddNonMatch(scope, pr)
return false, scope, nil
}
}
mreg := mpos.StartEnd()
ps.AddMatch(pr, scope, mpos)
if ps.Trace.On {
ps.Trace.Out(ps, pr, Match, mreg.St, scope, parAst, fmt.Sprintf("Full Match reg: %v", mreg))
}
return true, scope, mpos
}
// MatchOnlyToks matches rules having only tokens
func (pr *Rule) MatchOnlyToks(ps *State, parAst *Ast, scope lexer.Reg, depth int, optMap lexer.TokenMap) (bool, Matches) {
nr := len(pr.Rules)
var mpos Matches
scstlx := ps.Src.LexAt(scope.St) // scope starting lex
scstDepth := scstlx.Token.Depth
creg := scope
osz := len(pr.Order)
for oi := 0; oi < osz; oi++ {
ri := pr.Order[oi]
rr := &pr.Rules[ri]
kt := rr.Token
if optMap != nil && !optMap.Has(kt.Token) { // not even a possibility
return false, nil
}
if rr.FromNext {
if mpos == nil {
mpos = make(Matches, nr) // make on demand -- cuts out a lot of allocations!
}
mpos[nr-1] = lexer.Reg{scope.Ed, scope.Ed}
}
kt.Depth += scstDepth // always use starting scope depth
match, tpos := pr.MatchToken(ps, rr, ri, kt, &creg, mpos, parAst, scope, depth, optMap)
if !match {
if ps.Trace.On {
if tpos != lexer.PosZero {
tlx := ps.Src.LexAt(tpos)
ps.Trace.Out(ps, pr, NoMatch, creg.St, creg, parAst, fmt.Sprintf("%v token: %v, was: %v", ri, kt.String(), tlx.String()))
} else {
ps.Trace.Out(ps, pr, NoMatch, creg.St, creg, parAst, fmt.Sprintf("%v token: %v, nil region", ri, kt.String()))
}
}
return false, nil
}
if mpos == nil {
mpos = make(Matches, nr) // make on demand -- cuts out a lot of allocations!
}
mpos[ri] = lexer.Reg{tpos, tpos}
if ps.Trace.On {
ps.Trace.Out(ps, pr, SubMatch, creg.St, creg, parAst, fmt.Sprintf("%v token: %v", ri, kt.String()))
}
}
return true, mpos
}
// MatchToken matches one token sub-rule -- returns true for match and
// false if no match -- and the position where it was / should have been
func (pr *Rule) MatchToken(ps *State, rr *RuleEl, ri int, kt token.KeyToken, creg *lexer.Reg, mpos Matches, parAst *Ast, scope lexer.Reg, depth int, optMap lexer.TokenMap) (bool, lexer.Pos) {
nr := len(pr.Rules)
ok := false
matchst := false // match start of creg
matched := false // match end of creg
var tpos lexer.Pos
if ri == 0 {
matchst = true
} else if mpos != nil {
lpos := mpos[ri-1].Ed
if lpos != lexer.PosZero { // previous has matched
matchst = true
} else if ri < nr-1 && rr.FromNext {
lpos := mpos[ri+1].St
if lpos != lexer.PosZero { // previous has matched
creg.Ed, _ = ps.Src.PrevTokenPos(lpos)
matched = true
}
}
}
for stinc := 0; stinc < rr.StInc; stinc++ {
creg.St, _ = ps.Src.NextTokenPos(creg.St)
}
if ri == nr-1 && rr.Token.Token == token.EOS {
return true, scope.Ed
}
if creg.IsNil() && !matched {
return false, tpos
}
if matchst { // start token must be right here
if !ps.MatchToken(kt, creg.St) {
return false, creg.St
}
tpos = creg.St
} else if matched {
if !ps.MatchToken(kt, creg.Ed) {
return false, creg.Ed
}
tpos = creg.Ed
} else {
// prf := profile.Start("FindToken")
if pr.Is(Reverse) {
tpos, ok = ps.FindTokenReverse(kt, *creg)
} else {
tpos, ok = ps.FindToken(kt, *creg)
}
// prf.End()
if !ok {
return false, tpos
}
}
creg.St, _ = ps.Src.NextTokenPos(tpos) // always ratchet up
return true, tpos
}
// MatchMixed matches mixed tokens and non-tokens
func (pr *Rule) MatchMixed(ps *State, parAst *Ast, scope lexer.Reg, depth int, optMap lexer.TokenMap) (bool, Matches) {
nr := len(pr.Rules)
var mpos Matches
scstlx := ps.Src.LexAt(scope.St) // scope starting lex
scstDepth := scstlx.Token.Depth
creg := scope
osz := len(pr.Order)
// first pass filter on tokens
if optMap != nil {
for oi := 0; oi < osz; oi++ {
ri := pr.Order[oi]
rr := &pr.Rules[ri]
if rr.IsToken() {
kt := rr.Token
if !optMap.Has(kt.Token) { // not even a possibility
return false, nil
}
}
}
}
for oi := 0; oi < osz; oi++ {