-
Notifications
You must be signed in to change notification settings - Fork 328
/
scanner.go
175 lines (155 loc) · 5.35 KB
/
scanner.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
// Copyright (C) 2017 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package lingo
import (
"bytes"
"context"
"regexp"
"unicode/utf8"
)
type Skipper func(*Scanner)
// Marker is a stored point in a scanner that can be reset to.
type Marker struct {
offset int
count int
}
// Scanner is a basic implementation of the functions used by generated parsers.
// It expects a the full byte slice of the source file.
type Scanner struct {
ctx context.Context
name string
data []byte
offset int
skipper Skipper
skipping bool
records *Records
watermark scanError
}
// NewByteScanner builds a scanner over an input byte slice.
// If records is nil, the cst will not be maintained, otherwise it will be filled in with the parse Record list.
func NewByteScanner(ctx context.Context, name string, input []byte, records *Records) *Scanner {
return &Scanner{ctx: ctx, name: name, data: input, records: records}
}
// NewStringScanner builds a scanner over an input string.
// If records is nil, the cst will not be maintained, otherwise it will be filled in with the parse Record list.
func NewStringScanner(ctx context.Context, name string, input string, records *Records) *Scanner {
return NewByteScanner(ctx, name, []byte(input), records)
}
// WasOk is a helper function called by generated parser code.
// It is used to abandon the value result, and return true if there was no error.
// This is used in cases where the sub-parser is optional and the result is not needed.
func WasOk(_ interface{}, err error) bool {
return err == nil
}
// SetSkip sets the skip function and returns the old one.
func (s *Scanner) SetSkip(skipper Skipper) Skipper {
old := s.skipper
s.skipper = skipper
return old
}
// Skip invokes the current skip function if one is set.
func (s *Scanner) Skip() {
if s.skipping {
return
}
s.skipping = true
s.skipper(s)
s.skipping = false
}
// EOF returns true if the scanner has run out of input.
func (s *Scanner) EOF() bool {
return s.offset >= len(s.data)
}
// Mark returns a new marker for the current scan position and state.
func (s *Scanner) Mark() Marker {
m := Marker{offset: s.offset}
if s.records != nil {
m.count = len(*s.records)
}
return m
}
// PreMark returns an invalid marker, used for before the start sentinels.
func (s *Scanner) PreMark() Marker {
return Marker{offset: -1}
}
// MustProgress panics if the marker does not move forwards.
// Used to catch when the grammar is broken.
func (s *Scanner) MustProgress(m Marker) (Marker, error) {
err := error(nil)
if m.offset >= s.offset {
err = s.Error(nil, "Failed to make progress")
}
return s.Mark(), err
}
// Watermark returns the error that was generatd furthest into the parse stream.
// This is normally included in errors automatically, and often indicates the point where
// the best match failed, and thus the actual error in the source.
func (s *Scanner) Watermark() error {
return s.watermark
}
// Register adds a node to the cst from the start marker to the current position.
func (s *Scanner) Register(start Marker, object interface{}) {
if s.records == nil {
return
}
// append to grow
*s.records = append(*s.records, Record{})
// shuffle up
copy((*s.records)[start.count+1:], (*s.records)[start.count:])
// insert the record
(*s.records)[start.count] = Record{Start: start.offset, End: s.offset, Object: object}
}
// Reset puts the scanner back in the state it was when the start Marker was taken.
func (s *Scanner) Reset(start Marker) {
s.offset = start.offset
if s.records != nil {
*s.records = (*s.records)[:start.count]
}
}
// Rune is a parser for a single rune.
// If the next rune in the stream is a match, the rune will be consumed an error will be nil.
// Otherwise an error will be returned.
// In either case, the requested rune is returned as the value.
func (s *Scanner) Rune(r rune) (rune, error) {
v, size := utf8.DecodeRune(s.data[s.offset:])
if v != r {
return r, scanFailure
}
s.offset += size
return v, nil
}
// Rune is a parser for a literal string.
// If literal string is next in the stream, the string will be consumed an error will be nil.
// Otherwise an error will be returned and the value will be the empty string.
func (s *Scanner) Literal(str string) (string, error) {
data := []byte(str)
remains := s.data[s.offset:]
if len(data) > len(remains) || !bytes.Equal(data, remains[:len(data)]) {
return "", scanFailure
}
s.offset += len(data)
return str, nil
}
// Pattern is a parser for a regular expression.
// If the pattern matches the start of the stream, the matching string will be consumed and returned.
// Otherwise an error will be returned and the value will be the empty string.
func (s *Scanner) Pattern(re *regexp.Regexp) (string, error) {
remains := s.data[s.offset:]
match := re.FindIndex(remains)
if match == nil {
return "", scanFailure
}
s.offset += match[1]
return string(remains[:match[1]]), nil
}