-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.go
executable file
·245 lines (220 loc) · 4.32 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
// Package lexer is generated by GoGLL. Do not edit.
package lexer
import (
// "fmt"
"io/ioutil"
"strings"
"unicode"
"lex4/token"
)
type state int
const nullState state = -1
// Lexer contains both the input slice of runes and the slice of tokens
// parsed from the input
type Lexer struct {
// I is the input slice of runes
I []rune
// Tokens is the slice of tokens constructed by the lexer from I
Tokens []*token.Token
}
/*
NewFile constructs a Lexer created from the input file, fname.
If the input file is a markdown file NewFile process treats all text outside
code blocks as whitespace. All text inside code blocks are treated as input text.
If the input file is a normal text file NewFile treats all text in the inputfile
as input text.
*/
func NewFile(fname string) *Lexer {
buf, err := ioutil.ReadFile(fname)
if err != nil {
panic(err)
}
input := []rune(string(buf))
if strings.HasSuffix(fname, ".md") {
loadMd(input)
}
return New(input)
}
func loadMd(input []rune) {
i := 0
text := true
for i < len(input) {
if i <= len(input)-3 && input[i] == '`' && input[i+1] == '`' && input[i+2] == '`' {
text = !text
for j := 0; j < 3; j++ {
input[i+j] = ' '
}
i += 3
}
if i < len(input) {
if text {
if input[i] == '\n' {
input[i] = '\n'
} else {
input[i] = ' '
}
}
i += 1
}
}
}
/*
New constructs a Lexer from a slice of runes.
All contents of the input slice are treated as input text.
*/
func New(input []rune) *Lexer {
lex := &Lexer{
I: input,
Tokens: make([]*token.Token, 0, 2048),
}
lext := 0
for lext < len(lex.I) {
for lext < len(lex.I) && unicode.IsSpace(lex.I[lext]) {
lext++
}
if lext < len(lex.I) {
tok := lex.scan(lext)
lext = tok.Rext()
lex.addToken(tok)
}
}
lex.add(token.EOF, len(input), len(input))
return lex
}
func (l *Lexer) scan(i int) *token.Token {
// fmt.Printf("lexer.scan\n")
s, typ, rext := state(0), token.Error, i
for s != nullState {
// fmt.Printf("S%d '%c' @ %d\n", s, l.I[rext], rext)
if rext >= len(l.I) {
typ = accept[s]
s = nullState
} else {
typ = accept[s]
s = nextState[s](l.I[rext])
if s != nullState || typ == token.Error {
rext++
}
}
}
return token.New(typ, i, rext, l.I)
}
func escape(r rune) string {
switch r {
case '"':
return "\""
case '\\':
return "\\\\"
case '\r':
return "\\r"
case '\n':
return "\\n"
case '\t':
return "\\t"
}
return string(r)
}
// GetLineColumn returns the line and column of rune[i] in the input
func (l *Lexer) GetLineColumn(i int) (line, col int) {
line, col = 1, 1
for j := 0; j < i; j++ {
switch l.I[j] {
case '\n':
line++
col = 1
case '\t':
col += 4
default:
col++
}
}
return
}
// GetLineColumnOfToken returns the line and column of token[i] in the imput
func (l *Lexer) GetLineColumnOfToken(i int) (line, col int) {
return l.GetLineColumn(l.Tokens[i].Lext())
}
// GetString returns the input string from the left extent of Token[lext] to
// the right extent of Token[rext]
func (l *Lexer) GetString(lext, rext int) string {
return string(l.I[l.Tokens[lext].Lext():l.Tokens[rext].Rext()])
}
func (l *Lexer) add(t token.Type, lext, rext int) {
l.addToken(token.New(t, lext, rext, l.I))
}
func (l *Lexer) addToken(tok *token.Token) {
l.Tokens = append(l.Tokens, tok)
}
func any(r rune, set []rune) bool {
for _, r1 := range set {
if r == r1 {
return true
}
}
return false
}
func not(r rune, set []rune) bool {
for _, r1 := range set {
if r == r1 {
return false
}
}
return true
}
var accept = []token.Type{
token.Error,
token.Type0,
token.Type2,
token.Error,
token.Type1,
}
var nextState = []func(r rune) state{
// Set0
func(r rune) state {
switch {
case r == '&':
return 1
case r == '|':
return 2
case unicode.IsLetter(r):
return 3
}
return nullState
},
// Set1
func(r rune) state {
switch {
}
return nullState
},
// Set2
func(r rune) state {
switch {
}
return nullState
},
// Set3
func(r rune) state {
switch {
case r == '`':
return 4
case unicode.IsLetter(r):
return 4
case unicode.IsNumber(r):
return 4
}
return nullState
},
// Set4
func(r rune) state {
switch {
case r == '`':
return 4
case unicode.IsLetter(r):
return 4
case unicode.IsNumber(r):
return 4
}
return nullState
},
}