/
lexer_impl.go
197 lines (167 loc) · 3.97 KB
/
lexer_impl.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
package parser
import (
"os"
s "strings"
"unsafe"
"github.com/antlr/antlr4/runtime/Go/antlr"
"github.com/cornelk/hashmap"
"github.com/sirupsen/logrus"
)
//nolint:gochecknoglobals
var (
syslLexerLog = os.Getenv("SYSL_LEXER_LOG") != ""
keywords = [...]string{
"sequence of",
"set of",
"return",
"for",
"one of",
"else",
"if",
"loop",
"until",
"alt",
"while",
}
// Antlr doesn't support reentrant Go lexer state, so we work around it with
// a fast lock-free hash map.
lexerStates = &hashmap.HashMap{}
)
const importKeyword = "import"
type lexerState struct {
prevToken []antlr.Token
level stack
spaces int
linenum int
inSqBrackets int
parens int
blockTextLine int
gotNewLine bool
gotHTTPVerb bool
gotView bool
noMoreImports bool // Used to allow the import keyword after the application definition has started
}
func ls(l *SyslLexer) *lexerState {
key := uintptr(unsafe.Pointer(l))
if state, has := lexerStates.Get(key); has {
return state.(*lexerState)
}
state := &lexerState{}
lexerStates.Set(key, state)
return state
}
func DeleteLexerState(l *SyslLexer) {
key := uintptr(unsafe.Pointer(l))
lexerStates.Del(key)
}
func calcSpaces(text string) int {
s := 0
for i := 0; i < len(text); i++ {
if text[i] == ' ' {
s++
}
if text[i] == '\t' {
s += 4
}
}
return s
}
func startsWithKeyword(l *lexerState, text string) bool {
var lower = s.ToLower(text)
for _, kw := range keywords {
if s.HasPrefix(lower, kw) {
return true
}
}
// import is only a keyword before the application starts
isImport := s.HasPrefix(text, importKeyword) && !l.noMoreImports
return isImport
}
func createDedentToken(source *antlr.TokenSourceCharStreamPair) *antlr.CommonToken {
return antlr.NewCommonToken(source, SyslLexerDEDENT, 0, 0, 0)
}
func createIndentToken(source *antlr.TokenSourceCharStreamPair) *antlr.CommonToken {
return antlr.NewCommonToken(source, SyslLexerINDENT, 0, 0, 0)
}
type stack []int
func (s *stack) Push(o int) {
*s = append(*s, o)
}
func (s *stack) Pop() int {
l := len(*s)
ret := (*s)[l-1]
*s = (*s)[:l-1]
return ret
}
func (s *stack) Size() int {
return len(*s)
}
func (s *stack) Peek() int {
return (*s)[len(*s)-1]
}
func getPreviousIndent(s stack) int {
if s.Size() == 0 {
return 0
}
// peek, read but not remove HEAD
return s.Peek()
}
// trimText Token Text
func trimText(l *SyslLexer) string {
return s.TrimSpace(l.GetText())
}
func getNextToken(l *SyslLexer) antlr.Token {
ls := ls(l)
if len(ls.prevToken) > 0 {
// poll, retrieve head
nextTok := ls.prevToken[0]
ls.prevToken = ls.prevToken[1:]
return nextTok
}
next := l.BaseLexer.NextToken()
if syslLexerLog {
logrus.Info(next)
}
// return NEWLINE
if ls.gotNewLine {
switch next.GetTokenType() {
case SyslLexerNEWLINE, SyslLexerNEWLINE_2, SyslLexerEMPTY_LINE,
SyslLexerE_NL, SyslLexerE_EMPTY_LINE, SyslLexerTMPL_NL:
fallthrough
case SyslLexerINDENTED_COMMENT, SyslLexerEMPTY_COMMENT, SyslLexerE_INDENTED_COMMENT:
fallthrough
case SyslLexerE_DOT_NAME_NL:
return next
}
}
// regular whitespace, return as is.
// return from here only when we encounter HIDDEN after INDENT has been generated
// after processing NL.
if !ls.gotNewLine && next.GetChannel() == antlr.TokenHiddenChannel {
ls.spaces = 0
return next
} else if next.GetTokenType() == SyslLexerSYSL_COMMENT {
ls.spaces = 0
return next
}
if next.GetTokenType() == antlr.TokenEOF {
ls.spaces = 0 // done with the file
} else if !ls.gotNewLine {
return next
}
for ls.spaces != getPreviousIndent(ls.level) {
if ls.spaces > getPreviousIndent(ls.level) {
ls.level.Push(ls.spaces)
ls.prevToken = append(ls.prevToken, createIndentToken(next.GetSource()))
} else {
ls.level.Pop()
ls.prevToken = append(ls.prevToken, createDedentToken(next.GetSource()))
}
}
ls.gotNewLine = false
ls.prevToken = append(ls.prevToken, next)
// poll, retrieve head
temp := ls.prevToken[0]
ls.prevToken = ls.prevToken[1:]
return temp
}