/
tokenizer.go
122 lines (113 loc) · 1.81 KB
/
tokenizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package main
// TokenName ...
type TokenName int
// Token ...
type Token struct {
Value string
Name TokenName
}
// Tokens ...
const (
LPARENTHESE TokenName = iota
RPARENTHESE
SYMBOL
BOOLEAN
NUMBER
STRING
)
// Tokenization ...
func Tokenization(code string) []Token {
tokens := []Token{}
i := 0
for {
i = trimLeft(code, i)
if i < 0 {
break
}
item, nextIndex := nextItem(code, i)
if nextIndex < 0 {
break
}
tokens = append(tokens, Token{item, getType(item)})
i = nextIndex
}
return tokens
}
func isSpliter(r byte) bool {
return r == ' ' || r == '\t' || r == '\r' || r == '\n'
}
func getString(code string, i int) (string, int) {
if i+1 >= len(code) {
return "", -1
}
item := code[i : i+1]
next := i + 1
for {
if next >= len(code) {
return "", -1
}
item += code[next : next+1]
if code[next] == '"' {
next++
break
}
next++
}
return item, next
}
func trimLeft(code string, i int) int {
if i >= len(code) {
return -1
}
next := i
for isSpliter(code[next]) {
next++
if next >= len(code) {
return -1
}
}
return next
}
func nextItem(code string, i int) (string, int) {
if i >= len(code) {
return "", -1
}
if code[i] == '(' || code[i] == ')' {
return code[i : i+1], i + 1
}
if code[i] == '"' {
return getString(code, i)
}
var item string
next := i
for {
if isSpliter(code[next]) || code[next] == '(' || code[next] == ')' {
break
}
item += code[next : next+1]
next++
if next == len(code) {
break
}
if next > len(code) {
return "", -1
}
}
return item, next
}
func getType(item string) TokenName {
switch {
case item == "(":
return LPARENTHESE
case item == ")":
return RPARENTHESE
case IsBool(item):
return BOOLEAN
case IsString(item):
return STRING
case IsNumeric(item):
return NUMBER
default:
return SYMBOL
}
}