/
tokenizer.go
143 lines (115 loc) · 2.61 KB
/
tokenizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
package dynp
import (
"fmt"
)
type mode int
type tokenType int
const (
modeNorm mode = iota
modeBeginParam
modeInParam
)
const (
typeStaticPart tokenType = iota
typeParamPart
)
// TokenizeError tells details about an error while tokenzing input
type TokenizeError struct {
what string
pos int
token *Token
}
func (e *TokenizeError) Error() string {
return fmt.Sprintf("%s at pos %d (%s)", e.what, e.pos, e.token.part)
}
// Token is a single Token split by the tokenizer
type Token struct {
part []byte
tkType tokenType
withNestedParam bool
}
// Tokenizer wraps the input
type Tokenizer struct {
in []byte
}
// NewTokenizer creates a new tokenizer from byte slice
func NewTokenizer(input []byte) *Tokenizer {
return &Tokenizer{in: input}
}
// NewTokenizerFromString creates a new tokenizer from string input
func NewTokenizerFromString(inputString string) *Tokenizer {
return &Tokenizer{in: []byte(inputString)}
}
func (t *Tokenizer) newToken(tt tokenType) *Token {
r := &Token{tkType: tt}
r.part = make([]byte, 0, len(t.in))
return r
}
// Tokenize splits the given input string into tokens
func (t *Tokenizer) Tokenize() ([]*Token, error) {
var err error
res := make([]*Token, 0, 10)
l := len(t.in)
i := 0
mode := modeNorm
curToken := t.newToken(typeStaticPart)
bracketCounter := 0
for {
skip1 := false
if i >= l {
if len(curToken.part) > 0 {
res = append(res, curToken)
}
break
}
switch mode {
case modeNorm:
bracketCounter = 0
if t.in[i] == '$' {
// peek one ahead
if i < l-1 {
if t.in[i+1] == '{' {
mode = modeBeginParam
skip1 = true
}
}
}
case modeBeginParam:
if t.in[i] == '{' {
if len(curToken.part) > 0 {
res = append(res, curToken)
}
curToken = t.newToken(typeParamPart)
mode = modeInParam
skip1 = true
}
case modeInParam:
if t.in[i] == '{' {
bracketCounter++
curToken.withNestedParam = true
}
if t.in[i] == '}' {
bracketCounter--
if bracketCounter < 0 {
if len(curToken.part) > 0 {
res = append(res, curToken)
} else {
return res, &TokenizeError{what: "empty params not allowed", pos: i, token: curToken}
}
curToken = t.newToken(typeStaticPart)
mode = modeNorm
skip1 = true
}
}
}
if skip1 == false {
curToken.part = append(curToken.part, t.in[i])
}
//fmt.Printf("%d (%c) mode=%d bc=%d curTokenPart=%s\n", i, t.in[i], mode, bracketCounter, curToken.part)
i++
}
if mode == modeInParam {
return res, &TokenizeError{what: "invalid bracket balance", pos: i, token: curToken}
}
return res, err
}