/
token.go
151 lines (137 loc) · 3.01 KB
/
token.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
package svn
import (
"bufio"
"fmt"
"io"
)
// A TokenType is the type of a Token.
type TokenType int
const (
// ErrorToken means that an error occurred during tokenization.
ErrorToken TokenType = iota
WordToken
NumberToken
StringToken
LeftParenToken
RightParenToken
)
// A Tokenizer returns a stream of SVN Tokens.
type Tokenizer struct {
r *bufio.Reader
token Token
err error
done bool
}
// A Token describes a token in a SVN conversation.
// There are only 5 types of tokens: word, number, string, left and right parenthesis.
type Token struct {
Type TokenType
Number uint
Text string
}
// NewTokenizer returns a new SVN Tokenizer for the given Reader.
func NewTokenizer(r io.Reader) *Tokenizer {
z := &Tokenizer{
r: bufio.NewReader(r),
}
return z
}
func (t *Tokenizer) readByte() byte {
b, err := t.r.ReadByte()
if err != nil {
t.done = true
t.err = err
}
return b
}
// Scan advances the Tokenizer to the next token, which will then be available through the Token method.
// It returns false when the scan stops, either by reaching the end of the input or an error.
// After Scan returns false, the Err method will return any error that occurred during scanning.
func (t *Tokenizer) Scan() bool {
if t.done {
return false
}
t.token.Type = ErrorToken
var b byte
for b = t.readByte(); isspace(b); b = t.readByte() {
}
if t.err != nil {
return false
}
switch {
case isnum(b):
var number uint
for isnum(b) {
number *= 10
number += uint(b - '0')
b = t.readByte()
}
t.token.Type = NumberToken
t.token.Number = number
if b == ':' {
t.token.Type = StringToken
octets := make([]byte, number)
_, err := io.ReadFull(t.r, octets)
if err != nil {
t.token.Type = ErrorToken
t.done = true
t.err = err
return false
}
t.token.Text = string(octets)
b = t.readByte()
}
case b == '(':
t.token.Type = LeftParenToken
b = t.readByte()
case b == ')':
t.token.Type = RightParenToken
b = t.readByte()
case isalpha(b):
t.token.Type = WordToken
t.token.Text = ""
for isalnum(b) || b == '-' {
t.token.Text += string(b)
b = t.readByte()
}
default:
t.token.Type = ErrorToken
t.err = fmt.Errorf("syntax error: unexpected \"%c\"", b)
t.done = true
return false
}
if !isspace(b) {
t.err = fmt.Errorf("syntax error: expected space after %q", t.token)
t.token.Type = ErrorToken
t.done = true
return false
}
return true
}
// Token returns the most recent token generated by a call to Scan.
func (t *Tokenizer) Token() Token {
return t.token
}
// Err returns the first error that was encountered by the Tokenizer.
func (t *Tokenizer) Err() error {
return t.err
}
// String returns a string representation of the Token.
func (t Token) String() string {
s := ""
switch t.Type {
case ErrorToken:
s = "**ERROR**"
case WordToken:
s = t.Text
case NumberToken:
s = fmt.Sprint(t.Number)
case StringToken:
s = fmt.Sprintf("%d:%s", len(t.Text), t.Text)
case LeftParenToken:
s = "("
case RightParenToken:
s = ")"
}
return s
}