forked from ekanite/ekanite
-
Notifications
You must be signed in to change notification settings - Fork 0
/
lexer.go
119 lines (100 loc) · 2.54 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
package query
import (
"bufio"
"bytes"
"io"
)
var eof = rune(0)
// Lexer represents a lexer.
type Lexer struct {
r *bufio.Reader
}
// NewLexer returns a new instance of a Lexer.
func NewLexer(r io.Reader) *Lexer {
return &Lexer{r: bufio.NewReader(r)}
}
// read reads the next rune from the bufferred reader.
// Returns the query.eofif an error occurs.
func (s *Lexer) read() rune {
ch, _, err := s.r.ReadRune()
if err != nil {
return eof
}
return ch
}
// unread puts the previously read rune on the buffer.
func (s *Lexer) unread() { _ = s.r.UnreadRune() }
// Lex returns the next token and associated literal value.
func (s *Lexer) Lex() (tok Token, lit string) {
ch := s.read()
// If whitespace, then consume it and all following whitespace.
// A letter means an IDENT or reserved word.
if isWhitespace(ch) {
s.unread()
return s.lexWhitespace()
} else if ch == eof {
return EOF, ""
} else if ch == '(' {
return LPAREN, "("
} else if ch == ')' {
return RPAREN, ")"
} else if ch == ':' {
return COLON, ":"
}
s.unread()
tok, lit = s.lexString()
// Check for keyword match.
if kw, ok := Lookup(lit); ok {
return kw, lit
}
return tok, lit
}
// lexWhitespace consumes the current rune and all contiguous whitespace.
func (s *Lexer) lexWhitespace() (tok Token, lit string) {
// Create a buffer and read the current character into it.
var buf bytes.Buffer
buf.WriteRune(s.read())
// Read every subsequent whitespace character into the buffer.
// Non-whitespace characters and EOF will cause the loop to exit.
for {
if ch := s.read(); ch == eof {
break
} else if !isWhitespace(ch) {
s.unread()
break
} else {
buf.WriteRune(ch)
}
}
return WS, buf.String()
}
// lexIdent consumes the current rune and all contiguous String runes.
func (s *Lexer) lexString() (tok Token, lit string) {
// Create a buffer and read the current character into it.
var buf bytes.Buffer
buf.WriteRune(s.read())
// Read every subsequent string character into the buffer.
// Non-string characters and EOF will cause the loop to exit.
for {
if ch := s.read(); ch == eof {
break
} else if ch == ':' || isWhitespace(ch) || isParen(ch) {
// end of String lex
s.unread()
break
} else {
_, _ = buf.WriteRune(ch)
}
}
// Return as regular string.
return STRING, buf.String()
}
func isWhitespace(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
}
func isLetter(ch rune) bool {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
}
func isParen(ch rune) bool {
return ch == '(' || ch == ')'
}