Skip to content

Commit 73b0e41

Browse files
committed
Get started on writing a lexer for GraphQL
1 parent 88a3e1b commit 73b0e41

File tree

3 files changed

+255
-0
lines changed

3 files changed

+255
-0
lines changed

gql/lexer/lexer.go

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
package gqlex
2+
3+
import (
4+
"fmt"
5+
"strings"
6+
"unicode/utf8"
7+
)
8+
9+
type itemType int
10+
11+
const (
12+
itemError itemType = iota
13+
itemEOF
14+
itemLeftCurl // left curly bracket
15+
itemRightCurl // right curly bracket
16+
itemString // quoted string
17+
itemText // plain text
18+
itemIdentifier // variables
19+
)
20+
21+
const EOF = -1
22+
23+
type item struct {
24+
typ itemType
25+
val string
26+
}
27+
28+
func (i item) String() string {
29+
switch i.typ {
30+
case itemEOF:
31+
return "EOF"
32+
case itemError:
33+
return i.val
34+
case itemIdentifier:
35+
return fmt.Sprintf("var: [%v]", i.val)
36+
}
37+
/*
38+
if len(i.val) > 10 {
39+
return fmt.Sprintf("%.10q...", i.val)
40+
}
41+
*/
42+
return fmt.Sprintf("%q", i.val)
43+
}
44+
45+
type lexer struct {
46+
// NOTE: Using a text scanner wouldn't work because it's designed for parsing
47+
// Golang. It won't keep track of start position, or allow us to retrieve
48+
// slice from [start:pos]. Better to just use normal string.
49+
input string // string being scanned.
50+
start int // start position of this item.
51+
pos int // current position of this item.
52+
width int // width of last rune read from input.
53+
items chan item // channel of scanned items.
54+
depth int // nesting of {}
55+
}
56+
57+
func newLexer(input string) *lexer {
58+
l := &lexer{
59+
input: input,
60+
items: make(chan item),
61+
}
62+
go l.run()
63+
return l
64+
}
65+
66+
func (l *lexer) errorf(format string,
67+
args ...interface{}) stateFn {
68+
l.items <- item{
69+
typ: itemError,
70+
val: fmt.Sprintf(format, args...),
71+
}
72+
return nil
73+
}
74+
75+
func (l *lexer) emit(t itemType) {
76+
l.items <- item{
77+
typ: t,
78+
val: l.input[l.start:l.pos],
79+
}
80+
l.start = l.pos
81+
}
82+
83+
func (l *lexer) run() {
84+
for state := lexText; state != nil; {
85+
state = state(l)
86+
}
87+
close(l.items) // No more tokens.
88+
}
89+
90+
func (l *lexer) next() (result rune) {
91+
if l.pos >= len(l.input) {
92+
l.width = 0
93+
return EOF
94+
}
95+
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
96+
l.width = w
97+
l.pos += l.width
98+
return r
99+
}
100+
101+
func (l *lexer) backup() {
102+
l.pos -= l.width
103+
}
104+
105+
func (l *lexer) peek() rune {
106+
r := l.next()
107+
l.backup()
108+
return r
109+
}
110+
111+
func (l *lexer) ignore() {
112+
l.start = l.pos
113+
}
114+
115+
func (l *lexer) accept(valid string) bool {
116+
if strings.IndexRune(valid, l.next()) >= 0 {
117+
return true
118+
}
119+
l.backup()
120+
return false
121+
}
122+
123+
func (l *lexer) acceptRun(valid string) {
124+
for strings.IndexRune(valid, l.next()) >= 0 {
125+
}
126+
l.backup()
127+
}

gql/lexer/lexer_test.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package gqlex
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
)
7+
8+
func TestNewLexer(t *testing.T) {
9+
input := `
10+
{
11+
me {
12+
name
13+
city
14+
friends {
15+
name
16+
}
17+
}
18+
}`
19+
l := newLexer(input)
20+
for item := range l.items {
21+
fmt.Println(item.String())
22+
}
23+
}

gql/lexer/state.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package gqlex
2+
3+
import (
4+
"strings"
5+
"unicode"
6+
)
7+
8+
const (
9+
leftCurl = "{"
10+
rightCurl = "}"
11+
)
12+
13+
// stateFn represents the state of the scanner as a function that
14+
// returns the next state.
15+
type stateFn func(*lexer) stateFn
16+
17+
func lexText(l *lexer) stateFn {
18+
for {
19+
if strings.HasPrefix(l.input[l.pos:], leftCurl) {
20+
if l.pos > l.start {
21+
l.emit(itemText)
22+
}
23+
return lexLeftCurl
24+
}
25+
if strings.HasPrefix(l.input[l.pos:], rightCurl) {
26+
return l.errorf("Too many right brackets")
27+
}
28+
if l.next() == EOF {
29+
break
30+
}
31+
}
32+
// Correctly reached EOF.
33+
if l.pos > l.start {
34+
l.emit(itemText)
35+
}
36+
l.emit(itemEOF)
37+
return nil // Stop the run loop.
38+
}
39+
40+
func lexLeftCurl(l *lexer) stateFn {
41+
l.pos += len(leftCurl)
42+
l.depth += 1
43+
l.emit(itemLeftCurl)
44+
return lexInside(l)
45+
}
46+
47+
func lexRightCurl(l *lexer) stateFn {
48+
l.pos += len(rightCurl)
49+
l.depth -= 1
50+
l.emit(itemRightCurl)
51+
52+
if l.depth == 0 {
53+
return lexText
54+
} else {
55+
return lexInside
56+
}
57+
}
58+
59+
func lexInside(l *lexer) stateFn {
60+
for {
61+
if strings.HasPrefix(l.input[l.pos:], rightCurl) {
62+
return lexRightCurl
63+
}
64+
if strings.HasPrefix(l.input[l.pos:], leftCurl) {
65+
return lexLeftCurl
66+
}
67+
68+
switch r := l.next(); {
69+
case r == EOF:
70+
return l.errorf("unclosed action")
71+
case isSpace(r) || isEndOfLine(r):
72+
l.ignore()
73+
case isAlphaNumeric(r):
74+
l.backup()
75+
return lexIdentifier
76+
}
77+
}
78+
}
79+
80+
func lexIdentifier(l *lexer) stateFn {
81+
Loop:
82+
for {
83+
switch r := l.next(); {
84+
case isAlphaNumeric(r):
85+
// absorb.
86+
default:
87+
l.backup()
88+
l.emit(itemIdentifier)
89+
break Loop
90+
}
91+
}
92+
return lexInside
93+
}
94+
95+
func isSpace(r rune) bool {
96+
return r == ' ' || r == '\t'
97+
}
98+
99+
func isEndOfLine(r rune) bool {
100+
return r == '\r' || r == '\n'
101+
}
102+
103+
func isAlphaNumeric(r rune) bool {
104+
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
105+
}

0 commit comments

Comments
 (0)