Permalink
Browse files

Speed up lexer more by changing from rune to byte type

benchmark                   old ns/op     new ns/op     delta
BenchmarkNumbers-8          4749          1923          -59.51%
BenchmarkKeywords-8         5689          2313          -59.34%
BenchmarkNames-8            4901          2186          -55.40%
BenchmarkStrings-8          5079          2282          -55.07%
BenchmarkRegex-8            4183          1966          -53.00%
BenchmarkProgram-8          2563          1483          -42.14%
BenchmarkChoiceTokens-8     2065          2127          +3.00%
BenchmarkSimpleTokens-8     902           918           +1.77%
  • Loading branch information...
benhoyt committed Sep 7, 2018
1 parent 0fa32f9 commit 43af0cbd2f7b19273b58a75bf0fab20f91a755bf
Showing with 28 additions and 28 deletions.
  1. +28 −28 lexer/lexer.go
@@ -17,7 +17,7 @@ import (
type Lexer struct {
src []byte
offset int
ch rune
ch byte
errorMsg string
pos Position
nextPos Position
@@ -78,11 +78,11 @@ func (l *Lexer) scan() (Position, Token, string) {
if l.ch == '#' {
// Skip comment till end of line
l.next()
for l.ch != '\n' && l.ch >= 0 {
for l.ch != '\n' && l.ch != 0 {
l.next()
}
}
if l.ch < 0 {
if l.ch == 0 {
if l.errorMsg != "" {
return l.pos, ILLEGAL, l.errorMsg
}
@@ -98,12 +98,12 @@ func (l *Lexer) scan() (Position, Token, string) {

// Names: keywords and functions
if isNameStart(ch) {
runes := []rune{ch}
chars := []byte{ch}
for isNameStart(l.ch) || (l.ch >= '0' && l.ch <= '9') {
runes = append(runes, l.ch)
chars = append(chars, l.ch)
l.next()
}
name := string(runes)
name := string(chars)
tok, isKeyword := keywordTokens[name]
if !isKeyword {
tok = NAME
@@ -212,49 +212,49 @@ func (l *Lexer) scan() (Position, Token, string) {
case '|':
tok = l.choice('|', PIPE, OR)
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.':
runes := []rune{ch}
chars := []byte{ch}
gotDigit := false
if ch != '.' {
gotDigit = true
for l.ch >= '0' && l.ch <= '9' {
runes = append(runes, l.ch)
chars = append(chars, l.ch)
l.next()
}
if l.ch == '.' {
runes = append(runes, l.ch)
chars = append(chars, l.ch)
l.next()
}
}
for l.ch >= '0' && l.ch <= '9' {
gotDigit = true
runes = append(runes, l.ch)
chars = append(chars, l.ch)
l.next()
}
if !gotDigit {
return l.pos, ILLEGAL, "expected digits"
}
if l.ch == 'e' || l.ch == 'E' {
runes = append(runes, l.ch)
chars = append(chars, l.ch)
l.next()
if l.ch == '+' || l.ch == '-' {
runes = append(runes, l.ch)
chars = append(chars, l.ch)
l.next()
}
for l.ch >= '0' && l.ch <= '9' {
runes = append(runes, l.ch)
chars = append(chars, l.ch)
l.next()
}
}
tok = NUMBER
val = string(runes)
val = string(chars)
case '"', '\'':
// Note: POSIX awk spec doesn't allow single-quoted strings,
// but this helps without quoting, especially on Windows
// where the shell quote character is " (double quote).
runes := []rune{}
chars := []byte{}
for l.ch != ch {
c := l.ch
if c < 0 {
if c == 0 {
return l.pos, ILLEGAL, "didn't find end quote in string"
}
if c == '\r' || c == '\n' {
@@ -273,12 +273,12 @@ func (l *Lexer) scan() (Position, Token, string) {
c = l.ch
}
}
runes = append(runes, c)
chars = append(chars, c)
l.next()
}
l.next()
tok = STRING
val = string(runes)
val = string(chars)
default:
tok = ILLEGAL
val = fmt.Sprintf("unexpected %q", ch)
@@ -298,21 +298,21 @@ func (l *Lexer) ScanRegex() (Position, Token, string) {

func (l *Lexer) scanRegex() (Position, Token, string) {
pos := l.pos
runes := []rune{}
chars := []byte{}
switch l.lastTok {
case DIV:
// Regex after '/' (the usual case)
pos.Column -= 1
case DIV_ASSIGN:
// Regex after '/=' (possible when regex starts with '=')
pos.Column -= 2
runes = append(runes, '=')
chars = append(chars, '=')
default:
return l.pos, ILLEGAL, fmt.Sprintf("unexpected %s preceding regex", l.lastTok)
}
for l.ch != '/' {
c := l.ch
if c < 0 {
if c == 0 {
return l.pos, ILLEGAL, "didn't find end slash in regex"
}
if c == '\r' || c == '\n' {
@@ -321,21 +321,21 @@ func (l *Lexer) scanRegex() (Position, Token, string) {
if c == '\\' {
l.next()
if l.ch != '/' {
runes = append(runes, '\\')
chars = append(chars, '\\')
}
c = l.ch
}
runes = append(runes, c)
chars = append(chars, c)
l.next()
}
l.next()
return pos, REGEX, string(runes)
return pos, REGEX, string(chars)
}

func (l *Lexer) next() {
l.pos = l.nextPos
if l.offset >= len(l.src) {
l.ch = -1
l.ch = 0
return
}
ch := l.src[l.offset]
@@ -345,15 +345,15 @@ func (l *Lexer) next() {
} else {
l.nextPos.Column++
}
l.ch = rune(ch)
l.ch = ch
l.offset++
}

func isNameStart(ch rune) bool {
func isNameStart(ch byte) bool {
return ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
}

func (l *Lexer) choice(ch rune, one, two Token) Token {
func (l *Lexer) choice(ch byte, one, two Token) Token {
if l.ch == ch {
l.next()
return two

0 comments on commit 43af0cb

Please sign in to comment.