Permalink
Browse files

Speed up lexer by reducing allocations

benchmark                   old ns/op     new ns/op     delta
BenchmarkStrings-8          2282          1234          -45.92%
BenchmarkNumbers-8          1923          1194          -37.91%
BenchmarkRegex-8            1966          1348          -31.43%
BenchmarkKeywords-8         2313          1739          -24.82%
BenchmarkNames-8            2186          1843          -15.69%
BenchmarkProgram-8          1483          1293          -12.81%
BenchmarkSimpleTokens-8     918           879           -4.25%
BenchmarkChoiceTokens-8     2127          2090          -1.74%
  • Loading branch information...
benhoyt committed Sep 7, 2018
1 parent 43af0cb commit c5a32eb08f817b4622ce11e7ad858ed131e3cad7
Showing with 9 additions and 5 deletions.
  1. +3 −1 goawk.go
  2. +6 −4 lexer/lexer.go
@@ -31,13 +31,15 @@ package main
TODO:
- performance testing: I/O, allocations, CPU
+ optimize lexer
- order cases in switch by most common first
- wait, do we actually need handling of UTF-8 in source code? what about errors?
+ other TODOs in interp.go and parser.go
+ other uses of make() in interp.go
+ resolve array variables at parse time (by index instead of name)
+ resolve array parameters to functions at parse time and clean up userCall
+ benchmark against awk/gawk with some real awk scripts
+ ways to optimize sub()/gsub()
+ optimize lexer
+ optimize parser
- move ast (except Program) to "internal" package?
- break up interp.go? structure it better and add comments
@@ -98,7 +98,8 @@ func (l *Lexer) scan() (Position, Token, string) {

// Names: keywords and functions
if isNameStart(ch) {
chars := []byte{ch}
chars := make([]byte, 1, 16) // most won't require heap allocation
chars[0] = ch
for isNameStart(l.ch) || (l.ch >= '0' && l.ch <= '9') {
chars = append(chars, l.ch)
l.next()
@@ -212,7 +213,8 @@ func (l *Lexer) scan() (Position, Token, string) {
case '|':
tok = l.choice('|', PIPE, OR)
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.':
chars := []byte{ch}
chars := make([]byte, 1, 16) // most won't require heap allocation
chars[0] = ch
gotDigit := false
if ch != '.' {
gotDigit = true
@@ -251,7 +253,7 @@ func (l *Lexer) scan() (Position, Token, string) {
// Note: POSIX awk spec doesn't allow single-quoted strings,
// but this helps without quoting, especially on Windows
// where the shell quote character is " (double quote).
chars := []byte{}
chars := make([]byte, 0, 32) // most won't require heap allocation
for l.ch != ch {
c := l.ch
if c == 0 {
@@ -298,7 +300,7 @@ func (l *Lexer) ScanRegex() (Position, Token, string) {

func (l *Lexer) scanRegex() (Position, Token, string) {
pos := l.pos
chars := []byte{}
chars := make([]byte, 0, 32) // most won't require heap allocation
switch l.lastTok {
case DIV:
// Regex after '/' (the usual case)

0 comments on commit c5a32eb

Please sign in to comment.