Permalink
Browse files

Speed up string to number conversions by avoiding text/scanner

text/scanner is heavyweight and much too much for our needs here.
Getting rid of it increases the speed of explicit string to number
conversions by about 10x.

$ time ./goawk_before 'BEGIN { for (i=0; i<1000000; i++) { "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; } }'
real    0m10.692s

$ time ./goawk_after 'BEGIN { for (i=0; i<1000000; i++) { "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; "1.5e1"+"1"; } }'
real    0m0.983s
  • Loading branch information...
benhoyt committed Sep 8, 2018
1 parent 4b4840e commit 12b8520948e78ef19e3ed99bcffe25b3e893e447
Showing with 64 additions and 31 deletions.
  1. +0 −1 goawk.go
  2. +4 −4 interp/interp.go
  3. +2 −0 interp/interp_test.go
  4. +58 −26 interp/value.go
@@ -31,7 +31,6 @@ package main
TODO:
- performance testing: I/O, allocations, CPU
+ value.go: scanner is relatively slow and produces garbage for our purposes
+ other TODOs in interp.go and parser.go
+ other uses of make() in interp.go
+ resolve array variables at parse time (by index instead of name)
@@ -746,8 +746,8 @@ func (p *interp) eval(expr Expr) (value, error) {
if err != nil {
return value{}, err
}
indexNum, err := index.numChecked()
if err != nil {
indexNum, ok := index.numChecked()
if !ok {
return value{}, newError("field index not a number: %q", p.toString(index))
}
return p.getField(int(indexNum))
@@ -1739,8 +1739,8 @@ func (p *interp) assign(left Expr, right value) error {
if err != nil {
return err
}
indexNum, err := index.numChecked()
if err != nil {
indexNum, ok := index.numChecked()
if !ok {
return newError("field index not a number: %q", p.toString(index))
}
return p.setField(int(indexNum), p.toString(right))
@@ -150,6 +150,8 @@ BEGIN {
{`BEGIN { print 1, 1., .1, 1e0, -1 }`, "", "1 1 0.1 1 -1\n", "", ""},
{`BEGIN { print '\"' '\'' 'xy' "z" "'" '\"' }`, "", "\"'xyz'\"\n", "", "syntax error"}, // Check support for single-quoted strings
{`{ print /foo/ }`, "food\nfoo\nxfooz\nbar\n", "1\n1\n1\n0\n", "", ""},
{`BEGIN { print "-12"+0, "+12"+0, " \t\r\n7foo"+0, ".5"+0, "5."+0, "+."+0 }`, "", "-12 12 7 0.5 5 0\n", "", ""},
{`BEGIN { print "1e3"+0, "1.2e-1"+0, "1e+1"+0, "1e"+0, "1e+"+0 }`, "", "1000 0.12 10 1 1\n", "", ""},

// Conditional ?: expression
{`{ print /x/?"t":"f" }`, "x\ny\nxx\nz\n", "t\nf\nt\nf\n", "", ""},
@@ -7,7 +7,6 @@ import (
"math"
"strconv"
"strings"
"text/scanner"
)

const (
@@ -83,39 +82,72 @@ func (v value) num() float64 {
return f
}

func (v value) numChecked() (float64, error) {
func (v value) numChecked() (float64, bool) {
switch v.typ {
case typeNum:
return v.n, nil
return v.n, true
case typeStr:
if v.isNumStr {
// If it's a numeric string, we already have the float
// value from the numStr() call
return v.n, nil
return v.n, true
}
// TODO: scanner is relatively slow and allocates a bunch, do this by hand
// Note that converting to number directly (in constrast to
// "numeric strings") allows things like "1.5foo"
var scan scanner.Scanner
scan.Init(strings.NewReader(v.s))
scan.Error = func(*scanner.Scanner, string) {}
tok := scan.Scan()
negative := tok == '-'
if tok == '-' || tok == '+' {
tok = scan.Scan()
}
if scan.ErrorCount != 0 || (tok != scanner.Float && tok != scanner.Int) {
return 0, fmt.Errorf("invalid number %q", v.s)
// Otherwise ensure string starts with a float and convert it
return parseFloatPrefix(v.s)
default:
return 0, true
}
}

// Like strconv.ParseFloat, but parses at the start of string and
// allows things like "1.5foo"
func parseFloatPrefix(s string) (float64, bool) {
// Skip whitespace at start
i := 0
for i < len(s) && (s[i] == ' ' || s[i] == '\t' || s[i] == '\n' || s[i] == '\r') {
i++
}
start := i

// Parse mantissa: optional sign, initial digit(s), optional '.',
// then more digits
gotDigit := false
if i < len(s) && (s[i] == '+' || s[i] == '-') {
i++
}
for i < len(s) && s[i] >= '0' && s[i] <= '9' {
gotDigit = true
i++
}
if i < len(s) && s[i] == '.' {
i++
}
for i < len(s) && s[i] >= '0' && s[i] <= '9' {
gotDigit = true
i++
}
if !gotDigit {
return 0, false
}

// Parse exponent ("1e" and similar are allowed, but ParseFloat
// rejects them)
end := i
if i < len(s) && (s[i] == 'e' || s[i] == 'E') {
i++
if i < len(s) && (s[i] == '+' || s[i] == '-') {
i++
}
// Scanner allows trailing 'e', ParseFloat doesn't
text := scan.TokenText()
text = strings.TrimRight(text, "eE")
f, _ := strconv.ParseFloat(text, 64)
if negative {
f = -f
for i < len(s) && s[i] >= '0' && s[i] <= '9' {
i++
end = i
}
return f, nil
default:
return 0, nil
}

floatStr := s[start:end]
f, err := strconv.ParseFloat(floatStr, 64)
if err != nil {
panic(fmt.Sprintf("unexpected error parsing %q: %v", floatStr, err))
}
return f, true
}

0 comments on commit 12b8520

Please sign in to comment.