Permalink
Browse files

Big perf improvements: resolve variables at parse time

This necessitated a significant refactor in the exported API and no
longer exposes the Interp struct (but that's probably better anyway).
It also removes ParseExpr and Eval as parsing a stand-alone
expression was problematic.

For now, function names and array parameters to functions are still
resolved by name at runtime. I intend to work on that later, but this
gets the low-hanging fruit.

This gives a significant performance improvement for many benchmarks
(see below), because we're doing variable lookups by integer index
instead of name, and reducing the number of allocations in many
cases.

benchmark                           old ns/op     new ns/op     delta
BenchmarkFuncCall-8                 13710         5313          -61.25%
BenchmarkRecursiveFunc-8            60507         30719         -49.23%
BenchmarkForInLoop-8                4092          2467          -39.71%
BenchmarkLocalVars-8                2959          1827          -38.26%
BenchmarkForLoop-8                  15706         10349         -34.11%
BenchmarkIncrDecr-8                 2441          1647          -32.53%
BenchmarkGlobalVars-8               2628          1812          -31.05%
BenchmarkCondExpr-8                 867           604           -30.33%
BenchmarkIfStatement-8              1194          885           -25.88%
BenchmarkComparisons-8              739           606           -18.00%
BenchmarkRegexMatch-8               2224          1828          -17.81%
BenchmarkConcatSmall-8              1120          946           -15.54%
BenchmarkBuiltinIndex-8             4659          4221          -9.40%
BenchmarkBinaryOperators-8          993           905           -8.86%
BenchmarkBuiltinLength-8            3040          2775          -8.72%
BenchmarkBuiltinSubstr-8            4651          4303          -7.48%
BenchmarkBuiltinMatch-8             4964          4640          -6.53%
BenchmarkConcatLarge-8              7203914       6767577       -6.06%
BenchmarkBuiltinSplitSpace-8        119347        114082        -4.41%
BenchmarkSetField-8                 1599          1536          -3.94%
BenchmarkBuiltinGsub-8              18242         17691         -3.02%
BenchmarkSimplePattern-8            248           241           -2.82%
BenchmarkBuiltinSubAmpersand-8      15731         15311         -2.67%
BenchmarkArrayOperations-8          2350          2410          +2.55%
BenchmarkBuiltinGsubAmpersand-8     19415         18994         -2.17%
BenchmarkGetField-8                 585           573           -2.05%
BenchmarkBuiltinSplitRegex-8        152834        149869        -1.94%
BenchmarkBuiltinSub-8               15305         15092         -1.39%
BenchmarkSimpleBuiltins-8           6828          6746          -1.20%
BenchmarkBuiltinSprintf-8           15972         15927         -0.28%
  • Loading branch information...
benhoyt committed Sep 3, 2018
1 parent 177fef5 commit e0d7287ac1580bd0f144c763b222b9db8a858c54
Showing with 589 additions and 478 deletions.
  1. +210 −210 benchmarks.txt
  2. +18 −11 goawk.go
  3. +7 −3 goawk_test.go
  4. +226 −189 interp/interp.go
  5. +19 −28 interp/interp_test.go
  6. +14 −10 parser/ast.go
  7. +49 −27 parser/parser.go
  8. +46 −0 parser/specialvars.go

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -1,6 +1,6 @@
// Package goawk is an implementation of AWK written in Go.
//
// You can use the command-line "goawk" command or call AWK from your
// You can use the command-line "goawk" command or run AWK from your
// Go programs using the "interp" package. The command-line program
// has the same interface as regular awk:
//
@@ -30,13 +30,18 @@ package main
/*
TODO:
- break up interp.go?
- performance testing: I/O, allocations, CPU
+ defer in eval/exec -- will this help?
+ other TODOs in interp.go and parser.go
+ resolve array variables at parse time (by index instead of name)
+ resolve array parameters to functions at parse time and clean up userCall
- move ast (except Program) to "internal" package?
- break up interp.go? structure it better and add comments
- think about length() and substr() chars vs bytes:
https://github.com/benhoyt/goawk/issues/2#issuecomment-415314000
- get goawk_test.go working in TravisCI
- performance testing: I/O, allocations, CPU
+ getVar/setVar overhead -- can resolve stuff at compile-time
+ defer in eval/exec -- will this help?
- try out Go 2 error handling proposal with the GoAWK codebase:
https://go.googlesource.com/proposal/+/master/design/go2draft-error-handling.md
NICE TO HAVE:
- fix broken (commented-out) interp tests due to syntax handling
@@ -131,18 +136,20 @@ func main() {
}
}

p := interp.New(nil, nil)
p.SetVar("FS", *fieldSep)
config := &interp.Config{
Argv0: filepath.Base(os.Args[0]),
Args: args,
Vars: []string{"FS", *fieldSep},
}
for _, v := range vars {
parts := strings.SplitN(v, "=", 2)
if len(parts) != 2 {
errorExit("-v flag must be in format name=value")
}
p.SetVar(parts[0], parts[1])
config.Vars = append(config.Vars, parts[0], parts[1])
}
p.SetArgv0(filepath.Base(os.Args[0]))

err = p.Exec(prog, os.Stdin, args)
status, err := interp.ExecProgram(prog, config)
if err != nil {
errorExit("%s", err)
}
@@ -163,7 +170,7 @@ func main() {
f.Close()
}

os.Exit(p.ExitStatus())
os.Exit(status)
}

func showSourceLine(src []byte, pos lexer.Position, dividerLen int) {
@@ -145,9 +145,12 @@ func parseGoAWK(srcPath string) (*parser.Program, error) {
func interpGoAWK(prog *parser.Program, inputPath string) ([]byte, error) {
outBuf := &bytes.Buffer{}
errBuf := &bytes.Buffer{}
p := interp.New(outBuf, errBuf)
p.SetArgs([]string{"goawk_test", inputPath})
err := p.Exec(prog, nil, []string{inputPath})
config := &interp.Config{
Output: outBuf,
Error: errBuf,
Args: []string{inputPath},
}
_, err := interp.ExecProgram(prog, config)
result := outBuf.Bytes()
result = append(result, errBuf.Bytes()...)
return result, err
@@ -227,6 +230,7 @@ func TestCommandLine(t *testing.T) {
{[]string{"-v", "A=1", "-f", "testdata/g.3", "B=2", "/dev/null"}, "",
"A=1, B=0\n\tARGV[1] = B=2\n\tARGV[2] = /dev/null\nA=1, B=2\n"},
{[]string{`END { print (x==42) }`, "x=42.0"}, "", "1\n"},
{[]string{"-v", "x=42.0", `BEGIN { print (x==42) }`}, "", "1\n"},
}
for _, test := range tests {
testName := strings.Join(test.args, " ")
Oops, something went wrong.

0 comments on commit e0d7287

Please sign in to comment.