Skip to content

Commit

Permalink
Big perf improvements: resolve variables at parse time
Browse files Browse the repository at this point in the history
This necessitated a significant refactor in the exported API and no
longer exposes the Interp struct (but that's probably better anyway).
It also removes ParseExpr and Eval as parsing a stand-alone
expression was problematic.

For now, function names and array parameters to functions are still
resolved by name at runtime. I intend to work on that later, but this
gets the low-hanging fruit.

This gives a significant performance improvement for many benchmarks
(see below), because we're doing variable lookups by integer index
instead of name, and reducing the number of allocations in many
cases.

benchmark                           old ns/op     new ns/op     delta
BenchmarkFuncCall-8                 13710         5313          -61.25%
BenchmarkRecursiveFunc-8            60507         30719         -49.23%
BenchmarkForInLoop-8                4092          2467          -39.71%
BenchmarkLocalVars-8                2959          1827          -38.26%
BenchmarkForLoop-8                  15706         10349         -34.11%
BenchmarkIncrDecr-8                 2441          1647          -32.53%
BenchmarkGlobalVars-8               2628          1812          -31.05%
BenchmarkCondExpr-8                 867           604           -30.33%
BenchmarkIfStatement-8              1194          885           -25.88%
BenchmarkComparisons-8              739           606           -18.00%
BenchmarkRegexMatch-8               2224          1828          -17.81%
BenchmarkConcatSmall-8              1120          946           -15.54%
BenchmarkBuiltinIndex-8             4659          4221          -9.40%
BenchmarkBinaryOperators-8          993           905           -8.86%
BenchmarkBuiltinLength-8            3040          2775          -8.72%
BenchmarkBuiltinSubstr-8            4651          4303          -7.48%
BenchmarkBuiltinMatch-8             4964          4640          -6.53%
BenchmarkConcatLarge-8              7203914       6767577       -6.06%
BenchmarkBuiltinSplitSpace-8        119347        114082        -4.41%
BenchmarkSetField-8                 1599          1536          -3.94%
BenchmarkBuiltinGsub-8              18242         17691         -3.02%
BenchmarkSimplePattern-8            248           241           -2.82%
BenchmarkBuiltinSubAmpersand-8      15731         15311         -2.67%
BenchmarkArrayOperations-8          2350          2410          +2.55%
BenchmarkBuiltinGsubAmpersand-8     19415         18994         -2.17%
BenchmarkGetField-8                 585           573           -2.05%
BenchmarkBuiltinSplitRegex-8        152834        149869        -1.94%
BenchmarkBuiltinSub-8               15305         15092         -1.39%
BenchmarkSimpleBuiltins-8           6828          6746          -1.20%
BenchmarkBuiltinSprintf-8           15972         15927         -0.28%
  • Loading branch information
benhoyt committed Sep 3, 2018
1 parent 177fef5 commit e0d7287
Show file tree
Hide file tree
Showing 8 changed files with 589 additions and 478 deletions.
420 changes: 210 additions & 210 deletions benchmarks.txt

Large diffs are not rendered by default.

29 changes: 18 additions & 11 deletions goawk.go
@@ -1,6 +1,6 @@
// Package goawk is an implementation of AWK written in Go.
//
// You can use the command-line "goawk" command or call AWK from your
// You can use the command-line "goawk" command or run AWK from your
// Go programs using the "interp" package. The command-line program
// has the same interface as regular awk:
//
Expand Down Expand Up @@ -30,13 +30,18 @@ package main
/*
TODO:
- break up interp.go?
- performance testing: I/O, allocations, CPU
+ defer in eval/exec -- will this help?
+ other TODOs in interp.go and parser.go
+ resolve array variables at parse time (by index instead of name)
+ resolve array parameters to functions at parse time and clean up userCall
- move ast (except Program) to "internal" package?
- break up interp.go? structure it better and add comments
- think about length() and substr() chars vs bytes:
https://github.com/benhoyt/goawk/issues/2#issuecomment-415314000
- get goawk_test.go working in TravisCI
- performance testing: I/O, allocations, CPU
+ getVar/setVar overhead -- can resolve stuff at compile-time
+ defer in eval/exec -- will this help?
- try out Go 2 error handling proposal with the GoAWK codebase:
https://go.googlesource.com/proposal/+/master/design/go2draft-error-handling.md
NICE TO HAVE:
- fix broken (commented-out) interp tests due to syntax handling
Expand Down Expand Up @@ -131,18 +136,20 @@ func main() {
}
}

p := interp.New(nil, nil)
p.SetVar("FS", *fieldSep)
config := &interp.Config{
Argv0: filepath.Base(os.Args[0]),
Args: args,
Vars: []string{"FS", *fieldSep},
}
for _, v := range vars {
parts := strings.SplitN(v, "=", 2)
if len(parts) != 2 {
errorExit("-v flag must be in format name=value")
}
p.SetVar(parts[0], parts[1])
config.Vars = append(config.Vars, parts[0], parts[1])
}
p.SetArgv0(filepath.Base(os.Args[0]))

err = p.Exec(prog, os.Stdin, args)
status, err := interp.ExecProgram(prog, config)
if err != nil {
errorExit("%s", err)
}
Expand All @@ -163,7 +170,7 @@ func main() {
f.Close()
}

os.Exit(p.ExitStatus())
os.Exit(status)
}

func showSourceLine(src []byte, pos lexer.Position, dividerLen int) {
Expand Down
10 changes: 7 additions & 3 deletions goawk_test.go
Expand Up @@ -145,9 +145,12 @@ func parseGoAWK(srcPath string) (*parser.Program, error) {
func interpGoAWK(prog *parser.Program, inputPath string) ([]byte, error) {
outBuf := &bytes.Buffer{}
errBuf := &bytes.Buffer{}
p := interp.New(outBuf, errBuf)
p.SetArgs([]string{"goawk_test", inputPath})
err := p.Exec(prog, nil, []string{inputPath})
config := &interp.Config{
Output: outBuf,
Error: errBuf,
Args: []string{inputPath},
}
_, err := interp.ExecProgram(prog, config)
result := outBuf.Bytes()
result = append(result, errBuf.Bytes()...)
return result, err
Expand Down Expand Up @@ -227,6 +230,7 @@ func TestCommandLine(t *testing.T) {
{[]string{"-v", "A=1", "-f", "testdata/g.3", "B=2", "/dev/null"}, "",
"A=1, B=0\n\tARGV[1] = B=2\n\tARGV[2] = /dev/null\nA=1, B=2\n"},
{[]string{`END { print (x==42) }`, "x=42.0"}, "", "1\n"},
{[]string{"-v", "x=42.0", `BEGIN { print (x==42) }`}, "", "1\n"},
}
for _, test := range tests {
testName := strings.Join(test.args, " ")
Expand Down

0 comments on commit e0d7287

Please sign in to comment.