-
Notifications
You must be signed in to change notification settings - Fork 18.8k
Description
What version of Go are you using (go version)?
$ go version go version go1.15 darwin/amd64
Does this issue reproduce with the latest release?
Yes, even on yesterday's tip.
What operating system and processor architecture are you using (go env)?
go env Output
$ go env GO111MODULE="" GOARCH="amd64" GOBIN="" GOCACHE="/Users/okokes/Library/Caches/go-build" GOENV="/Users/okokes/Library/Application Support/go/env" GOEXE="" GOFLAGS="" GOHOSTARCH="amd64" GOHOSTOS="darwin" GOINSECURE="" GOMODCACHE="/Users/okokes/go/pkg/mod" GONOPROXY="" GONOSUMDB="" GOOS="darwin" GOPATH="/Users/okokes/go" GOPRIVATE="" GOPROXY="https://proxy.golang.org,direct" GOROOT="/usr/local/Cellar/go/1.15/libexec" GOSUMDB="sum.golang.org" GOTMPDIR="" GOTOOLDIR="/usr/local/Cellar/go/1.15/libexec/pkg/tool/darwin_amd64" GCCGO="gccgo" AR="ar" CC="clang" CXX="clang++" CGO_ENABLED="1" GOMOD="/Users/okokes/git/assorted/go-strconv-perf/go.mod" CGO_CFLAGS="-g -O2" CGO_CPPFLAGS="" CGO_CXXFLAGS="-g -O2" CGO_FFLAGS="-g -O2" CGO_LDFLAGS="-g -O2" PKG_CONFIG="pkg-config" GOGCCFLAGS="-fPIC -m64 -pthread -fno-caret-diagnostics -Qunused-arguments -fmessage-length=0 -fdebug-prefix-map=/var/folders/81/4jydp7kn51n6p68z88sqnkzc0000gn/T/go-build391862424=/tmp/go-build -gno-record-gcc-switches -fno-common"
What did you do?
I ran strconv.ParseInt (and ParseFloat) on valid input, e.g. 123, and invalid input, e.g. foo.
What did you expect to see?
I expected comparable performance for both valid and invalid inputs.
What did you see instead?
In the error case, ParseXXX functions allocate and that dominates the runtime. I tried replacing all the custom errors with a simple var errNotInt = errors.New("not an int") and got this:
$ go test -bench=. -benchmem
goos: darwin
goarch: amd64
BenchmarkParseInt/strconv.ParseInt-valid-12 80329828 14.0 ns/op 0 B/op 0 allocs/op
BenchmarkParseInt/strconv.ParseInt-invalid-12 21108784 56.1 ns/op 48 B/op 1 allocs/op
BenchmarkParseInt/custom_with_no_allocs-valid-12 79545568 13.8 ns/op 0 B/op 0 allocs/op
BenchmarkParseInt/custom_with_no_allocs-invalid-12 85985926 14.5 ns/op 0 B/op 0 allocs/op
PASS
The benchmark code:
Details
package mystrconv
import (
"strconv"
"testing"
)
func isIntStdLib(s string) bool {
if _, err := strconv.ParseInt(s, 10, 64); err != nil {
return false
}
return true
}
func isIntMine(s string) bool {
if _, err := ParseInt(s, 10, 64); err != nil {
return false
}
return true
}
var res bool
func BenchmarkParseInt(b *testing.B) {
type testCase struct {
name string
val string
fnc func(string) bool
}
cases := []testCase{
{"strconv.ParseInt-valid", "123", isIntStdLib},
{"strconv.ParseInt-invalid", "123g", isIntStdLib},
{"custom with no allocs-valid", "123", isIntMine},
{"custom with no allocs-invalid", "123g", isIntMine},
}
for _, test := range cases {
b.Run(test.name, func(b *testing.B) {
for j := 0; j < b.N; j++ {
res = test.fnc(test.val)
}
})
}
}
The library code (taking strconv and changing its error handling and putting it all in one file):
Details
// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file.package mystrconv
import "errors"
var errNotInt = errors.New("not an int")
const intSize = 32 << (^uint(0) >> 63)
// IntSize is the size in bits of an int or uint value.
const IntSize = intSizeconst maxUint64 = 1<<64 - 1
func lower(c byte) byte {
return c | ('x' - 'X')
}// ParseUint is like ParseInt but for unsigned numbers.
func ParseUint(s string, base int, bitSize int) (uint64, error) {
const fnParseUint = "ParseUint"if s == "" { return 0, errNotInt } base0 := base == 0 s0 := s switch { case 2 <= base && base <= 36: // valid base; nothing to do case base == 0: // Look for octal, hex prefix. base = 10 if s[0] == '0' { switch { case len(s) >= 3 && lower(s[1]) == 'b': base = 2 s = s[2:] case len(s) >= 3 && lower(s[1]) == 'o': base = 8 s = s[2:] case len(s) >= 3 && lower(s[1]) == 'x': base = 16 s = s[2:] default: base = 8 s = s[1:] } } default: return 0, errNotInt } if bitSize == 0 { bitSize = IntSize } else if bitSize < 0 || bitSize > 64 { return 0, errNotInt } // Cutoff is the smallest number such that cutoff*base > maxUint64. // Use compile-time constants for common cases. var cutoff uint64 switch base { case 10: cutoff = maxUint64/10 + 1 case 16: cutoff = maxUint64/16 + 1 default: cutoff = maxUint64/uint64(base) + 1 } maxVal := uint64(1)<<uint(bitSize) - 1 underscores := false var n uint64 for _, c := range []byte(s) { var d byte switch { case c == '_' && base0: underscores = true continue case '0' <= c && c <= '9': d = c - '0' case 'a' <= lower(c) && lower(c) <= 'z': d = lower(c) - 'a' + 10 default: return 0, errNotInt } if d >= byte(base) { return 0, errNotInt } if n >= cutoff { // n*base overflows return maxVal, errNotInt } n *= uint64(base) n1 := n + uint64(d) if n1 < n || n1 > maxVal { // n+v overflows return maxVal, errNotInt } n = n1 } if underscores && !underscoreOK(s0) { return 0, errNotInt } return n, nil}
func ParseInt(s string, base int, bitSize int) (i int64, err error) {
const fnParseInt = "ParseInt"if s == "" { return 0, errNotInt } // Pick off leading sign. neg := false if s[0] == '+' { s = s[1:] } else if s[0] == '-' { neg = true s = s[1:] } // Convert unsigned and check range. var un uint64 un, err = ParseUint(s, base, bitSize) if err != nil { return 0, err } if bitSize == 0 { bitSize = IntSize } cutoff := uint64(1 << uint(bitSize-1)) if !neg && un >= cutoff { return int64(cutoff - 1), errNotInt } if neg && un > cutoff { return -int64(cutoff), errNotInt } n := int64(un) if neg { n = -n } return n, nil}
func underscoreOK(s string) bool {
// saw tracks the last character (class) we saw:
// ^ for beginning of number,
// 0 for a digit or base prefix,
// _ for an underscore,
// ! for none of the above.
saw := '^'
i := 0// Optional sign. if len(s) >= 1 && (s[0] == '-' || s[0] == '+') { s = s[1:] } // Optional base prefix. hex := false if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') { i = 2 saw = '0' // base prefix counts as a digit for "underscore as digit separator" hex = lower(s[1]) == 'x' } // Number proper. for ; i < len(s); i++ { // Digits are always okay. if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' { saw = '0' continue } // Underscore must follow digit. if s[i] == '_' { if saw != '0' { return false } saw = '_' continue } // Underscore must also be followed by digit. if saw == '_' { return false } // Saw non-digit, non-underscore. saw = '!' } return saw != '_'}