Skip to content

strconv: allocation in the error case dominates the runtime #43241

@kokes

Description

@kokes

What version of Go are you using (go version)?

$ go version
go version go1.15 darwin/amd64

Does this issue reproduce with the latest release?

Yes, even on yesterday's tip.

What operating system and processor architecture are you using (go env)?

go env Output
$ go env
GO111MODULE=""
GOARCH="amd64"
GOBIN=""
GOCACHE="/Users/okokes/Library/Caches/go-build"
GOENV="/Users/okokes/Library/Application Support/go/env"
GOEXE=""
GOFLAGS=""
GOHOSTARCH="amd64"
GOHOSTOS="darwin"
GOINSECURE=""
GOMODCACHE="/Users/okokes/go/pkg/mod"
GONOPROXY=""
GONOSUMDB=""
GOOS="darwin"
GOPATH="/Users/okokes/go"
GOPRIVATE=""
GOPROXY="https://proxy.golang.org,direct"
GOROOT="/usr/local/Cellar/go/1.15/libexec"
GOSUMDB="sum.golang.org"
GOTMPDIR=""
GOTOOLDIR="/usr/local/Cellar/go/1.15/libexec/pkg/tool/darwin_amd64"
GCCGO="gccgo"
AR="ar"
CC="clang"
CXX="clang++"
CGO_ENABLED="1"
GOMOD="/Users/okokes/git/assorted/go-strconv-perf/go.mod"
CGO_CFLAGS="-g -O2"
CGO_CPPFLAGS=""
CGO_CXXFLAGS="-g -O2"
CGO_FFLAGS="-g -O2"
CGO_LDFLAGS="-g -O2"
PKG_CONFIG="pkg-config"
GOGCCFLAGS="-fPIC -m64 -pthread -fno-caret-diagnostics -Qunused-arguments -fmessage-length=0 -fdebug-prefix-map=/var/folders/81/4jydp7kn51n6p68z88sqnkzc0000gn/T/go-build391862424=/tmp/go-build -gno-record-gcc-switches -fno-common"

What did you do?

I ran strconv.ParseInt (and ParseFloat) on valid input, e.g. 123, and invalid input, e.g. foo.

What did you expect to see?

I expected comparable performance for both valid and invalid inputs.

What did you see instead?

In the error case, ParseXXX functions allocate and that dominates the runtime. I tried replacing all the custom errors with a simple var errNotInt = errors.New("not an int") and got this:

$ go test -bench=. -benchmem
goos: darwin
goarch: amd64
BenchmarkParseInt/strconv.ParseInt-valid-12         	80329828	        14.0 ns/op	       0 B/op	       0 allocs/op
BenchmarkParseInt/strconv.ParseInt-invalid-12       	21108784	        56.1 ns/op	      48 B/op	       1 allocs/op
BenchmarkParseInt/custom_with_no_allocs-valid-12    	79545568	        13.8 ns/op	       0 B/op	       0 allocs/op
BenchmarkParseInt/custom_with_no_allocs-invalid-12  	85985926	        14.5 ns/op	       0 B/op	       0 allocs/op
PASS

The benchmark code:

Details
package mystrconv

import (
	"strconv"
	"testing"
)

func isIntStdLib(s string) bool {
	if _, err := strconv.ParseInt(s, 10, 64); err != nil {
		return false
	}
	return true
}

func isIntMine(s string) bool {
	if _, err := ParseInt(s, 10, 64); err != nil {
		return false
	}
	return true
}

var res bool

func BenchmarkParseInt(b *testing.B) {
	type testCase struct {
		name string
		val  string
		fnc  func(string) bool
	}
	cases := []testCase{
		{"strconv.ParseInt-valid", "123", isIntStdLib},
		{"strconv.ParseInt-invalid", "123g", isIntStdLib},
		{"custom with no allocs-valid", "123", isIntMine},
		{"custom with no allocs-invalid", "123g", isIntMine},
	}
	for _, test := range cases {
		b.Run(test.name, func(b *testing.B) {
			for j := 0; j < b.N; j++ {
				res = test.fnc(test.val)
			}
		})
	}
}

The library code (taking strconv and changing its error handling and putting it all in one file):

Details
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package mystrconv

import "errors"

var errNotInt = errors.New("not an int")

const intSize = 32 << (^uint(0) >> 63)

// IntSize is the size in bits of an int or uint value.
const IntSize = intSize

const maxUint64 = 1<<64 - 1

func lower(c byte) byte {
return c | ('x' - 'X')
}

// ParseUint is like ParseInt but for unsigned numbers.
func ParseUint(s string, base int, bitSize int) (uint64, error) {
const fnParseUint = "ParseUint"

if s == "" {
	return 0, errNotInt
}

base0 := base == 0

s0 := s
switch {
case 2 <= base && base <= 36:
	// valid base; nothing to do

case base == 0:
	// Look for octal, hex prefix.
	base = 10
	if s[0] == '0' {
		switch {
		case len(s) >= 3 && lower(s[1]) == 'b':
			base = 2
			s = s[2:]
		case len(s) >= 3 && lower(s[1]) == 'o':
			base = 8
			s = s[2:]
		case len(s) >= 3 && lower(s[1]) == 'x':
			base = 16
			s = s[2:]
		default:
			base = 8
			s = s[1:]
		}
	}

default:
	return 0, errNotInt
}

if bitSize == 0 {
	bitSize = IntSize
} else if bitSize < 0 || bitSize > 64 {
	return 0, errNotInt
}

// Cutoff is the smallest number such that cutoff*base > maxUint64.
// Use compile-time constants for common cases.
var cutoff uint64
switch base {
case 10:
	cutoff = maxUint64/10 + 1
case 16:
	cutoff = maxUint64/16 + 1
default:
	cutoff = maxUint64/uint64(base) + 1
}

maxVal := uint64(1)<<uint(bitSize) - 1

underscores := false
var n uint64
for _, c := range []byte(s) {
	var d byte
	switch {
	case c == '_' && base0:
		underscores = true
		continue
	case '0' <= c && c <= '9':
		d = c - '0'
	case 'a' <= lower(c) && lower(c) <= 'z':
		d = lower(c) - 'a' + 10
	default:
		return 0, errNotInt
	}

	if d >= byte(base) {
		return 0, errNotInt
	}

	if n >= cutoff {
		// n*base overflows
		return maxVal, errNotInt
	}
	n *= uint64(base)

	n1 := n + uint64(d)
	if n1 < n || n1 > maxVal {
		// n+v overflows
		return maxVal, errNotInt
	}
	n = n1
}

if underscores && !underscoreOK(s0) {
	return 0, errNotInt
}

return n, nil

}

func ParseInt(s string, base int, bitSize int) (i int64, err error) {
const fnParseInt = "ParseInt"

if s == "" {
	return 0, errNotInt
}

// Pick off leading sign.
neg := false
if s[0] == '+' {
	s = s[1:]
} else if s[0] == '-' {
	neg = true
	s = s[1:]
}

// Convert unsigned and check range.
var un uint64
un, err = ParseUint(s, base, bitSize)
if err != nil {
	return 0, err
}

if bitSize == 0 {
	bitSize = IntSize
}

cutoff := uint64(1 << uint(bitSize-1))
if !neg && un >= cutoff {
	return int64(cutoff - 1), errNotInt
}
if neg && un > cutoff {
	return -int64(cutoff), errNotInt
}
n := int64(un)
if neg {
	n = -n
}
return n, nil

}

func underscoreOK(s string) bool {
// saw tracks the last character (class) we saw:
// ^ for beginning of number,
// 0 for a digit or base prefix,
// _ for an underscore,
// ! for none of the above.
saw := '^'
i := 0

// Optional sign.
if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
	s = s[1:]
}

// Optional base prefix.
hex := false
if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
	i = 2
	saw = '0' // base prefix counts as a digit for "underscore as digit separator"
	hex = lower(s[1]) == 'x'
}

// Number proper.
for ; i < len(s); i++ {
	// Digits are always okay.
	if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
		saw = '0'
		continue
	}
	// Underscore must follow digit.
	if s[i] == '_' {
		if saw != '0' {
			return false
		}
		saw = '_'
		continue
	}
	// Underscore must also be followed by digit.
	if saw == '_' {
		return false
	}
	// Saw non-digit, non-underscore.
	saw = '!'
}
return saw != '_'

}

Metadata

Metadata

Assignees

No one assigned

    Labels

    NeedsInvestigationSomeone must examine and confirm this is a valid issue and not a duplicate of an existing one.Performance

    Type

    No type

    Projects

    No projects

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions