Permalink
Browse files

strconv: optimize decimal ints formatting with smallsString

Benchmark results for GOARCH=amd64:

name                                     old time/op  new time/op  delta
FormatInt-4                              2.51µs ± 2%  2.40µs ± 2%   -4.51%  (p=0.000 n=9+10)
AppendInt-4                              1.67µs ± 2%  1.61µs ± 3%   -3.74%  (p=0.000 n=9+9)
FormatUint-4                              698ns ± 2%   643ns ± 3%   -7.95%  (p=0.000 n=10+8)
AppendUint-4                              478ns ± 1%   418ns ± 2%  -12.61%  (p=0.000 n=8+10)
AppendUintVarlen/1-4                     9.30ns ± 6%  9.15ns ± 1%     ~     (p=0.199 n=9+10)
AppendUintVarlen/12-4                    9.12ns ± 0%  9.16ns ± 2%     ~     (p=0.307 n=9+9)
AppendUintVarlen/123-4                   18.6ns ± 2%  18.7ns ± 0%     ~     (p=0.091 n=10+6)
AppendUintVarlen/1234-4                  19.1ns ± 4%  17.7ns ± 1%   -7.35%  (p=0.000 n=10+9)
AppendUintVarlen/12345-4                 21.5ns ± 3%  20.7ns ± 3%   -3.78%  (p=0.002 n=9+10)
AppendUintVarlen/123456-4                23.5ns ± 3%  20.9ns ± 1%  -11.14%  (p=0.000 n=10+9)
AppendUintVarlen/1234567-4               25.0ns ± 2%  23.6ns ± 7%   -5.48%  (p=0.004 n=9+10)
AppendUintVarlen/12345678-4              26.8ns ± 2%  23.4ns ± 2%  -12.79%  (p=0.000 n=9+10)
AppendUintVarlen/123456789-4             29.8ns ± 3%  26.5ns ± 5%  -11.03%  (p=0.000 n=10+10)
AppendUintVarlen/1234567890-4            31.6ns ± 3%  26.9ns ± 3%  -14.95%  (p=0.000 n=10+9)
AppendUintVarlen/12345678901-4           33.8ns ± 3%  29.3ns ± 5%  -13.21%  (p=0.000 n=10+10)
AppendUintVarlen/123456789012-4          35.5ns ± 4%  29.2ns ± 4%  -17.82%  (p=0.000 n=10+10)
AppendUintVarlen/1234567890123-4         37.6ns ± 4%  31.4ns ± 3%  -16.48%  (p=0.000 n=10+10)
AppendUintVarlen/12345678901234-4        39.8ns ± 6%  32.0ns ± 7%  -19.60%  (p=0.000 n=10+10)
AppendUintVarlen/123456789012345-4       40.7ns ± 0%  34.4ns ± 4%  -15.55%  (p=0.000 n=6+10)
AppendUintVarlen/1234567890123456-4      45.4ns ± 6%  35.1ns ± 4%  -22.66%  (p=0.000 n=10+10)
AppendUintVarlen/12345678901234567-4     45.1ns ± 1%  36.7ns ± 4%  -18.77%  (p=0.000 n=9+10)
AppendUintVarlen/123456789012345678-4    46.9ns ± 0%  36.4ns ± 3%  -22.49%  (p=0.000 n=9+10)
AppendUintVarlen/1234567890123456789-4   50.6ns ± 6%  38.8ns ± 3%  -23.28%  (p=0.000 n=10+10)
AppendUintVarlen/12345678901234567890-4  51.3ns ± 2%  38.4ns ± 0%  -25.00%  (p=0.000 n=9+8)

Benchmark results for GOARCH=386:

name                                     old time/op  new time/op  delta
FormatInt-4                              6.21µs ± 0%  6.14µs ± 0%  -1.11%  (p=0.008 n=5+5)
AppendInt-4                              4.95µs ± 0%  4.85µs ± 0%  -1.99%  (p=0.016 n=5+4)
FormatUint-4                             1.89µs ± 1%  1.83µs ± 1%  -2.94%  (p=0.008 n=5+5)
AppendUint-4                             1.59µs ± 0%  1.57µs ± 2%  -1.72%  (p=0.040 n=5+5)
FormatIntSmall-4                         8.48ns ± 0%  8.48ns ± 0%    ~     (p=0.905 n=5+5)
AppendIntSmall-4                         12.2ns ± 0%  12.2ns ± 0%    ~     (all equal)
AppendUintVarlen/1-4                     10.6ns ± 1%  10.7ns ± 0%    ~     (p=0.238 n=5+4)
AppendUintVarlen/12-4                    10.7ns ± 0%  10.7ns ± 1%    ~     (p=0.333 n=4+5)
AppendUintVarlen/123-4                   29.9ns ± 1%  30.2ns ± 0%  +1.07%  (p=0.016 n=5+4)
AppendUintVarlen/1234-4                  32.4ns ± 1%  30.4ns ± 0%  -6.30%  (p=0.008 n=5+5)
AppendUintVarlen/12345-4                 35.1ns ± 2%  34.9ns ± 0%    ~     (p=0.238 n=5+5)
AppendUintVarlen/123456-4                36.6ns ± 0%  35.3ns ± 0%  -3.55%  (p=0.029 n=4+4)
AppendUintVarlen/1234567-4               38.9ns ± 0%  39.6ns ± 0%  +1.80%  (p=0.029 n=4+4)
AppendUintVarlen/12345678-4              41.3ns ± 0%  40.1ns ± 0%  -2.91%  (p=0.000 n=5+4)
AppendUintVarlen/123456789-4             44.9ns ± 1%  44.8ns ± 0%    ~     (p=0.667 n=5+5)
AppendUintVarlen/1234567890-4            65.6ns ± 0%  66.2ns ± 1%  +0.88%  (p=0.016 n=4+5)
AppendUintVarlen/12345678901-4           77.9ns ± 0%  76.3ns ± 0%  -2.00%  (p=0.000 n=4+5)
AppendUintVarlen/123456789012-4          80.7ns ± 0%  79.1ns ± 1%  -2.01%  (p=0.008 n=5+5)
AppendUintVarlen/1234567890123-4         83.6ns ± 0%  80.2ns ± 1%  -4.07%  (p=0.008 n=5+5)
AppendUintVarlen/12345678901234-4        86.2ns ± 1%  83.3ns ± 0%  -3.39%  (p=0.008 n=5+5)
AppendUintVarlen/123456789012345-4       88.5ns ± 0%  83.7ns ± 0%  -5.42%  (p=0.008 n=5+5)
AppendUintVarlen/1234567890123456-4      90.6ns ± 0%  88.3ns ± 0%  -2.54%  (p=0.008 n=5+5)
AppendUintVarlen/12345678901234567-4     92.7ns ± 0%  89.0ns ± 1%  -4.01%  (p=0.008 n=5+5)
AppendUintVarlen/123456789012345678-4    95.6ns ± 1%  92.6ns ± 0%  -3.18%  (p=0.016 n=5+4)
AppendUintVarlen/1234567890123456789-4    118ns ± 0%   114ns ± 0%    ~     (p=0.079 n=4+5)
AppendUintVarlen/12345678901234567890-4   138ns ± 0%   136ns ± 0%  -1.45%  (p=0.008 n=5+5)

Updates #19445

Change-Id: Iafbe5c074898187c150dc3854e5b9fc19c10be05
Reviewed-on: https://go-review.googlesource.com/38255
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
  • Loading branch information...
valyala authored and griesemer committed Mar 16, 2017
1 parent 33266df commit 4c8023bfed57276498427ce17235c1bfe210d611
Showing with 73 additions and 10 deletions.
  1. +26 −10 src/strconv/itoa.go
  2. +47 −0 src/strconv/itoa_test.go
View
@@ -121,26 +121,42 @@ func formatBits(dst []byte, u uint64, base int, neg, append_ bool) (d []byte, s
// are calculated by runtime functions on 32bit machines.
q := u / 1e9
us := uint(u - q*1e9) // u % 1e9 fits into a uint
- for j := 9; j > 0; j-- {
- i--
- a[i] = byte(us%10 + '0')
- us /= 10
+ for j := 4; j > 0; j-- {
+ is := us % 100 * 2
+ us /= 100
+ i -= 2
+ a[i+1] = smallsString[is+1]
+ a[i+0] = smallsString[is+0]
}
+
+ // us < 10, since it contains the last digit
+ // from the initial 9-digit us.
+ i--
+ a[i] = smallsString[us*2+1]
+
u = q
}
// u < 1e9
}
// u guaranteed to fit into a uint
us := uint(u)
- for us >= 10 {
- i--
- a[i] = byte(us%10 + '0')
- us /= 10
+ for us >= 100 {
+ is := us % 100 * 2
+ us /= 100
+ i -= 2
+ a[i+1] = smallsString[is+1]
+ a[i+0] = smallsString[is+0]
}
- // us < 10
+
+ // us < 100
+ is := us * 2
i--
- a[i] = byte(us + '0')
+ a[i] = smallsString[is+1]
+ if us >= 10 {
+ i--
+ a[i] = smallsString[is]
+ }
} else if s := shifts[base]; s > 0 {
// base is power of 2: use shifts and masks instead of / and %
View
@@ -126,6 +126,41 @@ func TestUitoa(t *testing.T) {
}
}
+var varlenUints = []struct {
+ in uint64
+ out string
+}{
+ {1, "1"},
+ {12, "12"},
+ {123, "123"},
+ {1234, "1234"},
+ {12345, "12345"},
+ {123456, "123456"},
+ {1234567, "1234567"},
+ {12345678, "12345678"},
+ {123456789, "123456789"},
+ {1234567890, "1234567890"},
+ {12345678901, "12345678901"},
+ {123456789012, "123456789012"},
+ {1234567890123, "1234567890123"},
+ {12345678901234, "12345678901234"},
+ {123456789012345, "123456789012345"},
+ {1234567890123456, "1234567890123456"},
+ {12345678901234567, "12345678901234567"},
+ {123456789012345678, "123456789012345678"},
+ {1234567890123456789, "1234567890123456789"},
+ {12345678901234567890, "12345678901234567890"},
+}
+
+func TestFormatUintVarlen(t *testing.T) {
+ for _, test := range varlenUints {
+ s := FormatUint(test.in, 10)
+ if s != test.out {
+ t.Errorf("FormatUint(%v, 10) = %v want %v", test.in, s, test.out)
+ }
+ }
+}
+
func BenchmarkFormatInt(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, test := range itob64tests {
@@ -181,4 +216,16 @@ func BenchmarkAppendIntSmall(b *testing.B) {
}
}
+func BenchmarkAppendUintVarlen(b *testing.B) {
+ for _, test := range varlenUints {
+ b.Run(test.out, func(b *testing.B) {
+ dst := make([]byte, 0, 30)
+ for j := 0; j < b.N; j++ {
+ dst = AppendUint(dst[:0], test.in, 10)
+ BenchSink += len(dst)
+ }
+ })
+ }
+}
+
var BenchSink int // make sure compiler cannot optimize away benchmarks

0 comments on commit 4c8023b

Please sign in to comment.