Skip to content

Commit

Permalink
bytes, strings: optimize Split*
Browse files Browse the repository at this point in the history
The relevant benchmark results on linux/amd64:

bytes:

SplitSingleByteSeparator-4   25.7ms ± 5%   9.1ms ± 4%  -64.40%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.8ms ±20%   4.3ms ± 8%  -69.23%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.88µs ± 9%  0.88µs ± 4%  -53.25%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   4.83µs ±10%  1.32µs ± 9%  -72.65%  (p=0.000 n=10+10)

strings:

name                         old time/op  new time/op  delta
SplitSingleByteSeparator-4   21.4ms ± 8%   8.5ms ± 5%  -60.19%  (p=0.000 n=10+10)
SplitMultiByteSeparator-4    13.2ms ± 9%   3.9ms ± 4%  -70.29%  (p=0.000 n=10+10)
SplitNSingleByteSeparator-4  1.54µs ± 5%  0.75µs ± 7%  -51.21%  (p=0.000 n=10+10)
SplitNMultiByteSeparator-4   3.57µs ± 8%  1.01µs ±11%  -71.76%  (p=0.000 n=10+10)

Fixes #18973

Change-Id: Ie4bc010c6cc389001e72eab530497c81e5b26f34
Reviewed-on: https://go-review.googlesource.com/36510
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
  • Loading branch information
valyala authored and bradfitz committed Feb 8, 2017
1 parent c026845 commit 8946502
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 26 deletions.
23 changes: 12 additions & 11 deletions src/bytes/bytes.go
Expand Up @@ -215,20 +215,21 @@ func genSplit(s, sep []byte, sepSave, n int) [][]byte {
if n < 0 {
n = Count(s, sep) + 1
}
c := sep[0]
start := 0

a := make([][]byte, n)
na := 0
for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
if s[i] == c && (len(sep) == 1 || Equal(s[i:i+len(sep)], sep)) {
a[na] = s[start : i+sepSave]
na++
start = i + len(sep)
i += len(sep) - 1
n--
i := 0
for i < n {
m := Index(s, sep)
if m < 0 {
break
}
a[i] = s[:m+sepSave]
s = s[m+len(sep):]
i++
}
a[na] = s[start:]
return a[0 : na+1]
a[i] = s
return a[:i+1]
}

// SplitN slices s into subslices separated by sep and returns a slice of
Expand Down
53 changes: 53 additions & 0 deletions src/bytes/bytes_test.go
Expand Up @@ -1432,6 +1432,59 @@ func BenchmarkTrimSpace(b *testing.B) {
}
}

func makeBenchInputHard() []byte {
tokens := [...]string{
"<a>", "<p>", "<b>", "<strong>",
"</a>", "</p>", "</b>", "</strong>",
"hello", "world",
}
x := make([]byte, 0, 1<<20)
for {
i := rand.Intn(len(tokens))
if len(x)+len(tokens[i]) >= 1<<20 {
break
}
x = append(x, tokens[i]...)
}
return x
}

var benchInputHard = makeBenchInputHard()

func BenchmarkSplitEmptySeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, nil)
}
}

func BenchmarkSplitSingleByteSeparator(b *testing.B) {
sep := []byte("/")
for i := 0; i < b.N; i++ {
Split(benchInputHard, sep)
}
}

func BenchmarkSplitMultiByteSeparator(b *testing.B) {
sep := []byte("hello")
for i := 0; i < b.N; i++ {
Split(benchInputHard, sep)
}
}

func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
sep := []byte("/")
for i := 0; i < b.N; i++ {
SplitN(benchInputHard, sep, 10)
}
}

func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
sep := []byte("hello")
for i := 0; i < b.N; i++ {
SplitN(benchInputHard, sep, 10)
}
}

func BenchmarkRepeat(b *testing.B) {
for i := 0; i < b.N; i++ {
Repeat([]byte("-"), 80)
Expand Down
25 changes: 13 additions & 12 deletions src/strings/strings.go
Expand Up @@ -239,20 +239,21 @@ func genSplit(s, sep string, sepSave, n int) []string {
if n < 0 {
n = Count(s, sep) + 1
}
c := sep[0]
start := 0

a := make([]string, n)
na := 0
for i := 0; i+len(sep) <= len(s) && na+1 < n; i++ {
if s[i] == c && (len(sep) == 1 || s[i:i+len(sep)] == sep) {
a[na] = s[start : i+sepSave]
na++
start = i + len(sep)
i += len(sep) - 1
}
n--
i := 0
for i < n {
m := Index(s, sep)
if m < 0 {
break
}
a[i] = s[:m+sepSave]
s = s[m+len(sep):]
i++
}
a[na] = s[start:]
return a[0 : na+1]
a[i] = s
return a[:i+1]
}

// SplitN slices s into substrings separated by sep and returns a slice of
Expand Down
18 changes: 15 additions & 3 deletions src/strings/strings_test.go
Expand Up @@ -1476,24 +1476,36 @@ func BenchmarkFieldsFunc(b *testing.B) {
}
}

func BenchmarkSplit1(b *testing.B) {
func BenchmarkSplitEmptySeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "")
}
}

func BenchmarkSplit2(b *testing.B) {
func BenchmarkSplitSingleByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "/")
}
}

func BenchmarkSplit3(b *testing.B) {
func BenchmarkSplitMultiByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
Split(benchInputHard, "hello")
}
}

func BenchmarkSplitNSingleByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
SplitN(benchInputHard, "/", 10)
}
}

func BenchmarkSplitNMultiByteSeparator(b *testing.B) {
for i := 0; i < b.N; i++ {
SplitN(benchInputHard, "hello", 10)
}
}

func BenchmarkRepeat(b *testing.B) {
for i := 0; i < b.N; i++ {
Repeat("-", 80)
Expand Down

1 comment on commit 8946502

@dongweigogo
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@valyala , Great!

Please sign in to comment.