From c3b4c360c7bae6b3b2456aadcff03268f6168e4e Mon Sep 17 00:00:00 2001 From: aimuz Date: Wed, 1 May 2024 21:36:59 +0800 Subject: [PATCH] bytes, strings: optimize Cut for single-byte separators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimize the Cut function in both the bytes and strings packages to immediately return slices when the separator is a single byte (or character), avoiding more complex index searching logic. This change can significantly reduce the execution time for these specific cases, as benchmark tests added to each package demonstrate improvements. The optimization checks if the length of the separator is one before proceeding with the existing search strategy. If so, it uses IndexByte for a faster lookup of the separator's position. Additionally, benchmark tests have been added for both packages to demonstrate the performance benefits of this optimization across various scenarios. goos: darwin goarch: arm64 pkg: strings cpu: Apple M2 Max │ old-cut.txt │ new-cut.txt │ │ sec/op │ sec/op vs base │ Cut/Cut-One/2-12 4.026n ± 2% 3.274n ± 2% -18.68% (p=0.000 n=10) Cut/Cut-Two/2-12 8.093n ± 0% 8.357n ± 0% +3.27% (p=0.000 n=10) Cut/Cut-One/4-12 4.048n ± 1% 3.324n ± 2% -17.91% (p=0.000 n=10) Cut/Cut-Two/4-12 8.105n ± 0% 8.377n ± 1% +3.35% (p=0.000 n=10) Cut/Cut-One/8-12 4.089n ± 1% 3.290n ± 1% -19.53% (p=0.000 n=10) Cut/Cut-Two/8-12 8.107n ± 1% 8.359n ± 1% +3.10% (p=0.000 n=10) Cut/Cut-One/16-12 4.127n ± 1% 3.328n ± 1% -19.35% (p=0.000 n=10) Cut/Cut-Two/16-12 8.119n ± 1% 8.374n ± 1% +3.15% (p=0.000 n=10) Cut/Cut-One/32-12 4.545n ± 2% 3.675n ± 1% -19.14% (p=0.000 n=10) Cut/Cut-Two/32-12 8.708n ± 1% 8.963n ± 1% +2.92% (p=0.000 n=10) Cut/Cut-One/64-12 4.825n ± 2% 4.146n ± 1% -14.08% (p=0.000 n=10) Cut/Cut-Two/64-12 9.286n ± 0% 9.315n ± 1% ~ (p=0.105 n=10) geomean 5.983n 5.486n -8.32% │ old-cut.txt │ new-cut.txt │ │ B/op │ B/op vs base │ Cut/Cut-One/2-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/2-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-One/4-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/4-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-One/8-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/8-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-One/16-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/16-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-One/32-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/32-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-One/64-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/64-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean │ old-cut.txt │ new-cut.txt │ │ allocs/op │ allocs/op vs base │ Cut/Cut-One/2-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/2-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-One/4-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/4-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-One/8-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/8-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-One/16-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/16-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-One/32-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/32-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-One/64-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ Cut/Cut-Two/64-12 0.000 ± 0% 0.000 ± 0% ~ (p=1.000 n=10) ¹ geomean ² +0.00% ² ¹ all samples are equal ² summaries must be >0 to compute geomean For #67101 --- src/bytes/bytes.go | 6 ++++++ src/bytes/bytes_test.go | 18 ++++++++++++++++++ src/strings/strings.go | 7 +++++++ src/strings/strings_test.go | 18 ++++++++++++++++++ 4 files changed, 49 insertions(+) diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index 1871814c6e00d5..72ccdfc98eb512 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -1330,6 +1330,12 @@ func Index(s, sep []byte) int { // // Cut returns slices of the original slice s, not copies. func Cut(s, sep []byte) (before, after []byte, found bool) { + if len(sep) == 1 { + if i := IndexByte(s, sep[0]); i >= 0 { + return s[:i], s[i+1:], true + } + return s, nil, false + } if i := Index(s, sep); i >= 0 { return s[:i], s[i+len(sep):], true } diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go index 5e8cf85fd90ab9..e800a2f51e9e69 100644 --- a/src/bytes/bytes_test.go +++ b/src/bytes/bytes_test.go @@ -2263,3 +2263,21 @@ func TestClone(t *testing.T) { } } } + +func BenchmarkCut(b *testing.B) { + b.ReportAllocs() + + for _, skip := range [...]int{2, 4, 8, 16, 32, 64} { + s := Repeat(append(append(Repeat([]byte(" "), skip), 'a', 'a'), Repeat([]byte(" "), skip)...), 1<<16/skip) + b.Run(fmt.Sprintf("Cut-One/%d", skip), func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _, _ = Cut(s, []byte{'a'}) + } + }) + b.Run(fmt.Sprintf("Cut-Two/%d", skip), func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _, _ = Cut(s, []byte{'a', 'a'}) + } + }) + } +} diff --git a/src/strings/strings.go b/src/strings/strings.go index f53ae1f9a785a3..fdd5eda97ecba1 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -1309,9 +1309,16 @@ func Index(s, substr string) int { // The found result reports whether sep appears in s. // If sep does not appear in s, cut returns s, "", false. func Cut(s, sep string) (before, after string, found bool) { + if len(sep) == 1 { + if i := IndexByte(s, sep[0]); i >= 0 { + return s[:i], s[i+1:], true + } + return s, "", false + } if i := Index(s, sep); i >= 0 { return s[:i], s[i+len(sep):], true } + return s, "", false } diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go index ac493c7dcd43b0..2d2f64165d599f 100644 --- a/src/strings/strings_test.go +++ b/src/strings/strings_test.go @@ -2071,3 +2071,21 @@ func BenchmarkReplaceAll(b *testing.B) { stringSink = ReplaceAll("banana", "a", "<>") } } + +func BenchmarkCut(b *testing.B) { + b.ReportAllocs() + + for _, skip := range [...]int{2, 4, 8, 16, 32, 64} { + s := Repeat(Repeat(" ", skip)+"aa"+Repeat(" ", skip), 1<<16/skip) + b.Run(fmt.Sprintf("Cut-One/%d", skip), func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _, _ = Cut(s, "a") + } + }) + b.Run(fmt.Sprintf("Cut-Two/%d", skip), func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _, _ = Cut(s, "aa") + } + }) + } +}