-
Notifications
You must be signed in to change notification settings - Fork 18.8k
Description
For strings.Index there already exists a specialized version strings.IndexByte besides strings.IndexRune.
There currently is no Byte variant of strings.Contains but strings.ContainsRune exists.
Sampling from a large go code corpus:
- ~1/3 of calls to strings.Contains use a string of length 1 with an ASCII character
- strings.Contains appears ~100 times more frequently than strings.ContainsRune
- strings.Contains appears ~6 times more frequently then strings.Index and strings.IndexByte together.
There is overhead for both the binary code size, loading the string from memory and generally performance loss when using Contains or ContainsRune instead of ContainsByte (that uses IndexByte).
Make the inliner sufficiently aware of Contains and ContainsRune (+possible code layout change) to inline IndexByte calls directly.
Alternative
Add a specialized ContainsByte function to strings and bytes package.
Downside: current code does not immediately get the benefits without change.
Data
name time/op
Contains 5.88ns ± 1%
ContainsByte 3.30ns ± 0%
ContainsRune 5.40ns ± 1%
ContainsRuneFastPath 5.33ns ± 1%
Benchmark (I have seen 2 clock cycle variances due to branch addresses depending on where benchmark loops are, these here favor Contains on my Workstation)
var global bool
var data = "golang:contains"
func ContainsByte(s string, b byte) bool {
return strings.IndexByte(s, b) >= 0
}
func ContainsRuneFastPath(s string, r rune) bool {
if r <= utf8.RuneSelf {
return strings.IndexByte(s, byte(r)) >= 0
}
return strings.IndexRune(s, r) >= 0
}
func BenchmarkContains(b *testing.B) {
var sink bool
for i := 0; i < b.N; i++ {
// ...
// LEAQ 0x4c655(IP), AX // = 7 bytes
// MOVQ AX, 0x10(SP) // = 5 bytes
// MOVQ $0x1, 0x18(SP) // = 9 bytes
sink = strings.Contains(data, ":")
}
global = sink
}
func BenchmarkContainsByte(b *testing.B) {
var sink bool
for i := 0; i < b.N; i++ {
// ...
// MOVB $0x3a, 0x10(SP) // = 5 Bytes
// CALL internal/bytealg.IndexByteString(SB)
sink = ContainsByte(data, ':')
}
global = sink
}
func BenchmarkContainsRune(b *testing.B) {
var sink bool
for i := 0; i < b.N; i++ {
// ...
// MOVL $0x3a, 0x10(SP) // = 8 bytes
sink = strings.ContainsRune(data, ':')
}
global = sink
}
func BenchmarkContainsRuneFastPath(b *testing.B) {
var sink bool
for i := 0; i < b.N; i++ {
sink = ContainsRuneFastPath(data, ':')
}
global = sink
}