-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Open
Labels
NeedsInvestigationSomeone must examine and confirm this is a valid issue and not a duplicate of an existing one.Someone must examine and confirm this is a valid issue and not a duplicate of an existing one.Performancecompiler/runtimeIssues related to the Go compiler and/or runtime.Issues related to the Go compiler and/or runtime.
Milestone
Description
The clear and append built-ins can result in the need to zero an arbitrary amount of memory. For byte slices, the compiler appears to use a call to runtime.memclrNoHeapPointers. That function cannot be preempted, which can lead to arbitrary delays when another goroutine wants to stop the world (such as to start or end a GC cycle).
Applications that use bytes.Buffer can experience this when a call to bytes.(*Buffer).Write leads to a call to bytes.growSlice which uses append, as seen in one of the execution traces from #68399.
The runtime and compiler should collaborate to allow opportunities for preemption when zeroing large amounts of memory.
CC @golang/runtime @mknyszek
Reproducer, using `clear` built-in plus `runtime.ReadMemStats` to provide STWs
package memclr
import (
"context"
"fmt"
"math"
"runtime"
"runtime/metrics"
"sync"
"testing"
"time"
)
func BenchmarkMemclr(b *testing.B) {
for exp := 4; exp <= 9; exp++ {
size := int(math.Pow10(exp))
b.Run(fmt.Sprintf("bytes=10^%d", exp), testcaseMemclr(size))
}
}
func testcaseMemclr(l int) func(b *testing.B) {
return func(b *testing.B) {
b.SetBytes(int64(l))
v := make([]byte, l)
for range b.N {
clear(v)
}
}
}
func BenchmarkSTW(b *testing.B) {
for exp := 4; exp <= 9; exp++ {
size := int(math.Pow10(exp))
b.Run(fmt.Sprintf("bytes=10^%d", exp), testcaseSTW(size))
}
}
func testcaseSTW(size int) func(*testing.B) {
const name = "/sched/pauses/stopping/other:seconds"
return func(b *testing.B) {
ctx, cancel := context.WithCancel(context.Background())
clears := 0
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
v := make([]byte, size)
for ctx.Err() == nil {
clear(v)
clears++
}
}()
before := readMetric(name)
var memstats runtime.MemStats
for range b.N {
runtime.ReadMemStats(&memstats)
time.Sleep(10 * time.Microsecond) // allow others to make progress
}
after := readMetric(name)
cancel()
wg.Wait()
ns := float64(time.Second.Nanoseconds())
diff := delta(before.Float64Histogram(), after.Float64Histogram())
b.ReportMetric(worst(diff)*ns, "worst-ns")
b.ReportMetric(avg(diff)*ns, "avg-ns")
b.ReportMetric(float64(clears), "clears")
}
}
func readMetric(name string) metrics.Value {
samples := []metrics.Sample{{Name: name}}
metrics.Read(samples)
return samples[0].Value
}
func delta(a, b *metrics.Float64Histogram) *metrics.Float64Histogram {
v := &metrics.Float64Histogram{
Buckets: a.Buckets,
Counts: append([]uint64(nil), b.Counts...),
}
for i := range a.Counts {
v.Counts[i] -= a.Counts[i]
}
return v
}
func worst(h *metrics.Float64Histogram) float64 {
var v float64
for i, n := range h.Counts {
if n > 0 {
v = h.Buckets[i]
}
}
return v
}
func avg(h *metrics.Float64Histogram) float64 {
var v float64
var nn uint64
for i, n := range h.Counts {
if bv := h.Buckets[i]; !math.IsInf(bv, 0) && !math.IsNaN(bv) {
v += float64(n) * h.Buckets[i]
nn += n
}
}
return v / float64(nn)
}
Reproducer results, showing average time to stop the world is more than 1 ms (instead of less than 10 µs) when another part of the app is clearing a 100 MB byte slice
GOGC=off go test -cpu=2 -bench=. ./memclr)
goos: darwin
goarch: arm64
pkg: issues/memclr
cpu: Apple M1
BenchmarkMemclr/bytes=10^4-2 9421521 122.2 ns/op 81857.90 MB/s
BenchmarkMemclr/bytes=10^5-2 1000000 1433 ns/op 69779.61 MB/s
BenchmarkMemclr/bytes=10^6-2 99464 15148 ns/op 66016.50 MB/s
BenchmarkMemclr/bytes=10^7-2 8704 153918 ns/op 64969.54 MB/s
BenchmarkMemclr/bytes=10^8-2 758 1632702 ns/op 61248.17 MB/s
BenchmarkMemclr/bytes=10^9-2 67 16443990 ns/op 60812.49 MB/s
BenchmarkSTW/bytes=10^4-2 29718 40598 ns/op 5473 avg-ns 2912452 clears 98304 worst-ns
BenchmarkSTW/bytes=10^5-2 29895 38866 ns/op 4920 avg-ns 560027 clears 81920 worst-ns
BenchmarkSTW/bytes=10^6-2 26226 44481 ns/op 8116 avg-ns 70132 clears 16384 worst-ns
BenchmarkSTW/bytes=10^7-2 8925 164844 ns/op 120482 avg-ns 8919 clears 655360 worst-ns
BenchmarkSTW/bytes=10^8-2 2184 1571734 ns/op 1376487 avg-ns 2102 clears 4194304 worst-ns
BenchmarkSTW/bytes=10^9-2 1209 7075640 ns/op 6506152 avg-ns 529.0 clears 16777216 worst-ns
PASS
ok issues/memclr 29.098s
`bytes.growSlice` calling `runtime.memclrNoHeapPointers`
$ go version
go version go1.23.0 darwin/arm64
$ go tool objdump -s 'bytes.growSlice$' `which go` | grep CALL
buffer.go:249 0x10012a3cc 97fd23cd CALL runtime.growslice(SB)
buffer.go:249 0x10012a3f0 97fd3db8 CALL runtime.memclrNoHeapPointers(SB)
buffer.go:250 0x10012a43c 97fd3e0d CALL runtime.memmove(SB)
buffer.go:251 0x10012a464 94000ef3 CALL bytes.growSlice.func1(SB)
buffer.go:251 0x10012a484 97fd3ca3 CALL runtime.panicSliceAcap(SB)
buffer.go:249 0x10012a488 97fc9d82 CALL runtime.panicmakeslicelen(SB)
buffer.go:249 0x10012a490 97fc2cb8 CALL runtime.deferreturn(SB)
buffer.go:229 0x10012a4c0 97fd332c CALL runtime.morestack_noctxt.abi0(SB)
black23eep
Metadata
Metadata
Assignees
Labels
NeedsInvestigationSomeone must examine and confirm this is a valid issue and not a duplicate of an existing one.Someone must examine and confirm this is a valid issue and not a duplicate of an existing one.Performancecompiler/runtimeIssues related to the Go compiler and/or runtime.Issues related to the Go compiler and/or runtime.
Type
Projects
Status
Todo