Skip to content

runtime: non-preemptible zeroing in clear, append, bytes.growSlice, etc #69327

@rhysh

Description

@rhysh

The clear and append built-ins can result in the need to zero an arbitrary amount of memory. For byte slices, the compiler appears to use a call to runtime.memclrNoHeapPointers. That function cannot be preempted, which can lead to arbitrary delays when another goroutine wants to stop the world (such as to start or end a GC cycle).

Applications that use bytes.Buffer can experience this when a call to bytes.(*Buffer).Write leads to a call to bytes.growSlice which uses append, as seen in one of the execution traces from #68399.

The runtime and compiler should collaborate to allow opportunities for preemption when zeroing large amounts of memory.

CC @golang/runtime @mknyszek


Reproducer, using `clear` built-in plus `runtime.ReadMemStats` to provide STWs
package memclr

import (
	"context"
	"fmt"
	"math"
	"runtime"
	"runtime/metrics"
	"sync"
	"testing"
	"time"
)

func BenchmarkMemclr(b *testing.B) {
	for exp := 4; exp <= 9; exp++ {
		size := int(math.Pow10(exp))
		b.Run(fmt.Sprintf("bytes=10^%d", exp), testcaseMemclr(size))
	}
}

func testcaseMemclr(l int) func(b *testing.B) {
	return func(b *testing.B) {
		b.SetBytes(int64(l))
		v := make([]byte, l)
		for range b.N {
			clear(v)
		}
	}
}

func BenchmarkSTW(b *testing.B) {
	for exp := 4; exp <= 9; exp++ {
		size := int(math.Pow10(exp))
		b.Run(fmt.Sprintf("bytes=10^%d", exp), testcaseSTW(size))
	}
}

func testcaseSTW(size int) func(*testing.B) {
	const name = "/sched/pauses/stopping/other:seconds"

	return func(b *testing.B) {
		ctx, cancel := context.WithCancel(context.Background())

		clears := 0
		var wg sync.WaitGroup
		wg.Add(1)
		go func() {
			defer wg.Done()
			v := make([]byte, size)
			for ctx.Err() == nil {
				clear(v)
				clears++
			}
		}()

		before := readMetric(name)
		var memstats runtime.MemStats
		for range b.N {
			runtime.ReadMemStats(&memstats)
			time.Sleep(10 * time.Microsecond) // allow others to make progress
		}
		after := readMetric(name)

		cancel()
		wg.Wait()

		ns := float64(time.Second.Nanoseconds())
		diff := delta(before.Float64Histogram(), after.Float64Histogram())
		b.ReportMetric(worst(diff)*ns, "worst-ns")
		b.ReportMetric(avg(diff)*ns, "avg-ns")
		b.ReportMetric(float64(clears), "clears")
	}
}

func readMetric(name string) metrics.Value {
	samples := []metrics.Sample{{Name: name}}
	metrics.Read(samples)
	return samples[0].Value
}

func delta(a, b *metrics.Float64Histogram) *metrics.Float64Histogram {
	v := &metrics.Float64Histogram{
		Buckets: a.Buckets,
		Counts:  append([]uint64(nil), b.Counts...),
	}
	for i := range a.Counts {
		v.Counts[i] -= a.Counts[i]
	}
	return v
}

func worst(h *metrics.Float64Histogram) float64 {
	var v float64
	for i, n := range h.Counts {
		if n > 0 {
			v = h.Buckets[i]
		}
	}
	return v
}

func avg(h *metrics.Float64Histogram) float64 {
	var v float64
	var nn uint64
	for i, n := range h.Counts {
		if bv := h.Buckets[i]; !math.IsInf(bv, 0) && !math.IsNaN(bv) {
			v += float64(n) * h.Buckets[i]
			nn += n
		}
	}
	return v / float64(nn)
}
Reproducer results, showing average time to stop the world is more than 1 ms (instead of less than 10 µs) when another part of the app is clearing a 100 MB byte slice
GOGC=off go test -cpu=2 -bench=. ./memclr)
goos: darwin
goarch: arm64
pkg: issues/memclr
cpu: Apple M1
BenchmarkMemclr/bytes=10^4-2             9421521               122.2 ns/op      81857.90 MB/s
BenchmarkMemclr/bytes=10^5-2             1000000              1433 ns/op        69779.61 MB/s
BenchmarkMemclr/bytes=10^6-2               99464             15148 ns/op        66016.50 MB/s
BenchmarkMemclr/bytes=10^7-2                8704            153918 ns/op        64969.54 MB/s
BenchmarkMemclr/bytes=10^8-2                 758           1632702 ns/op        61248.17 MB/s
BenchmarkMemclr/bytes=10^9-2                  67          16443990 ns/op        60812.49 MB/s
BenchmarkSTW/bytes=10^4-2                  29718             40598 ns/op              5473 avg-ns          2912452 clears            98304 worst-ns
BenchmarkSTW/bytes=10^5-2                  29895             38866 ns/op              4920 avg-ns           560027 clears            81920 worst-ns
BenchmarkSTW/bytes=10^6-2                  26226             44481 ns/op              8116 avg-ns            70132 clears            16384 worst-ns
BenchmarkSTW/bytes=10^7-2                   8925            164844 ns/op            120482 avg-ns             8919 clears           655360 worst-ns
BenchmarkSTW/bytes=10^8-2                   2184           1571734 ns/op           1376487 avg-ns             2102 clears          4194304 worst-ns
BenchmarkSTW/bytes=10^9-2                   1209           7075640 ns/op           6506152 avg-ns              529.0 clears       16777216 worst-ns
PASS
ok      issues/memclr   29.098s
`bytes.growSlice` calling `runtime.memclrNoHeapPointers`
$ go version
go version go1.23.0 darwin/arm64

$ go tool objdump -s 'bytes.growSlice$' `which go` | grep CALL
  buffer.go:249         0x10012a3cc             97fd23cd                CALL runtime.growslice(SB)              
  buffer.go:249         0x10012a3f0             97fd3db8                CALL runtime.memclrNoHeapPointers(SB)   
  buffer.go:250         0x10012a43c             97fd3e0d                CALL runtime.memmove(SB)                
  buffer.go:251         0x10012a464             94000ef3                CALL bytes.growSlice.func1(SB)          
  buffer.go:251         0x10012a484             97fd3ca3                CALL runtime.panicSliceAcap(SB)         
  buffer.go:249         0x10012a488             97fc9d82                CALL runtime.panicmakeslicelen(SB)      
  buffer.go:249         0x10012a490             97fc2cb8                CALL runtime.deferreturn(SB)            
  buffer.go:229         0x10012a4c0             97fd332c                CALL runtime.morestack_noctxt.abi0(SB)

Metadata

Metadata

Assignees

No one assigned

    Labels

    NeedsInvestigationSomeone must examine and confirm this is a valid issue and not a duplicate of an existing one.Performancecompiler/runtimeIssues related to the Go compiler and/or runtime.

    Type

    No type

    Projects

    Status

    Todo

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions