From 0b9974d3f09fe3132b4bc4aef67b839e3f84a8c8 Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 2 Feb 2023 22:13:18 -0800 Subject: [PATCH] math/rand: use fastrand64 if possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the top-level math/rand functions are auto-seeded by default (issue #54880), use the runtime fastrand64 function when 1) Seed has not been called; 2) the GODEBUG randautoseed=0 is not used. The benchmarks run quickly and are relatively noisy, but they show significant improvements for parallel calls to the top-level functions. goos: linux goarch: amd64 pkg: math/rand cpu: 11th Gen Intel(R) Core(TM) i7-11850H @ 2.50GHz │ /tmp/foo.1 │ /tmp/foo.2 │ │ sec/op │ sec/op vs base │ Int63Threadsafe-16 11.605n ± 1% 3.094n ± 1% -73.34% (p=0.000 n=10) Int63ThreadsafeParallel-16 67.8350n ± 2% 0.4000n ± 1% -99.41% (p=0.000 n=10) Int63Unthreadsafe-16 1.947n ± 3% 1.924n ± 2% ~ (p=0.189 n=10) Intn1000-16 4.295n ± 2% 4.287n ± 3% ~ (p=0.517 n=10) Int63n1000-16 4.379n ± 0% 4.192n ± 2% -4.27% (p=0.000 n=10) Int31n1000-16 3.641n ± 3% 3.506n ± 0% -3.69% (p=0.000 n=10) Float32-16 3.330n ± 7% 3.250n ± 2% -2.40% (p=0.017 n=10) Float64-16 2.194n ± 6% 2.056n ± 4% -6.31% (p=0.004 n=10) Perm3-16 43.39n ± 9% 38.28n ± 12% -11.77% (p=0.015 n=10) Perm30-16 324.4n ± 6% 315.9n ± 19% ~ (p=0.315 n=10) Perm30ViaShuffle-16 175.4n ± 1% 143.6n ± 2% -18.15% (p=0.000 n=10) ShuffleOverhead-16 223.4n ± 2% 215.8n ± 1% -3.38% (p=0.000 n=10) Read3-16 5.428n ± 3% 5.406n ± 2% ~ (p=0.780 n=10) Read64-16 41.55n ± 5% 40.14n ± 3% -3.38% (p=0.000 n=10) Read1000-16 622.9n ± 4% 594.9n ± 2% -4.50% (p=0.000 n=10) Concurrent-16 136.300n ± 2% 4.647n ± 26% -96.59% (p=0.000 n=10) geomean 23.40n 12.15n -48.08% Fixes #49892 Change-Id: Iba75b326145512ab0b7ece233b98ac3d4e1fb504 Reviewed-on: https://go-review.googlesource.com/c/go/+/465037 Run-TryBot: Ian Lance Taylor TryBot-Result: Gopher Robot Reviewed-by: Ian Lance Taylor Reviewed-by: Brad Fitzpatrick Reviewed-by: Russ Cox Auto-Submit: Ian Lance Taylor --- src/math/rand/default_test.go | 148 ++++++++++++++++++++++++++++++ src/math/rand/rand.go | 164 +++++++++++++++++++++++++--------- src/math/rand/rand_test.go | 16 ++++ 3 files changed, 284 insertions(+), 44 deletions(-) create mode 100644 src/math/rand/default_test.go diff --git a/src/math/rand/default_test.go b/src/math/rand/default_test.go new file mode 100644 index 0000000000000..19fd75dfd1770 --- /dev/null +++ b/src/math/rand/default_test.go @@ -0,0 +1,148 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package rand_test + +import ( + "fmt" + "internal/race" + "internal/testenv" + . "math/rand" + "os" + "runtime" + "strconv" + "sync" + "testing" +) + +// Test that racy access to the default functions behaves reasonably. +func TestDefaultRace(t *testing.T) { + // Skip the test in short mode, but even in short mode run + // the test if we are using the race detector, because part + // of this is to see whether the race detector reports any problems. + if testing.Short() && !race.Enabled { + t.Skip("skipping starting another executable in short mode") + } + + const env = "GO_RAND_TEST_HELPER_CODE" + if v := os.Getenv(env); v != "" { + doDefaultTest(t, v) + return + } + + t.Parallel() + + for i := 0; i < 6; i++ { + i := i + t.Run(strconv.Itoa(i), func(t *testing.T) { + t.Parallel() + exe, err := os.Executable() + if err != nil { + exe = os.Args[0] + } + cmd := testenv.Command(t, exe, "-test.run=TestDefaultRace") + cmd = testenv.CleanCmdEnv(cmd) + cmd.Env = append(cmd.Env, fmt.Sprintf("GO_RAND_TEST_HELPER_CODE=%d", i/2)) + if i%2 != 0 { + cmd.Env = append(cmd.Env, "GODEBUG=randautoseed=0") + } + out, err := cmd.CombinedOutput() + if len(out) > 0 { + t.Logf("%s", out) + } + if err != nil { + t.Error(err) + } + }) + } +} + +// doDefaultTest should be run before there have been any calls to the +// top-level math/rand functions. Make sure that we can make concurrent +// calls to top-level functions and to Seed without any duplicate values. +// This will also give the race detector a change to report any problems. +func doDefaultTest(t *testing.T, v string) { + code, err := strconv.Atoi(v) + if err != nil { + t.Fatalf("internal error: unrecognized code %q", v) + } + + goroutines := runtime.GOMAXPROCS(0) + if goroutines < 4 { + goroutines = 4 + } + + ch := make(chan uint64, goroutines*3) + var wg sync.WaitGroup + + // The various tests below should not cause race detector reports + // and should not produce duplicate results. + // + // Note: these tests can theoretically fail when using fastrand64 + // in that it is possible to coincidentally get the same random + // number twice. That could happen something like 1 / 2**64 times, + // which is rare enough that it may never happen. We don't worry + // about that case. + + switch code { + case 0: + // Call Seed and Uint64 concurrently. + wg.Add(goroutines) + for i := 0; i < goroutines; i++ { + go func(s int64) { + defer wg.Done() + Seed(s) + }(int64(i) + 100) + } + wg.Add(goroutines) + for i := 0; i < goroutines; i++ { + go func() { + defer wg.Done() + ch <- Uint64() + }() + } + case 1: + // Call Uint64 concurrently with no Seed. + wg.Add(goroutines) + for i := 0; i < goroutines; i++ { + go func() { + defer wg.Done() + ch <- Uint64() + }() + } + case 2: + // Start with Uint64 to pick the fast source, then call + // Seed and Uint64 concurrently. + ch <- Uint64() + wg.Add(goroutines) + for i := 0; i < goroutines; i++ { + go func(s int64) { + defer wg.Done() + Seed(s) + }(int64(i) + 100) + } + wg.Add(goroutines) + for i := 0; i < goroutines; i++ { + go func() { + defer wg.Done() + ch <- Uint64() + }() + } + default: + t.Fatalf("internal error: unrecognized code %d", code) + } + + go func() { + wg.Wait() + close(ch) + }() + + m := make(map[uint64]bool) + for i := range ch { + if m[i] { + t.Errorf("saw %d twice", i) + } + m[i] = true + } +} diff --git a/src/math/rand/rand.go b/src/math/rand/rand.go index 7448ee1751e67..612b34d53e3b8 100644 --- a/src/math/rand/rand.go +++ b/src/math/rand/rand.go @@ -20,6 +20,7 @@ package rand import ( "internal/godebug" "sync" + "sync/atomic" _ "unsafe" // for go:linkname ) @@ -269,8 +270,11 @@ func (r *Rand) Shuffle(n int, swap func(i, j int)) { // always returns len(p) and a nil error. // Read should not be called concurrently with any other Rand method. func (r *Rand) Read(p []byte) (n int, err error) { - if lk, ok := r.src.(*lockedSource); ok { - return lk.read(p, &r.readVal, &r.readPos) + switch src := r.src.(type) { + case *lockedSource: + return src.read(p, &r.readVal, &r.readPos) + case *fastSource: + return src.read(p, &r.readVal, &r.readPos) } return read(p, r.src, &r.readVal, &r.readPos) } @@ -301,7 +305,75 @@ func read(p []byte, src Source, readVal *int64, readPos *int8) (n int, err error * Top-level convenience functions */ -var globalRand = New(new(lockedSource)) +// globalRandGenerator is the source of random numbers for the top-level +// convenience functions. When possible it uses the runtime fastrand64 +// function to avoid locking. This is not possible if the user called Seed, +// either explicitly or implicitly via GODEBUG=randautoseed=0. +var globalRandGenerator atomic.Pointer[Rand] + +var randautoseed = godebug.New("randautoseed") + +// globalRand returns the generator to use for the top-level convenience +// functions. +func globalRand() *Rand { + if r := globalRandGenerator.Load(); r != nil { + return r + } + + // This is the first call. Initialize based on GODEBUG. + var r *Rand + if randautoseed.Value() == "0" { + randautoseed.IncNonDefault() + r = New(new(lockedSource)) + r.Seed(1) + } else { + r = &Rand{ + src: &fastSource{}, + s64: &fastSource{}, + } + } + + if !globalRandGenerator.CompareAndSwap(nil, r) { + // Two different goroutines called some top-level + // function at the same time. While the results in + // that case are unpredictable, if we just use r here, + // and we are using a seed, we will most likely return + // the same value for both calls. That doesn't seem ideal. + // Just use the first one to get in. + return globalRandGenerator.Load() + } + + return r +} + +//go:linkname fastrand64 +func fastrand64() uint64 + +// fastSource is an implementation of Source64 that uses the runtime +// fastrand functions. +type fastSource struct { + // The mutex is used to avoid race conditions in Read. + mu sync.Mutex +} + +func (*fastSource) Int63() int64 { + return int64(fastrand64() & rngMask) +} + +func (*fastSource) Seed(int64) { + panic("internal error: call to fastSource.Seed") +} + +func (*fastSource) Uint64() uint64 { + return fastrand64() +} + +func (fs *fastSource) read(p []byte, readVal *int64, readPos *int8) (n int, err error) { + fs.mu.Lock() + n, err = read(p, fs, readVal, readPos) + fs.mu.Unlock() + return +} // Seed uses the provided seed value to initialize the default Source to a // deterministic state. Seed values that have the same remainder when @@ -321,65 +393,90 @@ var globalRand = New(new(lockedSource)) // from the global random source. To avoid such breakages, programs // that need a specific result sequence should use NewRand(NewSource(seed)) // to obtain a random generator that other packages cannot access. -func Seed(seed int64) { globalRand.Seed(seed) } +func Seed(seed int64) { + orig := globalRandGenerator.Load() + + // If we are already using a lockedSource, we can just re-seed it. + if orig != nil { + if _, ok := orig.src.(*lockedSource); ok { + orig.Seed(seed) + return + } + } + + // Otherwise either + // 1) orig == nil, which is the normal case when Seed is the first + // top-level function to be called, or + // 2) orig is already a fastSource, in which case we need to change + // to a lockedSource. + // Either way we do the same thing. + + r := New(new(lockedSource)) + r.Seed(seed) + + if !globalRandGenerator.CompareAndSwap(orig, r) { + // Something changed underfoot. Retry to be safe. + Seed(seed) + } +} // Int63 returns a non-negative pseudo-random 63-bit integer as an int64 // from the default Source. -func Int63() int64 { return globalRand.Int63() } +func Int63() int64 { return globalRand().Int63() } // Uint32 returns a pseudo-random 32-bit value as a uint32 // from the default Source. -func Uint32() uint32 { return globalRand.Uint32() } +func Uint32() uint32 { return globalRand().Uint32() } // Uint64 returns a pseudo-random 64-bit value as a uint64 // from the default Source. -func Uint64() uint64 { return globalRand.Uint64() } +func Uint64() uint64 { return globalRand().Uint64() } // Int31 returns a non-negative pseudo-random 31-bit integer as an int32 // from the default Source. -func Int31() int32 { return globalRand.Int31() } +func Int31() int32 { return globalRand().Int31() } // Int returns a non-negative pseudo-random int from the default Source. -func Int() int { return globalRand.Int() } +func Int() int { return globalRand().Int() } // Int63n returns, as an int64, a non-negative pseudo-random number in the half-open interval [0,n) // from the default Source. // It panics if n <= 0. -func Int63n(n int64) int64 { return globalRand.Int63n(n) } +func Int63n(n int64) int64 { return globalRand().Int63n(n) } // Int31n returns, as an int32, a non-negative pseudo-random number in the half-open interval [0,n) // from the default Source. // It panics if n <= 0. -func Int31n(n int32) int32 { return globalRand.Int31n(n) } +func Int31n(n int32) int32 { return globalRand().Int31n(n) } // Intn returns, as an int, a non-negative pseudo-random number in the half-open interval [0,n) // from the default Source. // It panics if n <= 0. -func Intn(n int) int { return globalRand.Intn(n) } +func Intn(n int) int { return globalRand().Intn(n) } // Float64 returns, as a float64, a pseudo-random number in the half-open interval [0.0,1.0) // from the default Source. -func Float64() float64 { return globalRand.Float64() } +func Float64() float64 { return globalRand().Float64() } // Float32 returns, as a float32, a pseudo-random number in the half-open interval [0.0,1.0) // from the default Source. -func Float32() float32 { return globalRand.Float32() } +func Float32() float32 { return globalRand().Float32() } // Perm returns, as a slice of n ints, a pseudo-random permutation of the integers // in the half-open interval [0,n) from the default Source. -func Perm(n int) []int { return globalRand.Perm(n) } +func Perm(n int) []int { return globalRand().Perm(n) } // Shuffle pseudo-randomizes the order of elements using the default Source. // n is the number of elements. Shuffle panics if n < 0. // swap swaps the elements with indexes i and j. -func Shuffle(n int, swap func(i, j int)) { globalRand.Shuffle(n, swap) } +func Shuffle(n int, swap func(i, j int)) { globalRand().Shuffle(n, swap) } // Read generates len(p) random bytes from the default Source and // writes them into p. It always returns len(p) and a nil error. // Read, unlike the Rand.Read method, is safe for concurrent use. // // Deprecated: For almost all use cases, crypto/rand.Read is more appropriate. -func Read(p []byte) (n int, err error) { return globalRand.Read(p) } +func Read(p []byte) (n int, err error) { return globalRand().Read(p) } // NormFloat64 returns a normally distributed float64 in the range // [-math.MaxFloat64, +math.MaxFloat64] with @@ -389,7 +486,7 @@ func Read(p []byte) (n int, err error) { return globalRand.Read(p) } // adjust the output using: // // sample = NormFloat64() * desiredStdDev + desiredMean -func NormFloat64() float64 { return globalRand.NormFloat64() } +func NormFloat64() float64 { return globalRand().NormFloat64() } // ExpFloat64 returns an exponentially distributed float64 in the range // (0, +math.MaxFloat64] with an exponential distribution whose rate parameter @@ -398,44 +495,23 @@ func NormFloat64() float64 { return globalRand.NormFloat64() } // callers can adjust the output using: // // sample = ExpFloat64() / desiredRateParameter -func ExpFloat64() float64 { return globalRand.ExpFloat64() } +func ExpFloat64() float64 { return globalRand().ExpFloat64() } type lockedSource struct { lk sync.Mutex - s *rngSource // nil if not yet allocated -} - -//go:linkname fastrand64 -func fastrand64() uint64 - -var randautoseed = godebug.New("randautoseed") - -// source returns r.s, allocating and seeding it if needed. -// The caller must have locked r. -func (r *lockedSource) source() *rngSource { - if r.s == nil { - var seed int64 - if randautoseed.Value() == "0" { - randautoseed.IncNonDefault() - seed = 1 - } else { - seed = int64(fastrand64()) - } - r.s = newSource(seed) - } - return r.s + s *rngSource } func (r *lockedSource) Int63() (n int64) { r.lk.Lock() - n = r.source().Int63() + n = r.s.Int63() r.lk.Unlock() return } func (r *lockedSource) Uint64() (n uint64) { r.lk.Lock() - n = r.source().Uint64() + n = r.s.Uint64() r.lk.Unlock() return } @@ -467,7 +543,7 @@ func (r *lockedSource) seed(seed int64) { // read implements Read for a lockedSource without a race condition. func (r *lockedSource) read(p []byte, readVal *int64, readPos *int8) (n int, err error) { r.lk.Lock() - n, err = read(p, r.source(), readVal, readPos) + n, err = read(p, r.s, readVal, readPos) r.lk.Unlock() return } diff --git a/src/math/rand/rand_test.go b/src/math/rand/rand_test.go index 462de8b73b4d5..7eba1dc0d3256 100644 --- a/src/math/rand/rand_test.go +++ b/src/math/rand/rand_test.go @@ -14,6 +14,7 @@ import ( . "math/rand" "os" "runtime" + "sync" "testing" "testing/iotest" ) @@ -683,3 +684,18 @@ func BenchmarkRead1000(b *testing.B) { r.Read(buf) } } + +func BenchmarkConcurrent(b *testing.B) { + const goroutines = 4 + var wg sync.WaitGroup + wg.Add(goroutines) + for i := 0; i < goroutines; i++ { + go func() { + defer wg.Done() + for n := b.N; n > 0; n-- { + Int63() + } + }() + } + wg.Wait() +}