Skip to content

Commit 497c347

Browse files
committed
vfs: deflake TestOnDiskFull tests
Deflake the TestOnDiskFull* tests. Previously these tests relied on time.Sleep so that several goroutines would observe the same injected ENOSPC in parallel. This commit switches to using a mutex and condition variable to ensure all the injected ENOSPCs are observed before any of the goroutines continue. Fix #5197.
1 parent 56cf871 commit 497c347

File tree

1 file changed

+47
-24
lines changed

1 file changed

+47
-24
lines changed

vfs/disk_full_test.go

Lines changed: 47 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"sync/atomic"
1212
"syscall"
1313
"testing"
14-
"time"
1514

1615
"github.com/cockroachdb/errors"
1716
"github.com/stretchr/testify/require"
@@ -41,7 +40,7 @@ func TestOnDiskFull_FS(t *testing.T) {
4140
for name, fn := range filesystemWriteOps {
4241
t.Run(name, func(t *testing.T) {
4342
innerFS := &enospcMockFS{}
44-
innerFS.enospcs.Store(1)
43+
innerFS.addENOSPC(1)
4544
var callbackInvocations int
4645
fs := OnDiskFull(innerFS, func() {
4746
callbackInvocations++
@@ -55,7 +54,7 @@ func TestOnDiskFull_FS(t *testing.T) {
5554
require.Equal(t, 1, callbackInvocations)
5655
// The inner filesystem should be invoked twice because of the
5756
// retry.
58-
require.Equal(t, uint32(2), innerFS.invocations.Load())
57+
require.Equal(t, 2, innerFS.mu.invocations)
5958
})
6059
}
6160
}
@@ -72,7 +71,7 @@ func TestOnDiskFull_File(t *testing.T) {
7271
require.NoError(t, err)
7372

7473
// The next Write should ENOSPC.
75-
innerFS.enospcs.Store(1)
74+
innerFS.addENOSPC(1)
7675

7776
// Call the Write method on the wrapped file. The first call should return
7877
// ENOSPC, but also that six bytes were written. Our registered callback
@@ -84,7 +83,7 @@ func TestOnDiskFull_File(t *testing.T) {
8483
require.Equal(t, 1, callbackInvocations)
8584
// The inner filesystem should be invoked 3 times. Once during Create
8685
// and twice during Write.
87-
require.Equal(t, uint32(3), innerFS.invocations.Load())
86+
require.Equal(t, 3, innerFS.mu.invocations)
8887
})
8988
t.Run("Sync", func(t *testing.T) {
9089
innerFS := &enospcMockFS{bytesWritten: 6}
@@ -98,30 +97,29 @@ func TestOnDiskFull_File(t *testing.T) {
9897

9998
// The next Sync should ENOSPC. The callback should be invoked, but a
10099
// Sync cannot be retried.
101-
innerFS.enospcs.Store(1)
100+
innerFS.addENOSPC(1)
102101

103102
err = f.Sync()
104103
require.Error(t, err)
105104
require.Equal(t, 1, callbackInvocations)
106105
// The inner filesystem should be invoked 2 times. Once during Create
107106
// and once during Sync.
108-
require.Equal(t, uint32(2), innerFS.invocations.Load())
107+
require.Equal(t, 2, innerFS.mu.invocations)
109108
})
110109
}
111110

112111
func TestOnDiskFull_Concurrent(t *testing.T) {
113-
innerFS := &enospcMockFS{
114-
opDelay: 10 * time.Millisecond,
115-
}
116-
innerFS.enospcs.Store(10)
112+
const concurrentWriters = 10
113+
innerFS := &enospcMockFS{}
114+
innerFS.mu.enospcs = concurrentWriters
117115
var callbackInvocations atomic.Int32
118116
fs := OnDiskFull(innerFS, func() {
119117
callbackInvocations.Add(1)
120118
})
121119

122120
var wg sync.WaitGroup
123-
for i := 0; i < 10; i++ {
124-
wg.Add(1)
121+
wg.Add(concurrentWriters)
122+
for range concurrentWriters {
125123
go func() {
126124
defer wg.Done()
127125
_, err := fs.Create("foo", WriteCategoryUnspecified)
@@ -133,26 +131,51 @@ func TestOnDiskFull_Concurrent(t *testing.T) {
133131
// Since all operations should start before the first one returns an
134132
// ENOSPC, the callback should only be invoked once.
135133
require.Equal(t, int32(1), callbackInvocations.Load())
136-
require.Equal(t, uint32(20), innerFS.invocations.Load())
134+
require.Equal(t, 2*concurrentWriters, innerFS.mu.invocations)
137135
}
138136

139137
type enospcMockFS struct {
140138
FS
141-
opDelay time.Duration
142139
bytesWritten int
143-
enospcs atomic.Int32
144-
invocations atomic.Uint32
140+
141+
mu struct {
142+
sync.Mutex
143+
cond sync.Cond
144+
enospcs int
145+
invocations int
146+
}
145147
}
146148

147-
func (fs *enospcMockFS) maybeENOSPC() error {
148-
fs.invocations.Add(1)
149-
v := fs.enospcs.Add(-1)
149+
func (fs *enospcMockFS) addENOSPC(n int) {
150+
fs.mu.Lock()
151+
defer fs.mu.Unlock()
152+
fs.mu.enospcs += n
153+
}
150154

151-
// Sleep before returning so that tests may issue concurrent writes that
152-
// fall into the same write generation.
153-
time.Sleep(fs.opDelay)
155+
func (fs *enospcMockFS) maybeENOSPC() error {
156+
fs.mu.Lock()
157+
defer fs.mu.Unlock()
158+
if fs.mu.cond.L == nil {
159+
fs.mu.cond.L = &fs.mu.Mutex
160+
}
154161

155-
if v >= 0 {
162+
fs.mu.invocations++
163+
164+
// If there are any ENOSPCs left, inject one and return an error.
165+
if fs.mu.enospcs > 0 {
166+
fs.mu.enospcs--
167+
// If there are remaining ENOSPCs, we wait for the other goroutines to
168+
// reach this point. We wait on the condition variable until all ENOSPCs
169+
// have been consumed. If we are the last goroutine to reach this point,
170+
// we wake up any other goroutines waiting on the condition variable.
171+
// This ensures that all the gorouintes part of the same 'write
172+
// generation' are blocked together.
173+
if fs.mu.enospcs == 0 {
174+
fs.mu.cond.Broadcast()
175+
}
176+
for fs.mu.enospcs > 0 {
177+
fs.mu.cond.Wait()
178+
}
156179
// Wrap the error to test error unwrapping.
157180
err := &os.PathError{Op: "mock", Path: "mock", Err: syscall.ENOSPC}
158181
return errors.Wrap(err, "uh oh")

0 commit comments

Comments
 (0)