@@ -25,6 +25,7 @@ import (
25
25
"github.com/cockroachdb/datadriven"
26
26
"github.com/cockroachdb/errors"
27
27
"github.com/cockroachdb/pebble/internal/base"
28
+ "github.com/cockroachdb/pebble/internal/buildtags"
28
29
"github.com/cockroachdb/pebble/internal/cache"
29
30
"github.com/cockroachdb/pebble/internal/testkeys"
30
31
"github.com/cockroachdb/pebble/internal/testutils"
@@ -2160,6 +2161,8 @@ func TestRecycleLogs(t *testing.T) {
2160
2161
2161
2162
type sstAndLogFileBlockingFS struct {
2162
2163
vfs.FS
2164
+ blockWAL bool
2165
+ blockSST bool
2163
2166
unblocker sync.WaitGroup
2164
2167
}
2165
2168
@@ -2168,7 +2171,8 @@ var _ vfs.FS = &sstAndLogFileBlockingFS{}
2168
2171
func (fs * sstAndLogFileBlockingFS ) Create (
2169
2172
name string , category vfs.DiskWriteCategory ,
2170
2173
) (vfs.File , error ) {
2171
- if strings .HasSuffix (name , ".log" ) || strings .HasSuffix (name , ".sst" ) {
2174
+ if (strings .HasSuffix (name , ".log" ) && fs .blockWAL ) ||
2175
+ (strings .HasSuffix (name , ".sst" ) && fs .blockSST ) {
2172
2176
fs .unblocker .Wait ()
2173
2177
}
2174
2178
return fs .FS .Create (name , category )
@@ -2178,16 +2182,16 @@ func (fs *sstAndLogFileBlockingFS) unblock() {
2178
2182
fs .unblocker .Done ()
2179
2183
}
2180
2184
2181
- func newBlockingFS (fs vfs.FS ) * sstAndLogFileBlockingFS {
2182
- lfbfs := & sstAndLogFileBlockingFS {FS : fs }
2185
+ func newBlockingFS (fs vfs.FS , blockWAL , blockSST bool ) * sstAndLogFileBlockingFS {
2186
+ lfbfs := & sstAndLogFileBlockingFS {FS : fs , blockWAL : blockWAL , blockSST : blockSST }
2183
2187
lfbfs .unblocker .Add (1 )
2184
2188
return lfbfs
2185
2189
}
2186
2190
2187
2191
func TestWALFailoverAvoidsWriteStall (t * testing.T ) {
2188
2192
mem := vfs .NewMem ()
2189
2193
// All sst and log creation is blocked.
2190
- primaryFS := newBlockingFS (mem )
2194
+ primaryFS := newBlockingFS (mem , true /*blockWAL*/ , true /*blockSST*/ )
2191
2195
// Secondary for WAL failover can do log creation.
2192
2196
secondary := wal.Dir {FS : mem , Dirname : "secondary" }
2193
2197
walFailover := & WALFailoverOptions {Secondary : secondary , FailoverOptions : wal.FailoverOptions {
@@ -2221,6 +2225,73 @@ func TestWALFailoverAvoidsWriteStall(t *testing.T) {
2221
2225
primaryFS .unblock ()
2222
2226
}
2223
2227
2228
+ type testLogManager struct {
2229
+ wal.Manager
2230
+ elevateWriteStallThreshold atomic.Bool
2231
+ }
2232
+
2233
+ func (tlm * testLogManager ) ElevateWriteStallThresholdForFailover () bool {
2234
+ return tlm .elevateWriteStallThreshold .Load ()
2235
+ }
2236
+
2237
+ func TestElevateThresholdAfterWriteStallUnblocksStall (t * testing.T ) {
2238
+ mem := vfs .NewMem ()
2239
+ // All sst writes are blocked.
2240
+ blockingFS := newBlockingFS (mem , false /*blockWAL*/ , true /*blockSST*/ )
2241
+ writeStallBeginCh := make (chan struct {}, 1 )
2242
+ el := EventListener {
2243
+ WriteStallBegin : func (_ WriteStallBeginInfo ) {
2244
+ writeStallBeginCh <- struct {}{}
2245
+ },
2246
+ }
2247
+ o := & Options {
2248
+ FS : blockingFS ,
2249
+ MemTableSize : 4 << 20 ,
2250
+ MemTableStopWritesThreshold : 2 ,
2251
+ Logger : testutils.Logger {T : t },
2252
+ EventListener : & el ,
2253
+ }
2254
+ d , err := Open ("" , o )
2255
+ // Replace the log manager with one that can elevate the write stall threshold.
2256
+ d .mu .Lock ()
2257
+ testWALManager := & testLogManager {Manager : d .mu .log .manager }
2258
+ d .mu .log .manager = testWALManager
2259
+ d .mu .Unlock ()
2260
+ require .NoError (t , err )
2261
+ value := make ([]byte , 1 << 20 )
2262
+ for i := range value {
2263
+ value [i ] = byte (rand .Uint32 ())
2264
+ }
2265
+ go func () {
2266
+ // Wait for write stall to begin.
2267
+ <- writeStallBeginCh
2268
+ t .Logf ("write stall has begun" )
2269
+ testWALManager .elevateWriteStallThreshold .Store (true )
2270
+ }()
2271
+ done := make (chan struct {})
2272
+ go func () {
2273
+ // After ~8 writes, the default write stall threshold is exceeded.
2274
+ // It is observed by the above goroutine, which removes the stall.
2275
+ for i := 0 ; i < 200 ; i ++ {
2276
+ require .NoError (t , d .Set ([]byte (fmt .Sprintf ("%d" , i )), value , nil ))
2277
+ }
2278
+ done <- struct {}{}
2279
+ }()
2280
+ timeout := 15 * time .Second
2281
+ if buildtags .SlowBuild {
2282
+ timeout = time .Minute
2283
+ }
2284
+ select {
2285
+ case <- time .After (timeout ):
2286
+ t .Fatalf ("write stall did not terminate" )
2287
+ case <- done :
2288
+ }
2289
+ require .True (t , testWALManager .elevateWriteStallThreshold .Load ())
2290
+ // Unblock the writes to allow the DB to close.
2291
+ blockingFS .unblock ()
2292
+ require .NoError (t , d .Close ())
2293
+ }
2294
+
2224
2295
// TestDeterminism is a datadriven test intended to validate determinism of
2225
2296
// operations in the face of concurrency or randomizing of operations. The test
2226
2297
// data defines a sequence of commands run sequentially. Then the test may
0 commit comments