@@ -10,6 +10,7 @@ import (
10
10
randv1 "math/rand"
11
11
"math/rand/v2"
12
12
"reflect"
13
+ "slices"
13
14
"strings"
14
15
"testing"
15
16
"testing/quick"
@@ -18,6 +19,7 @@ import (
18
19
"github.com/cockroachdb/pebble"
19
20
"github.com/cockroachdb/pebble/bloom"
20
21
"github.com/cockroachdb/pebble/cockroachkvs"
22
+ "github.com/cockroachdb/pebble/internal/base"
21
23
"github.com/cockroachdb/pebble/metamorphic"
22
24
"github.com/cockroachdb/pebble/sstable"
23
25
"github.com/cockroachdb/pebble/vfs"
@@ -42,7 +44,7 @@ func TestIteratorErrors(t *testing.T) {
42
44
// WriteOpConfig. We'll perform ~10,000 random operations that mutate the
43
45
// state of the database.
44
46
kf := metamorphic .TestkeysKeyFormat
45
- testOpts := metamorphic .RandomOptions (rng , kf , nil /* custom opt parsers */ )
47
+ testOpts := metamorphic .RandomOptions (rng , kf , metamorphic. RandomOptionsCfg {} )
46
48
// With even a very small injection probability, it's relatively
47
49
// unlikely that pebble.DebugCheckLevels will successfully complete
48
50
// without being interrupted by an ErrInjected. Omit these checks.
@@ -251,12 +253,174 @@ func buildSeparatedValuesDB(
251
253
return db , keys
252
254
}
253
255
256
+ // TestDoubleRestart checks that we are not in a precarious state immediately
257
+ // after restart. This could happen if we remove WALs before all necessary
258
+ // flushes are complete. Steps:
259
+ //
260
+ // 1. Use metamorphic to run operations on a store; the resulting filesystem is
261
+ // cloned multiple times in subsequent steps.
262
+ // 2. Clone the FS and start a "golden" store; read all KVs that will be used
263
+ // as the source of truth.
264
+ // 3. Independently clone the FS again and open a store; sleep for a small
265
+ // random amount then close and reopen the store. Then read KVs and
266
+ // cross-check against the "golden" KVs in step 2.
267
+ // 4. Repeat step 3 multiple times, each time with a new independent clone.
268
+ //
269
+ // This test was used to reproduce the failure in
270
+ // https://github.com/cockroachdb/cockroach/issues/148419.
271
+ func TestDoubleRestart (t * testing.T ) {
272
+ seed := time .Now ().UnixNano ()
273
+ t .Logf ("Using seed %d" , seed )
274
+ rng := rand .New (rand .NewPCG (0 , uint64 (seed )))
275
+
276
+ kf := metamorphic .TestkeysKeyFormat
277
+ testOpts := metamorphic .RandomOptions (rng , kf , metamorphic.RandomOptionsCfg {
278
+ AlwaysStrictFS : true ,
279
+ NoRemoteStorage : true ,
280
+ })
281
+ metaFS := testOpts .Opts .FS .(* vfs.MemFS )
282
+
283
+ var metaTestOutput bytes.Buffer
284
+ {
285
+ // Disable restart (it changes the FS internally and we don't have access to
286
+ // the new one).
287
+ opCfg := metamorphic .WriteOpConfig ().WithOpWeight (metamorphic .OpDBRestart , 0 )
288
+ test , err := metamorphic .New (
289
+ metamorphic .GenerateOps (rng , 2000 , kf , opCfg ),
290
+ testOpts , "" /* dir */ , & metaTestOutput ,
291
+ )
292
+ require .NoError (t , err )
293
+ require .NoError (t , metamorphic .Execute (test ))
294
+ }
295
+ defer func () {
296
+ if t .Failed () {
297
+ t .Logf ("Meta test output:\n %s" , metaTestOutput .String ())
298
+ }
299
+ }()
300
+
301
+ // makeOpts creates the options for a db that starts from a clone of metaFS.
302
+ makeOpts := func (logger base.Logger ) * pebble.Options {
303
+ opts := testOpts .Opts .Clone ()
304
+
305
+ // Make a copy of the metaFS.
306
+ opts .FS = vfs .NewMem ()
307
+ ok , err := vfs .Clone (metaFS , opts .FS , "" , "" )
308
+ require .NoError (t , err )
309
+ require .True (t , ok )
310
+
311
+ opts .FS = errorfs .Wrap (opts .FS , errorfs .RandomLatency (
312
+ errorfs .Randomly (0.8 , rng .Int64 ()),
313
+ 10 * time .Microsecond ,
314
+ rng .Int64 (),
315
+ time .Millisecond ,
316
+ ))
317
+
318
+ if opts .WALFailover != nil {
319
+ wf := * opts .WALFailover
320
+ wf .Secondary .FS = opts .FS
321
+ opts .WALFailover = & wf
322
+ }
323
+ opts .Logger = logger
324
+ opts .LoggerAndTracer = nil
325
+ lel := pebble .MakeLoggingEventListener (logger )
326
+ opts .EventListener = & lel
327
+ return opts
328
+ }
329
+
330
+ // Open the "golden" filesystem and scan it to get the expected KVs.
331
+ goldenLog := & base.InMemLogger {}
332
+ defer func () {
333
+ if t .Failed () {
334
+ t .Logf ("Golden db logs:\n %s\n " , goldenLog .String ())
335
+ }
336
+ }()
337
+ db , err := pebble .Open ("" , makeOpts (goldenLog ))
338
+ require .NoError (t , err )
339
+ goldenKeys , goldenVals := getKVs (t , db )
340
+ require .NoError (t , db .Close ())
341
+
342
+ // Repeatedly open and quickly close the database, verifying that we did not
343
+ // lose any data during this restart (e.g. because we prematurely deleted
344
+ // WALs).
345
+ for iter := 0 ; iter < 10 ; iter ++ {
346
+ func () {
347
+ dbLog := & base.InMemLogger {}
348
+ defer func () {
349
+ if t .Failed () {
350
+ t .Logf ("Db logs:\n %s\n " , dbLog .String ())
351
+ }
352
+ }()
353
+ opts := makeOpts (dbLog )
354
+ // Sometimes reduce the memtable size to trigger the large batch recovery
355
+ // code path.
356
+ if rng .IntN (2 ) == 0 {
357
+ opts .MemTableSize = 1200
358
+ }
359
+ dbLog .Infof ("Opening db\n " )
360
+ db , err := pebble .Open ("" , opts )
361
+ require .NoError (t , err )
362
+ if rng .IntN (2 ) == 0 {
363
+ d := time .Duration (rng .IntN (1000 )) * time .Microsecond
364
+ dbLog .Infof ("Sleeping %s" , d )
365
+ time .Sleep (d )
366
+ }
367
+ dbLog .Infof ("Closing db" )
368
+ require .NoError (t , db .Close ())
369
+
370
+ dbLog .Infof ("Reopening db" )
371
+ db , err = pebble .Open ("" , opts )
372
+ require .NoError (t , err )
373
+ dbLog .Infof ("Checking KVs" )
374
+ checkKVs (t , db , opts .Comparer , goldenKeys , goldenVals )
375
+ dbLog .Infof ("Closing db" )
376
+ require .NoError (t , db .Close ())
377
+ }()
378
+ }
379
+ }
380
+
381
+ // getKVs retrieves and returns all keys and values from the database in order.
382
+ func getKVs (t * testing.T , db * pebble.DB ) (keys [][]byte , vals [][]byte ) {
383
+ t .Helper ()
384
+ it , err := db .NewIter (& pebble.IterOptions {})
385
+ require .NoError (t , err )
386
+ for valid := it .First (); valid ; valid = it .Next () {
387
+ keys = append (keys , slices .Clone (it .Key ()))
388
+ val , err := it .ValueAndErr ()
389
+ require .NoError (t , err )
390
+ vals = append (vals , slices .Clone (val ))
391
+ }
392
+ require .NoError (t , it .Close ())
393
+ return keys , vals
394
+ }
395
+
396
+ // checkKVs checks that the keys and values in the database match the expected ones.
397
+ func checkKVs (
398
+ t * testing.T , db * pebble.DB , cmp * base.Comparer , expectedKeys [][]byte , expectedVals [][]byte ,
399
+ ) {
400
+ keys , vals := getKVs (t , db )
401
+ for i := 0 ; i < len (keys ) || i < len (expectedKeys ); i ++ {
402
+ if i < len (keys ) && i < len (expectedKeys ) && cmp .Equal (keys [i ], expectedKeys [i ]) {
403
+ continue
404
+ }
405
+ if i < len (keys ) && (i == len (expectedKeys ) || cmp .Compare (keys [i ], expectedKeys [i ]) < 0 ) {
406
+ t .Fatalf ("extra key: %q\n " , cmp .FormatKey (keys [i ]))
407
+ }
408
+ t .Fatalf ("missing key: %q\n " , cmp .FormatKey (expectedKeys [i ]))
409
+ }
410
+ for i := range vals {
411
+ // require.Equalf by itself fails if one is nil and the other is a non-nil empty slice.
412
+ if ! bytes .Equal (vals [i ], expectedVals [i ]) {
413
+ require .Equalf (t , expectedVals [i ], vals [i ], "key %q value msimatch" , cmp .FormatKey (keys [i ]))
414
+ }
415
+ }
416
+ }
417
+
254
418
func TestOptionsClone (t * testing.T ) {
255
419
seed := time .Now ().UnixNano ()
256
420
t .Logf ("Using seed %d" , seed )
257
421
rng := rand .New (rand .NewPCG (0 , uint64 (seed )))
258
422
259
- a := metamorphic .RandomOptions (rng , metamorphic .TestkeysKeyFormat , nil /* custom opt parsers */ ).Opts
423
+ a := metamorphic .RandomOptions (rng , metamorphic .TestkeysKeyFormat , metamorphic. RandomOptionsCfg {} ).Opts
260
424
b := a .Clone ()
261
425
if rng .IntN (2 ) == 0 {
262
426
a , b = b , a
0 commit comments