Skip to content

Commit b445f35

Browse files
committed
db: add TestWALHardCrashRandomized
Add a new randomized unit test adapted from TestWALFailoverRandomized that stresses recovery from a hard crash. Fix #4651.
1 parent b9a25dd commit b445f35

File tree

1 file changed

+77
-10
lines changed

1 file changed

+77
-10
lines changed

open_test.go

Lines changed: 77 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1586,7 +1586,7 @@ func TestMkdirAllAndSyncParents(t *testing.T) {
15861586
})
15871587
}
15881588

1589-
// TestWALFailoverRandomized is a randomzied test exercising recovery in the
1589+
// TestWALFailoverRandomized is a randomized test exercising recovery in the
15901590
// presence of WAL failover. It repeatedly opens a database, writes a number of
15911591
// batches concurrently and simulates a hard crash using vfs.NewCrashableMem. It
15921592
// ensures that the resulting DB state opens successfully, and the contents of
@@ -1596,7 +1596,6 @@ func TestMkdirAllAndSyncParents(t *testing.T) {
15961596
func TestWALFailoverRandomized(t *testing.T) {
15971597
seed := time.Now().UnixNano()
15981598
t.Logf("seed %d", seed)
1599-
mem := vfs.NewCrashableMem()
16001599
makeOptions := func(mem *vfs.MemFS) *Options {
16011600
failoverOpts := WALFailoverOptions{
16021601
Secondary: wal.Dir{FS: mem, Dirname: "secondary"},
@@ -1625,7 +1624,28 @@ func TestWALFailoverRandomized(t *testing.T) {
16251624
WALFailover: &failoverOpts,
16261625
}
16271626
}
1627+
runRandomizedCrashTest(t, randomizedCrashTestOptions{
1628+
makeOptions: makeOptions,
1629+
maxValueSize: 4096,
1630+
seed: seed,
1631+
unsyncedDataPercent: 50,
1632+
numOps: 1000,
1633+
opCrashWeight: 1,
1634+
opBatchWeight: 20,
1635+
})
1636+
}
1637+
1638+
type randomizedCrashTestOptions struct {
1639+
makeOptions func(*vfs.MemFS) *Options
1640+
maxValueSize int
1641+
numOps int
1642+
seed int64
1643+
unsyncedDataPercent int
1644+
opCrashWeight int
1645+
opBatchWeight int
1646+
}
16281647

1648+
func runRandomizedCrashTest(t *testing.T, opts randomizedCrashTestOptions) {
16291649
// KV state tracking.
16301650
//
16311651
// This test uses all uint16 big-endian integers as a keyspace. Values are
@@ -1697,29 +1717,33 @@ func TestWALFailoverRandomized(t *testing.T) {
16971717
require.NoError(t, it.Close())
16981718
}
16991719

1700-
d, err := Open("primary", makeOptions(mem))
1720+
mem := vfs.NewCrashableMem()
1721+
d, err := Open("primary", opts.makeOptions(mem))
17011722
require.NoError(t, err)
1702-
rng := rand.New(rand.NewPCG(0, uint64(seed)))
1723+
rng := rand.New(rand.NewPCG(0, uint64(opts.seed)))
17031724
var wg sync.WaitGroup
17041725
var n uint64
1726+
v := make([]byte, max(opts.maxValueSize, 2))
17051727
randomOps := metamorphic.Weighted[func()]{
1706-
{Weight: 1, Item: func() {
1728+
{Weight: opts.opCrashWeight, Item: func() {
17071729
time.Sleep(time.Microsecond * time.Duration(rand.IntN(30)))
17081730
t.Log("initiating hard crash")
17091731
setIsCrashing(true)
17101732
// Take a crash-consistent clone of the filesystem and use that going forward.
1711-
mem = mem.CrashClone(vfs.CrashCloneCfg{UnsyncedDataPercent: 50, RNG: rng})
1733+
mem = mem.CrashClone(vfs.CrashCloneCfg{
1734+
UnsyncedDataPercent: opts.unsyncedDataPercent,
1735+
RNG: rng,
1736+
})
17121737
wg.Wait() // Wait for outstanding batch commits to finish.
17131738
_ = d.Close()
1714-
d, err = Open("primary", makeOptions(mem))
1739+
d, err = Open("primary", opts.makeOptions(mem))
17151740
require.NoError(t, err)
17161741
validateState(d)
17171742
setIsCrashing(false)
17181743
}},
1719-
{Weight: 20, Item: func() {
1744+
{Weight: opts.opBatchWeight, Item: func() {
17201745
count := rng.IntN(14) + 1
17211746
var k [2]byte
1722-
var v [4096]byte
17231747
b := d.NewBatch()
17241748
for i := 0; i < count; i++ {
17251749
j := uint16((n + uint64(i)) % keyspaceSize)
@@ -1745,11 +1769,54 @@ func TestWALFailoverRandomized(t *testing.T) {
17451769
}},
17461770
}
17471771
nextRandomOp := randomOps.RandomDeck(randv1.New(randv1.NewSource(rng.Int64())))
1748-
for o := 0; o < 1000; o++ {
1772+
for o := 0; o < opts.numOps; o++ {
17491773
nextRandomOp()()
17501774
}
17511775
}
17521776

1777+
// TestWALHardCrashRandomized is a randomized test exercising recovery in the
1778+
// presence of a hard crash. It repeatedly opens a database, writes a number of
1779+
// batches concurrently and simulates a hard crash using vfs.NewCrashableMem. It
1780+
// ensures that the resulting DB state opens successfully, and the contents of
1781+
// the DB match the expectations based on the keys written.
1782+
func TestWALHardCrashRandomized(t *testing.T) {
1783+
for i := 0; i < 4; i++ {
1784+
func() {
1785+
seed := time.Now().UnixNano()
1786+
t.Logf("seed %d", seed)
1787+
prng := rand.New(rand.NewPCG(0, uint64(seed)))
1788+
makeOptions := func(mem *vfs.MemFS) *Options {
1789+
var fs vfs.FS = mem
1790+
mean := time.Duration(rand.ExpFloat64() * float64(time.Microsecond))
1791+
p := rand.Float64()
1792+
t.Logf("Injecting mean %s of latency with p=%.3f", mean, p)
1793+
fs = errorfs.Wrap(mem, errorfs.RandomLatency(errorfs.Randomly(p, seed), mean, seed, time.Second))
1794+
opts := &Options{
1795+
FS: fs,
1796+
FormatMajorVersion: internalFormatNewest,
1797+
Logger: testLogger{t},
1798+
MemTableSize: 32 << (10 + prng.IntN(6)), // [32 KiB, 256 KiB]
1799+
MemTableStopWritesThreshold: 4,
1800+
}
1801+
testingRandomized(t, opts)
1802+
return opts
1803+
}
1804+
// The configuration options are randomized to exercise different failure
1805+
// scenarios. Some runs result in sufficient number of memtable rotations
1806+
// between crashes that we do get recycled logs.
1807+
runRandomizedCrashTest(t, randomizedCrashTestOptions{
1808+
makeOptions: makeOptions,
1809+
maxValueSize: 1 << (prng.IntN(19)), // [1, 256 KiB]
1810+
unsyncedDataPercent: prng.IntN(101), // [0, 100]
1811+
seed: seed,
1812+
numOps: 250,
1813+
opCrashWeight: 1,
1814+
opBatchWeight: 20 << prng.IntN(3),
1815+
})
1816+
}()
1817+
}
1818+
}
1819+
17531820
func TestWALCorruption(t *testing.T) {
17541821
fs := vfs.NewMem()
17551822
d, err := Open("", testingRandomized(t, &Options{

0 commit comments

Comments
 (0)