Skip to content

Commit

Permalink
db: fix WAL checksum error handling from previous versions
Browse files Browse the repository at this point in the history
A database that was created using RocksDB or a previous version of
Pebble does not guarantee that its closed WALs were closed cleanly.
Detect these cases by the absence of a special private
'strict_wal_tail' option. If this special option is missing from the
OPTIONS file, interpret WALs permissively as they're interpreted in the
RocksDB and Pebble versions that wrote them.

See cockroachdb/cockroach#52729.
  • Loading branch information
jbowens committed Sep 30, 2020
1 parent a69a94e commit a33ead9
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 14 deletions.
19 changes: 11 additions & 8 deletions open.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ func Open(dirname string, opts *Options) (db *DB, _ error) {
name string
}
var logFiles []fileNumAndName
var strictWALTail bool
for _, filename := range ls {
ft, fn, ok := base.ParseFilename(opts.FS, filename)
if !ok {
Expand All @@ -250,7 +251,8 @@ func Open(dirname string, opts *Options) (db *DB, _ error) {
d.logRecycler.minRecycleLogNum = fn + 1
}
case fileTypeOptions:
if err := checkOptions(opts, opts.FS.PathJoin(dirname, filename)); err != nil {
strictWALTail, err = checkOptions(opts, opts.FS.PathJoin(dirname, filename))
if err != nil {
return nil, err
}
case fileTypeTemp:
Expand All @@ -271,7 +273,8 @@ func Open(dirname string, opts *Options) (db *DB, _ error) {
var ve versionEdit
for i, lf := range logFiles {
lastWAL := i == len(logFiles)-1
maxSeqNum, err := d.replayWAL(jobID, &ve, opts.FS, opts.FS.PathJoin(d.walDirname, lf.name), lf.num, lastWAL)
maxSeqNum, err := d.replayWAL(jobID, &ve, opts.FS,
opts.FS.PathJoin(d.walDirname, lf.name), lf.num, strictWALTail && !lastWAL)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -433,7 +436,7 @@ func GetVersion(dir string, fs vfs.FS) (string, error) {
// d.mu must be held when calling this, but the mutex may be dropped and
// re-acquired during the course of this method.
func (d *DB) replayWAL(
jobID int, ve *versionEdit, fs vfs.FS, filename string, logNum FileNum, lastWAL bool,
jobID int, ve *versionEdit, fs vfs.FS, filename string, logNum FileNum, strictWALTail bool,
) (maxSeqNum uint64, err error) {
file, err := fs.Open(filename)
if err != nil {
Expand Down Expand Up @@ -504,7 +507,7 @@ func (d *DB) replayWAL(
// to otherwise treat them like EOF.
if err == io.EOF {
break
} else if record.IsInvalidRecord(err) && lastWAL {
} else if record.IsInvalidRecord(err) && !strictWALTail {
break
}
return 0, errors.Wrap(err, "pebble: error when replaying WAL")
Expand Down Expand Up @@ -577,16 +580,16 @@ func (d *DB) replayWAL(
return maxSeqNum, err
}

func checkOptions(opts *Options, path string) error {
func checkOptions(opts *Options, path string) (strictWALTail bool, err error) {
f, err := opts.FS.Open(path)
if err != nil {
return err
return false, err
}
defer f.Close()

data, err := ioutil.ReadAll(f)
if err != nil {
return err
return false, err
}
return opts.Check(string(data))
return opts.checkOptions(string(data))
}
66 changes: 66 additions & 0 deletions open_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,72 @@ func TestTwoWALReplayCorrupt(t *testing.T) {
require.Error(t, err, "pebble: corruption")
}

// TestTwoWALReplayCorrupt tests WAL-replay behavior when the first of the two
// WALs is corrupted with an sstable checksum error and the OPTIONS file does
// not enable the private strict_wal_tail option, indicating that the WAL was
// produced by a database that did not guarantee clean WAL tails. See #864.
func TestTwoWALReplayPermissive(t *testing.T) {
// Use the real filesystem so that we can seek and overwrite WAL data
// easily.
dir, err := ioutil.TempDir("", "wal-replay")
require.NoError(t, err)
defer os.RemoveAll(dir)

opts := &Options{
MemTableStopWritesThreshold: 4,
MemTableSize: 2048,
}
opts.EnsureDefaults()
d, err := Open(dir, opts)
require.NoError(t, err)
d.mu.Lock()
d.mu.compact.flushing = true
d.mu.Unlock()
require.NoError(t, d.Set([]byte("1"), []byte(strings.Repeat("a", 1024)), nil))
require.NoError(t, d.Set([]byte("2"), nil, nil))
d.mu.Lock()
d.mu.compact.flushing = false
d.mu.Unlock()
require.NoError(t, d.Close())

// We should have two WALs.
var logs []string
var optionFilename string
ls, err := vfs.Default.List(dir)
require.NoError(t, err)
for _, name := range ls {
if filepath.Ext(name) == ".log" {
logs = append(logs, name)
}
if strings.HasPrefix(filepath.Base(name), "OPTIONS") {
optionFilename = name
}
}
sort.Strings(logs)
if len(logs) < 2 {
t.Fatalf("expected at least two log files, found %d", len(logs))
}

// Corrupt the (n-1)th WAL by zeroing four bytes, 100 bytes from the end
// of the file.
f, err := os.OpenFile(filepath.Join(dir, logs[len(logs)-2]), os.O_RDWR, os.ModePerm)
require.NoError(t, err)
off, err := f.Seek(-100, 2)
require.NoError(t, err)
_, err = f.Write([]byte{0, 0, 0, 0})
require.NoError(t, err)
require.NoError(t, f.Close())
t.Logf("zeored four bytes in %s at offset %d\n", logs[len(logs)-2], off)

// Remove the OPTIONS file containing the strict_wal_tail option.
require.NoError(t, vfs.Default.Remove(filepath.Join(dir, optionFilename)))

// Re-opening the database should not report the corruption.
d, err = Open(dir, nil)
require.NoError(t, err)
require.NoError(t, d.Close())
}

// TestOpenWALReplayReadOnlySeqNums tests opening a database:
// * in read-only mode
// * with multiple unflushed log files that must replayed
Expand Down
38 changes: 32 additions & 6 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,8 +438,19 @@ type Options struct {
// changing options dynamically?
WALMinSyncInterval func() time.Duration

// private options are only used by internal tests.
// private options are only used by internal tests or are used internally
// for facilitating upgrade paths of unconfigurable functionality.
private struct {
// strictWALTail configures whether or not a database's WALs created
// prior to the most recent one should be interpreted strictly,
// requiring a clean EOF. RocksDB 6.2.1 and the version of Pebble
// included in CockroachDB 20.1 do not guarantee that closed WALs end
// cleanly. If this option is set within an OPTIONS file, Pebble
// interprets previous WALs strictly, requiring a clean EOF.
// Otherwise, it interprets them permissively in the same manner as
// RocksDB 6.2.1.
strictWALTail bool

// TODO(peter): A private option to enable flush/compaction pacing. Only used
// by tests. Compaction/flush pacing is disabled until we fix the impact on
// throughput.
Expand Down Expand Up @@ -535,6 +546,7 @@ func (o *Options) EnsureDefaults() *Options {
if o.Merger == nil {
o.Merger = DefaultMerger
}
o.private.strictWALTail = true
if o.private.minCompactionRate == 0 {
o.private.minCompactionRate = 4 << 20 // 4 MB/s
}
Expand Down Expand Up @@ -635,6 +647,7 @@ func (o *Options) String() string {
fmt.Fprintf(&buf, " min_compaction_rate=%d\n", o.private.minCompactionRate)
fmt.Fprintf(&buf, " min_flush_rate=%d\n", o.private.minFlushRate)
fmt.Fprintf(&buf, " merger=%s\n", o.Merger.Name)
fmt.Fprintf(&buf, " strict_wal_tail=%t\n", o.private.strictWALTail)
fmt.Fprintf(&buf, " table_property_collectors=[")
for i := range o.TablePropertyCollectors {
if i > 0 {
Expand Down Expand Up @@ -807,6 +820,8 @@ func (o *Options) Parse(s string, hooks *ParseHooks) error {
o.private.minCompactionRate, err = strconv.Atoi(value)
case "min_flush_rate":
o.private.minFlushRate, err = strconv.Atoi(value)
case "strict_wal_tail":
o.private.strictWALTail, err = strconv.ParseBool(value)
case "merger":
switch value {
case "nullptr":
Expand Down Expand Up @@ -905,11 +920,9 @@ func (o *Options) Parse(s string, hooks *ParseHooks) error {
})
}

// Check verifies the options are compatible with the previous options
// serialized by Options.String(). For example, the Comparer and Merger must be
// the same, or data will not be able to be properly read from the DB.
func (o *Options) Check(s string) error {
return parseOptions(s, func(section, key, value string) error {
func (o *Options) checkOptions(s string) (strictWALTail bool, err error) {
// TODO(jackson): Refactor to avoid awkwardness of the strictWALTail return value.
return strictWALTail, parseOptions(s, func(section, key, value string) error {
switch section + "." + key {
case "Options.comparer":
if value != o.Comparer.Name {
Expand All @@ -923,11 +936,24 @@ func (o *Options) Check(s string) error {
return errors.Errorf("pebble: merger name from file %q != merger name from options %q",
errors.Safe(value), errors.Safe(o.Merger.Name))
}
case "Options.strict_wal_tail":
strictWALTail, err = strconv.ParseBool(value)
if err != nil {
return errors.Errorf("pebble: error parsing strict_wal_tail value %q: %w", value, err)
}
}
return nil
})
}

// Check verifies the options are compatible with the previous options
// serialized by Options.String(). For example, the Comparer and Merger must be
// the same, or data will not be able to be properly read from the DB.
func (o *Options) Check(s string) error {
_, err := o.checkOptions(s)
return err
}

// Validate verifies that the options are mutually consistent. For example,
// L0StopWritesThreshold must be >= L0CompactionThreshold, otherwise a write
// stall would persist indefinitely.
Expand Down
1 change: 1 addition & 0 deletions options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func TestOptionsString(t *testing.T) {
min_compaction_rate=4194304
min_flush_rate=1048576
merger=pebble.concatenate
strict_wal_tail=true
table_property_collectors=[]
wal_dir=
wal_bytes_per_sync=0
Expand Down

0 comments on commit a33ead9

Please sign in to comment.