Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

db: fix WAL checksum error handling during upgrade #948

Merged
merged 1 commit into from
Sep 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions open.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ func Open(dirname string, opts *Options) (db *DB, _ error) {
name string
}
var logFiles []fileNumAndName
var strictWALTail bool
for _, filename := range ls {
ft, fn, ok := base.ParseFilename(opts.FS, filename)
if !ok {
Expand All @@ -250,7 +251,8 @@ func Open(dirname string, opts *Options) (db *DB, _ error) {
d.logRecycler.minRecycleLogNum = fn + 1
}
case fileTypeOptions:
if err := checkOptions(opts, opts.FS.PathJoin(dirname, filename)); err != nil {
strictWALTail, err = checkOptions(opts, opts.FS.PathJoin(dirname, filename))
if err != nil {
return nil, err
}
case fileTypeTemp:
Expand All @@ -271,7 +273,8 @@ func Open(dirname string, opts *Options) (db *DB, _ error) {
var ve versionEdit
for i, lf := range logFiles {
lastWAL := i == len(logFiles)-1
maxSeqNum, err := d.replayWAL(jobID, &ve, opts.FS, opts.FS.PathJoin(d.walDirname, lf.name), lf.num, lastWAL)
maxSeqNum, err := d.replayWAL(jobID, &ve, opts.FS,
opts.FS.PathJoin(d.walDirname, lf.name), lf.num, strictWALTail && !lastWAL)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -433,7 +436,7 @@ func GetVersion(dir string, fs vfs.FS) (string, error) {
// d.mu must be held when calling this, but the mutex may be dropped and
// re-acquired during the course of this method.
func (d *DB) replayWAL(
jobID int, ve *versionEdit, fs vfs.FS, filename string, logNum FileNum, lastWAL bool,
jobID int, ve *versionEdit, fs vfs.FS, filename string, logNum FileNum, strictWALTail bool,
) (maxSeqNum uint64, err error) {
file, err := fs.Open(filename)
if err != nil {
Expand Down Expand Up @@ -504,7 +507,7 @@ func (d *DB) replayWAL(
// to otherwise treat them like EOF.
if err == io.EOF {
break
} else if record.IsInvalidRecord(err) && lastWAL {
} else if record.IsInvalidRecord(err) && !strictWALTail {
break
}
return 0, errors.Wrap(err, "pebble: error when replaying WAL")
Expand Down Expand Up @@ -577,16 +580,16 @@ func (d *DB) replayWAL(
return maxSeqNum, err
}

func checkOptions(opts *Options, path string) error {
func checkOptions(opts *Options, path string) (strictWALTail bool, err error) {
f, err := opts.FS.Open(path)
if err != nil {
return err
return false, err
}
defer f.Close()

data, err := ioutil.ReadAll(f)
if err != nil {
return err
return false, err
}
return opts.Check(string(data))
return opts.checkOptions(string(data))
}
66 changes: 66 additions & 0 deletions open_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -563,6 +563,72 @@ func TestTwoWALReplayCorrupt(t *testing.T) {
require.Error(t, err, "pebble: corruption")
}

// TestTwoWALReplayCorrupt tests WAL-replay behavior when the first of the two
// WALs is corrupted with an sstable checksum error and the OPTIONS file does
// not enable the private strict_wal_tail option, indicating that the WAL was
// produced by a database that did not guarantee clean WAL tails. See #864.
func TestTwoWALReplayPermissive(t *testing.T) {
// Use the real filesystem so that we can seek and overwrite WAL data
// easily.
dir, err := ioutil.TempDir("", "wal-replay")
require.NoError(t, err)
defer os.RemoveAll(dir)

opts := &Options{
MemTableStopWritesThreshold: 4,
MemTableSize: 2048,
}
opts.EnsureDefaults()
d, err := Open(dir, opts)
require.NoError(t, err)
d.mu.Lock()
d.mu.compact.flushing = true
d.mu.Unlock()
require.NoError(t, d.Set([]byte("1"), []byte(strings.Repeat("a", 1024)), nil))
require.NoError(t, d.Set([]byte("2"), nil, nil))
d.mu.Lock()
d.mu.compact.flushing = false
d.mu.Unlock()
require.NoError(t, d.Close())

// We should have two WALs.
var logs []string
var optionFilename string
ls, err := vfs.Default.List(dir)
require.NoError(t, err)
for _, name := range ls {
if filepath.Ext(name) == ".log" {
logs = append(logs, name)
}
if strings.HasPrefix(filepath.Base(name), "OPTIONS") {
optionFilename = name
}
}
sort.Strings(logs)
if len(logs) < 2 {
t.Fatalf("expected at least two log files, found %d", len(logs))
}

// Corrupt the (n-1)th WAL by zeroing four bytes, 100 bytes from the end
// of the file.
f, err := os.OpenFile(filepath.Join(dir, logs[len(logs)-2]), os.O_RDWR, os.ModePerm)
require.NoError(t, err)
off, err := f.Seek(-100, 2)
require.NoError(t, err)
_, err = f.Write([]byte{0, 0, 0, 0})
require.NoError(t, err)
require.NoError(t, f.Close())
t.Logf("zeored four bytes in %s at offset %d\n", logs[len(logs)-2], off)

// Remove the OPTIONS file containing the strict_wal_tail option.
require.NoError(t, vfs.Default.Remove(filepath.Join(dir, optionFilename)))

// Re-opening the database should not report the corruption.
d, err = Open(dir, nil)
require.NoError(t, err)
require.NoError(t, d.Close())
}

// TestOpenWALReplayReadOnlySeqNums tests opening a database:
// * in read-only mode
// * with multiple unflushed log files that must replayed
Expand Down
38 changes: 32 additions & 6 deletions options.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,8 +438,19 @@ type Options struct {
// changing options dynamically?
WALMinSyncInterval func() time.Duration

// private options are only used by internal tests.
// private options are only used by internal tests or are used internally
// for facilitating upgrade paths of unconfigurable functionality.
private struct {
// strictWALTail configures whether or not a database's WALs created
// prior to the most recent one should be interpreted strictly,
// requiring a clean EOF. RocksDB 6.2.1 and the version of Pebble
// included in CockroachDB 20.1 do not guarantee that closed WALs end
// cleanly. If this option is set within an OPTIONS file, Pebble
// interprets previous WALs strictly, requiring a clean EOF.
// Otherwise, it interprets them permissively in the same manner as
// RocksDB 6.2.1.
strictWALTail bool

// TODO(peter): A private option to enable flush/compaction pacing. Only used
// by tests. Compaction/flush pacing is disabled until we fix the impact on
// throughput.
Expand Down Expand Up @@ -535,6 +546,7 @@ func (o *Options) EnsureDefaults() *Options {
if o.Merger == nil {
o.Merger = DefaultMerger
}
o.private.strictWALTail = true
if o.private.minCompactionRate == 0 {
o.private.minCompactionRate = 4 << 20 // 4 MB/s
}
Expand Down Expand Up @@ -635,6 +647,7 @@ func (o *Options) String() string {
fmt.Fprintf(&buf, " min_compaction_rate=%d\n", o.private.minCompactionRate)
fmt.Fprintf(&buf, " min_flush_rate=%d\n", o.private.minFlushRate)
fmt.Fprintf(&buf, " merger=%s\n", o.Merger.Name)
fmt.Fprintf(&buf, " strict_wal_tail=%t\n", o.private.strictWALTail)
fmt.Fprintf(&buf, " table_property_collectors=[")
for i := range o.TablePropertyCollectors {
if i > 0 {
Expand Down Expand Up @@ -807,6 +820,8 @@ func (o *Options) Parse(s string, hooks *ParseHooks) error {
o.private.minCompactionRate, err = strconv.Atoi(value)
case "min_flush_rate":
o.private.minFlushRate, err = strconv.Atoi(value)
case "strict_wal_tail":
o.private.strictWALTail, err = strconv.ParseBool(value)
case "merger":
switch value {
case "nullptr":
Expand Down Expand Up @@ -905,11 +920,9 @@ func (o *Options) Parse(s string, hooks *ParseHooks) error {
})
}

// Check verifies the options are compatible with the previous options
// serialized by Options.String(). For example, the Comparer and Merger must be
// the same, or data will not be able to be properly read from the DB.
func (o *Options) Check(s string) error {
return parseOptions(s, func(section, key, value string) error {
func (o *Options) checkOptions(s string) (strictWALTail bool, err error) {
// TODO(jackson): Refactor to avoid awkwardness of the strictWALTail return value.
return strictWALTail, parseOptions(s, func(section, key, value string) error {
switch section + "." + key {
case "Options.comparer":
if value != o.Comparer.Name {
Expand All @@ -923,11 +936,24 @@ func (o *Options) Check(s string) error {
return errors.Errorf("pebble: merger name from file %q != merger name from options %q",
errors.Safe(value), errors.Safe(o.Merger.Name))
}
case "Options.strict_wal_tail":
strictWALTail, err = strconv.ParseBool(value)
if err != nil {
return errors.Errorf("pebble: error parsing strict_wal_tail value %q: %w", value, err)
}
}
return nil
})
}

// Check verifies the options are compatible with the previous options
// serialized by Options.String(). For example, the Comparer and Merger must be
// the same, or data will not be able to be properly read from the DB.
func (o *Options) Check(s string) error {
_, err := o.checkOptions(s)
return err
}

// Validate verifies that the options are mutually consistent. For example,
// L0StopWritesThreshold must be >= L0CompactionThreshold, otherwise a write
// stall would persist indefinitely.
Expand Down
1 change: 1 addition & 0 deletions options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func TestOptionsString(t *testing.T) {
min_compaction_rate=4194304
min_flush_rate=1048576
merger=pebble.concatenate
strict_wal_tail=true
table_property_collectors=[]
wal_dir=
wal_bytes_per_sync=0
Expand Down