Skip to content

Commit 837c19b

Browse files
committed
db: fix double Close of directory lock during failed Open
Previously if an error occurred during Open after the WAL manager had been initialized, it was possible for a directory lock to be Closed multiple times. This commit refactors the error handling to ensure we release the WALDir and WALFailover.Secondary locks at most once.
1 parent b35ff00 commit 837c19b

File tree

4 files changed

+57
-50
lines changed

4 files changed

+57
-50
lines changed

internal/base/directory_lock.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ type DirLock struct {
6464
//
6565
// When acquired by the client and passed to Open, refs = 1 and the Open
6666
// call increments it to 2. When the database is closed, it's decremented to
67-
// 1. Finally when the original caller, calls Close on the Lock, it's
68-
// drecemented to zero and the underlying file lock is released.
67+
// 1. Finally when the original caller calls Close on the Lock, it's
68+
// decremented to zero and the underlying file lock is released.
6969
//
7070
// When Open acquires the file lock, refs remains at 1 until the database is
7171
// closed.
@@ -78,7 +78,7 @@ func (l *DirLock) refForOpen() error {
7878
// it's 2, the lock is already in use by another database within the
7979
// process.
8080
if !l.refs.CompareAndSwap(1, 2) {
81-
return errors.Errorf("pebble: unexpected Lock reference count; is the lock already in use?")
81+
return errors.Errorf("pebble: unexpected %q DirLock reference count; is the lock already in use?", l.dirname)
8282
}
8383
return nil
8484
}
@@ -92,8 +92,11 @@ func (l *DirLock) Refs() int {
9292
// database. Close must not be called until after a database using the Lock has
9393
// been closed.
9494
func (l *DirLock) Close() error {
95-
if l.refs.Add(-1) > 0 {
95+
v := l.refs.Add(-1)
96+
if v > 0 {
9697
return nil
98+
} else if v < 0 {
99+
return errors.AssertionFailedf("pebble: unexpected %q DirLock reference count %d", l.dirname, v)
97100
}
98101
defer func() { l.fileLock = nil }()
99102
return l.fileLock.Close()

open.go

Lines changed: 46 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -101,52 +101,25 @@ func Open(dirname string, opts *Options) (db *DB, err error) {
101101

102102
// In all error cases, we return db = nil; this is used by various
103103
// deferred cleanups.
104+
maybeCleanUp := func(fn func() error) {
105+
if db == nil {
106+
err = errors.CombineErrors(err, fn())
107+
}
108+
}
104109

105110
// Open the database and WAL directories first.
106111
walDirname, secondaryWalDirName, dataDir, err := prepareAndOpenDirs(dirname, opts)
107112
if err != nil {
108113
return nil, errors.Wrapf(err, "error opening database at %q", dirname)
109114
}
110-
defer func() {
111-
if db == nil {
112-
dataDir.Close()
113-
}
114-
}()
115-
116-
// Lock all directories that will be written to.
117-
var dirLocks [3]*base.DirLock
118-
defer func() {
119-
for _, lock := range dirLocks {
120-
if db == nil && lock != nil {
121-
_ = lock.Close()
122-
}
123-
}
124-
}()
115+
defer maybeCleanUp(dataDir.Close)
125116

126117
// Lock the database directory.
127118
fileLock, err := base.AcquireOrValidateDirectoryLock(opts.Lock, dirname, opts.FS)
128119
if err != nil {
129120
return nil, err
130121
}
131-
dirLocks[0] = fileLock
132-
133-
// Lock the WAL directories, if configured.
134-
var walDirLock, walFailoverLock *base.DirLock
135-
if walDirname != dirname {
136-
walDirLock, err = base.AcquireOrValidateDirectoryLock(opts.WALDirLock, walDirname, opts.FS)
137-
if err != nil {
138-
return nil, err
139-
}
140-
dirLocks[1] = walDirLock
141-
}
142-
if opts.WALFailover != nil && secondaryWalDirName != dirname && secondaryWalDirName != walDirname {
143-
walFailoverLock, err = base.AcquireOrValidateDirectoryLock(
144-
opts.WALFailover.Secondary.Lock, secondaryWalDirName, opts.WALFailover.Secondary.FS)
145-
if err != nil {
146-
return nil, err
147-
}
148-
dirLocks[2] = walFailoverLock
149-
}
122+
defer maybeCleanUp(fileLock.Close)
150123

151124
// List the directory contents. This also happens to include WAL log files, if
152125
// they are in the same dir, but we will ignore those below. The provider is
@@ -358,8 +331,9 @@ func Open(dirname string, opts *Options) (db *DB, err error) {
358331
d.mu.log.metrics.fsyncLatency = prometheus.NewHistogram(prometheus.HistogramOpts{
359332
Buckets: FsyncLatencyBuckets,
360333
})
334+
361335
walOpts := wal.Options{
362-
Primary: wal.Dir{Lock: walDirLock, FS: opts.FS, Dirname: walDirname},
336+
Primary: wal.Dir{FS: opts.FS, Dirname: walDirname},
363337
Secondary: wal.Dir{},
364338
MinUnflushedWALNum: wal.NumWAL(d.mu.versions.minUnflushedLogNum),
365339
MaxNumRecyclableLogs: opts.MemTableStopWritesThreshold + 1,
@@ -373,10 +347,45 @@ func Open(dirname string, opts *Options) (db *DB, err error) {
373347
EventListener: walEventListenerAdaptor{l: opts.EventListener},
374348
WriteWALSyncOffsets: func() bool { return d.FormatMajorVersion() >= FormatWALSyncChunks },
375349
}
350+
// Ensure we release the WAL directory locks if we fail to open the
351+
// database. If we fail before initializing the WAL manager, this defer is
352+
// responsible for releasing the locks. If we fail after initializing the
353+
// WAL manager, closing the WAL manager will release the locks.
354+
//
355+
// TODO(jackson): Open's cleanup error handling logic is convoluted; can we
356+
// simplify it?
357+
defer maybeCleanUp(func() (err error) {
358+
if d.mu.log.manager == nil {
359+
if walOpts.Primary.Lock != nil {
360+
err = errors.CombineErrors(err, walOpts.Primary.Lock.Close())
361+
}
362+
if walOpts.Secondary.Lock != nil {
363+
err = errors.CombineErrors(err, walOpts.Secondary.Lock.Close())
364+
}
365+
return err
366+
}
367+
return nil
368+
})
369+
370+
// Lock the dedicated WAL directory, if configured.
371+
if walDirname != dirname {
372+
walOpts.Primary.Lock, err = base.AcquireOrValidateDirectoryLock(opts.WALDirLock, walDirname, opts.FS)
373+
if err != nil {
374+
return nil, err
375+
}
376+
}
376377
if opts.WALFailover != nil {
377378
walOpts.Secondary = opts.WALFailover.Secondary
379+
// Lock the secondary WAL directory, if distinct from the data directory
380+
// and primary WAL directory.
381+
if secondaryWalDirName != dirname && secondaryWalDirName != walDirname {
382+
walOpts.Secondary.Lock, err = base.AcquireOrValidateDirectoryLock(
383+
opts.WALFailover.Secondary.Lock, secondaryWalDirName, opts.WALFailover.Secondary.FS)
384+
if err != nil {
385+
return nil, err
386+
}
387+
}
378388
walOpts.Secondary.Dirname = secondaryWalDirName
379-
walOpts.Secondary.Lock = walFailoverLock
380389
walOpts.FailoverOptions = opts.WALFailover.FailoverOptions
381390
walOpts.FailoverWriteAndSyncLatency = prometheus.NewHistogram(prometheus.HistogramOpts{
382391
Buckets: FsyncLatencyBuckets,
@@ -416,12 +425,7 @@ func Open(dirname string, opts *Options) (db *DB, err error) {
416425
if err != nil {
417426
return nil, err
418427
}
419-
defer func() {
420-
if db == nil {
421-
_ = walManager.Close()
422-
}
423-
}()
424-
428+
defer maybeCleanUp(walManager.Close)
425429
d.mu.log.manager = walManager
426430

427431
d.cleanupManager = openCleanupManager(opts, d.objProvider, d.getDeletionPacerInfo)

testdata/cleaner

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ open-dir: db_wal
1515
close: db_wal
1616
open-dir: db
1717
lock: db/LOCK
18-
lock: db_wal/LOCK
1918
open-dir: db
2019
open-dir: db
2120
open-dir: db
@@ -24,6 +23,7 @@ sync: db/MANIFEST-000001
2423
create: db/marker.manifest.000001.MANIFEST-000001
2524
close: db/marker.manifest.000001.MANIFEST-000001
2625
sync: db
26+
lock: db_wal/LOCK
2727
open-dir: db_wal
2828
create: db_wal/000002.log
2929
sync: db_wal
@@ -146,7 +146,6 @@ open-dir: db1_wal
146146
close: db1_wal
147147
open-dir: db1
148148
lock: db1/LOCK
149-
lock: db1_wal/LOCK
150149
open-dir: db1
151150
open-dir: db1
152151
open-dir: db1
@@ -155,6 +154,7 @@ sync: db1/MANIFEST-000001
155154
create: db1/marker.manifest.000001.MANIFEST-000001
156155
close: db1/marker.manifest.000001.MANIFEST-000001
157156
sync: db1
157+
lock: db1_wal/LOCK
158158
open-dir: db1_wal
159159
create: db1_wal/000002.log
160160
sync: db1_wal
@@ -238,12 +238,12 @@ open-dir: db1_wal
238238
close: db1_wal
239239
open-dir: db1
240240
lock: db1/LOCK
241-
lock: db1_wal/LOCK
242241
open-dir: db1
243242
open-dir: db1
244243
open-dir: db1
245244
open: db1/MANIFEST-000001
246245
close: db1/MANIFEST-000001
246+
lock: db1_wal/LOCK
247247
open-dir: db1_wal
248248
open: db1/OPTIONS-000003
249249
close: db1/OPTIONS-000003

testdata/event_listener

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ open-dir: wal
1414
close: wal
1515
open-dir: db
1616
lock: db/LOCK
17-
lock: wal/LOCK
1817
open-dir: db
1918
open-dir: db
2019
open-dir: db
@@ -24,6 +23,7 @@ create: db/marker.manifest.000001.MANIFEST-000001
2423
close: db/marker.manifest.000001.MANIFEST-000001
2524
sync: db
2625
[JOB 1] MANIFEST created 000001
26+
lock: wal/LOCK
2727
open-dir: wal
2828
create: wal/000002.log
2929
sync: wal

0 commit comments

Comments
 (0)