From b7cb5a46f47c44d60d63493af535889786dc89d7 Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Thu, 9 May 2024 14:53:43 -0400 Subject: [PATCH] db: avoid acquiring DB.mu during batch commit, unless rotating memtable Previously, DB.commitWrite would acquire the DB.mu before calling makeRoomForWrite to ensure the current memtable had sufficient space for the batch's contents. When DB.commitWrite is called, the caller already holds the commit pipeline's mutex providing mutual exclusion with other committers and, crucially, preventing the concurrent rotation of the memtable. This commit adjusts DB.commitWrite to attempt reserving space in the current mutable memtable before acquiring DB.mu. In the common case, if successful, there is no need to acquire DB.mu. If unsuccessful, the committer falls back to acuqiring DB.mu and rotating the memtable through calling makeRoomForWrite. Close #2680. --- db.go | 47 +++++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/db.go b/db.go index c707b356db..224bf0599d 100644 --- a/db.go +++ b/db.go @@ -390,7 +390,15 @@ type DB struct { } mem struct { - // The current mutable memTable. + // The current mutable memTable. Readers of the pointer may hold + // either DB.mu or commitPipeline.mu. + // + // Its internal fields are protected by commitPipeline.mu. This + // allows batch commits to be performed without DB.mu as long as no + // memtable rotation is required. + // + // Both commitPipeline.mu and DB.mu must be held when rotating the + // memtable. mutable *memTable // Queue of flushables (the mutable memtable is at end). Elements are // added to the end of the slice and removed from the beginning. Once an @@ -930,24 +938,31 @@ func (d *DB) commitWrite(b *Batch, syncWG *sync.WaitGroup, syncErr *error) (*mem } } - d.mu.Lock() - var err error + // Grab a reference to the memtable. We don't hold DB.mu, but that's okay + // for readers of d.mu.mem.mutable. + mem := d.mu.mem.mutable + // Batches which contain keys of kind InternalKeyKindIngestSST will + // never be applied to the memtable, so we don't need to make room for + // write. if !b.ingestedSSTBatch { - // Batches which contain keys of kind InternalKeyKindIngestSST will - // never be applied to the memtable, so we don't need to make room for - // write. For the other cases, switch out the memtable if there was not - // enough room to store the batch. - err = d.makeRoomForWrite(b) + // Flushable batches will require a rotation of the memtable regardless, + // so only attempt an optimistic reservation of space in the current + // memtable if this batch is not a large flushable batch. + if b.flushable == nil { + err = d.mu.mem.mutable.prepare(b) + } + if b.flushable != nil || err == arenaskl.ErrArenaFull { + // Slow path. + // We need to acquire DB.mu and rotate the memtable. + func() { + d.mu.Lock() + defer d.mu.Unlock() + err = d.makeRoomForWrite(b) + mem = d.mu.mem.mutable + }() + } } - - // Grab a reference to the memtable while holding DB.mu. Note that for - // non-flushable batches (b.flushable == nil) makeRoomForWrite() added a - // reference to the memtable which will prevent it from being flushed until - // we unreference it. This reference is dropped in DB.commitApply(). - mem := d.mu.mem.mutable - - d.mu.Unlock() if err != nil { return nil, err }