Skip to content

Commit

Permalink
combine memtables before flushing to L0
Browse files Browse the repository at this point in the history
Taken from PR #1696, commit b21f591
  • Loading branch information
mangalaman93 committed Feb 21, 2023
1 parent c65a8ac commit cf76b23
Showing 1 changed file with 64 additions and 53 deletions.
117 changes: 64 additions & 53 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ type DB struct {
lc *levelsController
vlog valueLog
writeCh chan *request
flushChan chan flushTask // For flushing memtables.
flushChan chan *memTable // For flushing memtables.
closeOnce sync.Once // For closing DB only once.

blockWrites int32
Expand Down Expand Up @@ -240,7 +240,7 @@ func Open(opt Options) (*DB, error) {

db := &DB{
imm: make([]*memTable, 0, opt.NumMemtables),
flushChan: make(chan flushTask, opt.NumMemtables),
flushChan: make(chan *memTable, opt.NumMemtables),
writeCh: make(chan *request, kvWriteChCapacity),
opt: opt,
manifest: manifestFile,
Expand Down Expand Up @@ -355,7 +355,7 @@ func Open(opt Options) (*DB, error) {
}()
// Flush them to disk asap.
for _, mt := range db.imm {
db.flushChan <- flushTask{mt: mt}
db.flushChan <- mt
}
}
// We do increment nextTxnTs below. So, no need to do it here.
Expand Down Expand Up @@ -568,12 +568,12 @@ func (db *DB) close() (err error) {
} else {
db.opt.Debugf("Flushing memtable")
for {
pushedFlushTask := func() bool {
pushedMemTable := func() bool {
db.lock.Lock()
defer db.lock.Unlock()
y.AssertTrue(db.mt != nil)
select {
case db.flushChan <- flushTask{mt: db.mt}:
case db.flushChan <- db.mt:
db.imm = append(db.imm, db.mt) // Flusher will attempt to remove this from s.imm.
db.mt = nil // Will segfault if we try writing!
db.opt.Debugf("pushed to flush chan\n")
Expand All @@ -586,7 +586,7 @@ func (db *DB) close() (err error) {
}
return false
}()
if pushedFlushTask {
if pushedMemTable {
break
}
time.Sleep(10 * time.Millisecond)
Expand Down Expand Up @@ -826,6 +826,7 @@ func (db *DB) writeRequests(reqs []*request) error {
}
count += len(b.Entries)
var i uint64
var err error
for err = db.ensureRoomForWrite(); err == errNoRoom; err = db.ensureRoomForWrite() {
i++
if i%100 == 0 {
Expand Down Expand Up @@ -987,7 +988,7 @@ func (db *DB) ensureRoomForWrite() error {
}

select {
case db.flushChan <- flushTask{mt: db.mt}:
case db.flushChan <- db.mt:
db.opt.Debugf("Flushing memtable, mt.size=%d size of flushChan: %d\n",
db.mt.sl.MemSize(), len(db.flushChan))
// We manage to push this task. Let's modify imm.
Expand All @@ -1009,12 +1010,12 @@ func arenaSize(opt Options) int64 {
}

// buildL0Table builds a new table from the memtable.
func buildL0Table(ft flushTask, bopts table.Options) *table.Builder {
iter := ft.mt.sl.NewIterator()
func buildL0Table(iter y.Iterator, dropPrefixes [][]byte, bopts table.Options) *table.Builder {
defer iter.Close()

b := table.NewTableBuilder(bopts)
for iter.SeekToFirst(); iter.Valid(); iter.Next() {
if len(ft.dropPrefixes) > 0 && hasAnyPrefixes(iter.Key(), ft.dropPrefixes) {
for iter.Rewind(); iter.Valid(); iter.Next() {
if len(dropPrefixes) > 0 && hasAnyPrefixes(iter.Key(), dropPrefixes) {
continue
}
vs := iter.Value()
Expand All @@ -1024,23 +1025,14 @@ func buildL0Table(ft flushTask, bopts table.Options) *table.Builder {
}
b.Add(iter.Key(), iter.Value(), vp.Len)
}
return b
}

type flushTask struct {
mt *memTable
dropPrefixes [][]byte
return b
}

// handleFlushTask must be run serially.
func (db *DB) handleFlushTask(ft flushTask) error {
// There can be a scenario, when empty memtable is flushed.
if ft.mt.sl.Empty() {
return nil
}

// handleMemTableFlush must be run serially.
func (db *DB) handleMemTableFlush(itr y.Iterator, dropPrefixes [][]byte) error {
bopts := buildTableOptions(db)
builder := buildL0Table(ft, bopts)
builder := buildL0Table(itr, nil, bopts)
defer builder.Close()

// buildL0Table can return nil if the none of the items in the skiplist are
Expand Down Expand Up @@ -1069,39 +1061,62 @@ func (db *DB) handleFlushTask(ft flushTask) error {
return err
}

// flushMemtable must keep running until we send it an empty flushTask. If there
// are errors during handling the flush task, we'll retry indefinitely.
// flushMemtable must keep running until we send it an empty memtable. If there
// are errors during handling the memtable flush, we'll retry indefinitely.
func (db *DB) flushMemtable(lc *z.Closer) error {
defer lc.Done()

for ft := range db.flushChan {
if ft.mt == nil {
// We close db.flushChan now, instead of sending a nil ft.mt.
continue
}
for {
err := db.handleFlushTask(ft)
if err == nil {
var sz int64
var itrs []y.Iterator
var mts []*memTable
for { //nolint:gosimple
select {
case mt, ok := <-db.flushChan:
if mt != nil {
itrs = append(itrs, mt.sl.NewUniIterator(false))
mts = append(mts, mt)
sz += mt.sl.MemSize()
if sz < db.opt.MemTableSize {
continue
}
}

if !ok && len(mts) == 0 {
return nil
}
if len(mts) == 0 {
continue
}

mitr := table.NewMergeIterator(itrs, false)
for {
if err := db.handleMemTableFlush(mitr, nil); err != nil {
// Encountered error. Retry indefinitely.
db.opt.Errorf("error flushing memtable to disk: %v, retrying", err)
time.Sleep(time.Second)
continue
}

// Update s.imm. Need a lock.
db.lock.Lock()
// This is a single-threaded operation. ft.mt corresponds to the head of
// db.imm list. Once we flush it, we advance db.imm. The next ft.mt
// This is a single-threaded operation. mt corresponds to the head of
// db.imm list. Once we flush it, we advance db.imm. The next mt
// which would arrive here would match db.imm[0], because we acquire a
// lock over DB when pushing to flushChan.
// TODO: This logic is dirty AF. Any change and this could easily break.
y.AssertTrue(ft.mt == db.imm[0])
db.imm = db.imm[1:]
ft.mt.DecrRef() // Return memory.
for _, mt := range mts {
y.AssertTrue(mt == db.imm[0])
db.imm = db.imm[1:]
mt.DecrRef() // Return memory.
}
db.lock.Unlock()

break
}
// Encountered error. Retry indefinitely.
db.opt.Errorf("Failure while flushing memtable to disk: %v. Retrying...\n", err)
time.Sleep(time.Second)

// Reset everything.
itrs, mts, sz = itrs[:0], mts[:0], 0
}
}
return nil
}

func exists(path string) (bool, error) {
Expand Down Expand Up @@ -1521,7 +1536,7 @@ func (db *DB) startCompactions() {
func (db *DB) startMemoryFlush() {
// Start memory fluhser.
if db.closers.memtable != nil {
db.flushChan = make(chan flushTask, db.opt.NumMemtables)
db.flushChan = make(chan *memTable, db.opt.NumMemtables)
db.closers.memtable = z.NewCloser(1)
go func() {
_ = db.flushMemtable(db.closers.memtable)
Expand Down Expand Up @@ -1627,7 +1642,7 @@ func (db *DB) prepareToDrop() (func(), error) {
panic("Attempting to drop data in read-only mode.")
}
// In order prepare for drop, we need to block the incoming writes and
// write it to db. Then, flush all the pending flushtask. So that, we
// write it to db. Then, flush all the pending memtable. So that, we
// don't miss any entries.
if err := db.blockWrite(); err != nil {
return nil, err
Expand Down Expand Up @@ -1676,7 +1691,7 @@ func (db *DB) dropAll() (func(), error) {
if err != nil {
return f, err
}
// prepareToDrop will stop all the incomming write and flushes any pending flush tasks.
// prepareToDrop will stop all the incomming write and flushes any pending memtables.
// Before we drop, we'll stop the compaction because anyways all the datas are going to
// be deleted.
db.stopCompactions()
Expand Down Expand Up @@ -1758,13 +1773,9 @@ func (db *DB) DropPrefix(prefixes ...[]byte) error {
memtable.DecrRef()
continue
}
task := flushTask{
mt: memtable,
// Ensure that the head of value log gets persisted to disk.
dropPrefixes: filtered,
}
itr := memtable.sl.NewUniIterator(false)
db.opt.Debugf("Flushing memtable")
if err := db.handleFlushTask(task); err != nil {
if err := db.handleMemTableFlush(itr, filtered); err != nil {
db.opt.Errorf("While trying to flush memtable: %v", err)
return err
}
Expand Down

0 comments on commit cf76b23

Please sign in to comment.