Skip to content
Permalink
Browse files

Implement SingleDelete

In Pebble SingleDelete combined with Delete/Merge, it is converted to Delete.
  • Loading branch information
hueypark committed Sep 5, 2019
1 parent ab51a21 commit 3b18283e53d35b1465b158fadb2e025c84ae9de3
Showing with 756 additions and 25 deletions.
  1. +53 −0 batch.go
  2. +10 −0 batch_test.go
  3. +55 −6 compaction_iter.go
  4. +31 −0 db.go
  5. +187 −0 db_test.go
  6. +1 −0 internal.go
  7. +16 −14 internal/base/internal.go
  8. +3 −3 iterator.go
  9. +4 −2 mem_table.go
  10. +251 −0 testdata/compaction_iter
  11. +143 −0 testdata/iterator
  12. +2 −0 tool/wal.go
@@ -526,6 +526,59 @@ func (b *Batch) DeleteDeferred(keyLen int, _ *WriteOptions) (*DeferredBatchOp, e
return &b.deferredOp, nil
}

// SingleDelete adds an action to the batch that single deletes the entry for key.
// See Writer.SingleDelete for more details on the semantics of SingleDelete.
//
// It is safe to modify the contents of the arguments after SingleDelete returns.
func (b *Batch) SingleDelete(key []byte, _ *WriteOptions) error {
deferredOp, err := b.SingleDeleteDeferred(len(key), nil)
if err != nil {
return err
}
copy(deferredOp.Key, key)
// TODO(peter): Manually inline DeferredBatchOp.Finish(). Mid-stack inlining
// in go1.13 will remove the need for this.
if b.index != nil {
if err := b.index.Add(deferredOp.offset); err != nil {
// We never add duplicate entries, so an error should never occur.
panic(err)
}
}
return nil
}

// SingleDeleteDeferred is similar to SingleDelete in that it adds a single delete
// operation to the batch, except it only takes in key/value lengths instead of
// complete slices, letting the caller encode into those objects and then call
// Finish() on the returned object.
func (b *Batch) SingleDeleteDeferred(keyLen int, _ *WriteOptions) (*DeferredBatchOp, error) {
// Code duplication with Delete is so that the Delete case (where byte
// slices are provided) can preserve the fast path.
if len(b.storage.data) == 0 {
b.init(keyLen + binary.MaxVarintLen64 + batchHeaderLen)
}
if !b.increment() {
return nil, ErrInvalidBatch
}

b.memTableSize += memTableEntrySize(keyLen, 0)

pos := len(b.storage.data)
b.deferredOp.offset = uint32(pos)
b.grow(1 + maxVarintLen32 + keyLen)
b.storage.data[pos] = byte(InternalKeyKindSingleDelete)
pos++
varlen1 := putUvarint32(b.storage.data[pos:], uint32(keyLen))
pos += varlen1
b.deferredOp.Key = b.storage.data[pos : pos+keyLen]
b.deferredOp.Value = nil

b.storage.data = b.storage.data[:len(b.storage.data)-(maxVarintLen32-varlen1)]

b.deferredOp.index = b.index
return &b.deferredOp, nil
}

// DeleteRange deletes all of the keys (and values) in the range [start,end)
// (inclusive on start, exclusive on end).
//
@@ -47,13 +47,16 @@ func TestBatch(t *testing.T) {
{InternalKeyKindSet, "roses", "red"},
{InternalKeyKindSet, "violets", "blue"},
{InternalKeyKindDelete, "roses", ""},
{InternalKeyKindSingleDelete, "roses", ""},
{InternalKeyKindSet, "", ""},
{InternalKeyKindSet, "", "non-empty"},
{InternalKeyKindDelete, "", ""},
{InternalKeyKindSingleDelete, "", ""},
{InternalKeyKindSet, "grass", "green"},
{InternalKeyKindSet, "grass", "greener"},
{InternalKeyKindSet, "eleventy", strings.Repeat("!!11!", 100)},
{InternalKeyKindDelete, "nosuchkey", ""},
{InternalKeyKindSingleDelete, "nosuchkey", ""},
{InternalKeyKindSet, "binarydata", "\x00"},
{InternalKeyKindSet, "binarydata", "\xff"},
{InternalKeyKindMerge, "merge", "mergedata"},
@@ -73,6 +76,8 @@ func TestBatch(t *testing.T) {
_ = b.Merge([]byte(tc.key), []byte(tc.value), nil)
case InternalKeyKindDelete:
_ = b.Delete([]byte(tc.key), nil)
case InternalKeyKindSingleDelete:
_ = b.SingleDelete([]byte(tc.key), nil)
case InternalKeyKindRangeDelete:
_ = b.DeleteRange([]byte(tc.key), []byte(tc.value), nil)
case InternalKeyKindLogData:
@@ -103,6 +108,11 @@ func TestBatch(t *testing.T) {
copy(d.Key, key)
copy(d.Value, value)
d.Finish()
case InternalKeyKindSingleDelete:
d, _ := b.SingleDeleteDeferred(len(key), nil)
copy(d.Key, key)
copy(d.Value, value)
d.Finish()
case InternalKeyKindRangeDelete:
d, _ := b.DeleteRangeDeferred(len(key), len(value), nil)
copy(d.Key, key)
@@ -207,7 +207,7 @@ func (i *compactionIter) Next() (*InternalKey, []byte) {
for i.iterKey != nil {
i.key = *i.iterKey
switch i.key.Kind() {
case InternalKeyKindDelete:
case InternalKeyKindDelete, InternalKeyKindSingleDelete:
// If we're at the last snapshot stripe and the tombstone can be elided
// skip to the next stripe (which will be the next user key).
if i.curSnapshotIdx == 0 && i.elideTombstone(i.key.UserKey) {
@@ -216,11 +216,21 @@ func (i *compactionIter) Next() (*InternalKey, []byte) {
continue
}

i.saveKey()
i.value = i.iterValue
i.valid = true
i.skip = true
return &i.key, i.value
switch i.key.Kind() {
case InternalKeyKindDelete:
i.saveKey()
i.value = i.iterValue
i.valid = true
i.skip = true
return &i.key, i.value

case InternalKeyKindSingleDelete:
if i.singleDeleteNext() {
return &i.key, i.value
}

continue
}

case InternalKeyKindRangeDelete:
i.key = i.cloneKey(i.key)
@@ -384,6 +394,45 @@ func (i *compactionIter) mergeNext() (*InternalKey, []byte) {
}
}

func (i *compactionIter) singleDeleteNext() bool {
// Save the current key.
i.saveKey()
i.valid = true

// Loop until finds a key to be passed to the next level.
for {
if !i.nextInStripe() {
i.skip = false
return true
}

key := i.iterKey
switch key.Kind() {
case InternalKeyKindDelete, InternalKeyKindMerge:
// We've hit a Delete or Merge, transform the SingleDelete into a full Delete.
i.key.SetKind(InternalKeyKindDelete)
i.skip = true
return true

case InternalKeyKindSet:
i.nextInStripe()
i.valid = false
return false

case InternalKeyKindSingleDelete:
continue

case InternalKeyKindRangeDelete:
i.valid = false
return false

default:
i.err = fmt.Errorf("invalid internal key kind: %d", i.iterKey.Kind())
return false
}
}
}

func (i *compactionIter) saveKey() {
i.keyBuf = append(i.keyBuf[:0], i.iterKey.UserKey...)
i.key.UserKey = i.keyBuf
31 db.go
@@ -89,6 +89,26 @@ type Writer interface {
// It is safe to modify the contents of the arguments after Delete returns.
Delete(key []byte, o *WriteOptions) error

// SingleDelete is similar to Delete in that it deletes the value for the given key. Like Delete,
// it is a blind operation that will succeed even if the given key does not exist.
//
// WARNING: Undefined (non-deterministic) behavior will result if a key is overwritten and
// then deleted using SingleDelete. The record may appear deleted immediately, but be
// resurrected at a later time after compactions have been performed. Or the record may
// be deleted permanently. A Delete operation lays down a "tombstone" which shadows all
// previous versions of a key. The SingleDelete operation is akin to "anti-matter" and will
// only delete the most recently written version for a key. These different semantics allow
// the DB to avoid propagating a SingleDelete operation during a compaction as soon as the
// corresponding Set operation is encountered. These semantics require extreme care to handle
// properly. Only use if you have a workload where the performance gain is critical and you
// can guarantee that a record is written once and then deleted once.
//
// SingleDelete is internally transformed into a Delete if the most recent record for a key is either
// a Merge or Delete record.
//
// It is safe to modify the contents of the arguments after SingleDelete returns.
SingleDelete(key []byte, o *WriteOptions) error

// DeleteRange deletes all of the keys (and values) in the range [start,end)
// (inclusive on start, exclusive on end).
//
@@ -328,6 +348,17 @@ func (d *DB) Delete(key []byte, opts *WriteOptions) error {
return d.Apply(b, opts)
}

// SingleDelete adds an action to the batch that single deletes the entry for key.
// See Writer.SingleDelete for more details on the semantics of SingleDelete.
//
// It is safe to modify the contents of the arguments after SingleDelete returns.
func (d *DB) SingleDelete(key []byte, opts *WriteOptions) error {
b := newBatch(d)
defer b.release()
_ = b.SingleDelete(key, opts)
return d.Apply(b, opts)
}

// DeleteRange deletes all of the keys (and values) in the range [start,end)
// (inclusive on start, exclusive on end).
//

0 comments on commit 3b18283

Please sign in to comment.
You can’t perform that action at this time.