From f065d7a4d05611ae10611927f7eca5f7c15b5adb Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Fri, 1 Dec 2023 11:10:33 -0500 Subject: [PATCH] db: fix SINGLEDEL, MERGE + DEL interaction Previously, when a MERGE consumed a deletion tombstone during a compaction, the MERGE key was transformed into a SET. This SET lost knowledge of the previous deletion. Previously, a subsequent SingleDelete (intended to remove the MERGE key) could elide the SET, resurrecting internal keys that were deleted by the consumed deletion tombstone. This commit adjusts the compaction iterator to produce a SETWITHDEL (see #1255) that preserves knowledge of the consumed deletion tombstone. This ensures that a SINGLEDEL that meets the merged key yields a DEL. --- compaction_iter.go | 7 +++--- testdata/compaction_iter | 14 ++++++------ testdata/compaction_iter_delete_sized | 32 ++++++++++++++++++++------- testdata/compaction_iter_set_with_del | 14 ++++++------ 4 files changed, 42 insertions(+), 25 deletions(-) diff --git a/compaction_iter.go b/compaction_iter.go index cc753a6ac4..0fb9e45f26 100644 --- a/compaction_iter.go +++ b/compaction_iter.go @@ -522,7 +522,7 @@ func (i *compactionIter) Next() (*InternalKey, []byte) { // into a SET. var includesBase bool switch i.key.Kind() { - case InternalKeyKindSet: + case InternalKeyKindSet, InternalKeyKindSetWithDelete: includesBase = true case InternalKeyKindMerge: default: @@ -832,7 +832,8 @@ func (i *compactionIter) mergeNext(valueMerger ValueMerger) stripeChangeType { // We've hit a deletion tombstone. Return everything up to this point and // then skip entries until the next snapshot stripe. We change the kind // of the result key to a Set so that it shadows keys in lower - // levels. That is, MERGE+DEL -> SET. + // levels. That is, MERGE+DEL -> SETWITHDEL. + // // We do the same for SingleDelete since SingleDelete is only // permitted (with deterministic behavior) for keys that have been // set once since the last SingleDelete/Delete, so everything @@ -845,7 +846,7 @@ func (i *compactionIter) mergeNext(valueMerger ValueMerger) stripeChangeType { // single Set, and then merge in any following Sets, but that is // complicated wrt code and unnecessary given the narrow permitted // use of SingleDelete. - i.key.SetKind(InternalKeyKindSet) + i.key.SetKind(InternalKeyKindSetWithDelete) i.skip = true return sameStripeSkippable diff --git a/testdata/compaction_iter b/testdata/compaction_iter index 32fe720eba..3e831167d4 100644 --- a/testdata/compaction_iter +++ b/testdata/compaction_iter @@ -787,7 +787,7 @@ iter first next ---- -a#6,1:b[base] +a#6,18:b[base] . # Non-deterministic use of SINGLEDEL where there are two older SETs that have @@ -803,7 +803,7 @@ iter first next ---- -a#6,1:b[base] +a#6,18:b[base] . define @@ -1067,21 +1067,21 @@ iter first next ---- -a#5,1:5[base] +a#5,18:5[base] . iter allow-zero-seqnum=true first next ---- -a#0,1:5[base] +a#0,18:5[base] . iter elide-tombstones=true first next ---- -a#5,1:5[base] +a#5,18:5[base] . iter snapshots=2 @@ -1089,7 +1089,7 @@ first next next ---- -a#5,1:5[base] +a#5,18:5[base] a#1,2:1 . @@ -1098,7 +1098,7 @@ first next next ---- -a#5,1:5[base] +a#5,18:5[base] a#1,2:1 . diff --git a/testdata/compaction_iter_delete_sized b/testdata/compaction_iter_delete_sized index f9e8c761c1..275927e472 100644 --- a/testdata/compaction_iter_delete_sized +++ b/testdata/compaction_iter_delete_sized @@ -786,7 +786,7 @@ iter first next ---- -a#6,1:b[base] +a#6,18:b[base] . # Non-deterministic use of SINGLEDEL where there are two older SETs that have @@ -802,7 +802,7 @@ iter first next ---- -a#6,1:b[base] +a#6,18:b[base] . define @@ -1066,21 +1066,21 @@ iter first next ---- -a#5,1:5[base] +a#5,18:5[base] . iter allow-zero-seqnum=true first next ---- -a#0,1:5[base] +a#0,18:5[base] . iter elide-tombstones=true first next ---- -a#5,1:5[base] +a#5,18:5[base] . iter snapshots=2 @@ -1088,7 +1088,7 @@ first next next ---- -a#5,1:5[base] +a#5,18:5[base] a#1,2:1 . @@ -1097,7 +1097,7 @@ first next next ---- -a#5,1:5[base] +a#5,18:5[base] a#1,2:1 . @@ -1863,7 +1863,7 @@ missized-dels=0 # Regression test for #3087. # -# Whne a DELSIZED and a SINGLEDEL meet in a compaction, a DEL key should be +# When a DELSIZED and a SINGLEDEL meet in a compaction, a DEL key should be # emitted. define @@ -1879,3 +1879,19 @@ next ---- a#5,0: . + +# When a MERGE and a DEL[SIZED] meet in a compaction, a SETWITHDEL key (NOT a +# SET) should be emitted. Otherwise, a sequence such as SINGLEDDEL, MERGE, DEL, +# SET could result in the SET re-appearing. + +define +a.MERGE.5:foo +a.DEL.3: +---- + +iter +first +next +---- +a#5,18:foo[base] +. diff --git a/testdata/compaction_iter_set_with_del b/testdata/compaction_iter_set_with_del index ea1363331e..a0924af2ca 100644 --- a/testdata/compaction_iter_set_with_del +++ b/testdata/compaction_iter_set_with_del @@ -786,7 +786,7 @@ iter first next ---- -a#6,1:b[base] +a#6,18:b[base] . # Non-deterministic use of SINGLEDEL where there are two older SETs that have @@ -802,7 +802,7 @@ iter first next ---- -a#6,1:b[base] +a#6,18:b[base] . define @@ -1066,21 +1066,21 @@ iter first next ---- -a#5,1:5[base] +a#5,18:5[base] . iter allow-zero-seqnum=true first next ---- -a#0,1:5[base] +a#0,18:5[base] . iter elide-tombstones=true first next ---- -a#5,1:5[base] +a#5,18:5[base] . iter snapshots=2 @@ -1088,7 +1088,7 @@ first next next ---- -a#5,1:5[base] +a#5,18:5[base] a#1,2:1 . @@ -1097,7 +1097,7 @@ first next next ---- -a#5,1:5[base] +a#5,18:5[base] a#1,2:1 .