Skip to content

Commit 09c3396

Browse files
committed
blob: deduplicate values during rewrite
Because of the existence of virtual sstables and the lack of accurate blob value liveness for virtual sstables, a blob file rewrite may observe multiple references to the same value. Blob file rewriting must deduplicate these value IDs to preserve the correct indexing of values within the virtual block. Note that it's not sufficient to deduplicate referencing sstables by backing sstable. Consider a value V contained within virtual sstable s1, backed by b1. Imagine there's another virtual sstable s2 also backed by b1. If s1 is compacted, the reference to value V will be carried forward to a new physical sstable b2 which will encode the reference in its blob value liveness index. However the original backing file b1 is still in use, and its blob value liveness index continues to be in-use. Ultimately, we will fix this issue with accurate virtual sstable blob value liveness (#4915). For now, we deduplicate the valueIDs at rewrite time.
1 parent 8e65e15 commit 09c3396

File tree

4 files changed

+82
-1
lines changed

4 files changed

+82
-1
lines changed

compaction_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,12 @@ func TestCompaction(t *testing.T) {
10221022
}
10231023
return s
10241024

1025+
case "excise":
1026+
if err := runExciseCmd(td, d); err != nil {
1027+
return err.Error()
1028+
}
1029+
return describeLSM(d, verbose)
1030+
10251031
case "excise-dryrun":
10261032
ve, err := runExciseDryRunCmd(td, d)
10271033
if err != nil {
@@ -1081,6 +1087,31 @@ func TestCompaction(t *testing.T) {
10811087
require.NoError(t, b.Commit(nil))
10821088
return fmt.Sprintf("wrote %d keys\n", count)
10831089

1090+
case "run-blob-rewrite-compaction":
1091+
err := func() error {
1092+
d.mu.Lock()
1093+
defer d.mu.Unlock()
1094+
d.mu.versions.logLock()
1095+
env := d.makeCompactionEnvLocked()
1096+
require.NotNil(t, env)
1097+
picker := d.mu.versions.picker.(*compactionPickerByScore)
1098+
pc := picker.pickBlobFileRewriteCompaction(*env)
1099+
if pc == nil {
1100+
d.mu.versions.logUnlock()
1101+
return errors.New("no blob file rewrite compaction")
1102+
}
1103+
d.mu.versions.logUnlock()
1104+
d.runPickedCompaction(pc, noopGrantHandle{})
1105+
for d.mu.compact.compactingCount > 0 {
1106+
d.mu.compact.cond.Wait()
1107+
}
1108+
return nil
1109+
}()
1110+
if err != nil {
1111+
return err.Error()
1112+
}
1113+
return describeLSM(d, verbose)
1114+
10841115
case "auto-compact":
10851116
expectedCount := int64(1)
10861117
td.MaybeScanArgs(t, "count", &expectedCount)

sstable/blob/rewrite.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,14 @@ func (rw *FileRewriter) CopyBlock(
6969

7070
previousValueID := -1
7171
for _, valueID := range valueIDs {
72+
// Subsequent logic depends on the valueIDs being unique.
73+
// TODO(jackson): This is a workaround because we don't have per-sstable
74+
// liveness data. See https://github.com/cockroachdb/pebble/issues/4915.
75+
// If we had per-sstable liveness data, we should be able to make this
76+
// an assertion failure.
77+
if previousValueID == valueID {
78+
continue
79+
}
7280
// If there is a gap in the referenced Value IDs within this block, we
7381
// need to represent this sparseness as empty values within the block.
7482
// We can represent sparseness at the tail of a block or between blocks
@@ -84,6 +92,10 @@ func (rw *FileRewriter) CopyBlock(
8492
if err != nil {
8593
return err
8694
}
95+
// We don't know the value size, but we know it must not be empty.
96+
if len(value) == 0 {
97+
return errors.AssertionFailedf("value is empty")
98+
}
8799
rw.w.stats.ValueCount++
88100
rw.w.stats.UncompressedValueBytes += uint64(len(value))
89101
rw.w.valuesEncoder.AddValue(value)

testdata/blob_rewrite

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,4 +76,4 @@ rewrite-blob 000002 000001 000003
7676
Successfully rewrote blob file 000002 to 000004
7777
Input SSTables: [000001 000003]
7878
SSTables with blob references: 2
79-
{BlockCount: 1, ValueCount: 4, UncompressedValueBytes: 30, FileLen: 123}
79+
{BlockCount: 1, ValueCount: 2, UncompressedValueBytes: 15, FileLen: 106}

testdata/compaction/value_separation

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,44 @@ Filter utility: 0.0%
392392
Ingestions: 0 as flushable: 0 (0B in 0 tables)
393393
Cgo memory usage: 0B block cache: 0B (data: 0B, maps: 0B, entries: 0B) memtables: 0B
394394

395+
# Test a blob file rewrite compaction with virtual sstable references.
396+
397+
define value-separation=(true,1,10,0s,0.01)
398+
----
399+
400+
batch
401+
set a apple
402+
set b banana
403+
set c coconut
404+
----
405+
406+
compact a-b
407+
----
408+
L6:
409+
000005:[a#10,SET-c#12,SET] seqnums:[10-12] points:[a#10,SET-c#12,SET] size:862 blobrefs:[(B000006: 18); depth:1]
410+
Blob files:
411+
B000006 physical:{000006 size:[109 (109B)] vals:[18 (18B)]}
412+
413+
excise b ba
414+
----
415+
L6:
416+
000007(000005):[a#10,SET-a#10,SET] seqnums:[10-12] points:[a#10,SET-a#10,SET] size:104(862) blobrefs:[(B000006: 2); depth:1]
417+
000008(000005):[c#12,SET-c#12,SET] seqnums:[10-12] points:[c#12,SET-c#12,SET] size:104(862) blobrefs:[(B000006: 2); depth:1]
418+
Blob files:
419+
B000006 physical:{000006 size:[109 (109B)] vals:[18 (18B)]}
420+
421+
# Run a blob-rewrite compaction. It'll rewrite the blob file, but it won't
422+
# actually reclaim disk space. This is a known limitation due to the lack of
423+
# accurate blob value liveness for virtual sstables. See
424+
# https://github.com/cockroachdb/pebble/issues/4915.
425+
426+
run-blob-rewrite-compaction
427+
----
428+
L6:
429+
000007(000005):[a#10,SET-a#10,SET] seqnums:[10-12] points:[a#10,SET-a#10,SET] size:104(862) blobrefs:[(B000006: 2); depth:1]
430+
000008(000005):[c#12,SET-c#12,SET] seqnums:[10-12] points:[c#12,SET-c#12,SET] size:104(862) blobrefs:[(B000006: 2); depth:1]
431+
Blob files:
432+
B000006 physical:{000009 size:[110 (110B)] vals:[18 (18B)]}
395433

396434
define value-separation=(true,5,5,0s,1.0) l0-compaction-threshold=1
397435
----

0 commit comments

Comments
 (0)