Skip to content

Commit 5e3e420

Browse files
committed
db: preserve blob reference metadata across excise
During excise, we now preserve the top-level blob references across virtual ssts created. The value size of each blob reference is approximated by the proportion of the virtual table's size to the original table's size. Fixes: #4595
1 parent b445f35 commit 5e3e420

File tree

6 files changed

+78
-31
lines changed

6 files changed

+78
-31
lines changed

compaction_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,13 @@ func TestCompaction(t *testing.T) {
10041004
}
10051005
return s
10061006

1007+
case "excise-dryrun":
1008+
ve, err := runExciseDryRunCmd(td, d)
1009+
if err != nil {
1010+
td.Fatalf(t, err.Error())
1011+
}
1012+
return fmt.Sprintf("would excise %d files.\n%s", len(ve.DeletedTables), ve.DebugString(base.DefaultFormatter))
1013+
10071014
case "file-sizes":
10081015
return runTableFileSizesCmd(td, d)
10091016

data_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,6 +1410,42 @@ func runExciseCmd(td *datadriven.TestData, d *DB) error {
14101410
return d.Excise(context.Background(), exciseSpan)
14111411
}
14121412

1413+
func runExciseDryRunCmd(td *datadriven.TestData, d *DB) (*versionEdit, error) {
1414+
ve := &versionEdit{
1415+
DeletedTables: map[deletedFileEntry]*tableMetadata{},
1416+
}
1417+
var exciseSpan KeyRange
1418+
if len(td.CmdArgs) != 2 {
1419+
panic("insufficient args for excise-dryrun command")
1420+
}
1421+
exciseSpan.Start = []byte(td.CmdArgs[0].Key)
1422+
exciseSpan.End = []byte(td.CmdArgs[1].Key)
1423+
1424+
d.mu.Lock()
1425+
d.mu.versions.logLock()
1426+
defer func() {
1427+
d.mu.Lock()
1428+
d.mu.versions.logUnlock()
1429+
d.mu.Unlock()
1430+
}()
1431+
d.mu.Unlock()
1432+
current := d.mu.versions.currentVersion()
1433+
1434+
exciseBounds := exciseSpan.UserKeyBounds()
1435+
for l, ls := range current.AllLevelsAndSublevels() {
1436+
iter := ls.Iter()
1437+
for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest().UserKey, exciseSpan.End) < 0; m = iter.Next() {
1438+
leftTable, rightTable, err := d.exciseTable(context.Background(), exciseBounds, m, l.Level(), tightExciseBounds)
1439+
if err != nil {
1440+
return nil, errors.Errorf("error when excising %s: %s", m.FileNum, err.Error())
1441+
}
1442+
applyExciseToVersionEdit(ve, m, leftTable, rightTable, l.Level())
1443+
}
1444+
}
1445+
1446+
return ve, nil
1447+
}
1448+
14131449
func runIngestAndExciseCmd(td *datadriven.TestData, d *DB) error {
14141450
var exciseSpan KeyRange
14151451
paths := make([]string, 0, len(td.CmdArgs))

excise.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ func (d *DB) exciseTable(
135135
// TODO(bilal): Some of this work can happen without grabbing the manifest
136136
// lock; we could grab one currentVersion, release the lock, calculate excised
137137
// files, then grab the lock again and recalculate for just the files that
138-
// have changed since our previous calculation. Do this optimiaztino as part of
138+
// have changed since our previous calculation. Do this optimization as part of
139139
// https://github.com/cockroachdb/pebble/issues/2112 .
140140
if d.cmp(m.Smallest().UserKey, exciseBounds.Start) < 0 {
141141
leftTable = &tableMetadata{
@@ -147,6 +147,7 @@ func (d *DB) exciseTable(
147147
LargestSeqNum: m.LargestSeqNum,
148148
LargestSeqNumAbsolute: m.LargestSeqNumAbsolute,
149149
SyntheticPrefixAndSuffix: m.SyntheticPrefixAndSuffix,
150+
BlobReferenceDepth: m.BlobReferenceDepth,
150151
}
151152
if looseBounds {
152153
looseLeftTableBounds(d.cmp, m, leftTable, exciseBounds.Start)
@@ -159,6 +160,7 @@ func (d *DB) exciseTable(
159160
if err := determineExcisedTableSize(d.fileCache, m, leftTable); err != nil {
160161
return nil, nil, err
161162
}
163+
determineExcisedTableBlobReferences(m.BlobReferences, m.Size, leftTable)
162164
if err := leftTable.Validate(d.cmp, d.opts.Comparer.FormatKey); err != nil {
163165
return nil, nil, err
164166
}
@@ -182,6 +184,7 @@ func (d *DB) exciseTable(
182184
LargestSeqNum: m.LargestSeqNum,
183185
LargestSeqNumAbsolute: m.LargestSeqNumAbsolute,
184186
SyntheticPrefixAndSuffix: m.SyntheticPrefixAndSuffix,
187+
BlobReferenceDepth: m.BlobReferenceDepth,
185188
}
186189
if looseBounds {
187190
// We already checked that the end bound is exclusive.
@@ -194,6 +197,7 @@ func (d *DB) exciseTable(
194197
if err := determineExcisedTableSize(d.fileCache, m, rightTable); err != nil {
195198
return nil, nil, err
196199
}
200+
determineExcisedTableBlobReferences(m.BlobReferences, m.Size, rightTable)
197201
if err := rightTable.Validate(d.cmp, d.opts.Comparer.FormatKey); err != nil {
198202
return nil, nil, err
199203
}
@@ -434,6 +438,21 @@ func determineExcisedTableSize(
434438
return nil
435439
}
436440

441+
// determineExcisedTableBlobReferences copies blob references from the original
442+
// table to the excised table, scaling each blob reference's value size
443+
// proportionally based on the ratio of the excised table's size to the original
444+
// table's size.
445+
func determineExcisedTableBlobReferences(
446+
originalBlobReferences manifest.BlobReferences, originalSize uint64, excisedTable *tableMetadata,
447+
) {
448+
newBlobReferences := make(manifest.BlobReferences, len(originalBlobReferences))
449+
for i, bf := range originalBlobReferences {
450+
bf.ValueSize = bf.ValueSize * excisedTable.Size / originalSize
451+
newBlobReferences[i] = bf
452+
}
453+
excisedTable.BlobReferences = newBlobReferences
454+
}
455+
437456
// applyExciseToVersionEdit updates ve with a table deletion for the original
438457
// table and table additions for the left and/or right table.
439458
//

excise_test.go

Lines changed: 3 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -282,36 +282,10 @@ func TestExcise(t *testing.T) {
282282
}
283283
return ""
284284
case "excise-dryrun":
285-
ve := &versionEdit{
286-
DeletedTables: map[deletedFileEntry]*tableMetadata{},
287-
}
288-
var exciseSpan KeyRange
289-
if len(td.CmdArgs) != 2 {
290-
panic("insufficient args for compact command")
291-
}
292-
exciseSpan.Start = []byte(td.CmdArgs[0].Key)
293-
exciseSpan.End = []byte(td.CmdArgs[1].Key)
294-
295-
d.mu.Lock()
296-
d.mu.versions.logLock()
297-
d.mu.Unlock()
298-
current := d.mu.versions.currentVersion()
299-
300-
exciseBounds := exciseSpan.UserKeyBounds()
301-
for l, ls := range current.AllLevelsAndSublevels() {
302-
iter := ls.Iter()
303-
for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest().UserKey, exciseSpan.End) < 0; m = iter.Next() {
304-
leftTable, rightTable, err := d.exciseTable(context.Background(), exciseBounds, m, l.Level(), tightExciseBounds)
305-
if err != nil {
306-
td.Fatalf(t, "error when excising %s: %s", m.FileNum, err.Error())
307-
}
308-
applyExciseToVersionEdit(ve, m, leftTable, rightTable, l.Level())
309-
}
285+
ve, err := runExciseDryRunCmd(td, d)
286+
if err != nil {
287+
td.Fatalf(t, err.Error())
310288
}
311-
312-
d.mu.Lock()
313-
d.mu.versions.logUnlock()
314-
d.mu.Unlock()
315289
return fmt.Sprintf("would excise %d files, use ingest-and-excise to excise.\n%s", len(ve.DeletedTables), ve.DebugString(base.DefaultFormatter))
316290
case "confirm-backing":
317291
// Confirms that the files have the same FileBacking.

internal/manifest/blob_metadata.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ type BlobReference struct {
2525
// FileNum identifies the referenced blob file.
2626
FileNum base.DiskFileNum
2727
// ValueSize is the sum of the lengths of the uncompressed values within the
28-
// blob file for which there exists a reference in the sstable.
28+
// blob file for which there exists a reference in the sstable. Note that if
29+
// any of the referencing tables are virtualized tables, the ValueSize may
30+
// be approximate.
2931
//
3032
// INVARIANT: ValueSize <= Metadata.ValueSize
3133
ValueSize uint64

testdata/compaction/value_separation

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,3 +342,12 @@ Blob files:
342342
000038: 1686698 physical bytes, 1681728 value bytes
343343
000040: 1691574 physical bytes, 1686592 value bytes
344344
000042: 113626 physical bytes, 113280 value bytes
345+
346+
347+
excise-dryrun b c
348+
----
349+
would excise 1 files.
350+
del-table: L6 000044
351+
add-table: L6 000053(000044):[a@1#0,SET-azzz@1#0,SET] seqnums:[0-0] points:[a@1#0,SET-azzz@1#0,SET] size:238596 blobrefs:[(000006: 566234), (000008: 564260), (000010: 37624); depth:1]
352+
add-table: L6 000054(000044):[c@1#0,SET-czks@1#0,SET] seqnums:[0-0] points:[c@1#0,SET-czks@1#0,SET] size:235737 blobrefs:[(000006: 559450), (000008: 557499), (000010: 37173); depth:1]
353+
add-backing: 000044

0 commit comments

Comments
 (0)