Skip to content

Commit 3a1173a

Browse files
committed
blob: handle sparseness in AddVirtualBlockMapping
When rewriting a blob file, we need record the mapping from the original blob file's block ID to the new blob file's physical block in the 'virtual block' column. Previously the onus was on the user of the BlobFileWriter to call AddVirtualBlockMapping for every blockID in the original file, even for blockIDs that have no extant values. This commit removes the onus from the caller and handles populating sparseness within the implementation of BlobFileWriter.AddVirtualBlockMapping. This fixes a bug in blob-file rewriting when multiple blocks map to the first physical block. Additionally, this commit updates the virtual column entries for empty virtual blocks to reflect the sparseness storing 0xFFFFFFFF in the column (semantically equivalent to an entry mapping to the 0'th physical block with a valueID offset of 0xFFFFFFFF).
1 parent 6d965cb commit 3a1173a

File tree

3 files changed

+17
-12
lines changed

3 files changed

+17
-12
lines changed

blob_rewrite.go

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -456,7 +456,12 @@ func (rw *blobFileRewriter) copyBlockValues(ctx context.Context, finishedBlock b
456456
if shouldFlush {
457457
rw.writer.ForceFlush()
458458
}
459+
460+
// Record the mapping from the virtual block ID to the current physical
461+
// block and value ID offset.
462+
rw.writer.BeginNewVirtualBlock(finishedBlock.blockID)
459463
slices.Sort(finishedBlock.liveValueIDs)
464+
460465
for i, valueID := range finishedBlock.liveValueIDs {
461466
if i > 0 && finishedBlock.liveValueIDs[i-1]+1 != valueID {
462467
// There's a gap in the referenced value IDs.
@@ -489,11 +494,6 @@ func (rw *blobFileRewriter) Rewrite(ctx context.Context) (blob.FileWriterStats,
489494
// Begin constructing our output blob file. We maintain a map of blockID
490495
// to accumulated liveness data across all referencing sstables.
491496
firstBlock := heap.Pop(&rw.blkHeap).(*sstable.BlobRefLivenessEncoding)
492-
493-
// Add virtual block mappings for all blocks from 0 to the first block.
494-
for blockID := blob.BlockID(0); blockID < firstBlock.BlockID; blockID++ {
495-
rw.writer.BeginNewVirtualBlock(blockID)
496-
}
497497
pendingBlock := blockValues{
498498
blockID: firstBlock.BlockID,
499499
valuesSize: firstBlock.ValuesSize,
@@ -505,11 +505,6 @@ func (rw *blobFileRewriter) Rewrite(ctx context.Context) (blob.FileWriterStats,
505505
// If we are encountering a new block, write the last accumulated block
506506
// to the blob file.
507507
if pendingBlock.blockID != nextBlock.BlockID {
508-
// Add virtual block mappings for all blocks between the last block
509-
// we encountered and the current block.
510-
for blockID := pendingBlock.blockID; blockID < nextBlock.BlockID; blockID++ {
511-
rw.writer.BeginNewVirtualBlock(blockID)
512-
}
513508
// Write the last accumulated block's values to the blob file.
514509
if err := rw.copyBlockValues(ctx, pendingBlock); err != nil {
515510
return blob.FileWriterStats{}, err
@@ -522,7 +517,6 @@ func (rw *blobFileRewriter) Rewrite(ctx context.Context) (blob.FileWriterStats,
522517
}
523518

524519
// Copy the last accumulated block.
525-
rw.writer.BeginNewVirtualBlock(pendingBlock.blockID)
526520
if err := rw.copyBlockValues(ctx, pendingBlock); err != nil {
527521
return blob.FileWriterStats{}, err
528522
}

sstable/blob/blocks.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,16 @@ func (e *indexBlockEncoder) AddBlockHandle(h block.Handle) {
9595
func (e *indexBlockEncoder) AddVirtualBlockMapping(
9696
virtualBlockID BlockID, physicalBlockIndex int, valueIDOffset BlockValueID,
9797
) {
98-
if virtualBlockID != BlockID(e.countVirtualBlocks) {
98+
// Require that virtual blocks are added in order.
99+
if virtualBlockID < BlockID(e.countVirtualBlocks) {
99100
panic(errors.AssertionFailedf("virtual block ID %d is out of order; expected %d", virtualBlockID, e.countVirtualBlocks))
100101
}
102+
// If there's a gap within the virtual block IDs, we fill in the gap with
103+
// entries that clarify these blocks are empty.
104+
for id := BlockID(e.countVirtualBlocks); id < virtualBlockID; id++ {
105+
e.virtualBlocks.Set(int(id), virtualBlockIndexMask)
106+
e.countVirtualBlocks++
107+
}
101108
e.virtualBlocks.Set(int(virtualBlockID), uint64(physicalBlockIndex)|(uint64(valueIDOffset)<<32))
102109
e.countVirtualBlocks++
103110
}

sstable/blob/fetcher.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,10 @@ func (cr *cachedReader) GetUnsafeValue(
261261
var valueIDOffset BlockValueID
262262
if cr.indexBlock.dec.virtualBlockCount > 0 {
263263
physicalBlockIndex, valueIDOffset = cr.indexBlock.dec.RemapVirtualBlockID(vh.BlockID)
264+
if valueIDOffset == virtualBlockIndexMask {
265+
return nil, errors.AssertionFailedf("blob file indicates virtual block ID %d in %s should be unreferenced",
266+
vh.BlockID, vh.BlobFileID)
267+
}
264268
}
265269
invariants.CheckBounds(physicalBlockIndex, cr.indexBlock.dec.BlockCount())
266270

0 commit comments

Comments
 (0)