Skip to content

Commit f195c69

Browse files
committed
manifest: remove BlobReference.OriginalMetadata
Previously a TableMetadata's BlobReferences maintained a pointer to the original PhysicalBlobFile that existed when the reference was created. This was confusing state and only existed for the purpose of calculating the estimated physical size of the referenced values. This commit replaces the *PhysicalBlobFile pointer with a EstimatedPhysicalSize field that's calculated once upfront. Some of the version edit accumulation and application logic is also simplified, removing the need to track referenced *PhysicalBlobFiles. Informs #112.
1 parent 6f75f52 commit f195c69

File tree

7 files changed

+153
-151
lines changed

7 files changed

+153
-151
lines changed

internal/manifest/blob_metadata.go

Lines changed: 40 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -32,50 +32,40 @@ type BlobReference struct {
3232
// blob file for which there exists a reference in the sstable. Note that if
3333
// any of the referencing tables are virtualized tables, the ValueSize may
3434
// be approximate.
35-
//
36-
// INVARIANT: ValueSize <= Metadata.ValueSize
3735
ValueSize uint64
38-
39-
// OriginalMetadata is the metadata for the original physical blob file. It
40-
// is non-nil for blob references contained within active Versions. It is
41-
// expected to initially be nil when decoding a version edit as a part of
42-
// manfiest replay. When the version edit is accumulated into a bulk version
43-
// edit, the metadata is populated.
44-
//
45-
// The OriginalMetadata describes the physical blob file that existed when
46-
// the reference was originally created. The original physical blob file may
47-
// no longer exist. The BlobReference.FileID should be translated using a
48-
// Version's BlobFileSet.
49-
//
50-
// TODO(jackson): We should either remove the OriginalMetadata from the
51-
// BlobReference or use it to infer when a blob file has definitely NOT been
52-
// replaced and a lookup in Version.BlobFileSet is unnecessary.
53-
OriginalMetadata *PhysicalBlobFile
54-
}
55-
56-
// EstimatedPhysicalSize returns an estimate of the physical size of the blob
57-
// reference, in bytes. It's calculated by scaling the blob file's physical size
58-
// according to the ValueSize of the blob reference relative to the total
59-
// ValueSize of the blob file.
60-
func (br *BlobReference) EstimatedPhysicalSize() uint64 {
36+
// EstimatedPhysicalSize is an estimate of the physical size of the blob
37+
// reference, in bytes. It's calculated by scaling the blob file's physical
38+
// size according to the ValueSize of the blob reference relative to the
39+
// total ValueSize of the blob file.
40+
EstimatedPhysicalSize uint64
41+
}
42+
43+
// MakeBlobReference creates a BlobReference from the given file ID, value size,
44+
// and physical blob file.
45+
func MakeBlobReference(
46+
fileID base.BlobFileID, valueSize uint64, phys *PhysicalBlobFile,
47+
) BlobReference {
6148
if invariants.Enabled {
62-
if br.ValueSize > br.OriginalMetadata.ValueSize {
49+
switch {
50+
case valueSize > phys.ValueSize:
6351
panic(errors.AssertionFailedf("pebble: blob reference value size %d is greater than the blob file's value size %d",
64-
br.ValueSize, br.OriginalMetadata.ValueSize))
65-
}
66-
if br.ValueSize == 0 {
67-
panic(errors.AssertionFailedf("pebble: blob reference value size %d is zero", br.ValueSize))
68-
}
69-
if br.OriginalMetadata.ValueSize == 0 {
70-
panic(errors.AssertionFailedf("pebble: blob file value size %d is zero", br.OriginalMetadata.ValueSize))
52+
valueSize, phys.ValueSize))
53+
case valueSize == 0:
54+
panic(errors.AssertionFailedf("pebble: blob reference value size %d is zero", valueSize))
55+
case phys.ValueSize == 0:
56+
panic(errors.AssertionFailedf("pebble: blob file value size %d is zero", phys.ValueSize))
7157
}
7258
}
73-
// br.ValueSize
74-
// Reference size = ----------------------------- × br.OriginalMetadata.Size
75-
// br.OriginalMetadata.ValueSize
76-
//
77-
// We perform the multiplication first to avoid floating point arithmetic.
78-
return (br.ValueSize * br.OriginalMetadata.Size) / br.OriginalMetadata.ValueSize
59+
return BlobReference{
60+
FileID: fileID,
61+
ValueSize: valueSize,
62+
// valueSize
63+
// Reference size = ----------------- × phys.Size
64+
// phys.ValueSize
65+
//
66+
// We perform the multiplication first to avoid floating point arithmetic.
67+
EstimatedPhysicalSize: (valueSize * phys.Size) / phys.ValueSize,
68+
}
7969
}
8070

8171
// BlobFileMetadata encapsulates a blob file ID used to identify a particular
@@ -475,7 +465,7 @@ type currentBlobFile struct {
475465
// referencing table number. This would likely be more memory efficient,
476466
// reduce overall number of pointers to chase and suffer fewer allocations
477467
// (and we can pool the B-Tree nodes to further reduce allocs)
478-
references map[*TableMetadata]struct{}
468+
references map[base.TableNum]struct{}
479469
// referencedValueSize is the sum of the length of uncompressed values in
480470
// this blob file that are still live.
481471
referencedValueSize uint64
@@ -491,7 +481,7 @@ func (s *CurrentBlobFileSet) Init(bve *BulkVersionEdit) {
491481
for blobFileID, pbf := range bve.BlobFiles.Added {
492482
s.files[blobFileID] = &currentBlobFile{
493483
metadata: BlobFileMetadata{FileID: blobFileID, Physical: pbf},
494-
references: make(map[*TableMetadata]struct{}),
484+
references: make(map[base.TableNum]struct{}),
495485
}
496486
s.stats.Count++
497487
s.stats.PhysicalSize += pbf.Size
@@ -506,7 +496,7 @@ func (s *CurrentBlobFileSet) Init(bve *BulkVersionEdit) {
506496
if !ok {
507497
panic(errors.AssertionFailedf("pebble: referenced blob file %d not found", ref.FileID))
508498
}
509-
cbf.references[table] = struct{}{}
499+
cbf.references[table.TableNum] = struct{}{}
510500
cbf.referencedValueSize += ref.ValueSize
511501
s.stats.ReferencedValueSize += ref.ValueSize
512502
s.stats.ReferencesCount++
@@ -559,7 +549,7 @@ func (s *CurrentBlobFileSet) ApplyAndUpdateVersionEdit(ve *VersionEdit) error {
559549
}
560550

561551
blobFileID := m.FileID
562-
cbf := &currentBlobFile{references: make(map[*TableMetadata]struct{})}
552+
cbf := &currentBlobFile{references: make(map[base.TableNum]struct{})}
563553
cbf.metadata = BlobFileMetadata{FileID: blobFileID, Physical: m.Physical}
564554
s.files[blobFileID] = cbf
565555
s.stats.Count++
@@ -577,7 +567,7 @@ func (s *CurrentBlobFileSet) ApplyAndUpdateVersionEdit(ve *VersionEdit) error {
577567
if !ok {
578568
return errors.AssertionFailedf("pebble: referenced blob file %d not found", ref.FileID)
579569
}
580-
cbf.references[e.Meta] = struct{}{}
570+
cbf.references[e.Meta.TableNum] = struct{}{}
581571
cbf.referencedValueSize += ref.ValueSize
582572
s.stats.ReferencedValueSize += ref.ValueSize
583573
s.stats.ReferencesCount++
@@ -600,13 +590,15 @@ func (s *CurrentBlobFileSet) ApplyAndUpdateVersionEdit(ve *VersionEdit) error {
600590
return errors.AssertionFailedf("pebble: referenced value size %d for blob file %s is greater than the referenced value size %d",
601591
ref.ValueSize, cbf.metadata.FileID, cbf.referencedValueSize)
602592
}
603-
if _, ok := cbf.references[meta]; !ok {
604-
return errors.AssertionFailedf("pebble: deleted table %s's reference to blob file %d not known",
593+
if _, ok := cbf.references[meta.TableNum]; !ok {
594+
return errors.AssertionFailedf("pebble: deleted table %s's reference to blob file %s not known",
605595
meta.TableNum, ref.FileID)
606596
}
607597
}
608598

609-
// Decrement the stats for this reference.
599+
// Decrement the stats for this reference. We decrement even if the
600+
// table is being moved, because we incremented the stats when we
601+
// iterated over the version edit's new tables.
610602
cbf.referencedValueSize -= ref.ValueSize
611603
s.stats.ReferencedValueSize -= ref.ValueSize
612604
s.stats.ReferencesCount--
@@ -619,7 +611,7 @@ func (s *CurrentBlobFileSet) ApplyAndUpdateVersionEdit(ve *VersionEdit) error {
619611
continue
620612
}
621613
// Remove the reference of this table to this blob file.
622-
delete(cbf.references, meta)
614+
delete(cbf.references, meta.TableNum)
623615

624616
// If there are no more references to the blob file, remove it from
625617
// the set and add the removal of the blob file to the version edit.

internal/manifest/table_metadata.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ func (m *TableMetadata) VirtualMeta() *TableMetadata {
328328
func (m *TableMetadata) EstimatedReferenceSize() uint64 {
329329
var size uint64
330330
for i := range m.BlobReferences {
331-
size += m.BlobReferences[i].EstimatedPhysicalSize()
331+
size += m.BlobReferences[i].EstimatedPhysicalSize
332332
}
333333
return size
334334
}

internal/manifest/testdata/version_edit_apply

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ L2:
233233
apply v8
234234
add-table: L0 000004:[a#9,SET-z#9,DEL] seqnums:[9-9] points:[a#9,SET-z#9,DEL] blobrefs:[(B000003: 25935); depth:1]
235235
----
236-
error during Accumulate: blob file B000003 referenced by L0.000004 not found
236+
pebble: blob file B000003 referenced by L0.000004 not found
237237

238238
# Apply a version edit that introduces a new blob file and a corresponding table
239239
# that references its entirety.
@@ -258,7 +258,7 @@ Blob files:
258258
apply v8
259259
add-table: L0 000005:[a#10,SET-a#10,SET] seqnums:[10-10] points:[a#10,SET-a#10,SET] blobrefs:[(B000003: 205); depth:1]
260260
----
261-
error during Accumulate: blob file B000003 referenced by L0.000005 not found
261+
pebble: blob file B000003 referenced by L0.000005 not found
262262

263263
# We can add new tables referencing the blob file if we delete an existing table
264264
# with a reference to the file.

internal/manifest/version_edit.go

Lines changed: 16 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -984,13 +984,6 @@ type BulkVersionEdit struct {
984984
//
985985
// Deleted is keyed by blob file ID and points to the physical blob file.
986986
Deleted map[base.BlobFileID]*PhysicalBlobFile
987-
// DeletedReferences holds metadata of blob files referenced by tables
988-
// deleted in the accumulated version edits. This is used during replay
989-
// to populate the *BlobFileMetadata pointers of new blob references,
990-
// making use of the invariant that new blob references must correspond
991-
// to a file introduced in VersionEdit.AddedBlobFiles or a file
992-
// referenced by a deleted sstable.
993-
DeletedReferences map[base.BlobFileID]*PhysicalBlobFile
994987
}
995988

996989
// AddedFileBacking is a map to support lookup so that we can populate the
@@ -1013,26 +1006,6 @@ type BulkVersionEdit struct {
10131006
MarkedForCompactionCountDiff int
10141007
}
10151008

1016-
// getBlobFileMetadata returns the metadata for the specified blob file. When a
1017-
// new sstable with a reference to a blob file is accumulated to the
1018-
// BulkVersionEdit, it must be the case that either an already-accumulated
1019-
// version edit added the blob file to the BlobFiles.Added map, or the blob file
1020-
// was referenced by a file that was deleted (and the blob file was added to
1021-
// BlobFiles.DeletedReferences).
1022-
func (b *BulkVersionEdit) getBlobFileMetadata(fileID base.BlobFileID) *PhysicalBlobFile {
1023-
if b.BlobFiles.Added != nil {
1024-
if md, ok := b.BlobFiles.Added[fileID]; ok {
1025-
return md
1026-
}
1027-
}
1028-
if b.BlobFiles.DeletedReferences != nil {
1029-
if md, ok := b.BlobFiles.DeletedReferences[fileID]; ok {
1030-
return md
1031-
}
1032-
}
1033-
return nil
1034-
}
1035-
10361009
// Accumulate adds the file addition and deletions in the specified version
10371010
// edit to the bulk edit's internal state.
10381011
//
@@ -1106,17 +1079,6 @@ func (b *BulkVersionEdit) Accumulate(ve *VersionEdit) error {
11061079
// Present in b.Added for the same level.
11071080
delete(b.AddedTables[df.Level], df.FileNum)
11081081
}
1109-
// If this deleted sstable had any references to blob files, record the
1110-
// referenced blob file to BlobFiles.DeletedReferences. If the
1111-
// references are carried forward to new files (eg, during a compaction
1112-
// that decides not to rewrite the blob file), then we'll have the
1113-
// *BlobFileMetadata available when we process the NewTableEntry.
1114-
for _, blobRef := range m.BlobReferences {
1115-
if b.BlobFiles.DeletedReferences == nil {
1116-
b.BlobFiles.DeletedReferences = make(map[base.BlobFileID]*PhysicalBlobFile)
1117-
}
1118-
b.BlobFiles.DeletedReferences[blobRef.FileID] = blobRef.OriginalMetadata
1119-
}
11201082
}
11211083

11221084
// Generate state for Added backing files. Note that these must be generated
@@ -1165,20 +1127,6 @@ func (b *BulkVersionEdit) Accumulate(ve *VersionEdit) error {
11651127
if nf.Meta.MarkedForCompaction {
11661128
b.MarkedForCompactionCountDiff++
11671129
}
1168-
1169-
// If there are any new sstables with blob references without a pointer
1170-
// to the corresponding BlobFileMetadata, populate the pointer. This is
1171-
// the expected case during manifest replay.
1172-
for i := range nf.Meta.BlobReferences {
1173-
if nf.Meta.BlobReferences[i].OriginalMetadata != nil {
1174-
continue
1175-
}
1176-
nf.Meta.BlobReferences[i].OriginalMetadata = b.getBlobFileMetadata(nf.Meta.BlobReferences[i].FileID)
1177-
if nf.Meta.BlobReferences[i].OriginalMetadata == nil {
1178-
return errors.Errorf("blob file %s referenced by L%d.%s not found",
1179-
nf.Meta.BlobReferences[i].FileID, nf.Level, nf.Meta.TableNum)
1180-
}
1181-
}
11821130
}
11831131

11841132
for _, n := range ve.RemovedBackingTables {
@@ -1298,6 +1246,22 @@ func (b *BulkVersionEdit) Apply(curr *Version, readCompactionRate int64) (*Versi
12981246
f.AllowedSeeks.Store(allowedSeeks)
12991247
f.InitAllowedSeeks = allowedSeeks
13001248

1249+
// Validate that all referenced blob files exist.
1250+
for i, ref := range f.BlobReferences {
1251+
phys, ok := v.BlobFiles.LookupPhysical(ref.FileID)
1252+
if !ok {
1253+
return nil, errors.AssertionFailedf("pebble: blob file %s referenced by L%d.%s not found",
1254+
ref.FileID, level, f.TableNum)
1255+
}
1256+
// NB: It's possible that the reference already has an estimated
1257+
// physical size if the table was moved.
1258+
if ref.EstimatedPhysicalSize == 0 {
1259+
// We must call MakeBlobReference so that we compute the
1260+
// reference's physical estimated size.
1261+
f.BlobReferences[i] = MakeBlobReference(ref.FileID, ref.ValueSize, phys)
1262+
}
1263+
}
1264+
13011265
err := lm.insert(f)
13021266
if err != nil {
13031267
return nil, errors.Wrap(err, "pebble")

testdata/version_set

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -615,29 +615,56 @@ no obsolete tables
615615
no zombie blob files
616616
no obsolete blob files
617617

618+
# Compact the referencing sstable, removing half its referenced value size.
619+
620+
apply
621+
del-table: L4 000017
622+
add-table: L5 000018:[f#20,SET-g#20,SET] seqnums:[0-0] points:[f#20,SET-g#20,SET] size:1500 blobrefs:[(B000016: 5000); depth:1]
623+
----
624+
applied:
625+
last-seq-num: 99
626+
del-table: L4 000017
627+
add-table: L5 000018:[f#20,SET-g#20,SET]
628+
current version:
629+
L2:
630+
000001:[a#1,SET-c#1,SET] seqnums:[0-0] points:[a#1,SET-c#1,SET] size:100
631+
L3:
632+
000008:[a#1,SET-c#1,SET] seqnums:[0-0] points:[a#1,SET-c#1,SET] size:800
633+
000010:[d#1,SET-e#1,SET] seqnums:[0-0] points:[d#1,SET-e#1,SET] size:1000
634+
L5:
635+
000018:[f#20,SET-g#20,SET] seqnums:[0-0] points:[f#20,SET-g#20,SET] size:1800 blobrefs:[(B000016: 5000); depth:1]
636+
Blob files:
637+
B000016 physical:{000016 size:[10000 (9.8KB)] vals:[10000 (9.8KB)]}
638+
no virtual backings
639+
zombie tables: 000017
640+
no obsolete tables
641+
no zombie blob files
642+
no obsolete blob files
643+
644+
618645
# Apply the blob file replacement. Because we ref'd the previous version, the
619646
# previous physical blob file should be considered a zombie.
620647

621648
apply
622649
del-blob-file: B000016 000016
623-
add-blob-file: B000016 physical:{000018 size:[5000 (4.9KB)] vals:[5000 (4.9KB)]}
650+
add-blob-file: B000016 physical:{000019 size:[5000 (4.9KB)] vals:[5000 (4.9KB)]}
624651
----
625652
applied:
626653
last-seq-num: 99
627-
add-blob-file: B000016 physical:{000018 size:[5000 (4.9KB)] vals:[5000 (4.9KB)]}
654+
add-blob-file: B000016 physical:{000019 size:[5000 (4.9KB)] vals:[5000 (4.9KB)]}
628655
del-blob-file: B000016 000016
629656
current version:
630657
L2:
631658
000001:[a#1,SET-c#1,SET] seqnums:[0-0] points:[a#1,SET-c#1,SET] size:100
632659
L3:
633660
000008:[a#1,SET-c#1,SET] seqnums:[0-0] points:[a#1,SET-c#1,SET] size:800
634661
000010:[d#1,SET-e#1,SET] seqnums:[0-0] points:[d#1,SET-e#1,SET] size:1000
635-
L4:
636-
000017:[f#20,SET-g#20,SET] seqnums:[0-0] points:[f#20,SET-g#20,SET] size:1700 blobrefs:[(B000016: 10000); depth:1]
662+
L5:
663+
000018:[f#20,SET-g#20,SET] seqnums:[0-0] points:[f#20,SET-g#20,SET] size:1800 blobrefs:[(B000016: 5000); depth:1]
637664
Blob files:
638-
B000016 physical:{000018 size:[5000 (4.9KB)] vals:[5000 (4.9KB)]}
665+
B000016 physical:{000019 size:[5000 (4.9KB)] vals:[5000 (4.9KB)]}
639666
no virtual backings
640-
no zombie tables
667+
zombie tables: 000017
641668
no obsolete tables
642669
zombie blob files: 000016
643670
no obsolete blob files
@@ -654,13 +681,13 @@ current version:
654681
L3:
655682
000008:[a#1,SET-c#1,SET] seqnums:[0-0] points:[a#1,SET-c#1,SET] size:800
656683
000010:[d#1,SET-e#1,SET] seqnums:[0-0] points:[d#1,SET-e#1,SET] size:1000
657-
L4:
658-
000017:[f#20,SET-g#20,SET] seqnums:[0-0] points:[f#20,SET-g#20,SET] size:1700 blobrefs:[(B000016: 10000); depth:1]
684+
L5:
685+
000018:[f#20,SET-g#20,SET] seqnums:[0-0] points:[f#20,SET-g#20,SET] size:1800 blobrefs:[(B000016: 5000); depth:1]
659686
Blob files:
660-
B000016 physical:{000018 size:[5000 (4.9KB)] vals:[5000 (4.9KB)]}
687+
B000016 physical:{000019 size:[5000 (4.9KB)] vals:[5000 (4.9KB)]}
661688
no virtual backings
662689
no zombie tables
663-
no obsolete tables
690+
obsolete tables: 000017
664691
no zombie blob files
665692
obsolete blob files: 000016
666693

@@ -674,10 +701,10 @@ current version:
674701
L3:
675702
000008:[a#1,SET-c#1,SET] seqnums:[0-0] points:[a#1,SET-c#1,SET] size:800
676703
000010:[d#1,SET-e#1,SET] seqnums:[0-0] points:[d#1,SET-e#1,SET] size:1000
677-
L4:
678-
000017:[f#20,SET-g#20,SET] seqnums:[0-0] points:[f#20,SET-g#20,SET] size:1700 blobrefs:[(B000016: 10000); depth:1]
704+
L5:
705+
000018:[f#20,SET-g#20,SET] seqnums:[0-0] points:[f#20,SET-g#20,SET] size:1800 blobrefs:[(B000016: 5000); depth:1]
679706
Blob files:
680-
B000016 physical:{000018 size:[5000 (4.9KB)] vals:[5000 (4.9KB)]}
707+
B000016 physical:{000019 size:[5000 (4.9KB)] vals:[5000 (4.9KB)]}
681708
no virtual backings
682709
no zombie tables
683710
no obsolete tables

0 commit comments

Comments
 (0)