Skip to content

Commit d5443f6

Browse files
committed
internal/manifest: define BlobReferenceDepth type
Define a BlobReferenceDepth and move the documentation of the blob reference depth from the TableMetadata field onto the type, making the documentation more discoverable.
1 parent edebc31 commit d5443f6

File tree

6 files changed

+49
-45
lines changed

6 files changed

+49
-45
lines changed

internal/compact/run.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ type OutputTable struct {
5050
// BlobReferences is the list of blob references for the table.
5151
BlobReferences []manifest.BlobReference
5252
// BlobReferenceDepth is the depth of the blob references for the table.
53-
BlobReferenceDepth int
53+
BlobReferenceDepth manifest.BlobReferenceDepth
5454
}
5555

5656
// OutputBlob contains metadata about a blob file that was created during a
@@ -140,7 +140,7 @@ type ValueSeparation interface {
140140
type ValueSeparationMetadata struct {
141141
BlobReferences []manifest.BlobReference
142142
BlobReferenceSize uint64
143-
BlobReferenceDepth int
143+
BlobReferenceDepth manifest.BlobReferenceDepth
144144

145145
// The below fields are only populated if a new blob file was created.
146146
BlobFileStats blob.FileWriterStats

internal/manifest/blob_metadata.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,38 @@ func ParseBlobFileMetadataDebug(s string) (_ *BlobFileMetadata, err error) {
181181
}
182182
return m, nil
183183
}
184+
185+
// BlobReferenceDepth is a statistic maintained per-sstable, indicating an upper
186+
// bound on the number of blob files that a reader scanning the table would need
187+
// to keep open if they only open and close referenced blob files once. In other
188+
// words, it's the stack depth of blob files referenced by a sstable. If a
189+
// flush or compaction rewrites an sstable's values to a new blob file, the
190+
// resulting sstable has a blob reference depth of 1. When a compaction reuses
191+
// blob references, the max blob reference depth of the files in each level is
192+
// used, and then the depth is summed, and assigned to the output. This is a
193+
// loose upper bound (assuming worst case distribution of keys in all inputs)
194+
// but avoids tracking key spans for references and using key comparisons.
195+
//
196+
// Because the blob reference depth is the size of the working set of blob files
197+
// referenced by the table, it cannot exceed the count of distinct blob file
198+
// references.
199+
//
200+
// Example: Consider a compaction of file f0 from L0 and files f1, f2, f3 from
201+
// L1, where the former has blob reference depth of 1 and files f1, f2, f3 all
202+
// happen to have a blob-reference-depth of 1. Say we produce many output files,
203+
// one of which is f4. We are assuming here that the blobs referenced by f0
204+
// whose keys happened to be written to f4 are spread all across the key span of
205+
// f4. Say keys from f1 and f2 also made their way to f4. Then we will first
206+
// have keys that refer to blobs referenced by f1,f0 and at some point once we
207+
// move past the keys of f1, we will have keys that refer to blobs referenced by
208+
// f2,f0. In some sense, we have a working set of 2 blob files at any point in
209+
// time, and this is similar to the idea of level stack depth for reads -- hence
210+
// we adopt the depth terminology. We want to keep this stack depth in check,
211+
// since locality is important, while allowing it to be higher than 1, since
212+
// otherwise we will need to rewrite blob files in every compaction (defeating
213+
// the write amp benefit we are looking for). Similar to the level depth, this
214+
// simplistic analysis does not take into account distribution of keys involved
215+
// in the compaction and which of them have blob references. Also the locality
216+
// is actually better than in this analysis because more of the keys will be
217+
// from the lower level.
218+
type BlobReferenceDepth int

internal/manifest/version.go

Lines changed: 5 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -239,44 +239,12 @@ type TableMetadata struct {
239239
// BlobReferences is a list of blob files containing values that are
240240
// referenced by this sstable.
241241
BlobReferences []BlobReference
242-
// BlobReferenceDepth is an upper bound on the number of blob files that a
243-
// reader scanning the table would need to keep open if they only open and
244-
// close referenced blob files once. In other words, it's the stack depth of
245-
// blob files referenced by this sstable. If a flush or compaction rewrites
246-
// an sstable's values to a new blob file, the resulting sstable has a blob
247-
// reference depth of 1. When a compaction reuses blob references, the max
248-
// blob reference depth of the files in each level is used, and then the
249-
// depth is summed, and assigned to the output. This is a loose upper bound
250-
// (assuming worst case distribution of keys in all inputs) but avoids
251-
// tracking key spans for references and using key comparisons.
252-
//
253-
// Because the blob reference depth is the size of the working set of blob
254-
// files referenced by the table, it cannot exceed the count of distinct
255-
// blob file references.
256-
//
257-
// Example: Consider a compaction of file f0 from L0 and files f1, f2, f3
258-
// from L1, where the former has blob reference depth of 1 and files f1, f2,
259-
// f3 all happen to have a blob-reference-depth of 1. Say we produce many
260-
// output files, one of which is f4. We are assuming here that the blobs
261-
// referenced by f0 whose keys happened to be written to f4 are spread all
262-
// across the key span of f4. Say keys from f1 and f2 also made their way to
263-
// f4. Then we will first have keys that refer to blobs referenced by f1,f0
264-
// and at some point once we move past the keys of f1, we will have keys
265-
// that refer to blobs referenced by f2,f0. In some sense, we have a working
266-
// set of 2 blob files at any point in time, and this is similar to the idea
267-
// of level stack depth for reads -- hence we adopt the depth terminology.
268-
// We want to keep this stack depth in check, since locality is important,
269-
// while allowing it to be higher than 1, since otherwise we will need to
270-
// rewrite blob files in every compaction (defeating the write amp benefit
271-
// we are looking for). Similar to the level depth, this simplistic analysis
272-
// does not take into account distribution of keys involved in the
273-
// compaction and which of them have blob references. Also the locality is
274-
// actually better than in this analysis because more of the keys will be
275-
// from the lower level.
242+
// BlobReferenceDepth is the stack depth of blob files referenced by this
243+
// sstable. See the comment on the BlobReferenceDepth type for more details.
276244
//
277245
// INVARIANT: BlobReferenceDepth == 0 iff len(BlobReferences) == 0
278246
// INVARIANT: BlobReferenceDepth <= len(BlobReferences)
279-
BlobReferenceDepth int
247+
BlobReferenceDepth BlobReferenceDepth
280248

281249
// refs is the reference count for the table, used to determine when a table
282250
// is obsolete. When a table's reference count falls to zero, the table is
@@ -970,7 +938,7 @@ func ParseTableMetadataDebug(s string) (_ *TableMetadata, err error) {
970938
p.Expect(";")
971939
p.Expect("depth")
972940
p.Expect(":")
973-
m.BlobReferenceDepth = int(p.Uint64())
941+
m.BlobReferenceDepth = BlobReferenceDepth(p.Uint64())
974942
p.Expect("]")
975943

976944
default:
@@ -1076,7 +1044,7 @@ func (m *TableMetadata) Validate(cmp Compare, formatKey base.FormatKey) error {
10761044
// Assert that there's a nonzero blob reference depth if and only if the
10771045
// table has a nonzero count of blob references. Additionally, the file's
10781046
// blob reference depth should be bounded by the number of blob references.
1079-
if (len(m.BlobReferences) == 0) != (m.BlobReferenceDepth == 0) || m.BlobReferenceDepth > len(m.BlobReferences) {
1047+
if (len(m.BlobReferences) == 0) != (m.BlobReferenceDepth == 0) || m.BlobReferenceDepth > BlobReferenceDepth(len(m.BlobReferences)) {
10801048
return base.CorruptionErrorf("table %s with %d blob refs but %d blob ref depth",
10811049
m.FileNum, len(m.BlobReferences), m.BlobReferenceDepth)
10821050
}

internal/manifest/version_edit.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ func (v *VersionEdit) Decode(r io.Reader) error {
358358
var syntheticPrefix sstable.SyntheticPrefix
359359
var syntheticSuffix sstable.SyntheticSuffix
360360
var blobReferences []BlobReference
361-
var blobReferenceDepth uint64
361+
var blobReferenceDepth BlobReferenceDepth
362362
if tag == tagNewFile4 || tag == tagNewFile5 {
363363
for {
364364
customTag, err := d.readUvarint()
@@ -414,10 +414,11 @@ func (v *VersionEdit) Decode(r io.Reader) error {
414414
case customTagBlobReferences:
415415
// The first varint encodes the 'blob reference depth'
416416
// of the table.
417-
blobReferenceDepth, err = d.readUvarint()
417+
v, err := d.readUvarint()
418418
if err != nil {
419419
return err
420420
}
421+
blobReferenceDepth = BlobReferenceDepth(v)
421422
n, err := d.readUvarint()
422423
if err != nil {
423424
return err
@@ -457,7 +458,7 @@ func (v *VersionEdit) Decode(r io.Reader) error {
457458
LargestSeqNum: largestSeqNum,
458459
LargestSeqNumAbsolute: largestSeqNum,
459460
BlobReferences: blobReferences,
460-
BlobReferenceDepth: int(blobReferenceDepth),
461+
BlobReferenceDepth: blobReferenceDepth,
461462
MarkedForCompaction: markedForCompaction,
462463
Virtual: virtualState.virtual,
463464
SyntheticPrefixAndSuffix: sstable.MakeSyntheticPrefixAndSuffix(syntheticPrefix, syntheticSuffix),

value_separation.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ type preserveBlobReferences struct {
193193
// for every unique blob file referenced by input sstables.
194194
// inputBlobMetadatas must be sorted by FileNum.
195195
inputBlobMetadatas []*manifest.BlobFileMetadata
196-
outputBlobReferenceDepth int
196+
outputBlobReferenceDepth manifest.BlobReferenceDepth
197197

198198
// state
199199
buf []byte
@@ -321,6 +321,6 @@ func (vs *preserveBlobReferences) FinishOutput() (compact.ValueSeparationMetadat
321321
// reflecting the worst-case overlap of referenced blob files. If this
322322
// sstable references fewer unique blob files, reduce its depth to the
323323
// count of unique files.
324-
BlobReferenceDepth: min(vs.outputBlobReferenceDepth, len(references)),
324+
BlobReferenceDepth: min(vs.outputBlobReferenceDepth, manifest.BlobReferenceDepth(len(references))),
325325
}, nil
326326
}

value_separation_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,6 @@ func (d *defineDBValueSeparator) FinishOutput() (compact.ValueSeparationMetadata
251251
return compact.ValueSeparationMetadata{}, err
252252
}
253253
// TODO(jackson): Support setting a specific depth from the datadriven test.
254-
m.BlobReferenceDepth = len(m.BlobReferences)
254+
m.BlobReferenceDepth = manifest.BlobReferenceDepth(len(m.BlobReferences))
255255
return m, nil
256256
}

0 commit comments

Comments
 (0)