Skip to content

Commit fb5d0a7

Browse files
committed
db: consolidate the file size annotators
We currently have three file size annotators which are used for `EstimateDiskUsage`. Partially overlapping remote files are inspected twice and external remote files three times. It also means 3x more annotation-related objects per B-Tree node. This change consolidates the three aggregators into a single one. We generalize `SumAggregator` to allow arbitrary value types.
1 parent 3a12933 commit fb5d0a7

File tree

4 files changed

+78
-72
lines changed

4 files changed

+78
-72
lines changed

db.go

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -505,17 +505,7 @@ type DB struct {
505505
// validating is set to true when validation is running.
506506
validating bool
507507
}
508-
509-
// annotators contains various instances of manifest.TableAnnotator which
510-
// should be protected from concurrent access.
511-
annotators struct {
512-
// totalFileSize is the sum of the size of all files in the
513-
// database. This includes local, remote, and external sstables --
514-
// along with blob files.
515-
totalFileSize *manifest.TableAnnotator[uint64]
516-
remoteSize *manifest.TableAnnotator[uint64]
517-
externalSize *manifest.TableAnnotator[uint64]
518-
}
508+
fileSizeAnnotator *manifest.TableAnnotator[fileSizeByBacking]
519509
}
520510

521511
// problemSpans keeps track of spans of keys within LSM levels where

disk_usage.go

Lines changed: 56 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -48,40 +48,67 @@ func (d *DB) EstimateDiskUsageByBackingType(
4848
readState := d.loadReadState()
4949
defer readState.unref()
5050

51-
totalSize = *d.mu.annotators.totalFileSize.VersionRangeAnnotation(readState.current, bounds)
52-
remoteSize = *d.mu.annotators.remoteSize.VersionRangeAnnotation(readState.current, bounds)
53-
externalSize = *d.mu.annotators.externalSize.VersionRangeAnnotation(readState.current, bounds)
51+
sizes := d.mu.fileSizeAnnotator.VersionRangeAnnotation(readState.current, bounds)
52+
return sizes.totalSize, sizes.remoteSize, sizes.externalSize, nil
53+
}
5454

55-
return
55+
// fileSizeByBacking contains the estimated file size for LSM data within some
56+
// bounds. It is broken down by backing type. The file size refers to both the
57+
// sstable size and an estimate of the referenced blob sizes.
58+
type fileSizeByBacking struct {
59+
// totalSize is the estimated size of all files for the given bounds.
60+
totalSize uint64
61+
// remoteSize is the estimated size of remote files for the given bounds.
62+
remoteSize uint64
63+
// externalSize is the estimated size of external files for the given bounds.
64+
externalSize uint64
5665
}
5766

58-
// makeFileSizeAnnotator returns an annotator that computes the total
59-
// storage size of files that meet some criteria defined by filter. When
60-
// applicable, this includes both the sstable size and the size of any
61-
// referenced blob files.
62-
func (d *DB) makeFileSizeAnnotator(
63-
filter func(f *manifest.TableMetadata) bool,
64-
) *manifest.TableAnnotator[uint64] {
65-
return manifest.NewTableAnnotator[uint64](manifest.SumAggregator{
66-
AccumulateFunc: func(f *manifest.TableMetadata) (uint64, bool) {
67-
if filter(f) {
68-
return f.Size + f.EstimatedReferenceSize(), true
69-
}
70-
return 0, true
67+
func (d *DB) singleFileSizeByBacking(
68+
fileSize uint64, t *manifest.TableMetadata,
69+
) (_ fileSizeByBacking, ok bool) {
70+
res := fileSizeByBacking{
71+
totalSize: fileSize,
72+
}
73+
74+
objMeta, err := d.objProvider.Lookup(base.FileTypeTable, t.TableBacking.DiskFileNum)
75+
if err != nil {
76+
return res, false
77+
}
78+
if objMeta.IsRemote() {
79+
res.remoteSize += fileSize
80+
if objMeta.IsExternal() {
81+
res.externalSize += fileSize
82+
}
83+
}
84+
return res, true
85+
}
86+
87+
// makeFileSizeAnnotator returns an annotator that computes the storage size of
88+
// files. When applicable, this includes both the sstable size and the size of
89+
// any referenced blob files.
90+
func (d *DB) makeFileSizeAnnotator() *manifest.TableAnnotator[fileSizeByBacking] {
91+
return manifest.NewTableAnnotator[fileSizeByBacking](manifest.SumAggregator[fileSizeByBacking]{
92+
AddFunc: func(src, dst *fileSizeByBacking) {
93+
dst.totalSize += src.totalSize
94+
dst.remoteSize += src.remoteSize
95+
dst.externalSize += src.externalSize
96+
},
97+
AccumulateFunc: func(f *manifest.TableMetadata) (v fileSizeByBacking, cacheOK bool) {
98+
return d.singleFileSizeByBacking(f.Size+f.EstimatedReferenceSize(), f)
7199
},
72-
AccumulatePartialOverlapFunc: func(f *manifest.TableMetadata, bounds base.UserKeyBounds) uint64 {
73-
if filter(f) {
74-
overlappingFileSize, err := d.fileCache.estimateSize(f, bounds.Start, bounds.End.Key)
75-
if err != nil {
76-
return 0
77-
}
78-
overlapFraction := float64(overlappingFileSize) / float64(f.Size)
79-
// Scale the blob reference size proportionally to the file
80-
// overlap from the bounds to approximate only the blob
81-
// references that overlap with the requested bounds.
82-
return overlappingFileSize + uint64(float64(f.EstimatedReferenceSize())*overlapFraction)
100+
AccumulatePartialOverlapFunc: func(f *manifest.TableMetadata, bounds base.UserKeyBounds) fileSizeByBacking {
101+
overlappingFileSize, err := d.fileCache.estimateSize(f, bounds.Start, bounds.End.Key)
102+
if err != nil {
103+
return fileSizeByBacking{}
83104
}
84-
return 0
105+
overlapFraction := float64(overlappingFileSize) / float64(f.Size)
106+
// Scale the blob reference size proportionally to the file
107+
// overlap from the bounds to approximate only the blob
108+
// references that overlap with the requested bounds.
109+
size := overlappingFileSize + uint64(float64(f.EstimatedReferenceSize())*overlapFraction)
110+
res, _ := d.singleFileSizeByBacking(size, f)
111+
return res
85112
},
86113
})
87114
}

internal/manifest/annotator.go

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -333,44 +333,46 @@ func (a *BlobFileAnnotator[T]) Annotation(blobFiles *BlobFileSet) *T {
333333
return v
334334
}
335335

336-
// SumAggregator defines an Aggregator which sums together a uint64 value
336+
// SumAggregator defines an Aggregator which sums together a T value
337337
// across files.
338-
type SumAggregator struct {
339-
AccumulateFunc func(f *TableMetadata) (v uint64, cacheOK bool)
340-
AccumulatePartialOverlapFunc func(f *TableMetadata, bounds base.UserKeyBounds) uint64
338+
type SumAggregator[T any] struct {
339+
AddFunc func(src, dst *T)
340+
AccumulateFunc func(f *TableMetadata) (v T, cacheOK bool)
341+
AccumulatePartialOverlapFunc func(f *TableMetadata, bounds base.UserKeyBounds) T
341342
}
342343

343344
// Zero implements AnnotationAggregator.Zero, returning a new uint64 set to 0.
344-
func (sa SumAggregator) Zero() *uint64 {
345-
return new(uint64)
345+
func (sa SumAggregator[T]) Zero() *T {
346+
return new(T)
346347
}
347348

348349
// Accumulate implements AnnotationAggregator.Accumulate, accumulating a single
349350
// file's uint64 value.
350-
func (sa SumAggregator) Accumulate(f *TableMetadata, dst *uint64) (v *uint64, cacheOK bool) {
351+
func (sa SumAggregator[T]) Accumulate(f *TableMetadata, dst *T) (v *T, cacheOK bool) {
351352
accumulated, ok := sa.AccumulateFunc(f)
352-
*dst += accumulated
353+
sa.AddFunc(&accumulated, dst)
353354
return dst, ok
354355
}
355356

356357
// AccumulatePartialOverlap implements
357358
// PartialOverlapAnnotationAggregator.AccumulatePartialOverlap, accumulating a
358359
// single file's uint64 value for a file which only partially overlaps with the
359360
// range defined by bounds.
360-
func (sa SumAggregator) AccumulatePartialOverlap(
361-
f *TableMetadata, dst *uint64, bounds base.UserKeyBounds,
362-
) *uint64 {
361+
func (sa SumAggregator[T]) AccumulatePartialOverlap(
362+
f *TableMetadata, dst *T, bounds base.UserKeyBounds,
363+
) *T {
363364
if sa.AccumulatePartialOverlapFunc == nil {
364365
v, _ := sa.Accumulate(f, dst)
365366
return v
366367
}
367-
*dst += sa.AccumulatePartialOverlapFunc(f, bounds)
368+
accumulated := sa.AccumulatePartialOverlapFunc(f, bounds)
369+
sa.AddFunc(&accumulated, dst)
368370
return dst
369371
}
370372

371373
// Merge implements AnnotationAggregator.Merge by summing two uint64 values.
372-
func (sa SumAggregator) Merge(src *uint64, dst *uint64) *uint64 {
373-
*dst += *src
374+
func (sa SumAggregator[T]) Merge(src *T, dst *T) *T {
375+
sa.AddFunc(src, dst)
374376
return dst
375377
}
376378

@@ -380,7 +382,10 @@ func (sa SumAggregator) Merge(src *uint64, dst *uint64) *uint64 {
380382
func SumAnnotator(
381383
accumulate func(f *TableMetadata) (v uint64, cacheOK bool),
382384
) *TableAnnotator[uint64] {
383-
return NewTableAnnotator[uint64](SumAggregator{AccumulateFunc: accumulate})
385+
return NewTableAnnotator[uint64](SumAggregator[uint64]{
386+
AddFunc: func(src, dst *uint64) { *dst += *src },
387+
AccumulateFunc: accumulate,
388+
})
384389
}
385390

386391
// NumFilesAnnotator is an TableAnnotator which computes an annotation value

open.go

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -442,23 +442,7 @@ func Open(dirname string, opts *Options) (db *DB, err error) {
442442
d.newIters = d.fileCache.newIters
443443
d.tableNewRangeKeyIter = tableNewRangeKeyIter(d.newIters)
444444

445-
d.mu.annotators.totalFileSize = d.makeFileSizeAnnotator(func(f *manifest.TableMetadata) bool {
446-
return true
447-
})
448-
d.mu.annotators.remoteSize = d.makeFileSizeAnnotator(func(f *manifest.TableMetadata) bool {
449-
meta, err := d.objProvider.Lookup(base.FileTypeTable, f.TableBacking.DiskFileNum)
450-
if err != nil {
451-
return false
452-
}
453-
return meta.IsRemote()
454-
})
455-
d.mu.annotators.externalSize = d.makeFileSizeAnnotator(func(f *manifest.TableMetadata) bool {
456-
meta, err := d.objProvider.Lookup(base.FileTypeTable, f.TableBacking.DiskFileNum)
457-
if err != nil {
458-
return false
459-
}
460-
return meta.IsRemote() && meta.Remote.CleanupMethod == objstorage.SharedNoCleanup
461-
})
445+
d.mu.fileSizeAnnotator = d.makeFileSizeAnnotator()
462446

463447
var previousOptionsFileNum base.DiskFileNum
464448
var previousOptionsFilename string

0 commit comments

Comments
 (0)