Skip to content

Commit 7b047cd

Browse files
db: refactor disk usage estimate using range annotations
This change updates `db.EstimateDiskUsage` to use range annotations to estimate the disk usage of a key range. This should improve the performance of repeated disk usage estimates for similar or identical key ranges. At the Cockroach layer we use `db.EstimateDiskUsage` in a few places, most notably when [computing MVCC span stats](https://github.com/cockroachdb/cockroach/blob/master/pkg/server/span_stats_server.go#L217). Informs: #3793
1 parent a70d5b3 commit 7b047cd

File tree

4 files changed

+132
-95
lines changed

4 files changed

+132
-95
lines changed

db.go

Lines changed: 43 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,14 @@ type DB struct {
501501
// validating is set to true when validation is running.
502502
validating bool
503503
}
504+
505+
// annotators contains various instances of manifest.Annotator which
506+
// should be protected from concurrent access.
507+
annotators struct {
508+
totalSize *manifest.Annotator[uint64]
509+
remoteSize *manifest.Annotator[uint64]
510+
externalSize *manifest.Annotator[uint64]
511+
}
504512
}
505513

506514
// Normally equal to time.Now() but may be overridden in tests.
@@ -2228,6 +2236,31 @@ func (d *DB) SSTables(opts ...SSTablesOption) ([][]SSTableInfo, error) {
22282236
return destLevels, nil
22292237
}
22302238

2239+
// makeFileSizeAnnotator returns an annotator that computes the total size of
2240+
// files that meet some criteria defined by filter.
2241+
func (d *DB) makeFileSizeAnnotator(filter func(f *fileMetadata) bool) *manifest.Annotator[uint64] {
2242+
return &manifest.Annotator[uint64]{
2243+
Aggregator: manifest.SumAggregator{
2244+
AccumulateFunc: func(f *fileMetadata) (uint64, bool) {
2245+
if filter(f) {
2246+
return f.Size, true
2247+
}
2248+
return 0, true
2249+
},
2250+
AccumulatePartialOverlapFunc: func(f *fileMetadata, bounds base.UserKeyBounds) uint64 {
2251+
if filter(f) {
2252+
size, err := d.tableCache.estimateSize(f, bounds.Start, bounds.End.Key)
2253+
if err != nil {
2254+
return 0
2255+
}
2256+
return size
2257+
}
2258+
return 0
2259+
},
2260+
},
2261+
}
2262+
}
2263+
22312264
// EstimateDiskUsage returns the estimated filesystem space used in bytes for
22322265
// storing the range `[start, end]`. The estimation is computed as follows:
22332266
//
@@ -2254,7 +2287,9 @@ func (d *DB) EstimateDiskUsageByBackingType(
22542287
if err := d.closed.Load(); err != nil {
22552288
panic(err)
22562289
}
2257-
if d.opts.Comparer.Compare(start, end) > 0 {
2290+
2291+
bounds := base.UserKeyBoundsInclusive(start, end)
2292+
if !bounds.Valid(d.cmp) {
22582293
return 0, 0, 0, errors.New("invalid key-range specified (start > end)")
22592294
}
22602295

@@ -2264,70 +2299,13 @@ func (d *DB) EstimateDiskUsageByBackingType(
22642299
readState := d.loadReadState()
22652300
defer readState.unref()
22662301

2267-
for level, files := range readState.current.Levels {
2268-
iter := files.Iter()
2269-
if level > 0 {
2270-
// We can only use `Overlaps` to restrict `files` at L1+ since at L0 it
2271-
// expands the range iteratively until it has found a set of files that
2272-
// do not overlap any other L0 files outside that set.
2273-
overlaps := readState.current.Overlaps(level, base.UserKeyBoundsInclusive(start, end))
2274-
iter = overlaps.Iter()
2275-
}
2276-
for file := iter.First(); file != nil; file = iter.Next() {
2277-
if d.opts.Comparer.Compare(start, file.Smallest.UserKey) <= 0 &&
2278-
d.opts.Comparer.Compare(file.Largest.UserKey, end) <= 0 {
2279-
// The range fully contains the file, so skip looking it up in
2280-
// table cache/looking at its indexes, and add the full file size.
2281-
meta, err := d.objProvider.Lookup(fileTypeTable, file.FileBacking.DiskFileNum)
2282-
if err != nil {
2283-
return 0, 0, 0, err
2284-
}
2285-
if meta.IsRemote() {
2286-
remoteSize += file.Size
2287-
if meta.Remote.CleanupMethod == objstorage.SharedNoCleanup {
2288-
externalSize += file.Size
2289-
}
2290-
}
2291-
totalSize += file.Size
2292-
} else if d.opts.Comparer.Compare(file.Smallest.UserKey, end) <= 0 &&
2293-
d.opts.Comparer.Compare(start, file.Largest.UserKey) <= 0 {
2294-
var size uint64
2295-
var err error
2296-
if file.Virtual {
2297-
err = d.tableCache.withVirtualReader(
2298-
file.VirtualMeta(),
2299-
func(r sstable.VirtualReader) (err error) {
2300-
size, err = r.EstimateDiskUsage(start, end)
2301-
return err
2302-
},
2303-
)
2304-
} else {
2305-
err = d.tableCache.withReader(
2306-
file.PhysicalMeta(),
2307-
func(r *sstable.Reader) (err error) {
2308-
size, err = r.EstimateDiskUsage(start, end)
2309-
return err
2310-
},
2311-
)
2312-
}
2313-
if err != nil {
2314-
return 0, 0, 0, err
2315-
}
2316-
meta, err := d.objProvider.Lookup(fileTypeTable, file.FileBacking.DiskFileNum)
2317-
if err != nil {
2318-
return 0, 0, 0, err
2319-
}
2320-
if meta.IsRemote() {
2321-
remoteSize += size
2322-
if meta.Remote.CleanupMethod == objstorage.SharedNoCleanup {
2323-
externalSize += size
2324-
}
2325-
}
2326-
totalSize += size
2327-
}
2328-
}
2329-
}
2330-
return totalSize, remoteSize, externalSize, nil
2302+
d.mu.Lock()
2303+
defer d.mu.Unlock()
2304+
2305+
totalSize = *d.mu.annotators.totalSize.VersionRangeAnnotation(readState.current, bounds)
2306+
remoteSize = *d.mu.annotators.remoteSize.VersionRangeAnnotation(readState.current, bounds)
2307+
externalSize = *d.mu.annotators.externalSize.VersionRangeAnnotation(readState.current, bounds)
2308+
return
23312309
}
23322310

23332311
func (d *DB) walPreallocateSize() int {

internal/manifest/annotator.go

Lines changed: 66 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@ type AnnotationAggregator[T any] interface {
5959
Merge(src *T, dst *T) *T
6060
}
6161

62+
// A PartialOverlapAnnotationAggregator is an extension of AnnotationAggregator
63+
// that allows for custom accumulation of range annotations for files that only
64+
// partially overlap with the range.
65+
type PartialOverlapAnnotationAggregator[T any] interface {
66+
AnnotationAggregator[T]
67+
AccumulatePartialOverlap(f *FileMetadata, dst *T, bounds base.UserKeyBounds) *T
68+
}
69+
6270
type annotation struct {
6371
// annotator is a pointer to the Annotator that computed this annotation.
6472
// NB: This is untyped to allow AnnotationAggregator to use Go generics,
@@ -165,6 +173,15 @@ func (a *Annotator[T]) accumulateRangeAnnotation(
165173

166174
// Accumulate annotations from every item that overlaps the bounds.
167175
for i := leftItem; i < rightItem; i++ {
176+
if i == leftItem || i == rightItem-1 {
177+
if agg, ok := a.Aggregator.(PartialOverlapAnnotationAggregator[T]); ok {
178+
fb := n.items[i].UserKeyBounds()
179+
if cmp(bounds.Start, fb.Start) > 0 || bounds.End.CompareUpperBounds(cmp, fb.End) < 0 {
180+
a.scratch = agg.AccumulatePartialOverlap(n.items[i], a.scratch, bounds)
181+
continue
182+
}
183+
}
184+
}
168185
v, _ := a.Aggregator.Accumulate(n.items[i], a.scratch)
169186
a.scratch = v
170187
}
@@ -258,6 +275,26 @@ func (a *Annotator[T]) LevelRangeAnnotation(lm LevelMetadata, bounds base.UserKe
258275
return a.scratch
259276
}
260277

278+
// VersionRangeAnnotation calculates the annotation defined by this Annotator
279+
// for all files within the given Version which are within the range
280+
// defined by bounds.
281+
func (a *Annotator[T]) VersionRangeAnnotation(v *Version, bounds base.UserKeyBounds) *T {
282+
accumulateSlice := func(ls LevelSlice) {
283+
if ls.Empty() {
284+
return
285+
}
286+
a.accumulateRangeAnnotation(ls.iter.r, v.cmp.Compare, bounds, false, false)
287+
}
288+
a.scratch = a.Aggregator.Zero(a.scratch)
289+
for _, ls := range v.L0SublevelFiles {
290+
accumulateSlice(ls)
291+
}
292+
for _, lm := range v.Levels[1:] {
293+
accumulateSlice(lm.Slice())
294+
}
295+
return a.scratch
296+
}
297+
261298
// InvalidateAnnotation clears any cached annotations defined by Annotator. A
262299
// pointer to the Annotator is used as the key for pre-calculated values, so
263300
// the same Annotator must be used to clear the appropriate cached annotation.
@@ -270,27 +307,47 @@ func (a *Annotator[T]) InvalidateLevelAnnotation(lm LevelMetadata) {
270307
a.invalidateNodeAnnotation(lm.tree.root)
271308
}
272309

273-
// sumAggregator defines an Aggregator which sums together a uint64 value
310+
// SumAggregator defines an Aggregator which sums together a uint64 value
274311
// across files.
275-
type sumAggregator struct {
276-
accumulateFunc func(f *FileMetadata) (v uint64, cacheOK bool)
312+
type SumAggregator struct {
313+
AccumulateFunc func(f *FileMetadata) (v uint64, cacheOK bool)
314+
AccumulatePartialOverlapFunc func(f *FileMetadata, bounds base.UserKeyBounds) uint64
277315
}
278316

279-
func (sa sumAggregator) Zero(dst *uint64) *uint64 {
317+
// Zero implements AnnotationAggregator.Zero, returning a new uint64 set to 0.
318+
func (sa SumAggregator) Zero(dst *uint64) *uint64 {
280319
if dst == nil {
281320
return new(uint64)
282321
}
283322
*dst = 0
284323
return dst
285324
}
286325

287-
func (sa sumAggregator) Accumulate(f *FileMetadata, dst *uint64) (v *uint64, cacheOK bool) {
288-
accumulated, ok := sa.accumulateFunc(f)
326+
// Accumulate implements AnnotationAggregator.Accumulate, accumulating a single
327+
// file's uint64 value.
328+
func (sa SumAggregator) Accumulate(f *FileMetadata, dst *uint64) (v *uint64, cacheOK bool) {
329+
accumulated, ok := sa.AccumulateFunc(f)
289330
*dst += accumulated
290331
return dst, ok
291332
}
292333

293-
func (sa sumAggregator) Merge(src *uint64, dst *uint64) *uint64 {
334+
// AccumulatePartialOverlap implements
335+
// PartialOverlapAnnotationAggregator.AccumulatePartialOverlap, accumulating a
336+
// single file's uint64 value for a file which only partially overlaps with the
337+
// range defined by bounds.
338+
func (sa SumAggregator) AccumulatePartialOverlap(
339+
f *FileMetadata, dst *uint64, bounds base.UserKeyBounds,
340+
) *uint64 {
341+
if sa.AccumulatePartialOverlapFunc == nil {
342+
v, _ := sa.Accumulate(f, dst)
343+
return v
344+
}
345+
*dst += sa.AccumulatePartialOverlapFunc(f, bounds)
346+
return dst
347+
}
348+
349+
// Merge implements AnnotationAggregator.Merge by summing two uint64 values.
350+
func (sa SumAggregator) Merge(src *uint64, dst *uint64) *uint64 {
294351
*dst += *src
295352
return dst
296353
}
@@ -300,8 +357,8 @@ func (sa sumAggregator) Merge(src *uint64, dst *uint64) *uint64 {
300357
// files.
301358
func SumAnnotator(accumulate func(f *FileMetadata) (v uint64, cacheOK bool)) *Annotator[uint64] {
302359
return &Annotator[uint64]{
303-
Aggregator: sumAggregator{
304-
accumulateFunc: accumulate,
360+
Aggregator: SumAggregator{
361+
AccumulateFunc: accumulate,
305362
},
306363
}
307364
}

open.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,24 @@ func Open(dirname string, opts *Options) (db *DB, err error) {
408408
d.newIters = d.tableCache.newIters
409409
d.tableNewRangeKeyIter = tableNewRangeKeyIter(d.newIters)
410410

411+
d.mu.annotators.totalSize = d.makeFileSizeAnnotator(func(f *manifest.FileMetadata) bool {
412+
return true
413+
})
414+
d.mu.annotators.remoteSize = d.makeFileSizeAnnotator(func(f *manifest.FileMetadata) bool {
415+
meta, err := d.objProvider.Lookup(fileTypeTable, f.FileBacking.DiskFileNum)
416+
if err != nil {
417+
return false
418+
}
419+
return meta.IsRemote()
420+
})
421+
d.mu.annotators.externalSize = d.makeFileSizeAnnotator(func(f *manifest.FileMetadata) bool {
422+
meta, err := d.objProvider.Lookup(fileTypeTable, f.FileBacking.DiskFileNum)
423+
if err != nil {
424+
return false
425+
}
426+
return meta.IsRemote() && meta.Remote.CleanupMethod == objstorage.SharedNoCleanup
427+
})
428+
411429
var previousOptionsFileNum base.DiskFileNum
412430
var previousOptionsFilename string
413431
for _, filename := range ls {

table_cache.go

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -210,27 +210,11 @@ func (c *tableCacheContainer) metrics() (CacheMetrics, FilterMetrics) {
210210
func (c *tableCacheContainer) estimateSize(
211211
meta *fileMetadata, lower, upper []byte,
212212
) (size uint64, err error) {
213-
if meta.Virtual {
214-
err = c.withVirtualReader(
215-
meta.VirtualMeta(),
216-
func(r sstable.VirtualReader) (err error) {
217-
size, err = r.EstimateDiskUsage(lower, upper)
218-
return err
219-
},
220-
)
221-
} else {
222-
err = c.withReader(
223-
meta.PhysicalMeta(),
224-
func(r *sstable.Reader) (err error) {
225-
size, err = r.EstimateDiskUsage(lower, upper)
226-
return err
227-
},
228-
)
229-
}
230-
if err != nil {
231-
return 0, err
232-
}
233-
return size, nil
213+
c.withCommonReader(meta, func(cr sstable.CommonReader) error {
214+
size, err = cr.EstimateDiskUsage(lower, upper)
215+
return err
216+
})
217+
return size, err
234218
}
235219

236220
// createCommonReader creates a Reader for this file.

0 commit comments

Comments
 (0)