Skip to content

Commit 5e7485c

Browse files
committed
manifest: rework annotation implementation
The current annotation implementation purports to be thread-safe but in practice it is not: we store atomics in a slice which can get copied over when it is resized. We replace the dynamic slice with a static array. This requires assigning each annotator a unique index. The current implementation has a `valid` flag in addition to the `atomic.Value`. Storing a non-valid value is never necessary, so instead of using an extra flag we simply don't update `atomic.Value` unless the value is valid. We also rework the APIs to be as simple as possible and with less boilerplate.
1 parent 7a638c6 commit 5e7485c

File tree

9 files changed

+513
-539
lines changed

9 files changed

+513
-539
lines changed

compaction_picker.go

Lines changed: 33 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1560,39 +1560,42 @@ func (p *compactionPickerByScore) addScoresToPickedCompactionMetrics(
15601560
// the criteria, it chooses whichever file has the lowest LargestSeqNum. The
15611561
// lowest LargestSeqNum file will be the first eligible for an elision-only
15621562
// compaction once snapshots less than or equal to its LargestSeqNum are closed.
1563-
var elisionOnlyAnnotator = manifest.NewTableAnnotator[manifest.TableMetadata](manifest.PickFileAggregator{
1564-
Filter: func(f *manifest.TableMetadata) (eligible bool, cacheOK bool) {
1565-
if f.IsCompacting() {
1566-
return false, true
1567-
}
1563+
var elisionOnlyAnnotator = manifest.MakePickFileAnnotator(
1564+
manifest.NewTableAnnotationIdx(),
1565+
manifest.PickFileAnnotatorFuncs{
1566+
Filter: func(f *manifest.TableMetadata) (eligible bool, cacheOK bool) {
1567+
if f.IsCompacting() {
1568+
return false, true
1569+
}
15681570

1569-
backingProps, backingPropsValid := f.TableBacking.Properties()
1570-
stats, statsValid := f.Stats()
1571-
if !backingPropsValid || !statsValid {
1572-
return false, false
1573-
}
1571+
backingProps, backingPropsValid := f.TableBacking.Properties()
1572+
stats, statsValid := f.Stats()
1573+
if !backingPropsValid || !statsValid {
1574+
return false, false
1575+
}
15741576

1575-
// Bottommost files are large and not worthwhile to compact just
1576-
// to remove a few tombstones. Consider a file eligible only if
1577-
// either its own range deletions delete at least 10% of its data or
1578-
// its deletion tombstones make at least 10% of its entries.
1579-
//
1580-
// TODO(jackson): This does not account for duplicate user keys
1581-
// which may be collapsed. Ideally, we would have 'obsolete keys'
1582-
// statistics that would include tombstones, the keys that are
1583-
// dropped by tombstones and duplicated user keys. See #847.
1584-
//
1585-
// Note that tables that contain exclusively range keys (i.e. no point keys,
1586-
// `NumEntries` and `RangeDeletionsBytesEstimate` are both zero) are excluded
1587-
// from elision-only compactions.
1588-
// TODO(travers): Consider an alternative heuristic for elision of range-keys.
1589-
eligible = stats.RangeDeletionsBytesEstimate*10 >= f.Size || backingProps.NumDeletions*10 > backingProps.NumEntries
1590-
return eligible, true
1591-
},
1592-
Compare: func(f1 *manifest.TableMetadata, f2 *manifest.TableMetadata) bool {
1593-
return f1.LargestSeqNum < f2.LargestSeqNum
1577+
// Bottommost files are large and not worthwhile to compact just
1578+
// to remove a few tombstones. Consider a file eligible only if
1579+
// either its own range deletions delete at least 10% of its data or
1580+
// its deletion tombstones make at least 10% of its entries.
1581+
//
1582+
// TODO(jackson): This does not account for duplicate user keys
1583+
// which may be collapsed. Ideally, we would have 'obsolete keys'
1584+
// statistics that would include tombstones, the keys that are
1585+
// dropped by tombstones and duplicated user keys. See #847.
1586+
//
1587+
// Note that tables that contain exclusively range keys (i.e. no point keys,
1588+
// `NumEntries` and `RangeDeletionsBytesEstimate` are both zero) are excluded
1589+
// from elision-only compactions.
1590+
// TODO(travers): Consider an alternative heuristic for elision of range-keys.
1591+
eligible = stats.RangeDeletionsBytesEstimate*10 >= f.Size || backingProps.NumDeletions*10 > backingProps.NumEntries
1592+
return eligible, true
1593+
},
1594+
Compare: func(f1 *manifest.TableMetadata, f2 *manifest.TableMetadata) bool {
1595+
return f1.LargestSeqNum < f2.LargestSeqNum
1596+
},
15941597
},
1595-
})
1598+
)
15961599

15971600
// pickedCompactionFromCandidateFile creates a pickedCompaction from a *fileMetadata
15981601
// with various checks to ensure that the file still exists in the expected level

db.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,7 @@ type DB struct {
506506
// validating is set to true when validation is running.
507507
validating bool
508508
}
509-
fileSizeAnnotator *manifest.TableAnnotator[fileSizeByBacking]
509+
fileSizeAnnotator manifest.TableAnnotator[fileSizeByBacking]
510510
}
511511

512512
// problemSpans keeps track of spans of keys within LSM levels where
@@ -1976,7 +1976,7 @@ func (d *DB) Metrics() *Metrics {
19761976
}
19771977

19781978
blobCompressionMetrics := blobCompressionStatsAnnotator.Annotation(&vers.BlobFiles)
1979-
metrics.BlobFiles.Compression.MergeWith(blobCompressionMetrics)
1979+
metrics.BlobFiles.Compression.MergeWith(&blobCompressionMetrics)
19801980

19811981
d.mu.Unlock()
19821982

disk_usage.go

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -84,31 +84,35 @@ func (d *DB) singleFileSizeByBacking(
8484
return res, true
8585
}
8686

87+
var fileSizeAnnotatorIdx = manifest.NewTableAnnotationIdx()
88+
8789
// makeFileSizeAnnotator returns an annotator that computes the storage size of
8890
// files. When applicable, this includes both the sstable size and the size of
8991
// any referenced blob files.
90-
func (d *DB) makeFileSizeAnnotator() *manifest.TableAnnotator[fileSizeByBacking] {
91-
return manifest.NewTableAnnotator[fileSizeByBacking](manifest.SumAggregator[fileSizeByBacking]{
92-
AddFunc: func(src, dst *fileSizeByBacking) {
93-
dst.totalSize += src.totalSize
94-
dst.remoteSize += src.remoteSize
95-
dst.externalSize += src.externalSize
96-
},
97-
AccumulateFunc: func(f *manifest.TableMetadata) (v fileSizeByBacking, cacheOK bool) {
98-
return d.singleFileSizeByBacking(f.Size+f.EstimatedReferenceSize(), f)
99-
},
100-
AccumulatePartialOverlapFunc: func(f *manifest.TableMetadata, bounds base.UserKeyBounds) fileSizeByBacking {
101-
overlappingFileSize, err := d.fileCache.estimateSize(f, bounds.Start, bounds.End.Key)
102-
if err != nil {
103-
return fileSizeByBacking{}
104-
}
105-
overlapFraction := float64(overlappingFileSize) / float64(f.Size)
106-
// Scale the blob reference size proportionally to the file
107-
// overlap from the bounds to approximate only the blob
108-
// references that overlap with the requested bounds.
109-
size := overlappingFileSize + uint64(float64(f.EstimatedReferenceSize())*overlapFraction)
110-
res, _ := d.singleFileSizeByBacking(size, f)
111-
return res
112-
},
113-
})
92+
func (d *DB) makeFileSizeAnnotator() manifest.TableAnnotator[fileSizeByBacking] {
93+
return manifest.MakeTableAnnotator[fileSizeByBacking](
94+
fileSizeAnnotatorIdx,
95+
manifest.TableAnnotatorFuncs[fileSizeByBacking]{
96+
Merge: func(dst *fileSizeByBacking, src fileSizeByBacking) {
97+
dst.totalSize += src.totalSize
98+
dst.remoteSize += src.remoteSize
99+
dst.externalSize += src.externalSize
100+
},
101+
Table: func(f *manifest.TableMetadata) (v fileSizeByBacking, cacheOK bool) {
102+
return d.singleFileSizeByBacking(f.Size+f.EstimatedReferenceSize(), f)
103+
},
104+
PartialOverlap: func(f *manifest.TableMetadata, bounds base.UserKeyBounds) fileSizeByBacking {
105+
overlappingFileSize, err := d.fileCache.estimateSize(f, bounds.Start, bounds.End.Key)
106+
if err != nil {
107+
return fileSizeByBacking{}
108+
}
109+
overlapFraction := float64(overlappingFileSize) / float64(f.Size)
110+
// Scale the blob reference size proportionally to the file
111+
// overlap from the bounds to approximate only the blob
112+
// references that overlap with the requested bounds.
113+
size := overlappingFileSize + uint64(float64(f.EstimatedReferenceSize())*overlapFraction)
114+
res, _ := d.singleFileSizeByBacking(size, f)
115+
return res
116+
},
117+
})
114118
}

0 commit comments

Comments
 (0)