Skip to content

Commit 8dea64b

Browse files
committed
db: incorporate separated values when estimating compression ratios
Table stats collection estimates compression ratios for the purpose of calculating the size of data deleted by tombstones. Previously, this compression ratio calculation included the size of both keys and values in the pre-compression sum but only included sstables in the post-compression sum. This resulted in wildly overestimating the effectiveness of compression when values were separated into external blob files. This commit incorporates a table's references estimated physical size into the calculation. Informs cockroachdb/cockroach#149147.
1 parent 9fd475a commit 8dea64b

File tree

3 files changed

+9
-7
lines changed

3 files changed

+9
-7
lines changed

sstable/properties.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ type CommonProperties struct {
8383
NumEntries uint64 `prop:"rocksdb.num.entries"`
8484
// Total raw key size.
8585
RawKeySize uint64 `prop:"rocksdb.raw.key.size"`
86-
// Total raw value size.
86+
// Total raw value size. If values are separated, this includes the size of
87+
// the separated value, NOT the value handle.
8788
RawValueSize uint64 `prop:"rocksdb.raw.value.size"`
8889
// Total raw key size of point deletion tombstones. This value is comparable
8990
// to RawKeySize.

table_stats.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -503,15 +503,15 @@ func (d *DB) estimateSizesBeneath(
503503
// resulting in a lower than expected avgValueLogicalSize. For an example of
504504
// this effect see the estimate in testdata/compaction_picker_scores (search
505505
// for "point-deletions-bytes-estimate: 163850").
506-
fileSum = meta.Size
506+
fileSum = meta.Size + meta.EstimatedReferenceSize()
507507
entryCount = fileProps.NumEntries
508508
keySum = fileProps.RawKeySize
509509
valSum = fileProps.RawValueSize
510510
)
511511

512512
for l := level + 1; l < numLevels; l++ {
513513
for tableBeneath := range v.Overlaps(l, meta.UserKeyBounds()).All() {
514-
fileSum += tableBeneath.Size
514+
fileSum += tableBeneath.Size + tableBeneath.EstimatedReferenceSize()
515515
if tableBeneath.StatsValid() {
516516
entryCount += tableBeneath.Stats.NumEntries
517517
keySum += tableBeneath.Stats.RawKeySize
@@ -727,7 +727,7 @@ func maybeSetStatsFromProperties(
727727
// doesn't require any additional IO and since the number of point
728728
// deletions in the file is low, the error introduced by this crude
729729
// estimate is expected to be small.
730-
avgValSize, compressionRatio := estimatePhysicalSizes(meta.Size, props)
730+
avgValSize, compressionRatio := estimatePhysicalSizes(meta, props)
731731
pointEstimate = pointDeletionsBytesEstimate(props, avgValSize, compressionRatio)
732732
}
733733

@@ -833,7 +833,7 @@ func pointDeletionsBytesEstimate(
833833
}
834834

835835
func estimatePhysicalSizes(
836-
fileSize uint64, props *sstable.CommonProperties,
836+
tableMeta *manifest.TableMetadata, props *sstable.CommonProperties,
837837
) (avgValLogicalSize, compressionRatio float64) {
838838
// RawKeySize and RawValueSize are uncompressed totals. Scale according to
839839
// the data size to account for compression, index blocks and metadata
@@ -847,8 +847,9 @@ func estimatePhysicalSizes(
847847
// ----------------------- × ----------
848848
// RawKeySize+RawValueSize NumEntries
849849
//
850+
physicalSize := tableMeta.Size + tableMeta.EstimatedReferenceSize()
850851
uncompressedSum := props.RawKeySize + props.RawValueSize
851-
compressionRatio = float64(fileSize) / float64(uncompressedSum)
852+
compressionRatio = float64(physicalSize) / float64(uncompressedSum)
852853
if compressionRatio > 1 {
853854
// We can get huge compression ratios due to the fixed overhead of files
854855
// containing a tiny amount of data. By setting this to 1, we are ignoring

testdata/table_stats

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -978,5 +978,5 @@ wait-pending-table-stats
978978
num-entries: 3
979979
num-deletions: 3
980980
num-range-key-sets: 0
981-
point-deletions-bytes-estimate: 283
981+
point-deletions-bytes-estimate: 3940
982982
range-deletions-bytes-estimate: 0

0 commit comments

Comments
 (0)