Skip to content

Commit 19f9afc

Browse files
committed
db: tweak average logical value size estimate
When calculating the estimated space reclaimed by an unsized point tombstone, we calculate the average logical value size of KVs in sstables overlapping a tombstone's sstable. Previously this calculation divided the aggregate value size by the number of entries. This undercalculated the sum by including deletions. This commit updates this calculation to divide the aggregate value size by the number of non-deletion keys.
1 parent b0b0eea commit 19f9afc

File tree

4 files changed

+41
-32
lines changed

4 files changed

+41
-32
lines changed

table_stats.go

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,16 @@ func (d *DB) loadTableRangeDelStats(
480480
return compactionHints, rangeDeletionsBytesEstimate, nil
481481
}
482482

483+
// estimateSizesBeneath calculates two statistics describing the data in the LSM
484+
// below the provided table metadata:
485+
//
486+
// 1. The average logical size of values: This is a precompression sum of
487+
// non-tombstone values. It's helpful for estimating how much data a DEL
488+
// might delete.
489+
// 2. The compression ratio of the data beneath the table.
490+
//
491+
// estimateSizesBeneath walks the LSM table metadata for all tables beneath meta
492+
// (plus the table itself), computing the above statistics.
483493
func (d *DB) estimateSizesBeneath(
484494
ctx context.Context,
485495
v *manifest.Version,
@@ -496,14 +506,11 @@ func (d *DB) estimateSizesBeneath(
496506
// calculate a compression ratio of 0 which is not accurate for the file's
497507
// own tombstones.
498508
var (
499-
// TODO(sumeer): The entryCount includes the tombstones, which can be small,
500-
// resulting in a lower than expected avgValueLogicalSize. For an example of
501-
// this effect see the estimate in testdata/compaction_picker_scores (search
502-
// for "point-deletions-bytes-estimate: 163850").
503-
fileSum = meta.Size + meta.EstimatedReferenceSize()
504-
entryCount = fileProps.NumEntries
505-
keySum = fileProps.RawKeySize
506-
valSum = fileProps.RawValueSize
509+
fileSum = meta.Size + meta.EstimatedReferenceSize()
510+
entryCount = fileProps.NumEntries
511+
deletionCount = fileProps.NumDeletions
512+
keySum = fileProps.RawKeySize
513+
valSum = fileProps.RawValueSize
507514
)
508515

509516
for l := level + 1; l < numLevels; l++ {
@@ -527,6 +534,7 @@ func (d *DB) estimateSizesBeneath(
527534
}
528535

529536
entryCount += tableBeneath.ScaleStatistic(backingProps.NumEntries)
537+
deletionCount += tableBeneath.ScaleStatistic(backingProps.NumDeletions)
530538
keySum += tableBeneath.ScaleStatistic(backingProps.RawKeySize)
531539
valSum += tableBeneath.ScaleStatistic(backingProps.RawValueSize)
532540
continue
@@ -543,9 +551,9 @@ func (d *DB) estimateSizesBeneath(
543551
//
544552
// ↓
545553
//
546-
// FileSize RawValueSize
547-
// ----------------------- × ------------
548-
// RawKeySize+RawValueSize NumEntries
554+
// FileSize RawValueSize
555+
// ----------------------- × -------------------------
556+
// RawKeySize+RawValueSize NumEntries - NumDeletions
549557
//
550558
// We return the average logical value size plus the compression ratio,
551559
// leaving the scaling to the caller. This allows the caller to perform
@@ -559,7 +567,9 @@ func (d *DB) estimateSizesBeneath(
559567
// such overhead is not large.
560568
compressionRatio = 1
561569
}
562-
avgValueLogicalSize = float64(valSum) / float64(entryCount)
570+
// When calculating the average value size, we subtract the number of
571+
// deletions from the total number of entries.
572+
avgValueLogicalSize = float64(valSum) / float64(max(1, invariants.SafeSub(entryCount, deletionCount)))
563573
return avgValueLogicalSize, compressionRatio, nil
564574
}
565575

@@ -716,8 +726,7 @@ func maybeSetStatsFromProperties(meta *manifest.TableMetadata, props *sstable.Pr
716726
if props.NumDeletions != 0 || props.NumRangeKeyDels != 0 {
717727
return false
718728
}
719-
var stats manifest.TableStats
720-
meta.PopulateStats(&stats)
729+
meta.PopulateStats(new(manifest.TableStats))
721730
return true
722731
}
723732

testdata/compaction_picker_scores

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ wait-pending-table-stats
9393
num-entries: 5
9494
num-deletions: 5
9595
num-range-key-sets: 0
96-
point-deletions-bytes-estimate: 163850
96+
point-deletions-bytes-estimate: 327690
9797
range-deletions-bytes-estimate: 0
9898
compression: None:128
9999

@@ -105,7 +105,7 @@ L1 0B 0.00 0.00 0.00
105105
L2 0B 0.00 0.00 0.00
106106
L3 0B 0.00 0.00 0.00
107107
L4 0B 0.00 0.00 0.00
108-
L5 709B 0.01 0.01 2.51
108+
L5 709B 0.01 0.01 5.01
109109
L6 321KB 1.11 1.11 1.11
110110

111111
# Run a similar test as above, but this time the table containing the DELs is
@@ -147,7 +147,7 @@ wait-pending-table-stats
147147
num-entries: 5
148148
num-deletions: 5
149149
num-range-key-sets: 0
150-
point-deletions-bytes-estimate: 163860
150+
point-deletions-bytes-estimate: 327700
151151
range-deletions-bytes-estimate: 0
152152
compression: None:129
153153

testdata/compaction_tombstones

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ wait-pending-table-stats
8383
num-entries: 2
8484
num-deletions: 1
8585
num-range-key-sets: 0
86-
point-deletions-bytes-estimate: 2
86+
point-deletions-bytes-estimate: 3
8787
range-deletions-bytes-estimate: 0
8888
compression: None:124
8989

@@ -123,7 +123,7 @@ wait-pending-table-stats
123123
num-entries: 6
124124
num-deletions: 2
125125
num-range-key-sets: 0
126-
point-deletions-bytes-estimate: 2
126+
point-deletions-bytes-estimate: 3
127127
range-deletions-bytes-estimate: 101
128128
compression: None:87,Snappy:96/129
129129

@@ -157,7 +157,7 @@ wait-pending-table-stats
157157
num-entries: 11
158158
num-deletions: 1
159159
num-range-key-sets: 0
160-
point-deletions-bytes-estimate: 2
160+
point-deletions-bytes-estimate: 3
161161
range-deletions-bytes-estimate: 0
162162
compression: None:36,Snappy:131/169
163163

@@ -240,7 +240,7 @@ wait-pending-table-stats
240240
num-entries: 3
241241
num-deletions: 3
242242
num-range-key-sets: 0
243-
point-deletions-bytes-estimate: 6150
243+
point-deletions-bytes-estimate: 12294
244244
range-deletions-bytes-estimate: 0
245245
compression: None:128
246246

@@ -371,7 +371,7 @@ wait-pending-table-stats
371371
num-entries: 2
372372
num-deletions: 1
373373
num-range-key-sets: 0
374-
point-deletions-bytes-estimate: 2459
374+
point-deletions-bytes-estimate: 3074
375375
range-deletions-bytes-estimate: 0
376376
compression: None:120
377377

@@ -419,7 +419,7 @@ wait-pending-table-stats
419419
num-entries: 2
420420
num-deletions: 1
421421
num-range-key-sets: 0
422-
point-deletions-bytes-estimate: 2459
422+
point-deletions-bytes-estimate: 3074
423423
range-deletions-bytes-estimate: 0
424424
compression: None:120
425425

@@ -492,7 +492,7 @@ wait-pending-table-stats
492492
num-entries: 5
493493
num-deletions: 3
494494
num-range-key-sets: 0
495-
point-deletions-bytes-estimate: 7
495+
point-deletions-bytes-estimate: 9
496496
range-deletions-bytes-estimate: 0
497497
compression: None:36,Snappy:95/108
498498

@@ -505,7 +505,7 @@ wait-pending-table-stats force-tombstone-density-ratio=0.9
505505
num-entries: 5
506506
num-deletions: 3
507507
num-range-key-sets: 0
508-
point-deletions-bytes-estimate: 7
508+
point-deletions-bytes-estimate: 9
509509
range-deletions-bytes-estimate: 0
510510
tombstone-dense-blocks-ratio: 0.9
511511
compression: None:36,Snappy:95/108
@@ -556,7 +556,7 @@ wait-pending-table-stats
556556
num-entries: 5
557557
num-deletions: 3
558558
num-range-key-sets: 0
559-
point-deletions-bytes-estimate: 7
559+
point-deletions-bytes-estimate: 9
560560
range-deletions-bytes-estimate: 0
561561
compression: None:36,Snappy:95/108
562562

@@ -567,7 +567,7 @@ wait-pending-table-stats force-tombstone-density-ratio=0.9
567567
num-entries: 5
568568
num-deletions: 3
569569
num-range-key-sets: 0
570-
point-deletions-bytes-estimate: 7
570+
point-deletions-bytes-estimate: 9
571571
range-deletions-bytes-estimate: 0
572572
tombstone-dense-blocks-ratio: 0.9
573573
compression: None:36,Snappy:95/108
@@ -617,7 +617,7 @@ wait-pending-table-stats
617617
num-entries: 5
618618
num-deletions: 3
619619
num-range-key-sets: 0
620-
point-deletions-bytes-estimate: 857149
620+
point-deletions-bytes-estimate: 1500007
621621
range-deletions-bytes-estimate: 0
622622
compression: None:36,Snappy:95/108
623623

@@ -628,7 +628,7 @@ wait-pending-table-stats force-tombstone-density-ratio=0.9
628628
num-entries: 5
629629
num-deletions: 3
630630
num-range-key-sets: 0
631-
point-deletions-bytes-estimate: 857149
631+
point-deletions-bytes-estimate: 1500007
632632
range-deletions-bytes-estimate: 0
633633
tombstone-dense-blocks-ratio: 0.9
634634
compression: None:36,Snappy:95/108

testdata/table_stats

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ wait-pending-table-stats
1515
num-entries: 3
1616
num-deletions: 1
1717
num-range-key-sets: 0
18-
point-deletions-bytes-estimate: 2
18+
point-deletions-bytes-estimate: 3
1919
range-deletions-bytes-estimate: 0
2020

2121
compact a-c
@@ -546,7 +546,7 @@ wait-pending-table-stats
546546
num-entries: 5
547547
num-deletions: 2
548548
num-range-key-sets: 0
549-
point-deletions-bytes-estimate: 111127
549+
point-deletions-bytes-estimate: 114303
550550
range-deletions-bytes-estimate: 0
551551

552552
# Try a missized point tombstone. It should appear in the Metrics after the
@@ -1037,7 +1037,7 @@ wait-pending-table-stats
10371037
num-entries: 3
10381038
num-deletions: 3
10391039
num-range-key-sets: 0
1040-
point-deletions-bytes-estimate: 3908
1040+
point-deletions-bytes-estimate: 4641
10411041
range-deletions-bytes-estimate: 0
10421042
compression: None:36,Snappy:73/84
10431043

0 commit comments

Comments
 (0)