@@ -496,6 +496,10 @@ func (d *DB) estimateSizesBeneath(
496496 // calculate a compression ratio of 0 which is not accurate for the file's
497497 // own tombstones.
498498 fileSum += file .Size
499+ // TODO(sumeer): The entryCount includes the tombstones, which can be small,
500+ // resulting in a lower than expected avgValueLogicalSize. For an example of
501+ // this effect see the estimate in testdata/compaction_picker_scores (search
502+ // for "point-deletions-bytes-estimate: 163850").
499503 entryCount += fileProps .NumEntries
500504 keySum += fileProps .RawKeySize
501505 valSum += fileProps .RawValueSize
@@ -551,6 +555,13 @@ func (d *DB) estimateSizesBeneath(
551555 // additional compression ratio scaling if necessary.
552556 uncompressedSum := float64 (keySum + valSum )
553557 compressionRatio = float64 (fileSum ) / uncompressedSum
558+ if compressionRatio > 1 {
559+ // We can get huge compression ratios due to the fixed overhead of files
560+ // containing a tiny amount of data. By setting this to 1, we are ignoring
561+ // that overhead, but we accept that tradeoff since the total bytes in
562+ // such overhead is not large.
563+ compressionRatio = 1
564+ }
554565 avgValueLogicalSize = (float64 (valSum ) / float64 (entryCount ))
555566 return avgValueLogicalSize , compressionRatio , nil
556567}
@@ -568,6 +579,9 @@ func (d *DB) estimateReclaimedSizeBeneath(
568579 // Otherwise, estimating the range for the file requires
569580 // additional I/O to read the file's index blocks.
570581 hintSeqNum = math .MaxUint64
582+ // TODO(jbowens): When there are multiple sub-levels in L0 and the RANGEDEL
583+ // is from a higher sub-level, we incorrectly skip the files in the lower
584+ // sub-levels when estimating this overlap.
571585 for l := level + 1 ; l < numLevels ; l ++ {
572586 for file := range v .Overlaps (l , base .UserKeyBoundsEndExclusive (start , end )).All () {
573587 // Determine whether we need to update size estimates and hint seqnums
@@ -831,6 +845,13 @@ func estimatePhysicalSizes(
831845 //
832846 uncompressedSum := props .RawKeySize + props .RawValueSize
833847 compressionRatio = float64 (fileSize ) / float64 (uncompressedSum )
848+ if compressionRatio > 1 {
849+ // We can get huge compression ratios due to the fixed overhead of files
850+ // containing a tiny amount of data. By setting this to 1, we are ignoring
851+ // that overhead, but we accept that tradeoff since the total bytes in
852+ // such overhead is not large.
853+ compressionRatio = 1
854+ }
834855 avgValLogicalSize = (float64 (props .RawValueSize ) / float64 (props .NumEntries ))
835856 return avgValLogicalSize , compressionRatio
836857}
@@ -1057,6 +1078,22 @@ var valueBlockSizeAnnotator = manifest.SumAnnotator(func(f *tableMetadata) (uint
10571078 return f .Stats .ValueBlocksSize , f .StatsValid ()
10581079})
10591080
1081+ // pointDeletionsBytesEstimateAnnotator is a manifest.Annotator that annotates
1082+ // B-Tree nodes with the sum of the files' PointDeletionsBytesEstimate. This
1083+ // value may change once a table's stats are loaded asynchronously, so its
1084+ // values are marked as cacheable only if a file's stats have been loaded.
1085+ var pointDeletionsBytesEstimateAnnotator = manifest .SumAnnotator (func (f * tableMetadata ) (uint64 , bool ) {
1086+ return f .Stats .PointDeletionsBytesEstimate , f .StatsValid ()
1087+ })
1088+
1089+ // rangeDeletionsBytesEstimateAnnotator is a manifest.Annotator that annotates
1090+ // B-Tree nodes with the sum of the files' RangeDeletionsBytesEstimate. This
1091+ // value may change once a table's stats are loaded asynchronously, so its
1092+ // values are marked as cacheable only if a file's stats have been loaded.
1093+ var rangeDeletionsBytesEstimateAnnotator = manifest .SumAnnotator (func (f * tableMetadata ) (uint64 , bool ) {
1094+ return f .Stats .RangeDeletionsBytesEstimate , f .StatsValid ()
1095+ })
1096+
10601097// compressionTypeAnnotator is a manifest.Annotator that annotates B-tree
10611098// nodes with the compression type of the file. Its annotation type is
10621099// compressionTypes. The compression type may change once a table's stats are
0 commit comments