Skip to content

Commit b0b0eea

Browse files
committed
db: always estimate deletions via the table stats collector
When a new sstable is added to the LSM we need to populate its table stats, including PointDeletionBytesEstimate and RangeDeletionBytesEstimate. These statistics are typically calculated by the table stats collector asynchronously, because their calculation requires walking the LSM table metadata beneath the file and sometimes I/O. Previously there was a fast path for sstables for which unsized point tombstones (DELs, SINGLEDELs but not DELSIZEDs) made up less than 10% of all of the table's entries. The previous logic reasoned that if there were not many deletions within the file, we could use the average value size and compression ratio of the file itself without introducing too much error overall. However, an incorrect compression ratio can introduce significant error, including for DELSIZED keys. In the previous logic, we could take the fast path even if the entirety of the table was DELSIZED tombstones. This commit removes the fast path for all sstables except for those with no deletions at all. The 'slow path' of allowing the asynchronous table stats collector to calculate these stats is not significantly more resource intensive, and does not need to perform any I/O for point tombstones. I suspect the fast path was a premature optimization. Informs cockroachdb/cockroach#151633.
1 parent e1b5f11 commit b0b0eea

File tree

3 files changed

+19
-71
lines changed

3 files changed

+19
-71
lines changed

compaction.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3518,9 +3518,7 @@ func (c *tableCompaction) makeVersionEdit(result compact.Result) (*manifest.Vers
35183518

35193519
// If the file didn't contain any range deletions, we can fill its
35203520
// table stats now, avoiding unnecessarily loading the table later.
3521-
maybeSetStatsFromProperties(
3522-
fileMeta.PhysicalMeta(), &t.WriterMeta.Properties, c.logger,
3523-
)
3521+
maybeSetStatsFromProperties(fileMeta.PhysicalMeta(), &t.WriterMeta.Properties)
35243522

35253523
if t.WriterMeta.HasPointKeys {
35263524
fileMeta.ExtendPointKeyBounds(c.comparer.Compare,

ingest.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ func ingestLoad1(
322322
// disallowing removal of an open file. Under MemFS, if we don't populate
323323
// meta.Stats here, the file will be loaded into the file cache for
324324
// calculating stats before we can remove the original link.
325-
maybeSetStatsFromProperties(meta.PhysicalMeta(), &props, opts.Logger)
325+
maybeSetStatsFromProperties(meta.PhysicalMeta(), &props)
326326

327327
var iterStats base.InternalIteratorStats
328328
env := sstable.ReadEnv{

table_stats.go

Lines changed: 17 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -692,52 +692,31 @@ func sanityCheckStats(
692692
// maybeSetStatsFromProperties sets the table backing properties and attempts to
693693
// set the table stats from the properties, for a table that was created by an
694694
// ingestion or compaction.
695-
func maybeSetStatsFromProperties(
696-
meta *manifest.TableMetadata, props *sstable.Properties, logger Logger,
697-
) bool {
698-
backingProps := meta.TableBacking.PopulateProperties(props)
695+
func maybeSetStatsFromProperties(meta *manifest.TableMetadata, props *sstable.Properties) bool {
696+
meta.TableBacking.PopulateProperties(props)
699697
if invariants.Enabled && meta.Virtual {
700698
panic("table expected to be physical")
701699
}
702-
// If a table contains range deletions or range key deletions, we defer the
703-
// stats collection. There are two main reasons for this:
700+
// If a table contains any deletions, we defer the stats collection. There
701+
// are two main reasons for this:
704702
//
705-
// 1. Estimating the potential for reclaimed space due to a range deletion
706-
// tombstone requires scanning the LSM - a potentially expensive operation
707-
// that should be deferred.
708-
// 2. Range deletions and / or range key deletions present an opportunity to
709-
// compute "deletion hints", which also requires a scan of the LSM to
710-
// compute tables that would be eligible for deletion.
703+
// 1. Estimating the potential for reclaimed space due to a deletion
704+
// requires scanning the LSM - a potentially expensive operation that
705+
// should be deferred.
706+
// 2. Range deletions present an opportunity to compute "deletion hints",
707+
// which also requires a scan of the LSM to compute tables that would be
708+
// eligible for deletion.
711709
//
712710
// These two tasks are deferred to the table stats collector goroutine.
713-
if props.NumRangeDeletions != 0 || props.NumRangeKeyDels != 0 {
714-
return false
715-
}
716-
717-
// If a table is more than 10% point deletions without user-provided size
718-
// estimates, don't calculate the PointDeletionsBytesEstimate statistic
719-
// using our limited knowledge. The table stats collector can populate the
720-
// stats and calculate an average of value size of all the tables beneath
721-
// the table in the LSM, which will be more accurate.
722-
if unsizedDels := invariants.SafeSub(props.NumDeletions, props.NumSizedDeletions); unsizedDels > props.NumEntries/10 {
711+
//
712+
// Note that even if the point deletions are sized (DELSIZEDs), an accurate
713+
// compression ratio is necessary to calculate an accurate estimate of the
714+
// physical disk space they reclaim. To do that, we need to scan the LSM
715+
// beneath the file.
716+
if props.NumDeletions != 0 || props.NumRangeKeyDels != 0 {
723717
return false
724718
}
725-
726-
var pointEstimate uint64
727-
if props.NumEntries > 0 {
728-
// Use the file's own average key and value sizes as an estimate. This
729-
// doesn't require any additional IO and since the number of point
730-
// deletions in the file is low, the error introduced by this crude
731-
// estimate is expected to be small.
732-
avgValSize, compressionRatio := estimatePhysicalSizes(meta, props)
733-
pointEstimate = pointDeletionsBytesEstimate(backingProps, avgValSize, compressionRatio)
734-
}
735-
736-
stats := manifest.TableStats{
737-
PointDeletionsBytesEstimate: pointEstimate,
738-
RangeDeletionsBytesEstimate: 0,
739-
}
740-
sanityCheckStats(meta, &stats, logger, "stats from properties")
719+
var stats manifest.TableStats
741720
meta.PopulateStats(&stats)
742721
return true
743722
}
@@ -832,35 +811,6 @@ func pointDeletionsBytesEstimate(
832811
return uint64((tombstonesLogicalSize + shadowedLogicalSize) * compressionRatio)
833812
}
834813

835-
func estimatePhysicalSizes(
836-
tableMeta *manifest.TableMetadata, props *sstable.Properties,
837-
) (avgValLogicalSize, compressionRatio float64) {
838-
// RawKeySize and RawValueSize are uncompressed totals. Scale according to
839-
// the data size to account for compression, index blocks and metadata
840-
// overhead. Eg:
841-
//
842-
// Compression rate × Average uncompressed value size
843-
//
844-
// ↓
845-
//
846-
// FileSize RawValSize
847-
// ----------------------- × ----------
848-
// RawKeySize+RawValueSize NumEntries
849-
//
850-
physicalSize := tableMeta.Size + tableMeta.EstimatedReferenceSize()
851-
uncompressedSum := props.RawKeySize + props.RawValueSize
852-
compressionRatio = float64(physicalSize) / float64(uncompressedSum)
853-
if compressionRatio > 1 {
854-
// We can get huge compression ratios due to the fixed overhead of files
855-
// containing a tiny amount of data. By setting this to 1, we are ignoring
856-
// that overhead, but we accept that tradeoff since the total bytes in
857-
// such overhead is not large.
858-
compressionRatio = 1
859-
}
860-
avgValLogicalSize = float64(props.RawValueSize) / float64(props.NumEntries)
861-
return avgValLogicalSize, compressionRatio
862-
}
863-
864814
// newCombinedDeletionKeyspanIter returns a keyspan.FragmentIterator that
865815
// returns "ranged deletion" spans for a single table, providing a combined view
866816
// of both range deletion and range key deletion spans. The

0 commit comments

Comments
 (0)