Skip to content

Commit a2f076b

Browse files
committed
db: add metrics for garbage due to point and range deletions
Also tweak the compressionRatio calculation in TableStats. Informs #4602
1 parent f098d82 commit a2f076b

File tree

9 files changed

+172
-27
lines changed

9 files changed

+172
-27
lines changed

compaction_picker.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,7 @@ func (c *candidateLevelInfo) shouldCompact() bool {
661661
}
662662

663663
func fileCompensation(f *tableMetadata) uint64 {
664-
return uint64(f.Stats.PointDeletionsBytesEstimate) + f.Stats.RangeDeletionsBytesEstimate
664+
return f.Stats.PointDeletionsBytesEstimate + f.Stats.RangeDeletionsBytesEstimate
665665
}
666666

667667
// compensatedSize returns f's file size, inflated according to compaction
@@ -672,14 +672,6 @@ func compensatedSize(f *tableMetadata) uint64 {
672672
return f.Size + fileCompensation(f)
673673
}
674674

675-
// compensatedSizeAnnotator is a manifest.Annotator that annotates B-Tree
676-
// nodes with the sum of the files' compensated sizes. Compensated sizes may
677-
// change once a table's stats are loaded asynchronously, so its values are
678-
// marked as cacheable only if a file's stats have been loaded.
679-
var compensatedSizeAnnotator = manifest.SumAnnotator(func(f *tableMetadata) (uint64, bool) {
680-
return compensatedSize(f), f.StatsValid()
681-
})
682-
683675
// totalCompensatedSize computes the compensated size over a table metadata
684676
// iterator. Note that this function is linear in the files available to the
685677
// iterator. Use the compensatedSizeAnnotator if querying the total
@@ -947,7 +939,15 @@ func (p *compactionPickerByScore) calculateLevelScores(
947939
}
948940
sizeAdjust := calculateSizeAdjust(inProgressCompactions)
949941
for level := 1; level < numLevels; level++ {
950-
compensatedLevelSize := *compensatedSizeAnnotator.LevelAnnotation(p.vers.Levels[level]) + sizeAdjust[level].compensated()
942+
compensatedLevelSize :=
943+
// Actual file size.
944+
p.vers.Levels[level].Size() +
945+
// Point deletions.
946+
*pointDeletionsBytesEstimateAnnotator.LevelAnnotation(p.vers.Levels[level]) +
947+
// Range deletions.
948+
*rangeDeletionsBytesEstimateAnnotator.LevelAnnotation(p.vers.Levels[level]) +
949+
// Adjustments for in-progress compactions.
950+
sizeAdjust[level].compensated()
951951
scores[level].compensatedScore = float64(compensatedLevelSize) / float64(p.levelMaxBytes[level])
952952
scores[level].uncompensatedScore = float64(p.vers.Levels[level].Size()+sizeAdjust[level].actual()) / float64(p.levelMaxBytes[level])
953953
}

db.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2068,6 +2068,11 @@ func (d *DB) Metrics() *Metrics {
20682068
metrics.Keys.RangeKeySetsCount = *rangeKeySetsAnnotator.MultiLevelAnnotation(vers.RangeKeyLevels[:])
20692069
metrics.Keys.TombstoneCount = *tombstonesAnnotator.MultiLevelAnnotation(vers.Levels[:])
20702070

2071+
metrics.Table.Garbage.PointDeletionsBytesEstimate =
2072+
*pointDeletionsBytesEstimateAnnotator.MultiLevelAnnotation(vers.Levels[:])
2073+
metrics.Table.Garbage.RangeDeletionsBytesEstimate =
2074+
*rangeDeletionsBytesEstimateAnnotator.MultiLevelAnnotation(vers.Levels[:])
2075+
20712076
d.mu.versions.logLock()
20722077
metrics.private.manifestFileSize = uint64(d.mu.versions.manifest.Size())
20732078
backingCount, backingTotalSize := d.mu.versions.virtualBackings.Stats()

internal/manifest/version.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ type TableStats struct {
6666
// granularity and is not updated if compactions beneath the table reduce
6767
// the amount of reclaimable disk space. It also does not account for
6868
// overlapping data in L0 and ignores L0 sublevels, but the error that
69-
// introduces is expected to be small.
69+
// introduces is expected to be small. Similarly, multiple overlapping
70+
// RANGEDELs can in different levels can count the same data to be deleted
71+
// multiple times.
7072
//
7173
// Tables in the bottommost level of the LSM may have a nonzero estimate if
7274
// snapshots or move compactions prevented the elision of their range

metrics.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,20 @@ type Metrics struct {
319319
ZombieCount uint64
320320
}
321321

322+
// Garbage bytes.
323+
Garbage struct {
324+
// PointDeletionsBytesEstimate is the estimated file bytes that will be
325+
// saved by compacting all point deletions. This is dependent on table
326+
// stats collection, so can be very incomplete until
327+
// InitialStatsCollectionComplete becomes true.
328+
PointDeletionsBytesEstimate uint64
329+
// RangeDeletionsBytesEstimate is the estimated file bytes that will be
330+
// saved by compacting all range deletions. This is dependent on table
331+
// stats collection, so can be very incomplete until
332+
// InitialStatsCollectionComplete becomes true.
333+
RangeDeletionsBytesEstimate uint64
334+
}
335+
322336
// Whether the initial stats collection (for existing tables on Open) is
323337
// complete.
324338
InitialStatsCollectionComplete bool
@@ -725,7 +739,11 @@ func (m *Metrics) SafeFormat(w redact.SafePrinter, _ rune) {
725739
w.Printf(" unknown: %d", redact.Safe(count))
726740
}
727741
w.Printf("\n")
728-
742+
if m.Table.Garbage.PointDeletionsBytesEstimate > 0 || m.Table.Garbage.RangeDeletionsBytesEstimate > 0 {
743+
w.Printf("Garbage: point-deletions %s range-deletions %s\n",
744+
humanize.Bytes.Uint64(m.Table.Garbage.PointDeletionsBytesEstimate),
745+
humanize.Bytes.Uint64(m.Table.Garbage.RangeDeletionsBytesEstimate))
746+
}
729747
w.Printf("Table stats: ")
730748
if !m.Table.InitialStatsCollectionComplete {
731749
w.Printf("initial load in progress")

table_stats.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,10 @@ func (d *DB) estimateSizesBeneath(
496496
// calculate a compression ratio of 0 which is not accurate for the file's
497497
// own tombstones.
498498
fileSum += file.Size
499+
// TODO(sumeer): The entryCount includes the tombstones, which can be small,
500+
// resulting in a lower than expected avgValueLogicalSize. For an example of
501+
// this effect see the estimate in testdata/compaction_picker_scores (search
502+
// for "point-deletions-bytes-estimate: 163850").
499503
entryCount += fileProps.NumEntries
500504
keySum += fileProps.RawKeySize
501505
valSum += fileProps.RawValueSize
@@ -551,6 +555,13 @@ func (d *DB) estimateSizesBeneath(
551555
// additional compression ratio scaling if necessary.
552556
uncompressedSum := float64(keySum + valSum)
553557
compressionRatio = float64(fileSum) / uncompressedSum
558+
if compressionRatio > 1 {
559+
// We can get huge compression ratios due to the fixed overhead of files
560+
// containing a tiny amount of data. By setting this to 1, we are ignoring
561+
// that overhead, but we accept that tradeoff since the total bytes in
562+
// such overhead is not large.
563+
compressionRatio = 1
564+
}
554565
avgValueLogicalSize = (float64(valSum) / float64(entryCount))
555566
return avgValueLogicalSize, compressionRatio, nil
556567
}
@@ -568,6 +579,9 @@ func (d *DB) estimateReclaimedSizeBeneath(
568579
// Otherwise, estimating the range for the file requires
569580
// additional I/O to read the file's index blocks.
570581
hintSeqNum = math.MaxUint64
582+
// TODO(jbowens): When there are multiple sub-levels in L0 and the RANGEDEL
583+
// is from a higher sub-level, we incorrectly skip the files in the lower
584+
// sub-levels when estimating this overlap.
571585
for l := level + 1; l < numLevels; l++ {
572586
for file := range v.Overlaps(l, base.UserKeyBoundsEndExclusive(start, end)).All() {
573587
// Determine whether we need to update size estimates and hint seqnums
@@ -831,6 +845,13 @@ func estimatePhysicalSizes(
831845
//
832846
uncompressedSum := props.RawKeySize + props.RawValueSize
833847
compressionRatio = float64(fileSize) / float64(uncompressedSum)
848+
if compressionRatio > 1 {
849+
// We can get huge compression ratios due to the fixed overhead of files
850+
// containing a tiny amount of data. By setting this to 1, we are ignoring
851+
// that overhead, but we accept that tradeoff since the total bytes in
852+
// such overhead is not large.
853+
compressionRatio = 1
854+
}
834855
avgValLogicalSize = (float64(props.RawValueSize) / float64(props.NumEntries))
835856
return avgValLogicalSize, compressionRatio
836857
}
@@ -1057,6 +1078,22 @@ var valueBlockSizeAnnotator = manifest.SumAnnotator(func(f *tableMetadata) (uint
10571078
return f.Stats.ValueBlocksSize, f.StatsValid()
10581079
})
10591080

1081+
// pointDeletionsBytesEstimateAnnotator is a manifest.Annotator that annotates
1082+
// B-Tree nodes with the sum of the files' PointDeletionsBytesEstimate. This
1083+
// value may change once a table's stats are loaded asynchronously, so its
1084+
// values are marked as cacheable only if a file's stats have been loaded.
1085+
var pointDeletionsBytesEstimateAnnotator = manifest.SumAnnotator(func(f *tableMetadata) (uint64, bool) {
1086+
return f.Stats.PointDeletionsBytesEstimate, f.StatsValid()
1087+
})
1088+
1089+
// rangeDeletionsBytesEstimateAnnotator is a manifest.Annotator that annotates
1090+
// B-Tree nodes with the sum of the files' RangeDeletionsBytesEstimate. This
1091+
// value may change once a table's stats are loaded asynchronously, so its
1092+
// values are marked as cacheable only if a file's stats have been loaded.
1093+
var rangeDeletionsBytesEstimateAnnotator = manifest.SumAnnotator(func(f *tableMetadata) (uint64, bool) {
1094+
return f.Stats.RangeDeletionsBytesEstimate, f.StatsValid()
1095+
})
1096+
10601097
// compressionTypeAnnotator is a manifest.Annotator that annotates B-tree
10611098
// nodes with the compression type of the file. Its annotation type is
10621099
// compressionTypes. The compression type may change once a table's stats are

testdata/compaction_picker_scores

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ wait-pending-table-stats
9292
num-entries: 5
9393
num-deletions: 5
9494
num-range-key-sets: 0
95-
point-deletions-bytes-estimate: 164798
95+
point-deletions-bytes-estimate: 163850
9696
range-deletions-bytes-estimate: 0
9797

9898
scores
@@ -145,7 +145,7 @@ wait-pending-table-stats
145145
num-entries: 5
146146
num-deletions: 5
147147
num-range-key-sets: 0
148-
point-deletions-bytes-estimate: 164806
148+
point-deletions-bytes-estimate: 163860
149149
range-deletions-bytes-estimate: 0
150150

151151
maybe-compact

testdata/compaction_tombstones

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ wait-pending-table-stats
8080
num-entries: 2
8181
num-deletions: 1
8282
num-range-key-sets: 0
83-
point-deletions-bytes-estimate: 100
83+
point-deletions-bytes-estimate: 2
8484
range-deletions-bytes-estimate: 0
8585

8686
maybe-compact
@@ -119,7 +119,7 @@ wait-pending-table-stats
119119
num-entries: 6
120120
num-deletions: 2
121121
num-range-key-sets: 0
122-
point-deletions-bytes-estimate: 47
122+
point-deletions-bytes-estimate: 2
123123
range-deletions-bytes-estimate: 101
124124

125125
maybe-compact
@@ -152,7 +152,7 @@ wait-pending-table-stats
152152
num-entries: 11
153153
num-deletions: 1
154154
num-range-key-sets: 0
155-
point-deletions-bytes-estimate: 23
155+
point-deletions-bytes-estimate: 2
156156
range-deletions-bytes-estimate: 0
157157

158158
close-snapshot
@@ -233,7 +233,7 @@ wait-pending-table-stats
233233
num-entries: 3
234234
num-deletions: 3
235235
num-range-key-sets: 0
236-
point-deletions-bytes-estimate: 6988
236+
point-deletions-bytes-estimate: 6150
237237
range-deletions-bytes-estimate: 0
238238

239239
# By plain file size, 000005 should be picked because it is larger and
@@ -243,7 +243,7 @@ range-deletions-bytes-estimate: 0
243243

244244
maybe-compact
245245
----
246-
[JOB 100] compacted(default) L5 [000004] (771B) Score=13.77 + L6 [000006] (13KB) Score=0.92 -> L6 [] (0B), in 1.0s (2.0s total), output rate 0B/s
246+
[JOB 100] compacted(default) L5 [000004] (771B) Score=12.86 + L6 [000006] (13KB) Score=0.92 -> L6 [] (0B), in 1.0s (2.0s total), output rate 0B/s
247247

248248
# A table containing only range keys is not eligible for elision.
249249
# RANGEKEYDEL or RANGEKEYUNSET.
@@ -359,7 +359,7 @@ wait-pending-table-stats
359359
num-entries: 2
360360
num-deletions: 1
361361
num-range-key-sets: 0
362-
point-deletions-bytes-estimate: 2795
362+
point-deletions-bytes-estimate: 2459
363363
range-deletions-bytes-estimate: 0
364364

365365
wait-pending-table-stats
@@ -376,7 +376,7 @@ range-deletions-bytes-estimate: 8380
376376
maybe-compact
377377
----
378378
[JOB 100] compacted(delete-only) L6 [000007] (13KB) Score=0.00 -> L6 [000000] (8.2KB), in 1.0s (2.0s total), output rate 8.2KB/s
379-
[JOB 101] compacted(default) L5 [000004] (763B) Score=24.34 + L6 [000006] (13KB) Score=0.52 -> L6 [000000] (4.7KB), in 1.0s (2.0s total), output rate 4.7KB/s
379+
[JOB 101] compacted(default) L5 [000004] (763B) Score=23.70 + L6 [000006] (13KB) Score=0.52 -> L6 [000000] (4.7KB), in 1.0s (2.0s total), output rate 4.7KB/s
380380

381381
# The same LSM as above. However, this time, with point tombstone weighting at
382382
# 2x, the table with the point tombstone (000004) will be selected as the
@@ -405,7 +405,7 @@ wait-pending-table-stats
405405
num-entries: 2
406406
num-deletions: 1
407407
num-range-key-sets: 0
408-
point-deletions-bytes-estimate: 2795
408+
point-deletions-bytes-estimate: 2459
409409
range-deletions-bytes-estimate: 0
410410

411411
wait-pending-table-stats
@@ -422,4 +422,4 @@ range-deletions-bytes-estimate: 8380
422422
maybe-compact
423423
----
424424
[JOB 100] compacted(delete-only) L6 [000007] (13KB) Score=0.00 -> L6 [000000] (8.2KB), in 1.0s (2.0s total), output rate 8.2KB/s
425-
[JOB 101] compacted(default) L5 [000004] (763B) Score=24.34 + L6 [000006] (13KB) Score=0.52 -> L6 [000000] (4.7KB), in 1.0s (2.0s total), output rate 4.7KB/s
425+
[JOB 101] compacted(default) L5 [000004] (763B) Score=23.70 + L6 [000006] (13KB) Score=0.52 -> L6 [000000] (4.7KB), in 1.0s (2.0s total), output rate 4.7KB/s

testdata/metrics

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,3 +1141,86 @@ Ingestions: 0 as flushable: 0 (0B in 0 tables)
11411141
Cgo memory usage: 0B block cache: 0B (data: 0B, maps: 0B, entries: 0B) memtables: 0B
11421142
Iter category stats:
11431143
pebble-compaction, non-latency: {BlockBytes:342 BlockBytesInCache:0 BlockReadDuration:30ms}
1144+
1145+
init
1146+
----
1147+
1148+
batch
1149+
set a 1
1150+
set b 2
1151+
----
1152+
1153+
flush
1154+
----
1155+
L0.0:
1156+
000005:[a#10,SET-a#10,SET]
1157+
000006:[b#11,SET-b#11,SET]
1158+
1159+
batch
1160+
del-sized c 500
1161+
----
1162+
1163+
flush
1164+
----
1165+
L0.0:
1166+
000005:[a#10,SET-a#10,SET]
1167+
000006:[b#11,SET-b#11,SET]
1168+
000008:[c#12,DELSIZED-c#12,DELSIZED]
1169+
1170+
compact a-z parallel
1171+
----
1172+
L6:
1173+
000005:[a#10,SET-a#10,SET]
1174+
000006:[b#11,SET-b#11,SET]
1175+
000008:[c#12,DELSIZED-c#12,DELSIZED]
1176+
1177+
batch
1178+
del-range a c
1179+
----
1180+
1181+
flush
1182+
----
1183+
L0.0:
1184+
000010:[a#13,RANGEDEL-b#inf,RANGEDEL]
1185+
000011:[b#13,RANGEDEL-c#inf,RANGEDEL]
1186+
L6:
1187+
000005:[a#10,SET-a#10,SET]
1188+
000006:[b#11,SET-b#11,SET]
1189+
000008:[c#12,DELSIZED-c#12,DELSIZED]
1190+
1191+
metrics
1192+
----
1193+
| | | | ingested | moved | written | | amp
1194+
level | tables size val-bl vtables | score uc c | in | tables size | tables size | tables size | read | r w
1195+
------+-----------------------------+----------------+-------+--------------+--------------+--------------+-------+---------
1196+
0 | 2 1.5KB 0B 0 | - 0.25 0.25 | 106B | 0 0B | 0 0B | 5 3.7KB | 0B | 1 35.7
1197+
1 | 0 0B 0B 0 | - 0 0 | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0
1198+
2 | 0 0B 0B 0 | - 0 0 | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0
1199+
3 | 0 0B 0B 0 | - 0 0 | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0
1200+
4 | 0 0B 0B 0 | - 0 0 | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0
1201+
5 | 0 0B 0B 0 | - 0 0 | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0
1202+
6 | 3 2.2KB 0B 0 | - 0.00 0.00 | 0B | 0 0B | 3 2.2KB | 0 0B | 0B | 1 0
1203+
total | 5 3.7KB 0B 0 | - - - | 106B | 0 0B | 3 2.2KB | 5 3.8KB | 0B | 2 36.7
1204+
----------------------------------------------------------------------------------------------------------------------------
1205+
WAL: 1 files (0B) in: 57B written: 106B (86% overhead)
1206+
Flushes: 3
1207+
Compactions: 3 estimated debt: 3.7KB in progress: 0 (0B)
1208+
default: 0 delete: 0 elision: 0 move: 3 read: 0 tombstone-density: 0 rewrite: 0 copy: 0 multi-level: 0
1209+
MemTables: 1 (256KB) zombie: 1 (256KB)
1210+
Zombie tables: 0 (0B, local: 0B)
1211+
Backing tables: 0 (0B)
1212+
Virtual tables: 0 (0B)
1213+
Local tables size: 3.7KB
1214+
Compression types: snappy: 5
1215+
Garbage: point-deletions 502B range-deletions 1.5KB
1216+
Table stats: all loaded
1217+
Block cache: 2 entries (774B) hit rate: 0.0%
1218+
Table cache: 2 entries (1.6KB) hit rate: 0.0%
1219+
Range key sets: 0 Tombstones: 3 Total missized tombstones encountered: 0
1220+
Snapshots: 0 earliest seq num: 0
1221+
Table iters: 0
1222+
Filter utility: 0.0%
1223+
Ingestions: 0 as flushable: 0 (0B in 0 tables)
1224+
Cgo memory usage: 0B block cache: 0B (data: 0B, maps: 0B, entries: 0B) memtables: 0B
1225+
Iter category stats:
1226+
pebble-compaction, non-latency: {BlockBytes:0 BlockBytesInCache:0 BlockReadDuration:0s}

testdata/table_stats

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ wait-pending-table-stats
1515
num-entries: 3
1616
num-deletions: 1
1717
num-range-key-sets: 0
18-
point-deletions-bytes-estimate: 57
18+
point-deletions-bytes-estimate: 2
1919
range-deletions-bytes-estimate: 0
2020

2121
compact a-c
@@ -546,7 +546,7 @@ wait-pending-table-stats
546546
num-entries: 5
547547
num-deletions: 2
548548
num-range-key-sets: 0
549-
point-deletions-bytes-estimate: 112572
549+
point-deletions-bytes-estimate: 111127
550550
range-deletions-bytes-estimate: 0
551551

552552
# Try a missized point tombstone. It should appear in the Metrics after the
@@ -676,7 +676,7 @@ wait-pending-table-stats
676676
num-entries: 1
677677
num-deletions: 1
678678
num-range-key-sets: 0
679-
point-deletions-bytes-estimate: 39
679+
point-deletions-bytes-estimate: 3
680680
range-deletions-bytes-estimate: 0
681681

682682
wait-pending-table-stats
@@ -685,7 +685,7 @@ wait-pending-table-stats
685685
num-entries: 1
686686
num-deletions: 1
687687
num-range-key-sets: 0
688-
point-deletions-bytes-estimate: 39
688+
point-deletions-bytes-estimate: 3
689689
range-deletions-bytes-estimate: 0
690690

691691
# Create an sstable with a range key set.

0 commit comments

Comments
 (0)