@@ -191,7 +191,8 @@ type compaction struct {
191
191
// to cancel, such as if a conflicting excise operation raced it to manifest
192
192
// application. Only holders of the manifest lock will write to this atomic.
193
193
cancel atomic.Bool
194
-
194
+ // kind indicates the kind of compaction. Different compaction kinds have
195
+ // different semantics and mechanics. Some may have additional fields.
195
196
kind compactionKind
196
197
// isDownload is true if this compaction was started as part of a Download
197
198
// operation. In this case kind is compactionKindCopy or
@@ -253,8 +254,6 @@ type compaction struct {
253
254
// single output table with the tables in the grandparent level.
254
255
maxOverlapBytes uint64
255
256
256
- // flushing contains the flushables (aka memtables) that are being flushed.
257
- flushing flushableList
258
257
// bytesWritten contains the number of bytes that have been written to outputs.
259
258
bytesWritten atomic.Int64
260
259
@@ -271,10 +270,6 @@ type compaction struct {
271
270
// in the grandparent when this compaction finishes will be the same.
272
271
grandparents manifest.LevelSlice
273
272
274
- // Boundaries at which flushes to L0 should be split. Determined by
275
- // L0Sublevels. If nil, flushes aren't split.
276
- l0Limits [][]byte
277
-
278
273
delElision compact.TombstoneElision
279
274
rangeKeyElision compact.TombstoneElision
280
275
@@ -284,14 +279,31 @@ type compaction struct {
284
279
// lower level in the LSM during runCompaction.
285
280
allowedZeroSeqNum bool
286
281
287
- // deletionHints are set if this is a compactionKindDeleteOnly. Used to figure
288
- // out whether an input must be deleted in its entirety, or excised into
289
- // virtual sstables.
290
- deletionHints []deleteCompactionHint
291
-
292
- // exciseEnabled is set to true if this is a compactionKindDeleteOnly and
293
- // this compaction is allowed to excise files.
294
- exciseEnabled bool
282
+ // deleteOnly contains information specific to compactions with kind
283
+ // compactionKindDeleteOnly. A delete-only compaction is a special
284
+ // compaction that does not merge or write sstables. Instead, it only
285
+ // performs deletions either through removing whole sstables from the LSM or
286
+ // virtualizing them into virtual sstables.
287
+ deleteOnly struct {
288
+ // hints are collected by the table stats collector and describe range
289
+ // deletions and the files containing keys deleted by them.
290
+ hints []deleteCompactionHint
291
+ // exciseEnabled is set to true if this compaction is allowed to excise
292
+ // files. If false, the compaction will only remove whole sstables that
293
+ // are wholly contained within the bounds of range deletions.
294
+ exciseEnabled bool
295
+ }
296
+ // flush contains information specific to flushes (compactionKindFlush and
297
+ // compactionKindIngestedFlushable). A flush is modeled by a compaction
298
+ // because it has similar mechanics to a default compaction.
299
+ flush struct {
300
+ // flushables contains the flushables (aka memtables, large batches,
301
+ // flushable ingestions, etc) that are being flushed.
302
+ flushables flushableList
303
+ // Boundaries at which sstables flushed to L0 should be split.
304
+ // Determined by L0Sublevels. If nil, ignored.
305
+ l0Limits [][]byte
306
+ }
295
307
296
308
metrics levelMetricsDelta
297
309
@@ -526,16 +538,16 @@ func newDeleteOnlyCompaction(
526
538
exciseEnabled bool ,
527
539
) * compaction {
528
540
c := & compaction {
529
- kind : compactionKindDeleteOnly ,
530
- comparer : opts .Comparer ,
531
- logger : opts .Logger ,
532
- version : cur ,
533
- beganAt : beganAt ,
534
- inputs : inputs ,
535
- deletionHints : hints ,
536
- exciseEnabled : exciseEnabled ,
537
- grantHandle : noopGrantHandle {},
538
- }
541
+ kind : compactionKindDeleteOnly ,
542
+ comparer : opts .Comparer ,
543
+ logger : opts .Logger ,
544
+ version : cur ,
545
+ beganAt : beganAt ,
546
+ inputs : inputs ,
547
+ grantHandle : noopGrantHandle {} ,
548
+ }
549
+ c . deleteOnly . hints = hints
550
+ c . deleteOnly . exciseEnabled = exciseEnabled
539
551
// Acquire a reference to the version to ensure that files and in-memory
540
552
// version state necessary for reading files remain available. Ignoring
541
553
// excises, this isn't strictly necessary for reading the sstables that are
@@ -657,10 +669,11 @@ func newFlush(
657
669
getValueSeparation : getValueSeparation ,
658
670
maxOutputFileSize : math .MaxUint64 ,
659
671
maxOverlapBytes : math .MaxUint64 ,
660
- flushing : flushing ,
661
672
grantHandle : noopGrantHandle {},
662
673
tableFormat : tableFormat ,
663
674
}
675
+ c .flush .flushables = flushing
676
+ c .flush .l0Limits = l0Organizer .FlushSplitKeys ()
664
677
c .startLevel = & c .inputs [0 ]
665
678
c .outputLevel = & c .inputs [1 ]
666
679
if len (flushing ) > 0 {
@@ -670,6 +683,14 @@ func newFlush(
670
683
}
671
684
c .kind = compactionKindIngestedFlushable
672
685
return c , nil
686
+ } else {
687
+ // Make sure there's no ingestedFlushable after the first flushable
688
+ // in the list.
689
+ for _ , f := range c .flush .flushables [1 :] {
690
+ if _ , ok := f .flushable .(* ingestedFlushable ); ok {
691
+ panic ("pebble: flushables shouldn't contain ingestedFlushable" )
692
+ }
693
+ }
673
694
}
674
695
}
675
696
@@ -683,16 +704,6 @@ func newFlush(
683
704
c .getValueSeparation = neverSeparateValues
684
705
}
685
706
686
- // Make sure there's no ingestedFlushable after the first flushable in the
687
- // list.
688
- for _ , f := range flushing {
689
- if _ , ok := f .flushable .(* ingestedFlushable ); ok {
690
- panic ("pebble: flushing shouldn't contain ingestedFlushable flushable" )
691
- }
692
- }
693
-
694
- c .l0Limits = l0Organizer .FlushSplitKeys ()
695
-
696
707
cmp := c .comparer .Compare
697
708
updatePointBounds := func (iter internalIterator ) {
698
709
if kv := iter .First (); kv != nil {
@@ -797,7 +808,7 @@ func (c *compaction) allowZeroSeqNum() bool {
797
808
// code doesn't know that L0 contains files and zeroing of seqnums should
798
809
// be disabled. That is fixable, but it seems safer to just match the
799
810
// RocksDB behavior for now.
800
- return len (c .flushing ) == 0 && c .delElision .ElidesEverything () && c .rangeKeyElision .ElidesEverything ()
811
+ return len (c .flush . flushables ) == 0 && c .delElision .ElidesEverything () && c .rangeKeyElision .ElidesEverything ()
801
812
}
802
813
803
814
// newInputIters returns an iterator over all the input tables in a compaction.
@@ -811,7 +822,7 @@ func (c *compaction) newInputIters(
811
822
cmp := c .comparer .Compare
812
823
813
824
// Validate the ordering of compaction input files for defense in depth.
814
- if len (c .flushing ) == 0 {
825
+ if len (c .flush . flushables ) == 0 {
815
826
if c .startLevel .level >= 0 {
816
827
err := manifest .CheckOrdering (c .comparer , manifest .Level (c .startLevel .level ),
817
828
c .startLevel .files .Iter ())
@@ -856,7 +867,7 @@ func (c *compaction) newInputIters(
856
867
// numInputLevels is an approximation of the number of iterator levels. Due
857
868
// to idiosyncrasies in iterator construction, we may (rarely) exceed this
858
869
// initial capacity.
859
- numInputLevels := max (len (c .flushing ), len (c .inputs ))
870
+ numInputLevels := max (len (c .flush . flushables ), len (c .inputs ))
860
871
iters := make ([]internalIterator , 0 , numInputLevels )
861
872
rangeDelIters := make ([]keyspan.FragmentIterator , 0 , numInputLevels )
862
873
rangeKeyIters := make ([]keyspan.FragmentIterator , 0 , numInputLevels )
@@ -883,11 +894,10 @@ func (c *compaction) newInputIters(
883
894
// Populate iters, rangeDelIters and rangeKeyIters with the appropriate
884
895
// constituent iterators. This depends on whether this is a flush or a
885
896
// compaction.
886
- if len (c .flushing ) != 0 {
897
+ if len (c .flush . flushables ) != 0 {
887
898
// If flushing, we need to build the input iterators over the memtables
888
- // stored in c.flushing.
889
- for i := range c .flushing {
890
- f := c .flushing [i ]
899
+ // stored in c.flush.flushables.
900
+ for _ , f := range c .flush .flushables {
891
901
iters = append (iters , f .newFlushIter (nil ))
892
902
rangeDelIter := f .newRangeDelIter (nil )
893
903
if rangeDelIter != nil {
@@ -1082,7 +1092,7 @@ func (c *compaction) newRangeDelIter(
1082
1092
}
1083
1093
1084
1094
func (c * compaction ) String () string {
1085
- if len (c .flushing ) != 0 {
1095
+ if len (c .flush . flushables ) != 0 {
1086
1096
return "flush\n "
1087
1097
}
1088
1098
@@ -1358,7 +1368,7 @@ func (d *DB) flush() {
1358
1368
// were ingested as flushables. Both DB.mu and the manifest lock must be held
1359
1369
// while runIngestFlush is called.
1360
1370
func (d * DB ) runIngestFlush (c * compaction ) (* manifest.VersionEdit , error ) {
1361
- if len (c .flushing ) != 1 {
1371
+ if len (c .flush . flushables ) != 1 {
1362
1372
panic ("pebble: ingestedFlushable must be flushed one at a time." )
1363
1373
}
1364
1374
@@ -1369,7 +1379,7 @@ func (d *DB) runIngestFlush(c *compaction) (*manifest.VersionEdit, error) {
1369
1379
baseLevel := d .mu .versions .picker .getBaseLevel ()
1370
1380
ve := & manifest.VersionEdit {}
1371
1381
var ingestSplitFiles []ingestSplitFile
1372
- ingestFlushable := c .flushing [0 ].flushable .(* ingestedFlushable )
1382
+ ingestFlushable := c .flush . flushables [0 ].flushable .(* ingestedFlushable )
1373
1383
1374
1384
updateLevelMetricsOnExcise := func (m * manifest.TableMetadata , level int , added []manifest.NewTableEntry ) {
1375
1385
levelMetrics := c .metrics [level ]
@@ -1633,7 +1643,7 @@ func (d *DB) flush1() (bytesFlushed uint64, err error) {
1633
1643
} else {
1634
1644
// c.kind == compactionKindIngestedFlushable && we could have deleted files due
1635
1645
// to ingest-time splits or excises.
1636
- ingestFlushable := c .flushing [0 ].flushable .(* ingestedFlushable )
1646
+ ingestFlushable := c .flush . flushables [0 ].flushable .(* ingestedFlushable )
1637
1647
exciseBounds := ingestFlushable .exciseSpan .UserKeyBounds ()
1638
1648
for c2 := range d .mu .compact .inProgress {
1639
1649
// Check if this compaction overlaps with the excise span. Note that just
@@ -2413,7 +2423,7 @@ func checkDeleteCompactionHints(
2413
2423
2414
2424
func (d * DB ) compactionPprofLabels (c * compaction ) pprof.LabelSet {
2415
2425
activity := "compact"
2416
- if len (c .flushing ) != 0 {
2426
+ if len (c .flush . flushables ) != 0 {
2417
2427
activity = "flush"
2418
2428
}
2419
2429
level := "L?"
@@ -3007,13 +3017,14 @@ func fragmentDeleteCompactionHints(
3007
3017
func (d * DB ) runDeleteOnlyCompaction (
3008
3018
jobID JobID , c * compaction , snapshots compact.Snapshots ,
3009
3019
) (ve * manifest.VersionEdit , stats compact.Stats , retErr error ) {
3010
- fragments := fragmentDeleteCompactionHints (d .cmp , c .deletionHints )
3020
+ fragments := fragmentDeleteCompactionHints (d .cmp , c .deleteOnly . hints )
3011
3021
ve = & manifest.VersionEdit {
3012
3022
DeletedTables : map [manifest.DeletedTableEntry ]* manifest.TableMetadata {},
3013
3023
}
3014
3024
for _ , cl := range c .inputs {
3015
3025
levelMetrics := & LevelMetrics {}
3016
- if err := d .runDeleteOnlyCompactionForLevel (cl , levelMetrics , ve , snapshots , fragments , c .exciseEnabled ); err != nil {
3026
+ err := d .runDeleteOnlyCompactionForLevel (cl , levelMetrics , ve , snapshots , fragments , c .deleteOnly .exciseEnabled )
3027
+ if err != nil {
3017
3028
return nil , stats , err
3018
3029
}
3019
3030
c .metrics [cl .level ] = levelMetrics
@@ -3245,7 +3256,7 @@ func (d *DB) compactAndWrite(
3245
3256
3246
3257
runnerCfg := compact.RunnerConfig {
3247
3258
CompactionBounds : c .bounds ,
3248
- L0SplitKeys : c .l0Limits ,
3259
+ L0SplitKeys : c .flush . l0Limits ,
3249
3260
Grandparents : c .grandparents ,
3250
3261
MaxGrandparentOverlapBytes : c .maxOverlapBytes ,
3251
3262
TargetOutputFileSize : c .maxOutputFileSize ,
@@ -3329,7 +3340,7 @@ func (c *compaction) makeVersionEdit(result compact.Result) (*manifest.VersionEd
3329
3340
TableBytesRead : c .outputLevel .files .TableSizeSum (),
3330
3341
BlobBytesCompacted : result .Stats .CumulativeBlobFileSize ,
3331
3342
}
3332
- if c .flushing != nil {
3343
+ if c .flush . flushables != nil {
3333
3344
outputMetrics .BlobBytesFlushed = result .Stats .CumulativeBlobFileSize
3334
3345
}
3335
3346
if len (c .extraLevels ) > 0 {
@@ -3338,7 +3349,7 @@ func (c *compaction) makeVersionEdit(result compact.Result) (*manifest.VersionEd
3338
3349
outputMetrics .TableBytesRead += outputMetrics .TableBytesIn
3339
3350
3340
3351
c .metrics [c .outputLevel .level ] = outputMetrics
3341
- if len (c .flushing ) == 0 && c .metrics [c .startLevel .level ] == nil {
3352
+ if len (c .flush . flushables ) == 0 && c .metrics [c .startLevel .level ] == nil {
3342
3353
c .metrics [c .startLevel .level ] = & LevelMetrics {}
3343
3354
}
3344
3355
if len (c .extraLevels ) > 0 {
@@ -3369,7 +3380,7 @@ func (c *compaction) makeVersionEdit(result compact.Result) (*manifest.VersionEd
3369
3380
BlobReferences : t .BlobReferences ,
3370
3381
BlobReferenceDepth : t .BlobReferenceDepth ,
3371
3382
}
3372
- if c .flushing == nil {
3383
+ if c .flush . flushables == nil {
3373
3384
// Set the file's LargestSeqNumAbsolute to be the maximum value of any
3374
3385
// of the compaction's input sstables.
3375
3386
// TODO(jackson): This could be narrowed to be the maximum of input
@@ -3408,7 +3419,7 @@ func (c *compaction) makeVersionEdit(result compact.Result) (*manifest.VersionEd
3408
3419
}
3409
3420
3410
3421
// Update metrics.
3411
- if c .flushing == nil {
3422
+ if c .flush . flushables == nil {
3412
3423
outputMetrics .TablesCompacted ++
3413
3424
outputMetrics .TableBytesCompacted += fileMeta .Size
3414
3425
} else {
0 commit comments