Skip to content

Commit 97df4e1

Browse files
committed
db: compactions: calculate eventual output level
We improve the compaction code to check if the levels below the compaction bounds are empty, in which case we adjust the output file size and other sstable writer options to correspond to the eventual level (after move compactions).
1 parent 437775a commit 97df4e1

File tree

6 files changed

+115
-18
lines changed

6 files changed

+115
-18
lines changed

compaction.go

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,15 @@ type tableCompaction struct {
270270

271271
inputs []compactionLevel
272272

273+
// eventualOutputLevel is normally outputLevel.level, unless
274+
// outputLevel.level+1 has no overlap with the compaction bounds (in which
275+
// case it is the bottom-most consecutive level with no such overlap).
276+
//
277+
// Because of move compactions, we know that any sstables produced by this
278+
// compaction will be later moved to eventualOutputLevel. So we use
279+
// eventualOutputLevel when determining the target file size, compression
280+
// options, etc.
281+
eventualOutputLevel int
273282
// maxOutputFileSize is the maximum size of an individual table created
274283
// during compaction.
275284
maxOutputFileSize uint64
@@ -280,9 +289,10 @@ type tableCompaction struct {
280289
// The boundaries of the input data.
281290
bounds base.UserKeyBounds
282291

283-
// grandparents are the tables in level+2 that overlap with the files being
284-
// compacted. Used to determine output table boundaries. Do not assume that the actual files
285-
// in the grandparent when this compaction finishes will be the same.
292+
// grandparents are the tables in eventualOutputLevel+2 that overlap with the
293+
// files being compacted. Used to determine output table boundaries. Do not
294+
// assume that the actual files in the grandparent when this compaction
295+
// finishes will be the same.
286296
grandparents manifest.LevelSlice
287297

288298
delElision compact.TombstoneElision
@@ -572,7 +582,18 @@ func newCompaction(
572582
grantHandle: grantHandle,
573583
}
574584

575-
targetFileSize := opts.TargetFileSize(pc.outputLevel.level, pc.baseLevel)
585+
// Determine eventual output level.
586+
c.eventualOutputLevel = pc.outputLevel.level
587+
// TODO(radu): for intra-L0 compactions, we could check if the compaction
588+
// includes all L0 files within the bounds.
589+
if pc.outputLevel.level != 0 {
590+
for c.eventualOutputLevel < manifest.NumLevels-1 && !c.version.HasOverlap(c.eventualOutputLevel+1, c.bounds) {
591+
// All output tables are guaranteed to be moved down.
592+
c.eventualOutputLevel++
593+
}
594+
}
595+
596+
targetFileSize := opts.TargetFileSize(c.eventualOutputLevel, pc.baseLevel)
576597
c.maxOutputFileSize = uint64(targetFileSize)
577598
c.maxOverlapBytes = maxGrandparentOverlapBytes(targetFileSize)
578599

@@ -607,8 +628,8 @@ func newCompaction(
607628
}
608629
// Compute the set of outputLevel+1 files that overlap this compaction (these
609630
// are the grandparent sstables).
610-
if c.outputLevel.level+1 < numLevels {
611-
c.grandparents = c.version.Overlaps(max(c.outputLevel.level+1, pc.baseLevel), c.bounds)
631+
if c.eventualOutputLevel < manifest.NumLevels-1 {
632+
c.grandparents = c.version.Overlaps(max(c.eventualOutputLevel+1, pc.baseLevel), c.bounds)
612633
}
613634
c.delElision, c.rangeKeyElision = compact.SetupTombstoneElision(
614635
c.comparer.Compare, c.version, pc.l0Organizer, c.outputLevel.level, c.bounds,
@@ -666,7 +687,10 @@ func (c *tableCompaction) maybeSwitchToMoveOrCopy(
666687
// We avoid a move or copy if there is lots of overlapping grandparent data.
667688
// Otherwise, the move could create a parent file that will require a very
668689
// expensive merge later on.
669-
if c.grandparents.AggregateSizeSum() > c.maxOverlapBytes {
690+
//
691+
// Note that if eventualOutputLevel != outputLevel, there are no
692+
// "grandparents" on the output level.
693+
if c.eventualOutputLevel == c.outputLevel.level && c.grandparents.AggregateSizeSum() > c.maxOverlapBytes {
670694
return
671695
}
672696

@@ -840,13 +864,18 @@ func newFlush(
840864
logger: opts.Logger,
841865
inputs: []compactionLevel{{level: -1}, {level: 0}},
842866
getValueSeparation: getValueSeparation,
843-
maxOutputFileSize: math.MaxUint64,
844-
maxOverlapBytes: math.MaxUint64,
845-
grantHandle: noopGrantHandle{},
867+
// TODO(radu): consider calculating the eventual output level for flushes.
868+
// We expect the bounds to be very wide in practice, but perhaps we can do a
869+
// finer-grained overlap analysis.
870+
eventualOutputLevel: 0,
871+
maxOutputFileSize: math.MaxUint64,
872+
maxOverlapBytes: math.MaxUint64,
873+
grantHandle: noopGrantHandle{},
846874
metrics: compactionMetrics{
847875
beganAt: beganAt,
848876
},
849877
}
878+
850879
c.flush.flushables = flushing
851880
c.flush.l0Limits = l0Organizer.FlushSplitKeys()
852881
c.startLevel = &c.inputs[0]
@@ -3440,7 +3469,7 @@ func (d *DB) compactAndWrite(
34403469
}
34413470
spanPolicyValid = true
34423471
}
3443-
writerOpts := d.makeWriterOptions(c.outputLevel.level)
3472+
writerOpts := d.makeWriterOptions(c.eventualOutputLevel)
34443473
if spanPolicy.ValueStoragePolicy.DisableSeparationBySuffix {
34453474
writerOpts.DisableValueBlocks = true
34463475
}

compaction_picker_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -702,13 +702,13 @@ func TestCompactionPickerL0(t *testing.T) {
702702
if ptc == nil {
703703
return "no compaction"
704704
}
705-
c := newCompaction(ptc, opts, time.Now(), nil /* provider */, noopGrantHandle{}, sstable.TableFormatMinSupported, neverSeparateValues)
705+
c := newCompaction(ptc, opts, time.Now(), nil /* provider */, noopGrantHandle{}, noSharedStorage, neverSeparateValues)
706706
return fmt.Sprintf("%d", c.maxOutputFileSize)
707707
case "max-overlap-bytes":
708708
if ptc == nil {
709709
return "no compaction"
710710
}
711-
c := newCompaction(ptc, opts, time.Now(), nil /* provider */, noopGrantHandle{}, sstable.TableFormatMinSupported, neverSeparateValues)
711+
c := newCompaction(ptc, opts, time.Now(), nil /* provider */, noopGrantHandle{}, noSharedStorage, neverSeparateValues)
712712
return fmt.Sprintf("%d", c.maxOverlapBytes)
713713
}
714714
return fmt.Sprintf("unrecognized command: %s", td.Cmd)
@@ -1269,7 +1269,7 @@ func TestCompactionOutputFileSize(t *testing.T) {
12691269
ptc := pc.(*pickedTableCompaction)
12701270
fmt.Fprintf(&buf, "L%d -> L%d\n", ptc.startLevel.level, ptc.outputLevel.level)
12711271
fmt.Fprintf(&buf, "L%d: %s\n", ptc.startLevel.level, tableNums(ptc.startLevel.files))
1272-
c := newCompaction(ptc, opts, time.Now(), nil /* provider */, noopGrantHandle{}, sstable.TableFormatMinSupported, neverSeparateValues)
1272+
c := newCompaction(ptc, opts, time.Now(), nil /* provider */, noopGrantHandle{}, noSharedStorage, neverSeparateValues)
12731273
fmt.Fprintf(&buf, "maxOutputFileSize: %d\n", c.maxOutputFileSize)
12741274
} else {
12751275
return "nil"

data_test.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1429,31 +1429,35 @@ func runLayoutCmd(t *testing.T, td *datadriven.TestData, d *DB) string {
14291429
func runPopulateCmd(t *testing.T, td *datadriven.TestData, b *Batch) {
14301430
var maxKeyLength, valLength int
14311431
var timestamps []int
1432+
var prefix string
14321433
td.ScanArgs(t, "keylen", &maxKeyLength)
14331434
td.MaybeScanArgs(t, "timestamps", &timestamps)
14341435
td.MaybeScanArgs(t, "vallen", &valLength)
1436+
td.MaybeScanArgs(t, "prefix", &prefix)
14351437
// Default to writing timestamps @1.
14361438
if len(timestamps) == 0 {
14371439
timestamps = append(timestamps, 1)
14381440
}
14391441

14401442
ks := testkeys.Alpha(maxKeyLength)
1441-
buf := make([]byte, ks.MaxLen()+testkeys.MaxSuffixLen)
1443+
buf := make([]byte, len(prefix)+ks.MaxLen()+testkeys.MaxSuffixLen)
1444+
copy(buf, prefix)
14421445
vbuf := make([]byte, valLength)
14431446
for i := uint64(0); i < ks.Count(); i++ {
14441447
for _, ts := range timestamps {
1445-
n := testkeys.WriteKeyAt(buf, ks, i, int64(ts))
1448+
n := testkeys.WriteKeyAt(buf[len(prefix):], ks, i, int64(ts))
1449+
key := buf[:len(prefix)+n]
14461450

14471451
// Default to using the key as the value, but if the user provided
14481452
// the vallen argument, generate a random value of the specified
14491453
// length.
1450-
value := buf[:n]
1454+
value := key
14511455
if valLength > 0 {
14521456
_, err := crand.Read(vbuf)
14531457
require.NoError(t, err)
14541458
value = vbuf
14551459
}
1456-
require.NoError(t, b.Set(buf[:n], value, nil))
1460+
require.NoError(t, b.Set(key, value, nil))
14571461
}
14581462
}
14591463
}

internal/manifest/level_metadata.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,13 @@ func (ls LevelSlice) Overlaps(cmp Compare, bounds base.UserKeyBounds) LevelSlice
415415
return newBoundedLevelSlice(startIter.iter.clone(), &startIter.iter, &endIter.iter)
416416
}
417417

418+
// HasOverlap is equivalent to ls.Overlaps(cmp, bounds).Len() > 0 but is more efficient.
419+
func (ls LevelSlice) HasOverlap(cmp Compare, bounds base.UserKeyBounds) bool {
420+
iter := ls.Iter()
421+
t := iter.SeekGE(cmp, bounds.Start)
422+
return t != nil && bounds.End.IsUpperBoundFor(cmp, t.Smallest().UserKey)
423+
}
424+
418425
// KeyType is used to specify the type of keys we're looking for in
419426
// LevelIterator positioning operations. Files not containing any keys of the
420427
// desired type are skipped.

internal/manifest/version.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,20 @@ func (v *Version) Overlaps(level int, bounds base.UserKeyBounds) LevelSlice {
582582
return v.Levels[level].Slice().Overlaps(v.cmp.Compare, bounds)
583583
}
584584

585+
// HasOverlap is equivalent to len(v.Overlaps(level, bounds)) > 0 but more
586+
// efficient.
587+
func (v *Version) HasOverlap(level int, bounds base.UserKeyBounds) bool {
588+
if level == 0 {
589+
for sublevel := range v.L0SublevelFiles {
590+
if v.L0SublevelFiles[sublevel].HasOverlap(v.cmp.Compare, bounds) {
591+
return true
592+
}
593+
}
594+
return false
595+
}
596+
return v.Levels[level].Slice().HasOverlap(v.cmp.Compare, bounds)
597+
}
598+
585599
// AllLevelsAndSublevels returns an iterator that produces a Layer, LevelSlice
586600
// pair for each L0 sublevel (from top to bottom) and each level below L0.
587601
func (v *Version) AllLevelsAndSublevels() iter.Seq2[Layer, LevelSlice] {

testdata/compaction/l0_to_lbase_compaction

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,46 @@ DELETE PACER | in queue | deleted
119119
other files | 0 (0B) | 0 (0B)
120120
----
121121
----
122+
123+
define target-file-sizes=(1, 1, 1, 1000) hide-size
124+
L0
125+
ba#2,SET:v
126+
L0
127+
bb#2,SET:v
128+
L0
129+
ba#1,SET:v
130+
L0
131+
bb#1,SET:v
132+
L4
133+
a#3,SET:v
134+
L5
135+
a#2,SET:v
136+
L6
137+
a#1,SET:v
138+
----
139+
L0.1:
140+
000004:[ba#2,SET-ba#2,SET]
141+
000005:[bb#2,SET-bb#2,SET]
142+
L0.0:
143+
000006:[ba#1,SET-ba#1,SET]
144+
000007:[bb#1,SET-bb#1,SET]
145+
L4:
146+
000008:[a#3,SET-a#3,SET]
147+
L5:
148+
000009:[a#2,SET-a#2,SET]
149+
L6:
150+
000010:[a#1,SET-a#1,SET]
151+
152+
# The result of this compaction requires two files on all levels except L6. Even
153+
# though we are compacting into L1, we are using the L6 target file size,
154+
# knowing that the result will eventually be moved.
155+
compact b-c
156+
----
157+
L1:
158+
000011:[ba#0,SET-bb#0,SET]
159+
L4:
160+
000008:[a#3,SET-a#3,SET]
161+
L5:
162+
000009:[a#2,SET-a#2,SET]
163+
L6:
164+
000010:[a#1,SET-a#1,SET]

0 commit comments

Comments
 (0)