Skip to content

Commit 6ac23ff

Browse files
committed
pebble: prefer L0->Lbase move compactions when possible
The existing compaction grow logic attempts to add additional start level files by expanding the output level's initial key range, which is based off of overlapping files with L0. Previously, we would always try to expand the compaction range even when the output key range was empty, causing the L0 files in those empty output ranges to be rewritten instead of being moved. This commit makes it such that we only expand the compaction output range and inputs when there is at least one overlapping file in lbase, preferring move compactions when possible. This behaviour mirrors the grow logic for LPositive-LPositive compactions. Fixes: ##4872
1 parent e5fcf2f commit 6ac23ff

File tree

6 files changed

+164
-52
lines changed

6 files changed

+164
-52
lines changed

compaction_picker.go

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -527,29 +527,26 @@ func (pc *pickedTableCompaction) growL0ForBase(cmp base.Compare, maxExpandedByte
527527
panic(fmt.Sprintf("pc.startLevel.level is %d, expected 0", pc.startLevel.level))
528528
}
529529
}
530+
531+
if pc.outputLevel.files.Empty() {
532+
// If there are no overlapping fields in the output level, we do not
533+
// attempt to expand the compaction to encourage move compactions.
534+
return false
535+
}
536+
530537
smallestBaseKey := base.InvalidInternalKey
531538
largestBaseKey := base.InvalidInternalKey
532-
if pc.outputLevel.files.Empty() {
533-
baseIter := pc.version.Levels[pc.outputLevel.level].Iter()
534-
if sm := baseIter.SeekLT(cmp, pc.bounds.Start); sm != nil {
539+
// NB: We use Reslice to access the underlying level's files, but
540+
// we discard the returned slice. The pc.outputLevel.files slice
541+
// is not modified.
542+
_ = pc.outputLevel.files.Reslice(func(start, end *manifest.LevelIterator) {
543+
if sm := start.Prev(); sm != nil {
535544
smallestBaseKey = sm.Largest()
536545
}
537-
if la := baseIter.SeekGE(cmp, pc.bounds.End.Key); la != nil {
546+
if la := end.Next(); la != nil {
538547
largestBaseKey = la.Smallest()
539548
}
540-
} else {
541-
// NB: We use Reslice to access the underlying level's files, but
542-
// we discard the returned slice. The pc.outputLevel.files slice
543-
// is not modified.
544-
_ = pc.outputLevel.files.Reslice(func(start, end *manifest.LevelIterator) {
545-
if sm := start.Prev(); sm != nil {
546-
smallestBaseKey = sm.Largest()
547-
}
548-
if la := end.Next(); la != nil {
549-
largestBaseKey = la.Smallest()
550-
}
551-
})
552-
}
549+
})
553550
oldLcf := pc.lcf.Clone()
554551
if !pc.l0Organizer.ExtendL0ForBaseCompactionTo(smallestBaseKey, largestBaseKey, pc.lcf) {
555552
return false

compaction_test.go

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ func TestPickCompaction(t *testing.T) {
206206
level: 0,
207207
baseLevel: 1,
208208
},
209-
want: "100,110 ",
209+
want: "100 ",
210210
},
211211

212212
{
@@ -1082,15 +1082,32 @@ func TestCompaction(t *testing.T) {
10821082
return fmt.Sprintf("wrote %d keys\n", count)
10831083

10841084
case "auto-compact":
1085-
d.mu.Lock()
1086-
prev := d.opts.DisableAutomaticCompactions
1087-
d.opts.DisableAutomaticCompactions = false
1088-
d.maybeScheduleCompaction()
1089-
for d.mu.compact.compactingCount > 0 {
1090-
d.mu.compact.cond.Wait()
1085+
expectedCount := int64(1)
1086+
td.MaybeScanArgs(t, "count", &expectedCount)
1087+
err := func() error {
1088+
d.mu.Lock()
1089+
defer d.mu.Unlock()
1090+
prevCount := d.mu.versions.metrics.Compact.Count
1091+
prev := d.opts.DisableAutomaticCompactions
1092+
d.opts.DisableAutomaticCompactions = false
1093+
err := try(100*time.Microsecond, 60*time.Second, func() error {
1094+
d.maybeScheduleCompaction()
1095+
for d.mu.compact.compactingCount > 0 {
1096+
d.mu.compact.cond.Wait()
1097+
}
1098+
compactions := d.mu.versions.metrics.Compact.Count - prevCount
1099+
if compactions < expectedCount {
1100+
return errors.Errorf("expectedCount at least %d automatic compaction(s), got %d, total: %d",
1101+
expectedCount, compactions, d.mu.versions.metrics.Compact.Count)
1102+
}
1103+
return nil
1104+
})
1105+
d.opts.DisableAutomaticCompactions = prev
1106+
return err
1107+
}()
1108+
if err != nil {
1109+
return err.Error() + "\n" + describeLSM(d, verbose)
10911110
}
1092-
d.opts.DisableAutomaticCompactions = prev
1093-
d.mu.Unlock()
10941111
return describeLSM(d, verbose)
10951112

10961113
case "set-disable-auto-compact":
@@ -1201,13 +1218,13 @@ func TestCompaction(t *testing.T) {
12011218
d.mu.Lock()
12021219
defer d.mu.Unlock()
12031220
if len(d.mu.compact.manual) != numBlocked {
1204-
return errors.Errorf("expected %d waiting manual compactions, versus actual %d",
1221+
return errors.Errorf("expectedCount %d waiting manual compactions, versus actual %d",
12051222
numBlocked, len(d.mu.compact.manual))
12061223
}
12071224
// Expect to be back to the fake ongoing compactions when the
12081225
// non-blocked manual compactions are done.
12091226
if d.mu.compact.compactingCount != 1 {
1210-
return errors.Errorf("expected 1 ongoing compaction, versus actual %d",
1227+
return errors.Errorf("expectedCount 1 ongoing compaction, versus actual %d",
12111228
d.mu.compact.compactingCount)
12121229
}
12131230
return nil
@@ -1368,6 +1385,10 @@ func TestCompaction(t *testing.T) {
13681385
minVersion: formatDeprecatedExperimentalValueSeparation,
13691386
maxVersion: formatDeprecatedExperimentalValueSeparation,
13701387
},
1388+
"l0_to_lbase_compaction": {
1389+
minVersion: formatDeprecatedExperimentalValueSeparation,
1390+
maxVersion: formatDeprecatedExperimentalValueSeparation,
1391+
},
13711392
}
13721393
datadriven.Walk(t, "testdata/compaction", func(t *testing.T, path string) {
13731394
filename := filepath.Base(path)

range_del_test.go

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -446,20 +446,21 @@ func TestRangeDelCompactionTruncation2(t *testing.T) {
446446
defer snap2.Close()
447447
require.NoError(t, d.DeleteRange([]byte("a"), []byte("d"), nil))
448448

449-
// Compact to produce the L1 tables.
450449
require.NoError(t, d.Compact(context.Background(), []byte("b"), []byte("b\x00"), false))
451450
expectLSM(`
451+
L0.0:
452+
000007:[a#12,RANGEDEL-b#inf,RANGEDEL]
452453
L6:
453-
000009:[a#12,RANGEDEL-d#inf,RANGEDEL]
454-
`)
454+
000009:[b#12,RANGEDEL-d#inf,RANGEDEL]`)
455455

456456
require.NoError(t, d.Set([]byte("c"), bytes.Repeat([]byte("d"), 100), nil))
457457
require.NoError(t, d.Compact(context.Background(), []byte("c"), []byte("c\x00"), false))
458458
expectLSM(`
459+
L0.0:
460+
000007:[a#12,RANGEDEL-b#inf,RANGEDEL]
459461
L6:
460-
000012:[a#12,RANGEDEL-c#inf,RANGEDEL]
461-
000013:[c#13,SET-d#inf,RANGEDEL]
462-
`)
462+
000012:[b#12,RANGEDEL-c#inf,RANGEDEL]
463+
000013:[c#13,SET-d#inf,RANGEDEL]`)
463464
}
464465

465466
// TODO(peter): rewrite this test, TestRangeDelCompactionTruncation, and
@@ -523,28 +524,31 @@ func TestRangeDelCompactionTruncation3(t *testing.T) {
523524
require.NoError(t, d.Compact(context.Background(), []byte("b"), []byte("b\x00"), false))
524525
}
525526
expectLSM(`
527+
L0.0:
528+
000007:[a#12,RANGEDEL-b#inf,RANGEDEL]
526529
L3:
527-
000009:[a#12,RANGEDEL-d#inf,RANGEDEL]
528-
`)
530+
000009:[b#12,RANGEDEL-d#inf,RANGEDEL]`)
529531

530532
require.NoError(t, d.Set([]byte("c"), bytes.Repeat([]byte("d"), 100), nil))
531533

532534
require.NoError(t, d.Compact(context.Background(), []byte("c"), []byte("c\x00"), false))
533535
expectLSM(`
536+
L0.0:
537+
000007:[a#12,RANGEDEL-b#inf,RANGEDEL]
534538
L3:
535-
000013:[a#12,RANGEDEL-b#inf,RANGEDEL]
536-
000014:[b#12,RANGEDEL-c#inf,RANGEDEL]
539+
000013:[b#12,RANGEDEL-c#inf,RANGEDEL]
537540
L4:
538-
000015:[c#13,SET-d#inf,RANGEDEL]
541+
000014:[c#13,SET-d#inf,RANGEDEL]
539542
`)
540543

541544
require.NoError(t, d.Compact(context.Background(), []byte("c"), []byte("c\x00"), false))
542545
expectLSM(`
546+
L0.0:
547+
000007:[a#12,RANGEDEL-b#inf,RANGEDEL]
543548
L3:
544-
000013:[a#12,RANGEDEL-b#inf,RANGEDEL]
545-
000014:[b#12,RANGEDEL-c#inf,RANGEDEL]
549+
000013:[b#12,RANGEDEL-c#inf,RANGEDEL]
546550
L5:
547-
000015:[c#13,SET-d#inf,RANGEDEL]
551+
000014:[c#13,SET-d#inf,RANGEDEL]
548552
`)
549553

550554
if _, _, err := d.Get([]byte("b")); err != ErrNotFound {
@@ -553,12 +557,12 @@ L5:
553557

554558
require.NoError(t, d.Compact(context.Background(), []byte("a"), []byte("a\x00"), false))
555559
expectLSM(`
560+
L1:
561+
000007:[a#12,RANGEDEL-b#inf,RANGEDEL]
556562
L3:
557-
000014:[b#12,RANGEDEL-c#inf,RANGEDEL]
558-
L4:
559-
000013:[a#12,RANGEDEL-b#inf,RANGEDEL]
563+
000013:[b#12,RANGEDEL-c#inf,RANGEDEL]
560564
L5:
561-
000015:[c#13,SET-d#inf,RANGEDEL]
565+
000014:[c#13,SET-d#inf,RANGEDEL]
562566
`)
563567

564568
if v, _, err := d.Get([]byte("b")); err != ErrNotFound {
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# Generate sstables with sequential, non-overlapping keys. When we trigger a compaction
2+
# from L0 to Lbase, we expect all the compactions to be moves, since there are no
3+
# overlapping keys being written.
4+
define l0-compaction-threshold=1 auto-compactions=off
5+
----
6+
7+
8+
populate keylen=4 timestamps=(1) vallen=64
9+
----
10+
wrote 475254 keys
11+
12+
13+
flush
14+
----
15+
L0.0:
16+
000005:[a@1#10,SET-boao@1#28148,SET]
17+
000006:[boap@1#28149,SET-dcbc@1#56285,SET]
18+
000007:[dcbd@1#56286,SET-eqbu@1#84424,SET]
19+
000008:[eqbv@1#84425,SET-gecj@1#112562,SET]
20+
000009:[geck@1#112563,SET-hsda@1#140701,SET]
21+
000010:[hsdb@1#140702,SET-jgdq@1#168839,SET]
22+
000011:[jgdr@1#168840,SET-kueg@1#196977,SET]
23+
000012:[kueh@1#196978,SET-miev@1#225114,SET]
24+
000013:[miew@1#225115,SET-nwfl@1#253252,SET]
25+
000014:[nwfm@1#253253,SET-pkga@1#281390,SET]
26+
000015:[pkgb@1#281391,SET-qygs@1#309529,SET]
27+
000016:[qygt@1#309530,SET-smhh@1#337667,SET]
28+
000017:[smhi@1#337668,SET-uahw@1#365804,SET]
29+
000018:[uahx@1#365805,SET-voim@1#393942,SET]
30+
000019:[voin@1#393943,SET-xcjb@1#422080,SET]
31+
000020:[xcjc@1#422081,SET-yqjt@1#450219,SET]
32+
000021:[yqju@1#450220,SET-zzzz@1#475263,SET]
33+
34+
35+
auto-compact count=17
36+
----
37+
L6:
38+
000005:[a@1#10,SET-boao@1#28148,SET]
39+
000006:[boap@1#28149,SET-dcbc@1#56285,SET]
40+
000007:[dcbd@1#56286,SET-eqbu@1#84424,SET]
41+
000008:[eqbv@1#84425,SET-gecj@1#112562,SET]
42+
000009:[geck@1#112563,SET-hsda@1#140701,SET]
43+
000010:[hsdb@1#140702,SET-jgdq@1#168839,SET]
44+
000011:[jgdr@1#168840,SET-kueg@1#196977,SET]
45+
000012:[kueh@1#196978,SET-miev@1#225114,SET]
46+
000013:[miew@1#225115,SET-nwfl@1#253252,SET]
47+
000014:[nwfm@1#253253,SET-pkga@1#281390,SET]
48+
000015:[pkgb@1#281391,SET-qygs@1#309529,SET]
49+
000016:[qygt@1#309530,SET-smhh@1#337667,SET]
50+
000017:[smhi@1#337668,SET-uahw@1#365804,SET]
51+
000018:[uahx@1#365805,SET-voim@1#393942,SET]
52+
000019:[voin@1#393943,SET-xcjb@1#422080,SET]
53+
000020:[xcjc@1#422081,SET-yqjt@1#450219,SET]
54+
000021:[yqju@1#450220,SET-zzzz@1#475263,SET]
55+
56+
57+
metrics
58+
----
59+
| | | | ingested | moved | written | | amp
60+
level | tables size val-bl vtables | score ff cff | in | tables size | tables size | tables size | read | r w
61+
------+-----------------------------+----------------+-------+--------------+--------------+--------------+-------+---------
62+
0 | 0 0B 0B 0 | - 0 0 | 33MB | 0 0B | 0 0B | 17 34MB | 0B | 0 1.02
63+
1 | 0 0B 0B 0 | - 0 0 | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0
64+
2 | 0 0B 0B 0 | - 0 0 | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0
65+
3 | 0 0B 0B 0 | - 0 0 | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0
66+
4 | 0 0B 0B 0 | - 0 0 | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0
67+
5 | 0 0B 0B 0 | - 0 0 | 0B | 0 0B | 0 0B | 0 0B | 0B | 0 0
68+
6 | 17 34MB 0B 0 | - 0.53 0.53 | 0B | 0 0B | 17 34MB | 0 0B | 0B | 1 0
69+
total | 17 34MB 0B 0 | - - - | 33MB | 0 0B | 17 34MB | 17 67MB | 0B | 1 2.02
70+
----------------------------------------------------------------------------------------------------------------------------
71+
WAL: 1 files (0B) in: 33MB written: 33MB (0% overhead)
72+
Flushes: 2
73+
Compactions: 17 estimated debt: 0B in progress: 0 (0B) canceled: 0 (0B) failed: 0 problem spans: 0
74+
default: 0 delete: 0 elision: 0 move: 17 read: 0 tombstone-density: 0 rewrite: 0 copy: 0 multi-level: 0 blob-file-rewrite: 0
75+
MemTables: 1 (512KB) zombie: 1 (512KB)
76+
Zombie tables: 0 (0B, local: 0B)
77+
Backing tables: 0 (0B)
78+
Virtual tables: 0 (0B)
79+
Local tables size: 34MB
80+
Compression types: snappy: 17
81+
Table stats: all loaded
82+
Block cache: 3.3K entries (7.0MB) hit rate: 0.1%
83+
File cache: 17 tables, 0 blobfiles (4.6KB) hit rate: 97.4%
84+
Range key sets: 0 Tombstones: 0 Total missized tombstones encountered: 0
85+
Snapshots: 0 earliest seq num: 0
86+
Table iters: 0
87+
Filter utility: 0.0%
88+
Ingestions: 0 as flushable: 0 (0B in 0 tables)
89+
Cgo memory usage: 0B block cache: 0B (data: 0B, maps: 0B, entries: 0B) memtables: 0B

testdata/compaction/score_compaction_picked_before_manual

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ compaction-log
1818
[JOB 1] compacted(move) L5 [000004] (797B) Score=0.00 + L6 [] (0B) Score=0.00 -> L6 [000004] (797B), in 1.0s (1.0s total), output rate 797B/s
1919

2020
# Do an auto score-based compaction with the same LSM as the previous test.
21-
define disable-multi-level lbase-max-bytes=1
21+
define disable-multi-level lbase-max-bytes=1 auto-compactions=off
2222
L5
2323
a.SET.2:v c.SET.4:v
2424
----

testdata/compaction/value_separation

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ Blob files:
302302
# garbage ratio of 0.0 (no garbage). With this configuration, any blob file that
303303
# contains any unreferenced values should be immediately compacted.
304304

305-
define value-separation=(true,1,2,0s,0.0)
305+
define value-separation=(true,1,2,0s,0.0) auto-compactions=off
306306
----
307307

308308
batch
@@ -450,12 +450,13 @@ Blob files:
450450
B000040 physical:{000040 size:[1707508 (1.6MB)] vals:[1645056 (1.6MB)]}
451451
B000042 physical:{000042 size:[886008 (865KB)] vals:[853568 (834KB)]}
452452

453-
# Schedule automatic compactions. These compactions should write data to L6. The
454-
# resulting sstables will reference multiple blob files but maintain a blob
455-
# reference depth of 1 because L6 has no referenced blob files and all the L0
453+
# Manual compaction of key range so we merge files instead of moving.
454+
# This compaction should write data to L6. The resulting sstables will
455+
# reference multiple blob files but maintain a blob reference depth of 1
456+
# because L6 has no referenced blob files and all the L0
456457
# input tables have a reference depth of 1.
457458

458-
auto-compact
459+
compact a-zzzz
459460
----
460461
L6:
461462
000044:[a@1#0,SET-czms@1#0,SET] seqnums:[0-0] points:[a@1#0,SET-czms@1#0,SET] size:715291 blobrefs:[(B000006: 1642240), (B000008: 1642432), (B000010: 201984); depth:1]

0 commit comments

Comments
 (0)