Skip to content

Commit a259666

Browse files
darshanimeRaduBerinde
authored andcommitted
allow delete pacing knobs to be configurable
1 parent 0f1c564 commit a259666

File tree

8 files changed

+105
-22
lines changed

8 files changed

+105
-22
lines changed

obsolete_files.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,16 @@ func openCleanupManager(
8888
opts: opts,
8989
objProvider: objProvider,
9090
onTableDeleteFn: onTableDeleteFn,
91-
deletePacer: newDeletionPacer(crtime.NowMono(), int64(opts.TargetByteDeletionRate), getDeletePacerInfo),
92-
jobsCh: make(chan *cleanupJob, jobsQueueDepth),
91+
deletePacer: newDeletionPacer(
92+
crtime.NowMono(),
93+
opts.FreeSpaceThresholdBytes,
94+
int64(opts.TargetByteDeletionRate),
95+
opts.FreeSpaceTimeframe,
96+
opts.ObsoleteBytesMaxRatio,
97+
opts.ObsoleteBytesTimeframe,
98+
getDeletePacerInfo,
99+
),
100+
jobsCh: make(chan *cleanupJob, jobsQueueDepth),
93101
}
94102
cm.mu.completedJobsCond.L = &cm.mu.Mutex
95103
cm.waitGroup.Add(1)

options.go

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,20 +1035,42 @@ type Options struct {
10351035
// changing options dynamically?
10361036
WALMinSyncInterval func() time.Duration
10371037

1038+
// The controls below manage deletion pacing, which slows down
1039+
// deletions when compactions finish or when readers close and
1040+
// obsolete files must be cleaned up. Rapid deletion of many
1041+
// files simultaneously can increase disk latency on certain
1042+
// SSDs, and this functionality helps protect against that.
1043+
10381044
// TargetByteDeletionRate is the rate (in bytes per second) at which sstable file
10391045
// deletions are limited to (under normal circumstances).
10401046
//
1041-
// Deletion pacing is used to slow down deletions when compactions finish up
1042-
// or readers close and newly-obsolete files need cleaning up. Deleting lots
1043-
// of files at once can cause disk latency to go up on some SSDs, which this
1044-
// functionality guards against.
1045-
//
10461047
// This value is only a best-effort target; the effective rate can be
10471048
// higher if deletions are falling behind or disk space is running low.
10481049
//
10491050
// Setting this to 0 disables deletion pacing, which is also the default.
10501051
TargetByteDeletionRate int
10511052

1053+
// FreeSpaceThresholdBytes specifies the minimum amount of free disk space that Pebble
1054+
// attempts to maintain. If free disk space drops below this threshold, deletions
1055+
// are accelerated above TargetByteDeletionRate until the threshold is restored.
1056+
// Default is 16GB.
1057+
FreeSpaceThresholdBytes uint64
1058+
1059+
// FreeSpaceTimeframe sets the duration (in seconds) within which Pebble attempts
1060+
// to restore the free disk space back to FreeSpaceThreshold. A lower value means
1061+
// more aggressive deletions. Default is 10s.
1062+
FreeSpaceTimeframe time.Duration
1063+
1064+
// ObsoleteBytesMaxRatio specifies the maximum allowed ratio of obsolete files to
1065+
// live files. If this ratio is exceeded, Pebble speeds up deletions above the
1066+
// TargetByteDeletionRate until the ratio is restored. Default is 0.20.
1067+
ObsoleteBytesMaxRatio float64
1068+
1069+
// ObsoleteBytesTimeframe sets the duration (in seconds) within which Pebble aims
1070+
// to restore the obsolete-to-live bytes ratio below ObsoleteBytesMaxRatio. A lower
1071+
// value means more aggressive deletions. Default is 300s.
1072+
ObsoleteBytesTimeframe time.Duration
1073+
10521074
// EnableSQLRowSpillMetrics specifies whether the Pebble instance will only be used
10531075
// to temporarily persist data spilled to disk for row-oriented SQL query execution.
10541076
EnableSQLRowSpillMetrics bool
@@ -1135,6 +1157,22 @@ func (o *Options) EnsureDefaults() {
11351157
o.Cleaner = DeleteCleaner{}
11361158
}
11371159

1160+
if o.FreeSpaceThresholdBytes == 0 {
1161+
o.FreeSpaceThresholdBytes = 16 << 30 // 16 GB
1162+
}
1163+
1164+
if o.FreeSpaceTimeframe == 0 {
1165+
o.FreeSpaceTimeframe = 10 * time.Second
1166+
}
1167+
1168+
if o.ObsoleteBytesMaxRatio == 0 {
1169+
o.ObsoleteBytesMaxRatio = 0.20
1170+
}
1171+
1172+
if o.ObsoleteBytesTimeframe == 0 {
1173+
o.ObsoleteBytesTimeframe = 300 * time.Second
1174+
}
1175+
11381176
if o.Experimental.DisableIngestAsFlushable == nil {
11391177
o.Experimental.DisableIngestAsFlushable = func() bool { return false }
11401178
}
@@ -1393,6 +1431,10 @@ func (o *Options) String() string {
13931431
fmt.Fprintf(&buf, " mem_table_size=%d\n", o.MemTableSize)
13941432
fmt.Fprintf(&buf, " mem_table_stop_writes_threshold=%d\n", o.MemTableStopWritesThreshold)
13951433
fmt.Fprintf(&buf, " min_deletion_rate=%d\n", o.TargetByteDeletionRate)
1434+
fmt.Fprintf(&buf, " free_space_threshold_bytes=%d\n", o.FreeSpaceThresholdBytes)
1435+
fmt.Fprintf(&buf, " free_space_timeframe=%s\n", o.FreeSpaceTimeframe.String())
1436+
fmt.Fprintf(&buf, " obsolete_bytes_max_ratio=%f\n", o.ObsoleteBytesMaxRatio)
1437+
fmt.Fprintf(&buf, " obsolete_bytes_timeframe=%s\n", o.ObsoleteBytesTimeframe.String())
13961438
fmt.Fprintf(&buf, " merger=%s\n", o.Merger.Name)
13971439
if o.Experimental.MultiLevelCompactionHeuristic != nil {
13981440
fmt.Fprintf(&buf, " multilevel_compaction_heuristic=%s\n", o.Experimental.MultiLevelCompactionHeuristic.String())
@@ -1729,6 +1771,14 @@ func (o *Options) Parse(s string, hooks *ParseHooks) error {
17291771
// may be meaningful again eventually.
17301772
case "min_deletion_rate":
17311773
o.TargetByteDeletionRate, err = strconv.Atoi(value)
1774+
case "free_space_threshold_bytes":
1775+
o.FreeSpaceThresholdBytes, err = strconv.ParseUint(value, 10, 64)
1776+
case "free_space_timeframe":
1777+
o.FreeSpaceTimeframe, err = time.ParseDuration(value)
1778+
case "obsolete_bytes_max_ratio":
1779+
o.ObsoleteBytesMaxRatio, err = strconv.ParseFloat(value, 64)
1780+
case "obsolete_bytes_timeframe":
1781+
o.ObsoleteBytesTimeframe, err = time.ParseDuration(value)
17321782
case "min_flush_rate":
17331783
// Do nothing; option existed in older versions of pebble, and
17341784
// may be meaningful again eventually.

options_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ func TestDefaultOptionsString(t *testing.T) {
103103
mem_table_size=4194304
104104
mem_table_stop_writes_threshold=2
105105
min_deletion_rate=0
106+
free_space_threshold_bytes=17179869184
107+
free_space_timeframe=10s
108+
obsolete_bytes_max_ratio=0.200000
109+
obsolete_bytes_timeframe=5m0s
106110
merger=pebble.concatenate
107111
multilevel_compaction_heuristic=wamp(0.00, false)
108112
read_compaction_rate=16000

pacer.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,25 @@ const deletePacerHistory = 5 * time.Minute
6060
// normally limit deletes (when we are not falling behind or running out of
6161
// space). A value of 0.0 disables pacing.
6262
func newDeletionPacer(
63-
now crtime.Mono, targetByteDeletionRate int64, getInfo func() deletionPacerInfo,
63+
now crtime.Mono,
64+
freeSpaceThreshold uint64,
65+
targetByteDeletionRate int64,
66+
freeSpaceTimeframe time.Duration,
67+
obsoleteBytesMaxRatio float64,
68+
obsoleteBytesTimeframe time.Duration,
69+
getInfo func() deletionPacerInfo,
6470
) *deletionPacer {
6571
d := &deletionPacer{
66-
freeSpaceThreshold: 16 << 30, // 16 GB
67-
freeSpaceTimeframe: 10 * time.Second,
72+
freeSpaceThreshold: freeSpaceThreshold,
73+
freeSpaceTimeframe: freeSpaceTimeframe,
6874

69-
obsoleteBytesMaxRatio: 0.20,
70-
obsoleteBytesTimeframe: 5 * time.Minute,
75+
obsoleteBytesMaxRatio: obsoleteBytesMaxRatio,
76+
obsoleteBytesTimeframe: obsoleteBytesTimeframe,
7177

7278
targetByteDeletionRate: targetByteDeletionRate,
7379
getInfo: getInfo,
7480
}
81+
7582
d.mu.history.Init(now, deletePacerHistory)
7683
return d
7784
}

pacer_test.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ func TestDeletionPacer(t *testing.T) {
4646
expected: 304.8,
4747
},
4848
// As freeBytes is 10GB below the free space threshold, rate should be
49-
// increased to by 1GB/s.
49+
// increased by 1GB/s.
5050
{
5151
freeBytes: 6 * GB,
5252
obsoleteBytes: 1 * MB,
@@ -121,7 +121,17 @@ func TestDeletionPacer(t *testing.T) {
121121
}
122122
start := crtime.NowMono()
123123
last := start
124-
pacer := newDeletionPacer(start, 100*MB, getInfo)
124+
var opts Options
125+
opts.EnsureDefaults()
126+
pacer := newDeletionPacer(
127+
start,
128+
opts.FreeSpaceThresholdBytes,
129+
100*MB,
130+
opts.FreeSpaceTimeframe,
131+
opts.ObsoleteBytesMaxRatio,
132+
opts.ObsoleteBytesTimeframe,
133+
getInfo,
134+
)
125135
for _, h := range tc.history {
126136
last = start + crtime.Mono(time.Second*time.Duration(h[0]))
127137
pacer.ReportDeletion(last, uint64(h[1]))

replay/testdata/replay

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ tree
1111
614 000007.sst
1212
0 LOCK
1313
133 MANIFEST-000001
14-
1358 OPTIONS-000003
14+
1494 OPTIONS-000003
1515
0 marker.format-version.000001.013
1616
0 marker.manifest.000001.MANIFEST-000001
1717
simple/
@@ -21,7 +21,7 @@ tree
2121
25 000004.log
2222
586 000005.sst
2323
85 MANIFEST-000001
24-
1358 OPTIONS-000003
24+
1494 OPTIONS-000003
2525
0 marker.format-version.000001.013
2626
0 marker.manifest.000001.MANIFEST-000001
2727

@@ -55,6 +55,10 @@ cat build/OPTIONS-000003
5555
mem_table_size=4194304
5656
mem_table_stop_writes_threshold=2
5757
min_deletion_rate=0
58+
free_space_threshold_bytes=17179869184
59+
free_space_timeframe=10s
60+
obsolete_bytes_max_ratio=0.200000
61+
obsolete_bytes_timeframe=5m0s
5862
merger=pebble.concatenate
5963
multilevel_compaction_heuristic=wamp(0.00, false)
6064
read_compaction_rate=16000

replay/testdata/replay_paced

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ tree
1414
0 LOCK
1515
133 MANIFEST-000001
1616
205 MANIFEST-000010
17-
1358 OPTIONS-000003
17+
1494 OPTIONS-000003
1818
0 marker.format-version.000001.013
1919
0 marker.manifest.000002.MANIFEST-000010
2020
high_read_amp/
@@ -26,7 +26,7 @@ tree
2626
39 000008.log
2727
560 000009.sst
2828
157 MANIFEST-000010
29-
1358 OPTIONS-000003
29+
1494 OPTIONS-000003
3030
0 marker.format-version.000001.013
3131
0 marker.manifest.000001.MANIFEST-000010
3232

testdata/metrics

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ Iter category stats:
8888

8989
disk-usage
9090
----
91-
2.1KB
91+
2.3KB
9292

9393
batch
9494
set b 2
@@ -149,7 +149,7 @@ Iter category stats:
149149

150150
disk-usage
151151
----
152-
3.7KB
152+
3.8KB
153153

154154
# Closing iter a will release one of the zombie memtables.
155155

@@ -238,7 +238,7 @@ Iter category stats:
238238

239239
disk-usage
240240
----
241-
3.0KB
241+
3.1KB
242242

243243
# Closing iter b will release the last zombie sstable and the last zombie memtable.
244244

@@ -284,7 +284,7 @@ Iter category stats:
284284

285285
disk-usage
286286
----
287-
2.3KB
287+
2.4KB
288288

289289
additional-metrics
290290
----

0 commit comments

Comments
 (0)