Skip to content

Commit 01d4b24

Browse files
ijsongRaduBerinde
authored andcommitted
fix cleanupManager.Close() hanging when deletion pacing is active
When `TargetByteDeletionRate` is set to a low value, `DB.Close()` can hang for a long time because deletion pacing uses `time.Sleep()` that can't be interrupted. This change modifies `maybePace()` to check for early termination and use interruptible `select` instead of `time.Sleep()`. Resolves: #5347
1 parent e1277c4 commit 01d4b24

File tree

2 files changed

+95
-13
lines changed

2 files changed

+95
-13
lines changed

obsolete_files.go

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ type cleanupManager struct {
4141
jobsCh chan *cleanupJob
4242
// waitGroup is used to wait for the background goroutine to exit.
4343
waitGroup sync.WaitGroup
44+
// stopCh is closed when the pacer is disabled after closing the cleanup manager.
45+
stopCh chan struct{}
4446

4547
mu struct {
4648
sync.Mutex
@@ -103,6 +105,7 @@ func openCleanupManager(
103105
getDeletePacerInfo,
104106
),
105107
jobsCh: make(chan *cleanupJob, jobsQueueDepth),
108+
stopCh: make(chan struct{}),
106109
}
107110
cm.mu.completedJobsCond.L = &cm.mu.Mutex
108111
cm.waitGroup.Add(1)
@@ -120,6 +123,7 @@ func openCleanupManager(
120123
// Delete pacing is disabled for the remaining jobs.
121124
func (cm *cleanupManager) Close() {
122125
close(cm.jobsCh)
126+
close(cm.stopCh)
123127
cm.waitGroup.Wait()
124128
}
125129

@@ -174,21 +178,18 @@ func (cm *cleanupManager) Wait() {
174178
func (cm *cleanupManager) mainLoop() {
175179
defer cm.waitGroup.Done()
176180

181+
paceTimer := time.NewTimer(time.Duration(0))
182+
defer paceTimer.Stop()
183+
177184
var tb tokenbucket.TokenBucket
178185
// Use a token bucket with 1 token / second refill rate and 1 token burst.
179186
tb.Init(1.0, 1.0)
180187
for job := range cm.jobsCh {
181-
for _, of := range job.obsoleteFiles {
182-
switch of.fileType {
183-
case base.FileTypeTable:
184-
cm.maybePace(&tb, &of)
185-
cm.deleteObsoleteObject(of.fileType, job.jobID, of.fileNum)
186-
case base.FileTypeBlob:
187-
cm.maybePace(&tb, &of)
188-
cm.deleteObsoleteObject(of.fileType, job.jobID, of.fileNum)
189-
default:
190-
cm.deleteObsoleteFile(of.fs, of.fileType, job.jobID, of.path, of.fileNum)
191-
}
188+
select {
189+
case <-cm.stopCh:
190+
cm.deleteObsoleteFilesInJob(job, nil, nil)
191+
default:
192+
cm.deleteObsoleteFilesInJob(job, &tb, paceTimer)
192193
}
193194
cm.mu.Lock()
194195
cm.mu.completedJobs++
@@ -199,9 +200,31 @@ func (cm *cleanupManager) mainLoop() {
199200
}
200201
}
201202

203+
// deleteObsoleteFilesInJob deletes all obsolete files in the given job. If tb
204+
// and paceTimer are provided, files that need pacing will be throttled
205+
// according to the deletion rate. If tb is nil, files are deleted immediately
206+
// without pacing (used when the cleanup manager is being closed).
207+
func (cm *cleanupManager) deleteObsoleteFilesInJob(
208+
job *cleanupJob, tb *tokenbucket.TokenBucket, paceTimer *time.Timer,
209+
) {
210+
for _, of := range job.obsoleteFiles {
211+
switch of.fileType {
212+
case base.FileTypeTable, base.FileTypeBlob:
213+
if tb != nil {
214+
cm.maybePace(tb, &of, paceTimer)
215+
}
216+
cm.deleteObsoleteObject(of.fileType, job.jobID, of.fileNum)
217+
default:
218+
cm.deleteObsoleteFile(of.fs, of.fileType, job.jobID, of.path, of.fileNum)
219+
}
220+
}
221+
}
222+
202223
// maybePace sleeps before deleting an object if appropriate. It is always
203224
// called from the background goroutine.
204-
func (cm *cleanupManager) maybePace(tb *tokenbucket.TokenBucket, of *obsoleteFile) {
225+
func (cm *cleanupManager) maybePace(
226+
tb *tokenbucket.TokenBucket, of *obsoleteFile, paceTimer *time.Timer,
227+
) {
205228
if !of.needsPacing() {
206229
return
207230
}
@@ -220,7 +243,12 @@ func (cm *cleanupManager) maybePace(tb *tokenbucket.TokenBucket, of *obsoleteFil
220243
if ok {
221244
break
222245
}
223-
time.Sleep(d)
246+
paceTimer.Reset(d)
247+
select {
248+
case <-paceTimer.C:
249+
case <-cm.stopCh:
250+
return
251+
}
224252
}
225253
}
226254

obsolete_files_test.go

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@ import (
1010
"sort"
1111
"strings"
1212
"testing"
13+
"time"
1314

1415
"github.com/cockroachdb/datadriven"
1516
"github.com/cockroachdb/pebble/internal/base"
1617
"github.com/cockroachdb/pebble/internal/testutils"
18+
"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
1719
"github.com/cockroachdb/pebble/vfs"
1820
"github.com/stretchr/testify/require"
1921
)
@@ -139,3 +141,55 @@ func TestCleaner(t *testing.T) {
139141
}
140142
})
141143
}
144+
145+
func TestCleanupManagerCloseWithPacing(t *testing.T) {
146+
mem := vfs.NewMem()
147+
opts := &Options{
148+
FS: mem,
149+
TargetByteDeletionRate: 1024, // 1 KB/s - slow pacing
150+
}
151+
opts.EnsureDefaults()
152+
153+
objProvider, err := objstorageprovider.Open(objstorageprovider.Settings{
154+
FS: mem,
155+
FSDirName: "/",
156+
})
157+
require.NoError(t, err)
158+
defer objProvider.Close()
159+
160+
getDeletePacerInfo := func() deletionPacerInfo {
161+
return deletionPacerInfo{
162+
freeBytes: 10 << 30,
163+
}
164+
}
165+
166+
cm := openCleanupManager(opts, objProvider, getDeletePacerInfo)
167+
168+
// Create obsolete files that would normally take a long time to delete.
169+
// At 1 KB/s, 100 files of 10 KB each would take 1000 seconds.
170+
largeFiles := make([]obsoleteFile, 100)
171+
for i := range largeFiles {
172+
largeFiles[i] = obsoleteFile{
173+
fileType: base.FileTypeTable,
174+
fs: mem,
175+
path: fmt.Sprintf("test%02d.sst", i+1),
176+
fileNum: base.DiskFileNum(i + 1),
177+
fileSize: 10 << 10,
178+
isLocal: true,
179+
}
180+
}
181+
182+
cm.EnqueueJob(1, largeFiles, obsoleteObjectStats{})
183+
184+
done := make(chan struct{})
185+
go func() {
186+
defer close(done)
187+
cm.Close()
188+
}()
189+
190+
select {
191+
case <-done:
192+
case <-time.After(30 * time.Second):
193+
t.Fatalf("timed out waiting for cleanupManager.Close() to return")
194+
}
195+
}

0 commit comments

Comments
 (0)