Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log compaction failure error and delete temporarily blocks from disk #2261

Merged
merged 3 commits into from
Jun 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

### Grafana Mimir

* [CHANGE] Compactor: delete source and output blocks from local disk on compaction failed, to reduce likelihood that subsequent compactions fail because of no space left on disk. #2261
* [BUGFIX] Compactor: log the actual error on compaction failed. #2261

### Mixin

### Jsonnet
Expand Down
10 changes: 5 additions & 5 deletions pkg/compactor/bucket_compactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,13 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul

defer func() {
elapsed := time.Since(jobBeginTime)
level.Info(jobLogger).Log("msg", "compaction job finished", "success", rerr == nil, "duration", elapsed, "duration_ms", elapsed.Milliseconds())

// Leave the compact directory for inspection if it is a halt error
// or if it is not then so that possibly we would not have to download everything again.
if rerr != nil {
return
if rerr == nil {
level.Info(jobLogger).Log("msg", "compaction job succeeded", "duration", elapsed, "duration_ms", elapsed.Milliseconds())
replay marked this conversation as resolved.
Show resolved Hide resolved
} else {
level.Error(jobLogger).Log("msg", "compaction job failed", "duration", elapsed, "duration_ms", elapsed.Milliseconds(), "err", rerr)
}

if err := os.RemoveAll(subDir); err != nil {
level.Error(jobLogger).Log("msg", "failed to remove compaction group work directory", "path", subDir, "err", err)
}
Expand Down
12 changes: 6 additions & 6 deletions pkg/compactor/compactor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -537,15 +537,15 @@ func TestMultitenantCompactor_ShouldIterateOverUsersAndRunCompaction(t *testing.
`level=info component=compactor user=user-1 msg="start of GC"`,
`level=debug component=compactor user=user-1 msg="grouper found a compactable blocks group" groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 job="stage: merge, range start: 1574776800000, range end: 1574784000000, shard: , blocks: 01DTVP434PA9VFXSW2JKB3392D (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC),01FS51A7GQ1RQWV35DBVYQM4KF (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC)"`,
`level=info component=compactor user=user-1 msg="start of compactions"`,
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-1 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-1`,
`level=info component=compactor msg="starting compaction of user blocks" user=user-2`,
`level=info component=compactor user=user-2 msg="start sync of metas"`,
`level=info component=compactor user=user-2 msg="start of GC"`,
`level=debug component=compactor user=user-2 msg="grouper found a compactable blocks group" groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 job="stage: merge, range start: 1574776800000, range end: 1574784000000, shard: , blocks: 01DTW0ZCPDDNV4BV83Q2SV4QAZ (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC),01FRSF035J26D6CGX7STCSD1KG (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC)"`,
`level=info component=compactor user=user-2 msg="start of compactions"`,
`level=info component=compactor user=user-2 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-2 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-2 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-2`,
}, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n")))
Expand Down Expand Up @@ -677,7 +677,7 @@ func TestMultitenantCompactor_ShouldStopCompactingTenantOnReachingMaxCompactionT
`level=debug component=compactor user=user-1 msg="grouper found a compactable blocks group" groupKey=0@414047632870839233-merge--1574776800000-1574784000000 job="stage: merge, range start: 1574776800000, range end: 1574784000000, shard: , blocks: 01DTVP434PA9VFXSW2JKB3392D (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC),01FS51A7GQ1RQWV35DBVYQM4KF (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC)"`,
`level=info component=compactor user=user-1 msg="start of compactions"`,
`level=info component=compactor user=user-1 msg="max compaction time reached, no more compactions will be started"`,
`level=info component=compactor user=user-1 groupKey=0@12695595599644216241-merge--1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-1 groupKey=0@12695595599644216241-merge--1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-1 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-1`,
}, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n")))
Expand Down Expand Up @@ -1024,15 +1024,15 @@ func TestMultitenantCompactor_ShouldCompactAllUsersOnShardingEnabledButOnlyOneIn
`level=info component=compactor user=user-1 msg="start of GC"`,
`level=debug component=compactor user=user-1 msg="grouper found a compactable blocks group" groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 job="stage: merge, range start: 1574776800000, range end: 1574784000000, shard: , blocks: 01DTVP434PA9VFXSW2JKB3392D (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC),01FSTQ95C8FS0ZAGTQS2EF1NEG (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC)"`,
`level=info component=compactor user=user-1 msg="start of compactions"`,
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-1 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-1`,
`level=info component=compactor msg="starting compaction of user blocks" user=user-2`,
`level=info component=compactor user=user-2 msg="start sync of metas"`,
`level=info component=compactor user=user-2 msg="start of GC"`,
`level=debug component=compactor user=user-2 msg="grouper found a compactable blocks group" groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 job="stage: merge, range start: 1574776800000, range end: 1574784000000, shard: , blocks: 01DTW0ZCPDDNV4BV83Q2SV4QAZ (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC),01FSV54G6QFQH1G9QE93G3B9TB (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC)"`,
`level=info component=compactor user=user-2 msg="start of compactions"`,
`level=info component=compactor user=user-2 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-2 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-2 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-2`,
}, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n")))
Expand Down Expand Up @@ -1241,7 +1241,7 @@ func TestMultitenantCompactor_ShouldSkipCompactionForJobsNoMoreOwnedAfterPlannin
`level=debug component=compactor user=user-1 msg="grouper found a compactable blocks group" groupKey=0@17241709254077376921-split-1_of_4-1574863200000-1574870400000 job="stage: split, range start: 1574863200000, range end: 1574870400000, shard: 1_of_4, blocks: 01DTVP434PA9VFXSW2JK000002 (min time: 2019-11-27 14:00:00 +0000 UTC, max time: 2019-11-27 16:00:00 +0000 UTC)"`,
// The ownership check is failing because, to keep this test simple, we've just switched
// the instance state to LEAVING and there are no other instances in the ring.
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-split-4_of_4-1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-split-4_of_4-1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-1 msg="skipped compaction because unable to check whether the job is owned by the compactor instance" groupKey=0@17241709254077376921-split-1_of_4-1574863200000-1574870400000 err="at least 1 live replicas required, could only find 0 - unhealthy instances: 1.2.3.4:0"`,
`level=info component=compactor user=user-1 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-1`,
Expand Down