Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions pkg/controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -941,6 +941,25 @@ func TestRolloutController_ReconcileStatefulsetWithDownscaleDelay(t *testing.T)
"DELETE http://ingester-zone-b-2.ingester-zone-b.test.svc.cluster.local./prepare-delayed-downscale",
},
},

"scale up succeeds even if DELETE returns 409 Conflict (i.e partition state locked)": {
statefulSets: []runtime.Object{
mockStatefulSet("ingester-zone-b", withReplicas(2, 2),
withMirrorReplicasAnnotations("test", customResourceGVK),
withDelayedDownscaleAnnotations(time.Hour, "http://pod/prepare-delayed-downscale")),
},
httpResponses: map[string]httpResponse{
"DELETE http://ingester-zone-b-0.ingester-zone-b.test.svc.cluster.local./prepare-delayed-downscale": {statusCode: http.StatusConflict, body: "partition state is locked"},
"DELETE http://ingester-zone-b-1.ingester-zone-b.test.svc.cluster.local./prepare-delayed-downscale": {statusCode: http.StatusConflict, body: "partition state is locked"},
},
customResourceScaleSpecReplicas: 5,
customResourceScaleStatusReplicas: 2,
expectedPatchedSets: map[string][]string{"ingester-zone-b": {`{"spec":{"replicas":5}}`}},
expectedHttpRequests: []string{
"DELETE http://ingester-zone-b-0.ingester-zone-b.test.svc.cluster.local./prepare-delayed-downscale",
"DELETE http://ingester-zone-b-1.ingester-zone-b.test.svc.cluster.local./prepare-delayed-downscale",
},
},
}

for testName, testData := range tests {
Expand Down
11 changes: 10 additions & 1 deletion pkg/controller/delay.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,8 @@ func callPrepareDownscaleAndReturnElapsedDurationsSinceInitiatedDownscale(ctx co
}

if resp.StatusCode/100 != 2 {
// Unlike `callCancelDelayedDownscale`, here we consider all non 2xx status code as error and should block the downscale.
// this includes the case where we fail because a partition state change is locked.
level.Error(epLogger).Log("msg", "unexpected status code returned when calling POST on endpoint", "status", resp.StatusCode, "response_body", string(body))
return fmt.Errorf("HTTP POST request returned non-2xx status code: %v", resp.StatusCode)
}
Expand Down Expand Up @@ -260,8 +262,15 @@ func callCancelDelayedDownscale(ctx context.Context, logger log.Logger, client h
defer resp.Body.Close()

if resp.StatusCode/100 != 2 {
err := errors.New("HTTP DELETE request returned non-2xx status code")
body, readError := io.ReadAll(resp.Body)

// Handle 409 Conflict separately - this typically means that the partition state is locked by an engineer
if resp.StatusCode == http.StatusConflict {
level.Info(epLogger).Log("msg", "HTTP DELETE request returned 409 status code, delayed downscale cancellation skipped", "status", resp.StatusCode, "response_body", string(body))
return nil
}

err := errors.New("HTTP DELETE request returned non-2xx status code")
level.Error(epLogger).Log("msg", "unexpected status code returned when calling DELETE on endpoint", "status", resp.StatusCode, "response_body", string(body))
return errors.Join(err, readError)
}
Expand Down