From c31c75d58c761f0a6bd50a3d21f240ab274aee79 Mon Sep 17 00:00:00 2001 From: Andrii Chubatiuk Date: Wed, 29 Apr 2026 23:47:59 +0300 Subject: [PATCH] reconcile: prevent from staying in expanding state --- docs/CHANGELOG.md | 1 + internal/controller/operator/controllers.go | 2 +- .../operator/reconcile_and_track_status_test.go | 8 +++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index ad7460f76..0d8f8f5d1 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -18,6 +18,7 @@ aliases: * BUGFIX: [converter](https://docs.victoriametrics.com/operator/integrations/prometheus/#objects-conversion): disable all prometheus controllers if CRD group was not found. See [#2838](https://github.com/VictoriaMetrics/helm-charts/issues/2838). * BUGFIX: [vmdistributed](https://docs.victoriametrics.com/operator/resources/vmdistributed/): change default load balancing policy for write requests from `first_available` to `least_loaded`. This should allow to evenly distribute write load across all VMAgents. +* BUGFIX: [vmoperator](https://docs.victoriametrics.com/operator/): retry reconcile errors, that may lead to expanding state, before resource could hang in expanding state. ## [v0.69.0](https://github.com/VictoriaMetrics/operator/releases/tag/v0.69.0) **Release date:** 22 April 2026 diff --git a/internal/controller/operator/controllers.go b/internal/controller/operator/controllers.go index 5b1871bc8..14afc7933 100644 --- a/internal/controller/operator/controllers.go +++ b/internal/controller/operator/controllers.go @@ -345,8 +345,8 @@ func reconcileAndTrackStatus[T client.Object, ST reconcile.StatusWithMetadata[ST resultStatus = vmv1beta1.UpdateStatusExpanding } else { resultStatus = vmv1beta1.UpdateStatusFailed - resultErr = err } + resultErr = err return } if specChanged { diff --git a/internal/controller/operator/reconcile_and_track_status_test.go b/internal/controller/operator/reconcile_and_track_status_test.go index e300fc0ec..7d02dbade 100644 --- a/internal/controller/operator/reconcile_and_track_status_test.go +++ b/internal/controller/operator/reconcile_and_track_status_test.go @@ -74,7 +74,7 @@ func TestReconcileAndTrackStatus(t *testing.T) { wantStatus: vmv1beta1.UpdateStatusOperational, }) - // retryable conflict error, operational → expanding + // retryable conflict error, operational → expanding, error propagated for requeue opSpec := vmv1beta1.VMAlertSpec{SelectAllByDefault: true} f(opts{ object: &vmv1beta1.VMAlert{ @@ -89,9 +89,10 @@ func TestReconcileAndTrackStatus(t *testing.T) { return ctrl.Result{}, k8serrors.NewConflict(schema.GroupResource{Group: "apps", Resource: "deployments"}, "test", fmt.Errorf("conflict")) }, wantStatus: vmv1beta1.UpdateStatusExpanding, + wantErr: true, }) - // retryable wait interrupted, operational → expanding + // retryable wait interrupted, operational → expanding, error propagated for requeue f(opts{ object: &vmv1beta1.VMAlert{ ObjectMeta: metav1.ObjectMeta{Name: "test-vmalert", Namespace: "default"}, @@ -105,6 +106,7 @@ func TestReconcileAndTrackStatus(t *testing.T) { return ctrl.Result{}, wait.ErrorInterrupted(fmt.Errorf("timeout")) }, wantStatus: vmv1beta1.UpdateStatusExpanding, + wantErr: true, }) // operational → expanding → operational @@ -237,7 +239,7 @@ func TestVMClusterRemainsExpandingDuringPVCResize(t *testing.T) { _, err := reconcileAndTrackStatus(context.Background(), fclient, cluster, func() (ctrl.Result, error) { return ctrl.Result{}, wait.ErrorInterrupted(fmt.Errorf("pvc resize still in progress")) }) - assert.NoError(t, err) + assert.Error(t, err) got := &vmv1beta1.VMCluster{} assert.NoError(t, fclient.Get(context.Background(), types.NamespacedName{Name: "test-vmcluster", Namespace: "default"}, got))