diff --git a/internal/controller/aggregates_controller.go b/internal/controller/aggregates_controller.go index 39c68094..0072ce6c 100644 --- a/internal/controller/aggregates_controller.go +++ b/internal/controller/aggregates_controller.go @@ -23,7 +23,6 @@ import ( "fmt" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" @@ -64,7 +63,6 @@ func (ac *AggregatesController) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } - base := hv.DeepCopy() desiredAggregateNames, desiredCondition := ac.determineDesiredState(hv) // Extract current aggregate names for comparison @@ -73,11 +71,13 @@ func (ac *AggregatesController) Reconcile(ctx context.Context, req ctrl.Request) currentAggregateNames[i] = agg.Name } + var newAggregates []kvmv1.Aggregate + aggregatesChanged := false if !slicesEqualUnordered(desiredAggregateNames, currentAggregateNames) { // Apply aggregates to OpenStack and update status aggregates, err := openstack.ApplyAggregates(ctx, ac.computeClient, hv.Name, desiredAggregateNames) if err != nil { - // Set error condition + // Set error condition with retry on conflict condition := metav1.Condition{ Type: kvmv1.ConditionTypeAggregatesUpdated, Status: metav1.ConditionFalse, @@ -85,27 +85,37 @@ func (ac *AggregatesController) Reconcile(ctx context.Context, req ctrl.Request) Message: fmt.Errorf("failed to apply aggregates: %w", err).Error(), } - if meta.SetStatusCondition(&hv.Status.Conditions, condition) { - if err2 := ac.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(AggregatesControllerName)); err2 != nil { - return ctrl.Result{}, errors.Join(err, err2) - } + if err2 := utils.PatchHypervisorStatusWithRetry(ctx, ac.Client, hv.Name, AggregatesControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }); err2 != nil { + return ctrl.Result{}, errors.Join(err, err2) } return ctrl.Result{}, err } - hv.Status.Aggregates = aggregates + newAggregates = aggregates + aggregatesChanged = true } - // Set the condition based on the determined desired state - meta.SetStatusCondition(&hv.Status.Conditions, desiredCondition) - - if equality.Semantic.DeepEqual(base, hv) { + // Skip the round-trip when nothing would change + existing := meta.FindStatusCondition(hv.Status.Conditions, desiredCondition.Type) + conditionUnchanged := existing != nil && + existing.Status == desiredCondition.Status && + existing.Reason == desiredCondition.Reason && + existing.Message == desiredCondition.Message + if !aggregatesChanged && conditionUnchanged { return ctrl.Result{}, nil } - return ctrl.Result{}, ac.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(AggregatesControllerName)) + // Patch status with retry on conflict + err := utils.PatchHypervisorStatusWithRetry(ctx, ac.Client, hv.Name, AggregatesControllerName, func(h *kvmv1.Hypervisor) { + if aggregatesChanged { + h.Status.Aggregates = newAggregates + } + meta.SetStatusCondition(&h.Status.Conditions, desiredCondition) + }) + + return ctrl.Result{}, err } // determineDesiredState returns the desired aggregates and the corresponding condition diff --git a/internal/controller/hypervisor_controller.go b/internal/controller/hypervisor_controller.go index 0456959b..41213ad2 100644 --- a/internal/controller/hypervisor_controller.go +++ b/internal/controller/hypervisor_controller.go @@ -38,6 +38,7 @@ import ( kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/global" + "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) const ( @@ -121,8 +122,16 @@ func (hv *HypervisorController) Reconcile(ctx context.Context, req ctrl.Request) } if !equality.Semantic.DeepEqual(hypervisor, base) { - return ctrl.Result{}, hv.Status().Patch(ctx, hypervisor, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(HypervisorControllerName)) + // Capture values to apply - only mutate fields this controller owns + newInternalIP := hypervisor.Status.InternalIP + terminatingCondition := meta.FindStatusCondition(hypervisor.Status.Conditions, kvmv1.ConditionTypeTerminating) + + return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, hv.Client, hypervisor.Name, HypervisorControllerName, func(h *kvmv1.Hypervisor) { + h.Status.InternalIP = newInternalIP + if terminatingCondition != nil { + meta.SetStatusCondition(&h.Status.Conditions, *terminatingCondition) + } + }) } syncLabelsAndAnnotations(nodeLabels, hypervisor, node) diff --git a/internal/controller/hypervisor_instance_ha_controller.go b/internal/controller/hypervisor_instance_ha_controller.go index 33597785..831f7134 100644 --- a/internal/controller/hypervisor_instance_ha_controller.go +++ b/internal/controller/hypervisor_instance_ha_controller.go @@ -30,6 +30,7 @@ import ( logger "sigs.k8s.io/controller-runtime/pkg/log" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) const ( @@ -99,17 +100,17 @@ func (r *HypervisorInstanceHaController) Reconcile(ctx context.Context, req ctrl } if err := disableInstanceHA(hv); err != nil { - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition := metav1.Condition{ Type: kvmv1.ConditionTypeHaEnabled, Status: metav1.ConditionUnknown, Message: err.Error(), Reason: kvmv1.ConditionReasonFailed, - }) - - if patchErr := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(old, k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(HypervisorInstanceHaControllerName)); patchErr != nil { - return ctrl.Result{}, errors.Join(err, patchErr) } - return ctrl.Result{}, err + + patchErr := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, HypervisorInstanceHaControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }) + return ctrl.Result{}, errors.Join(err, patchErr) } } else { if !meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ @@ -123,17 +124,17 @@ func (r *HypervisorInstanceHaController) Reconcile(ctx context.Context, req ctrl } if err := enableInstanceHA(hv); err != nil { - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition := metav1.Condition{ Type: kvmv1.ConditionTypeHaEnabled, Status: metav1.ConditionUnknown, Message: err.Error(), Reason: kvmv1.ConditionReasonFailed, - }) - - if patchErr := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(old, k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(HypervisorInstanceHaControllerName)); patchErr != nil { - return ctrl.Result{}, errors.Join(err, patchErr) } - return ctrl.Result{}, err + + patchErr := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, HypervisorInstanceHaControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }) + return ctrl.Result{}, errors.Join(err, patchErr) } } @@ -141,7 +142,13 @@ func (r *HypervisorInstanceHaController) Reconcile(ctx context.Context, req ctrl return ctrl.Result{}, nil } - return ctrl.Result{}, r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(old, k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(HypervisorInstanceHaControllerName)) + // Only set the HaEnabled condition this controller owns + haCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeHaEnabled) + return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, HypervisorInstanceHaControllerName, func(h *kvmv1.Hypervisor) { + if haCondition != nil { + meta.SetStatusCondition(&h.Status.Conditions, *haCondition) + } + }) } // SetupWithManager sets up the controller with the Manager. diff --git a/internal/controller/hypervisor_maintenance_controller.go b/internal/controller/hypervisor_maintenance_controller.go index f4662ee6..5a4e55f3 100644 --- a/internal/controller/hypervisor_maintenance_controller.go +++ b/internal/controller/hypervisor_maintenance_controller.go @@ -39,6 +39,7 @@ import ( kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" + "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) const ( @@ -82,8 +83,22 @@ func (hec *HypervisorMaintenanceController) Reconcile(ctx context.Context, req c return ctrl.Result{}, nil } - return ctrl.Result{}, hec.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(old, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(MaintenanceControllerName)) + // Capture only the fields this controller owns + disabledCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) + evictingCondition := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeEvicting) + evicted := hv.Status.Evicted + + return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, hec.Client, hv.Name, HypervisorMaintenanceControllerName, func(h *kvmv1.Hypervisor) { + if disabledCondition != nil { + meta.SetStatusCondition(&h.Status.Conditions, *disabledCondition) + } + if evictingCondition != nil { + meta.SetStatusCondition(&h.Status.Conditions, *evictingCondition) + } else { + meta.RemoveStatusCondition(&h.Status.Conditions, kvmv1.ConditionTypeEvicting) + } + h.Status.Evicted = evicted + }) } func (hec *HypervisorMaintenanceController) reconcileComputeService(ctx context.Context, hv *kvmv1.Hypervisor) error { diff --git a/internal/controller/hypervisor_taint_controller.go b/internal/controller/hypervisor_taint_controller.go index 64a7e5eb..ad15da4a 100644 --- a/internal/controller/hypervisor_taint_controller.go +++ b/internal/controller/hypervisor_taint_controller.go @@ -30,6 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) const ( @@ -84,8 +85,13 @@ func (r *HypervisorTaintController) Reconcile(ctx context.Context, req ctrl.Requ return ctrl.Result{}, nil } - return ctrl.Result{}, r.Status().Patch(ctx, hypervisor, k8sclient.MergeFromWithOptions(before, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(HypervisorTaintControllerName)) + // Only set the Tainted condition this controller owns + taintedCondition := meta.FindStatusCondition(hypervisor.Status.Conditions, kvmv1.ConditionTypeTainted) + return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hypervisor.Name, HypervisorTaintControllerName, func(h *kvmv1.Hypervisor) { + if taintedCondition != nil { + meta.SetStatusCondition(&h.Status.Conditions, *taintedCondition) + } + }) } func (r *HypervisorTaintController) SetupWithManager(mgr ctrl.Manager) error { diff --git a/internal/controller/offboarding_controller.go b/internal/controller/offboarding_controller.go index 14336b43..d838471b 100644 --- a/internal/controller/offboarding_controller.go +++ b/internal/controller/offboarding_controller.go @@ -35,6 +35,7 @@ import ( kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" + "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) const ( @@ -146,30 +147,30 @@ func (r *HypervisorOffboardingReconciler) Reconcile(ctx context.Context, req ctr } func (r *HypervisorOffboardingReconciler) setOffboardingCondition(ctx context.Context, hv *kvmv1.Hypervisor, message string) (ctrl.Result, error) { - base := hv.DeepCopy() - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeOffboarded, - Status: metav1.ConditionFalse, - Reason: "Offboarding", - Message: message, + err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OffboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeOffboarded, + Status: metav1.ConditionFalse, + Reason: "Offboarding", + Message: message, + }) }) - if err := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(OffboardingControllerName)); err != nil { + if err != nil { return ctrl.Result{}, fmt.Errorf("cannot update hypervisor status due to %w", err) } return ctrl.Result{}, nil } func (r *HypervisorOffboardingReconciler) markOffboarded(ctx context.Context, hv *kvmv1.Hypervisor) error { - base := hv.DeepCopy() - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeOffboarded, - Status: metav1.ConditionTrue, - Reason: "Offboarded", - Message: "Offboarding successful", + err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OffboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeOffboarded, + Status: metav1.ConditionTrue, + Reason: "Offboarded", + Message: "Offboarding successful", + }) }) - if err := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(OffboardingControllerName)); err != nil { + if err != nil { return fmt.Errorf("cannot update hypervisor status due to %w", err) } return nil diff --git a/internal/controller/onboarding_controller.go b/internal/controller/onboarding_controller.go index 6dc66d01..0e466e66 100644 --- a/internal/controller/onboarding_controller.go +++ b/internal/controller/onboarding_controller.go @@ -46,6 +46,7 @@ import ( kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" + "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) var errRequeue = errors.New("requeue requested") @@ -109,15 +110,15 @@ func (r *OnboardingController) Reconcile(ctx context.Context, req ctrl.Request) // check condition reason status := meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) if status == nil { - base := hv.DeepCopy() - - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition := metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionTrue, Reason: kvmv1.ConditionReasonInitial, Message: "Initial onboarding", + } + return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) }) - return ctrl.Result{}, r.patchStatus(ctx, hv, base) } switch status.Reason { @@ -158,22 +159,28 @@ func (r *OnboardingController) abortOnboarding(ctx context.Context, hv *kvmv1.Hy return nil } + condition := *meta.FindStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) if err := r.deleteTestServers(ctx, computeHost); err != nil { - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition = metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionTrue, // No cleanup, so we are still "onboarding" Reason: kvmv1.ConditionReasonAborted, Message: err.Error(), - }) + } + meta.SetStatusCondition(&hv.Status.Conditions, condition) if equality.Semantic.DeepEqual(hv, base) { return err } - return errors.Join(err, r.patchStatus(ctx, hv, base)) + return errors.Join(err, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + })) } - return r.patchStatus(ctx, hv, base) + return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }) } func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1.Hypervisor) error { @@ -205,14 +212,15 @@ func (r *OnboardingController) initialOnboarding(ctx context.Context, hv *kvmv1. return result.Err } - base := hv.DeepCopy() - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition := metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionTrue, Reason: kvmv1.ConditionReasonTesting, Message: "Running onboarding tests", + } + return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) }) - return r.patchStatus(ctx, hv, base) } func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervisor, host string) (ctrl.Result, error) { @@ -234,15 +242,16 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis return ctrl.Result{RequeueAfter: defaultWaitTime}, nil } - base := hv.DeepCopy() // Set condition back to testing - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition := metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionTrue, Reason: kvmv1.ConditionReasonTesting, Message: "Server ended up in error state: " + server.Fault.Message, - }) - if err = r.patchStatus(ctx, hv, base); err != nil { + } + if err = utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }); err != nil { return ctrl.Result{}, err } @@ -257,14 +266,15 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis ShowConsoleOutput(ctx, r.testComputeClient, server.ID, servers.ShowConsoleOutputOpts{Length: 11}). Extract() if err != nil { - base := hv.DeepCopy() - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition := metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionTrue, Reason: kvmv1.ConditionReasonTesting, Message: fmt.Sprintf("could not get console output %v", err), - }) - if err := r.patchStatus(ctx, hv, base); err != nil { + } + if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }); err != nil { return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil @@ -272,14 +282,15 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis if !strings.Contains(consoleOutput, server.Name) { if !server.LaunchedAt.IsZero() && r.Clock.Now().After(server.LaunchedAt.Add(smokeTestTimeout)) { - base := hv.DeepCopy() - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition := metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionTrue, Reason: kvmv1.ConditionReasonTesting, Message: fmt.Sprintf("timeout waiting for console output since %v", server.LaunchedAt), - }) - if err := r.patchStatus(ctx, hv, base); err != nil { + } + if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }); err != nil { return ctrl.Result{}, err } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { @@ -292,14 +303,15 @@ func (r *OnboardingController) smokeTest(ctx context.Context, hv *kvmv1.Hypervis } if err = servers.Delete(ctx, r.testComputeClient, server.ID).ExtractErr(); err != nil { - base := hv.DeepCopy() - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition := metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionTrue, Reason: kvmv1.ConditionReasonTesting, Message: fmt.Sprintf("failed to terminate instance %v", err), - }) - if err := r.patchStatus(ctx, hv, base); err != nil { + } + if err := utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }); err != nil { return ctrl.Result{}, err } return ctrl.Result{RequeueAfter: defaultWaitTime}, nil @@ -328,44 +340,44 @@ func (r *OnboardingController) completeOnboarding(ctx context.Context, host stri return ctrl.Result{}, nil } - base := hv.DeepCopy() - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition := metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionFalse, Reason: kvmv1.ConditionReasonSucceeded, Message: "Onboarding completed", - }) + } - return ctrl.Result{}, r.patchStatus(ctx, hv, base) + return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }) } // First time in completeOnboarding - clean up and prepare for aggregate sync err := r.deleteTestServers(ctx, host) if err != nil { - base := hv.DeepCopy() - if meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition := metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionTrue, // No cleanup, so we are still "onboarding" Reason: kvmv1.ConditionReasonAborted, Message: err.Error(), - }) { - return ctrl.Result{}, errors.Join(err, r.patchStatus(ctx, hv, base)) } - - return ctrl.Result{}, err + return ctrl.Result{}, errors.Join(err, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + })) } - base := hv.DeepCopy() // Mark onboarding as almost complete, triggers other controllers to do their part - meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + condition := metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionTrue, Reason: kvmv1.ConditionReasonHandover, Message: "Waiting for other controllers to take over", - }) + } // Patch status to signal aggregates controller - return ctrl.Result{}, r.patchStatus(ctx, hv, base) + return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }) } func (r *OnboardingController) deleteTestServers(ctx context.Context, host string) error { @@ -438,10 +450,12 @@ func (r *OnboardingController) ensureNovaProperties(ctx context.Context, hv *kvm return fmt.Errorf("could not find exact match for %v", shortHypervisorAddress) } - base := hv.DeepCopy() - hv.Status.HypervisorID = myHypervisor.ID - hv.Status.ServiceID = myHypervisor.Service.ID - return r.patchStatus(ctx, hv, base) + hypervisorID := myHypervisor.ID + serviceID := myHypervisor.Service.ID + return utils.PatchHypervisorStatusWithRetry(ctx, r.Client, hv.Name, OnboardingControllerName, func(h *kvmv1.Hypervisor) { + h.Status.HypervisorID = hypervisorID + h.Status.ServiceID = serviceID + }) } func (r *OnboardingController) createOrGetTestServer(ctx context.Context, zone, computeHost string, nodeUid types.UID) (*servers.Server, error) { @@ -568,11 +582,6 @@ func (r *OnboardingController) findTestImage(ctx context.Context) (string, error return "", fmt.Errorf("couldn't find image with name %v", testImageName) } -func (r *OnboardingController) patchStatus(ctx context.Context, hv, base *kvmv1.Hypervisor) error { - return r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(OnboardingControllerName)) -} - // SetupWithManager sets up the controller with the Manager. func (r *OnboardingController) SetupWithManager(mgr ctrl.Manager) error { if r.TestFlavorID == "" { diff --git a/internal/controller/ready/controller.go b/internal/controller/ready/controller.go index 878e8caa..b7ba7b55 100644 --- a/internal/controller/ready/controller.go +++ b/internal/controller/ready/controller.go @@ -32,6 +32,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/predicate" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" + "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) const ( @@ -114,8 +115,9 @@ func (r *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu } log.Info("Updating Ready condition", "status", readyCondition.Status, "reason", readyCondition.Reason) - return ctrl.Result{}, r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(ControllerName)) + return ctrl.Result{}, utils.PatchHypervisorStatusWithRetry(ctx, r.Client, req.Name, ControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, readyCondition) + }) } // ComputeReadyCondition determines the Ready condition based on other conditions diff --git a/internal/controller/traits_controller.go b/internal/controller/traits_controller.go index 32ee2dd1..11916361 100644 --- a/internal/controller/traits_controller.go +++ b/internal/controller/traits_controller.go @@ -36,6 +36,7 @@ import ( kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack" + "github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/utils" ) const ( @@ -103,13 +104,11 @@ func (tc *TraitsController) Reconcile(ctx context.Context, req ctrl.Request) (ct // fetch current traits, to ensure we don't add duplicates current, err := resourceproviders.GetTraits(ctx, tc.serviceClient, hv.Status.HypervisorID).Extract() if err != nil { - base := hv.DeepCopy() - if meta.SetStatusCondition(&hv.Status.Conditions, - getTraitCondition(err, "Failed to get current traits from placement")) { - err = errors.Join(err, tc.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(TraitsControllerName))) - } - return ctrl.Result{}, err + condition := getTraitCondition(err, "Failed to get current traits from placement") + patchErr := utils.PatchHypervisorStatusWithRetry(ctx, tc.Client, hv.Name, TraitsControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }) + return ctrl.Result{}, errors.Join(err, patchErr) } var targetTraits []string @@ -135,24 +134,20 @@ func (tc *TraitsController) Reconcile(ctx context.Context, req ctrl.Request) (ct }) err = result.Err if err != nil { - // set status condition - base := hv.DeepCopy() - if meta.SetStatusCondition(&hv.Status.Conditions, - getTraitCondition(err, "Failed to update traits in placement")) { - err = errors.Join(err, - tc.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(TraitsControllerName))) - } - return ctrl.Result{}, err + condition := getTraitCondition(err, "Failed to update traits in placement") + patchErr := utils.PatchHypervisorStatusWithRetry(ctx, tc.Client, hv.Name, TraitsControllerName, func(h *kvmv1.Hypervisor) { + meta.SetStatusCondition(&h.Status.Conditions, condition) + }) + return ctrl.Result{}, errors.Join(err, patchErr) } } - base := hv.DeepCopy() // update status unconditionally, since we want always to propagate the current traits - hv.Status.Traits = targetTraits - meta.SetStatusCondition(&hv.Status.Conditions, getTraitCondition(nil, "Traits successfully updated")) - return ctrl.Result{}, tc.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(TraitsControllerName)) + err = utils.PatchHypervisorStatusWithRetry(ctx, tc.Client, hv.Name, TraitsControllerName, func(h *kvmv1.Hypervisor) { + h.Status.Traits = targetTraits + meta.SetStatusCondition(&h.Status.Conditions, getTraitCondition(nil, "Traits successfully updated")) + }) + return ctrl.Result{}, err } // getTraitCondition creates a Condition object for trait updates diff --git a/internal/utils/status_patch.go b/internal/utils/status_patch.go new file mode 100644 index 00000000..e353f876 --- /dev/null +++ b/internal/utils/status_patch.go @@ -0,0 +1,55 @@ +/* +SPDX-FileCopyrightText: Copyright 2025 SAP SE or an SAP affiliate company and cobaltcore-dev contributors +SPDX-License-Identifier: Apache-2.0 + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "context" + "time" + + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" + k8sclient "sigs.k8s.io/controller-runtime/pkg/client" + + kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" +) + +// StatusPatchBackoff is a retry backoff for status patches that may conflict +// with other controllers. Uses exponential backoff with more retries than the +// default to handle high contention scenarios. +var StatusPatchBackoff = wait.Backoff{ + Steps: 10, + Duration: 50 * time.Millisecond, + Factor: 2.0, + Jitter: 0.2, +} + +// PatchHypervisorStatusWithRetry patches hypervisor status with retry on conflict. +// The updateFn receives a fresh copy of the hypervisor and should apply status changes to it. +// It re-fetches the resource before each retry attempt to get the latest resourceVersion. +func PatchHypervisorStatusWithRetry(ctx context.Context, c k8sclient.Client, name, fieldOwner string, updateFn func(*kvmv1.Hypervisor)) error { + return retry.RetryOnConflict(StatusPatchBackoff, func() error { + hv := &kvmv1.Hypervisor{} + if err := c.Get(ctx, k8sclient.ObjectKey{Name: name}, hv); err != nil { + return err + } + base := hv.DeepCopy() + updateFn(hv) + return c.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, + k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(fieldOwner)) + }) +}