diff --git a/artifacts/flagger/crd.yaml b/artifacts/flagger/crd.yaml index 19b24319e..3d7eba9d8 100644 --- a/artifacts/flagger/crd.yaml +++ b/artifacts/flagger/crd.yaml @@ -992,6 +992,7 @@ spec: - pre-rollout - rollout - confirm-promotion + - confirm-finalizing - post-rollout - event - rollback @@ -1027,6 +1028,7 @@ spec: - Progressing - WaitingPromotion - Promoting + - WaitingFinalising - Finalising - Succeeded - Failed diff --git a/charts/flagger/crds/crd.yaml b/charts/flagger/crds/crd.yaml index 19b24319e..b99b9d823 100644 --- a/charts/flagger/crds/crd.yaml +++ b/charts/flagger/crds/crd.yaml @@ -992,6 +992,7 @@ spec: - pre-rollout - rollout - confirm-promotion + - confirm-finalizing - post-rollout - event - rollback @@ -1028,6 +1029,7 @@ spec: - WaitingPromotion - Promoting - Finalising + - WaitingFinalising - Succeeded - Failed - Terminating diff --git a/pkg/apis/flagger/v1beta1/canary.go b/pkg/apis/flagger/v1beta1/canary.go index 13a45a100..2ca2c4367 100644 --- a/pkg/apis/flagger/v1beta1/canary.go +++ b/pkg/apis/flagger/v1beta1/canary.go @@ -319,22 +319,24 @@ type CanaryAlert struct { type HookType string const ( - // RolloutHook execute webhook during the canary analysis + // RolloutHook executes webhook during the canary analysis RolloutHook HookType = "rollout" - // PreRolloutHook execute webhook before routing traffic to canary + // PreRolloutHook executes webhook before routing traffic to canary PreRolloutHook HookType = "pre-rollout" - // PostRolloutHook execute webhook after the canary analysis + // PostRolloutHook executes webhook after the canary analysis PostRolloutHook HookType = "post-rollout" - // ConfirmRolloutHook halt canary analysis until webhook returns HTTP 200 + // ConfirmRolloutHook halts canary analysis until webhook returns HTTP 200 ConfirmRolloutHook HookType = "confirm-rollout" - // ConfirmPromotionHook halt canary promotion until webhook returns HTTP 200 + // ConfirmPromotionHook halts canary promotion until webhook returns HTTP 200 ConfirmPromotionHook HookType = "confirm-promotion" + // ConfirmFinalizingHook halts canary finalizing until webhook returns HTTP 200 + ConfirmFinalizingHook HookType = "confirm-finalizing" // EventHook dispatches Flagger events to the specified endpoint EventHook HookType = "event" - // RollbackHook rollback canary analysis if webhook returns HTTP 200 + // RollbackHook rollbacks canary analysis if webhook returns HTTP 200 RollbackHook HookType = "rollback" // ConfirmTrafficIncreaseHook increases traffic weight if webhook returns HTTP 200 - ConfirmTrafficIncreaseHook = "confirm-traffic-increase" + ConfirmTrafficIncreaseHook HookType = "confirm-traffic-increase" ) // CanaryWebhook holds the reference to external checks used for canary analysis diff --git a/pkg/apis/flagger/v1beta1/status.go b/pkg/apis/flagger/v1beta1/status.go index 2a487fb56..ae60fe767 100644 --- a/pkg/apis/flagger/v1beta1/status.go +++ b/pkg/apis/flagger/v1beta1/status.go @@ -51,6 +51,8 @@ const ( CanaryPhaseWaitingPromotion CanaryPhase = "WaitingPromotion" // CanaryPhasePromoting means the canary analysis is finished and the primary spec has been updated CanaryPhasePromoting CanaryPhase = "Promoting" + // CanaryPhaseWaitingFinalising means the canary finalising is paused (waiting for confirmation to proceed) + CanaryPhaseWaitingFinalising CanaryPhase = "WaitingFinalising" // CanaryPhaseFinalising means the canary promotion is finished and traffic has been routed back to primary CanaryPhaseFinalising CanaryPhase = "Finalising" // CanaryPhaseSucceeded means the canary analysis has been successful diff --git a/pkg/canary/status.go b/pkg/canary/status.go index a8917e8c7..d430f0162 100644 --- a/pkg/canary/status.go +++ b/pkg/canary/status.go @@ -212,22 +212,25 @@ func MakeStatusConditions(cd *flaggerv1.Canary, message = fmt.Sprintf("%s initialization completed.", cd.Spec.TargetRef.Kind) case flaggerv1.CanaryPhaseWaiting: status = corev1.ConditionUnknown - message = "Waiting for approval." - case flaggerv1.CanaryPhaseWaitingPromotion: - status = corev1.ConditionUnknown - message = "Waiting for approval." + message = "Waiting for approval to start canary handling." case flaggerv1.CanaryPhaseProgressing: status = corev1.ConditionUnknown message = "New revision detected, progressing canary analysis." + case flaggerv1.CanaryPhaseWaitingPromotion: + status = corev1.ConditionUnknown + message = "Waiting for approval to start primary rolling update" case flaggerv1.CanaryPhasePromoting: status = corev1.ConditionUnknown message = "Canary analysis completed, starting primary rolling update." + case flaggerv1.CanaryPhaseWaitingFinalising: + status = corev1.ConditionUnknown + message = "Waiting for approval to start finalizing" case flaggerv1.CanaryPhaseFinalising: status = corev1.ConditionUnknown - message = "Canary analysis completed, routing all traffic to primary." + message = "Primary rolling update completed, routed all traffic to primary." case flaggerv1.CanaryPhaseSucceeded: status = corev1.ConditionTrue - message = "Canary analysis completed successfully, promotion finished." + message = "Canary analysis completed successfully, promotion & finlization finished." case flaggerv1.CanaryPhaseFailed: status = corev1.ConditionFalse message = fmt.Sprintf("Canary analysis failed, %s scaled to zero.", cd.Spec.TargetRef.Kind) diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go index 54f142d6a..1032c70eb 100644 --- a/pkg/controller/scheduler.go +++ b/pkg/controller/scheduler.go @@ -241,9 +241,11 @@ func (c *Controller) advanceCanary(name string, namespace string) { } // check gates + //if cd.Status.Phase != flaggerv1.CanaryPhaseWaitingFinalising && cd.Status.Phase != flaggerv1.CanaryPhaseFinalising { if isApproved := c.runConfirmRolloutHooks(cd, canaryController); !isApproved { return } + //} maxWeight := c.maxWeight(cd) @@ -311,7 +313,8 @@ func (c *Controller) advanceCanary(name string, namespace string) { // check if we should rollback if cd.Status.Phase == flaggerv1.CanaryPhaseProgressing || cd.Status.Phase == flaggerv1.CanaryPhaseWaiting || - cd.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion { + cd.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion || + cd.Status.Phase == flaggerv1.CanaryPhaseWaitingFinalising { if ok := c.runRollbackHooks(cd, cd.Status.Phase); ok { c.recordEventWarningf(cd, "Rolling back %s.%s manual webhook invoked", cd.Name, cd.Namespace) c.alert(cd, "Rolling back manual webhook invoked", false, flaggerv1.SeverityWarn) @@ -322,32 +325,29 @@ func (c *Controller) advanceCanary(name string, namespace string) { // route traffic back to primary if analysis has succeeded if cd.Status.Phase == flaggerv1.CanaryPhasePromoting { + //start traffic shift only when the primary is ready + if err := canaryController.IsPrimaryReady(cd); err != nil { + c.recordEventWarningf(cd, "%v", err) + return + } + c.runPromotionTrafficShift(cd, canaryController, meshRouter, provider, canaryWeight, primaryWeight) return } // scale canary to zero if promotion has finished - if cd.Status.Phase == flaggerv1.CanaryPhaseFinalising { - if err := canaryController.ScaleToZero(cd); err != nil { - c.recordEventWarningf(cd, "%v", err) - return - } + if (cd.Status.Phase == flaggerv1.CanaryPhaseFinalising || + cd.Status.Phase == flaggerv1.CanaryPhaseWaitingFinalising) && + cd.Status.FailedChecks < cd.GetAnalysisThreshold() { - // set status to succeeded - if err := canaryController.SetStatusPhase(cd, flaggerv1.CanaryPhaseSucceeded); err != nil { - c.recordEventWarningf(cd, "%v", err) - return - } - c.recorder.SetStatus(cd, flaggerv1.CanaryPhaseSucceeded) - c.runPostRolloutHooks(cd, flaggerv1.CanaryPhaseSucceeded) - c.recordEventInfof(cd, "Promotion completed! Scaling down %s.%s", cd.Spec.TargetRef.Name, cd.Namespace) - c.alert(cd, "Canary analysis completed successfully, promotion finished.", - false, flaggerv1.SeverityInfo) + c.runFinalizing(cd, canaryController) return } - // check if the number of failed checks reached the threshold - if (cd.Status.Phase == flaggerv1.CanaryPhaseProgressing || cd.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion) && + // check if the number of failed checks reached the threshold for rollback + if (cd.Status.Phase == flaggerv1.CanaryPhaseProgressing || + cd.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion || + cd.Status.Phase == flaggerv1.CanaryPhaseWaitingFinalising) && (!retriable || cd.Status.FailedChecks >= cd.GetAnalysisThreshold()) { if !retriable { c.recordEventWarningf(cd, "Rolling back %s.%s progress deadline exceeded %v", @@ -428,7 +428,7 @@ func (c *Controller) runPromotionTrafficShift(canary *flaggerv1.Canary, canaryCo meshRouter router.Interface, provider string, canaryWeight int, primaryWeight int) { // finalize promotion since no traffic shifting is possible for Kubernetes CNI if provider == flaggerv1.KubernetesProvider { - if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseFinalising); err != nil { + if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseWaitingFinalising); err != nil { c.recordEventWarningf(canary, "%v", err) } return @@ -442,7 +442,7 @@ func (c *Controller) runPromotionTrafficShift(canary *flaggerv1.Canary, canaryCo return } c.recorder.SetWeight(canary, c.totalWeight(canary), 0) - if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseFinalising); err != nil { + if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseWaitingFinalising); err != nil { c.recordEventWarningf(canary, "%v", err) } return @@ -467,7 +467,7 @@ func (c *Controller) runPromotionTrafficShift(canary *flaggerv1.Canary, canaryCo // finalize promotion if primaryWeight == c.totalWeight(canary) { - if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseFinalising); err != nil { + if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseWaitingFinalising); err != nil { c.recordEventWarningf(canary, "%v", err) } } else { @@ -481,6 +481,34 @@ func (c *Controller) runPromotionTrafficShift(canary *flaggerv1.Canary, canaryCo } +func (c *Controller) runFinalizing(cd *flaggerv1.Canary, canaryController canary.Controller) { + if ok := c.runConfirmFinalizingHook(cd, flaggerv1.CanaryPhaseFinalising, canaryController); !ok { + return + } + + if err := canaryController.ScaleToZero(cd); err != nil { + c.recordEventWarningf(cd, "%v", err) + return + } + // set status to succeeded + if err := canaryController.SetStatusPhase(cd, flaggerv1.CanaryPhaseSucceeded); err != nil { + c.recordEventWarningf(cd, "%v", err) + return + } + c.recorder.SetStatus(cd, flaggerv1.CanaryPhaseSucceeded) + c.runPostRolloutHooks(cd, flaggerv1.CanaryPhaseSucceeded) + + if cd.SkipAnalysis() { + c.recordEventInfof(cd, "Promotion completed! Canary analysis was skipped for %s.%s", cd.Spec.TargetRef.Name, cd.Namespace) + c.alert(cd, "Canary analysis was skipped, promotion finished.", + false, flaggerv1.SeverityInfo) + } else { + c.recordEventInfof(cd, "Promotion completed! Scaling down %s.%s", cd.Spec.TargetRef.Name, cd.Namespace) + c.alert(cd, "Canary analysis completed successfully, promotion finished.", + false, flaggerv1.SeverityInfo) + } +} + func (c *Controller) runCanary(canary *flaggerv1.Canary, canaryController canary.Controller, meshRouter router.Interface, mirrored bool, canaryWeight int, primaryWeight int, maxWeight int) { primaryName := fmt.Sprintf("%s-primary", canary.Spec.TargetRef.Name) @@ -540,17 +568,19 @@ func (c *Controller) runCanary(canary *flaggerv1.Canary, canaryController canary } // update primary spec - c.recordEventInfof(canary, "Copying %s.%s template spec to %s.%s", - canary.Spec.TargetRef.Name, canary.Namespace, primaryName, canary.Namespace) - if err := canaryController.Promote(canary); err != nil { - c.recordEventWarningf(canary, "%v", err) - return - } + if canary.Status.Phase != flaggerv1.CanaryPhasePromoting { + c.recordEventInfof(canary, "Copying %s.%s template spec to %s.%s", + canary.Spec.TargetRef.Name, canary.Namespace, primaryName, canary.Namespace) + if err := canaryController.Promote(canary); err != nil { + c.recordEventWarningf(canary, "%v", err) + return + } - // update status phase - if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhasePromoting); err != nil { - c.recordEventWarningf(canary, "%v", err) - return + // update status phase + if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhasePromoting); err != nil { + c.recordEventWarningf(canary, "%v", err) + return + } } } } @@ -583,17 +613,19 @@ func (c *Controller) runAB(canary *flaggerv1.Canary, canaryController canary.Con // promote canary - max iterations reached if canary.GetAnalysis().Iterations == canary.Status.Iterations { - c.recordEventInfof(canary, "Copying %s.%s template spec to %s.%s", - canary.Spec.TargetRef.Name, canary.Namespace, primaryName, canary.Namespace) - if err := canaryController.Promote(canary); err != nil { - c.recordEventWarningf(canary, "%v", err) - return - } + if canary.Status.Phase != flaggerv1.CanaryPhasePromoting { + c.recordEventInfof(canary, "Copying %s.%s template spec to %s.%s", + canary.Spec.TargetRef.Name, canary.Namespace, primaryName, canary.Namespace) + if err := canaryController.Promote(canary); err != nil { + c.recordEventWarningf(canary, "%v", err) + return + } - // update status phase - if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhasePromoting); err != nil { - c.recordEventWarningf(canary, "%v", err) - return + // update status phase + if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhasePromoting); err != nil { + c.recordEventWarningf(canary, "%v", err) + return + } } } } @@ -652,17 +684,19 @@ func (c *Controller) runBlueGreen(canary *flaggerv1.Canary, canaryController can // promote canary - max iterations reached if canary.GetAnalysis().Iterations < canary.Status.Iterations { - c.recordEventInfof(canary, "Copying %s.%s template spec to %s.%s", - canary.Spec.TargetRef.Name, canary.Namespace, primaryName, canary.Namespace) - if err := canaryController.Promote(canary); err != nil { - c.recordEventWarningf(canary, "%v", err) - return - } + if canary.Status.Phase != flaggerv1.CanaryPhasePromoting { + c.recordEventInfof(canary, "Copying %s.%s template spec to %s.%s", + canary.Spec.TargetRef.Name, canary.Namespace, primaryName, canary.Namespace) + if err := canaryController.Promote(canary); err != nil { + c.recordEventWarningf(canary, "%v", err) + return + } - // update status phase - if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhasePromoting); err != nil { - c.recordEventWarningf(canary, "%v", err) - return + // update status phase + if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhasePromoting); err != nil { + c.recordEventWarningf(canary, "%v", err) + return + } } } @@ -700,6 +734,7 @@ func (c *Controller) shouldSkipAnalysis(canary *flaggerv1.Canary, canaryControll } // regardless if analysis is being skipped, rollback if canary failed to progress + //if !retriable || canary.Status.FailedChecks >= canary.GetAnalysisThreshold() { if !retriable { c.recordEventWarningf(canary, "Rolling back %s.%s progress deadline exceeded %v", canary.Name, canary.Namespace, err) c.alert(canary, fmt.Sprintf("Progress deadline exceeded %v", err), false, flaggerv1.SeverityError) @@ -718,32 +753,43 @@ func (c *Controller) shouldSkipAnalysis(canary *flaggerv1.Canary, canaryControll c.recorder.SetWeight(canary, primaryWeight, canaryWeight) // copy spec and configs from canary to primary - c.recordEventInfof(canary, "Copying %s.%s template spec to %s-primary.%s", - canary.Spec.TargetRef.Name, canary.Namespace, canary.Spec.TargetRef.Name, canary.Namespace) - if err := canaryController.Promote(canary); err != nil { - c.recordEventWarningf(canary, "%v", err) - return true + if canary.Status.Phase != flaggerv1.CanaryPhasePromoting { + c.recordEventInfof(canary, "Copying %s.%s template spec to %s-primary.%s", + canary.Spec.TargetRef.Name, canary.Namespace, canary.Spec.TargetRef.Name, canary.Namespace) + if err := canaryController.Promote(canary); err != nil { + c.recordEventWarningf(canary, "%v", err) + return true + } + + if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhasePromoting); err != nil { + c.recordEventWarningf(canary, "%v", err) + return true + } } - // shutdown canary - if err := canaryController.ScaleToZero(canary); err != nil { - c.recordEventWarningf(canary, "%v", err) - return true + // route traffic back to primary if analysis has succeeded + if canary.Status.Phase == flaggerv1.CanaryPhasePromoting { + //start traffic shift only when the primary is ready + if err := canaryController.IsPrimaryReady(canary); err != nil { + c.recordEventWarningf(canary, "%v", err) + return true + } + + if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseWaitingFinalising); err != nil { + c.recordEventWarningf(canary, "%v", err) + return true + } + } - // update status phase - if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseSucceeded); err != nil { - c.recordEventWarningf(canary, "%v", err) + // scale canary to zero if promotion has finished + if (canary.Status.Phase == flaggerv1.CanaryPhaseFinalising || + canary.Status.Phase == flaggerv1.CanaryPhaseWaitingFinalising) && + canary.Status.FailedChecks < canary.GetAnalysisThreshold() { + c.runFinalizing(canary, canaryController) return true } - // notify - c.recorder.SetStatus(canary, flaggerv1.CanaryPhaseSucceeded) - c.recordEventInfof(canary, "Promotion completed! Canary analysis was skipped for %s.%s", - canary.Spec.TargetRef.Name, canary.Namespace) - c.alert(canary, "Canary analysis was skipped, promotion finished.", - false, flaggerv1.SeverityInfo) - return true } @@ -754,6 +800,7 @@ func (c *Controller) shouldAdvance(canary *flaggerv1.Canary, canaryController ca canary.Status.Phase == flaggerv1.CanaryPhaseWaiting || canary.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion || canary.Status.Phase == flaggerv1.CanaryPhasePromoting || + canary.Status.Phase == flaggerv1.CanaryPhaseWaitingFinalising || canary.Status.Phase == flaggerv1.CanaryPhaseFinalising { return true, nil } @@ -788,6 +835,7 @@ func (c *Controller) checkCanaryStatus(canary *flaggerv1.Canary, canaryControlle if canary.Status.Phase == flaggerv1.CanaryPhaseProgressing || canary.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion || canary.Status.Phase == flaggerv1.CanaryPhasePromoting || + canary.Status.Phase == flaggerv1.CanaryPhaseWaitingFinalising || canary.Status.Phase == flaggerv1.CanaryPhaseFinalising { return true } @@ -868,6 +916,10 @@ func (c *Controller) rollback(canary *flaggerv1.Canary, canaryController canary. c.recorder.SetWeight(canary, primaryWeight, canaryWeight) + if ok := c.runConfirmFinalizingHook(canary, flaggerv1.CanaryPhaseFailed, canaryController); !ok { + return + } + // shutdown canary if err := canaryController.ScaleToZero(canary); err != nil { c.recordEventWarningf(canary, "%v", err) diff --git a/pkg/controller/scheduler_hooks.go b/pkg/controller/scheduler_hooks.go index 7d2349be2..808c60180 100644 --- a/pkg/controller/scheduler_hooks.go +++ b/pkg/controller/scheduler_hooks.go @@ -104,6 +104,33 @@ func (c *Controller) runConfirmPromotionHooks(canary *flaggerv1.Canary, canaryCo return true } +func (c *Controller) runConfirmFinalizingHook(canary *flaggerv1.Canary, nextPhase flaggerv1.CanaryPhase, canaryController canary.Controller) bool { + for _, webhook := range canary.GetAnalysis().Webhooks { + if webhook.Type == flaggerv1.ConfirmFinalizingHook { + err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseWaitingFinalising, webhook) + if err != nil { + if canary.Status.Phase != flaggerv1.CanaryPhaseWaitingFinalising { + if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseWaitingFinalising); err != nil { + c.logger.With("canary", fmt.Sprintf("%s.%s", canary.Name, canary.Namespace)).Errorf("%v", err) + } + } + c.recordEventWarningf(canary, "Halt finalizing %s.%s waiting for finalizing approval %s", canary.Name, canary.Namespace, webhook.Name) + if !webhook.MuteAlert { + c.alert(canary, "Canary finalizing is waiting for approval.", false, flaggerv1.SeverityWarn) + } + return false + } else { + if err := canaryController.SetStatusPhase(canary, nextPhase); err != nil { + c.logger.With("canary", fmt.Sprintf("%s.%s", canary.Name, canary.Namespace)).Errorf("%v", err) + return false + } + c.recordEventInfof(canary, "Confirm-finalizing check %s passed. Next phase is %s", webhook.Name, nextPhase) + } + } + } + return true +} + func (c *Controller) runPreRolloutHooks(canary *flaggerv1.Canary) bool { for _, webhook := range canary.GetAnalysis().Webhooks { if webhook.Type == flaggerv1.PreRolloutHook {