From eb276db1a98cee50c46b58d34515aea5d2dd6fcf Mon Sep 17 00:00:00 2001 From: Andy Librian Date: Fri, 5 Aug 2022 11:18:16 +0700 Subject: [PATCH] add dedicated phase waiting traffic increase This is to fix #1248 where canary is restarted to "Starting canary analysis" on confirm traffic increase failure. Signed-off-by: Andy Librian --- artifacts/flagger/crd.yaml | 1 + charts/flagger/crds/crd.yaml | 1 + docs/gitbook/usage/how-it-works.md | 2 +- kustomize/base/flagger/crd.yaml | 1 + pkg/apis/flagger/v1beta1/status.go | 2 ++ pkg/canary/status.go | 3 +++ pkg/controller/scheduler.go | 12 +++++++++--- pkg/controller/scheduler_hooks.go | 24 ++++++++++++++++++------ 8 files changed, 36 insertions(+), 10 deletions(-) diff --git a/artifacts/flagger/crd.yaml b/artifacts/flagger/crd.yaml index ed2ce257d..0873edb68 100644 --- a/artifacts/flagger/crd.yaml +++ b/artifacts/flagger/crd.yaml @@ -1033,6 +1033,7 @@ spec: - Initialized - Waiting - Progressing + - WaitingTrafficIncrease - WaitingPromotion - Promoting - Finalising diff --git a/charts/flagger/crds/crd.yaml b/charts/flagger/crds/crd.yaml index ed2ce257d..0873edb68 100644 --- a/charts/flagger/crds/crd.yaml +++ b/charts/flagger/crds/crd.yaml @@ -1033,6 +1033,7 @@ spec: - Initialized - Waiting - Progressing + - WaitingTrafficIncrease - WaitingPromotion - Promoting - Finalising diff --git a/docs/gitbook/usage/how-it-works.md b/docs/gitbook/usage/how-it-works.md index dbe09b748..3805ab431 100644 --- a/docs/gitbook/usage/how-it-works.md +++ b/docs/gitbook/usage/how-it-works.md @@ -256,7 +256,7 @@ status: ``` The `Promoted` status condition can have one of the following reasons: -Initialized, Waiting, Progressing, WaitingPromotion, Promoting, Finalising, Succeeded or Failed. +Initialized, Waiting, Progressing, WaitingTrafficIncrease, WaitingPromotion, Promoting, Finalising, Succeeded or Failed. A failed canary will have the promoted status set to `false`, the reason to `failed` and the last applied spec will be different to the last promoted one. diff --git a/kustomize/base/flagger/crd.yaml b/kustomize/base/flagger/crd.yaml index ed2ce257d..0873edb68 100644 --- a/kustomize/base/flagger/crd.yaml +++ b/kustomize/base/flagger/crd.yaml @@ -1033,6 +1033,7 @@ spec: - Initialized - Waiting - Progressing + - WaitingTrafficIncrease - WaitingPromotion - Promoting - Finalising diff --git a/pkg/apis/flagger/v1beta1/status.go b/pkg/apis/flagger/v1beta1/status.go index 2a487fb56..2fc576cbf 100644 --- a/pkg/apis/flagger/v1beta1/status.go +++ b/pkg/apis/flagger/v1beta1/status.go @@ -47,6 +47,8 @@ const ( CanaryPhaseWaiting CanaryPhase = "Waiting" // CanaryPhaseProgressing means the canary analysis is underway CanaryPhaseProgressing CanaryPhase = "Progressing" + // CanaryPhaseWaitingTrafficIncrease means the canary advancement is paused and is waiting for confirmation to increase traffic to the canary. + CanaryPhaseWaitingTrafficIncrease CanaryPhase = "WaitingTrafficIncrease" // CanaryPhaseWaitingPromotion means the canary promotion is paused (waiting for confirmation to proceed) CanaryPhaseWaitingPromotion CanaryPhase = "WaitingPromotion" // CanaryPhasePromoting means the canary analysis is finished and the primary spec has been updated diff --git a/pkg/canary/status.go b/pkg/canary/status.go index a8917e8c7..b30bb7906 100644 --- a/pkg/canary/status.go +++ b/pkg/canary/status.go @@ -213,6 +213,9 @@ func MakeStatusConditions(cd *flaggerv1.Canary, case flaggerv1.CanaryPhaseWaiting: status = corev1.ConditionUnknown message = "Waiting for approval." + case flaggerv1.CanaryPhaseWaitingTrafficIncrease: + status = corev1.ConditionUnknown + message = "Waiting for approval." case flaggerv1.CanaryPhaseWaitingPromotion: status = corev1.ConditionUnknown message = "Waiting for approval." diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go index ba45aca44..549aa9870 100644 --- a/pkg/controller/scheduler.go +++ b/pkg/controller/scheduler.go @@ -331,6 +331,7 @@ func (c *Controller) advanceCanary(name string, namespace string) { // check if we should rollback if cd.Status.Phase == flaggerv1.CanaryPhaseProgressing || cd.Status.Phase == flaggerv1.CanaryPhaseWaiting || + cd.Status.Phase == flaggerv1.CanaryPhaseWaitingTrafficIncrease || cd.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion { if ok := c.runRollbackHooks(cd, cd.Status.Phase); ok { c.recordEventWarningf(cd, "Rolling back %s.%s manual webhook invoked", cd.Name, cd.Namespace) @@ -379,7 +380,9 @@ func (c *Controller) advanceCanary(name string, namespace string) { } // check if the number of failed checks reached the threshold - if (cd.Status.Phase == flaggerv1.CanaryPhaseProgressing || cd.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion) && + if (cd.Status.Phase == flaggerv1.CanaryPhaseProgressing || + cd.Status.Phase == flaggerv1.CanaryPhaseWaitingTrafficIncrease || + cd.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion) && (!retriable || cd.Status.FailedChecks >= cd.GetAnalysisThreshold()) { if !retriable { c.recordEventWarningf(cd, "Rolling back %s.%s progress deadline exceeded %v", @@ -398,7 +401,7 @@ func (c *Controller) advanceCanary(name string, namespace string) { // check if the canary success rate is above the threshold // skip check if no traffic is routed or mirrored to canary - if canaryWeight == 0 && cd.Status.Iterations == 0 && + if cd.Status.Phase == flaggerv1.CanaryPhaseProgressing && canaryWeight == 0 && cd.Status.Iterations == 0 && !(cd.GetAnalysis().Mirror && mirrored) { c.recordEventInfof(cd, "Starting canary analysis for %s.%s", cd.Spec.TargetRef.Name, cd.Namespace) @@ -447,7 +450,7 @@ func (c *Controller) advanceCanary(name string, namespace string) { if c.nextStepWeight(cd, canaryWeight) > 0 { // run hook only if traffic is not mirrored if !mirrored { - if promote := c.runConfirmTrafficIncreaseHooks(cd); !promote { + if promote := c.runConfirmTrafficIncreaseHooks(cd, canaryController); !promote { return } } @@ -795,6 +798,7 @@ func (c *Controller) shouldAdvance(canary *flaggerv1.Canary, canaryController ca canary.Status.Phase == flaggerv1.CanaryPhaseInitializing || canary.Status.Phase == flaggerv1.CanaryPhaseProgressing || canary.Status.Phase == flaggerv1.CanaryPhaseWaiting || + canary.Status.Phase == flaggerv1.CanaryPhaseWaitingTrafficIncrease || canary.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion || canary.Status.Phase == flaggerv1.CanaryPhasePromoting || canary.Status.Phase == flaggerv1.CanaryPhaseFinalising { @@ -829,6 +833,7 @@ func (c *Controller) shouldAdvance(canary *flaggerv1.Canary, canaryController ca func (c *Controller) checkCanaryStatus(canary *flaggerv1.Canary, canaryController canary.Controller, scalerReconciler canary.ScalerReconciler, shouldAdvance bool) bool { c.recorder.SetStatus(canary, canary.Status.Phase) if canary.Status.Phase == flaggerv1.CanaryPhaseProgressing || + canary.Status.Phase == flaggerv1.CanaryPhaseWaitingTrafficIncrease || canary.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion || canary.Status.Phase == flaggerv1.CanaryPhasePromoting || canary.Status.Phase == flaggerv1.CanaryPhaseFinalising { @@ -884,6 +889,7 @@ func (c *Controller) checkCanaryStatus(canary *flaggerv1.Canary, canaryControlle func (c *Controller) hasCanaryRevisionChanged(canary *flaggerv1.Canary, canaryController canary.Controller) bool { if canary.Status.Phase == flaggerv1.CanaryPhaseProgressing || + canary.Status.Phase == flaggerv1.CanaryPhaseWaitingTrafficIncrease || canary.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion { if diff, _ := canaryController.HasTargetChanged(canary); diff { return true diff --git a/pkg/controller/scheduler_hooks.go b/pkg/controller/scheduler_hooks.go index 7d2349be2..a22fa8c6c 100644 --- a/pkg/controller/scheduler_hooks.go +++ b/pkg/controller/scheduler_hooks.go @@ -23,17 +23,29 @@ import ( "github.com/fluxcd/flagger/pkg/canary" ) -func (c *Controller) runConfirmTrafficIncreaseHooks(canary *flaggerv1.Canary) bool { +func (c *Controller) runConfirmTrafficIncreaseHooks(canary *flaggerv1.Canary, canaryController canary.Controller) bool { for _, webhook := range canary.GetAnalysis().Webhooks { if webhook.Type == flaggerv1.ConfirmTrafficIncreaseHook { - err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseProgressing, webhook) + err := CallWebhook(canary.Name, canary.Namespace, canary.Status.Phase, webhook) if err != nil { - c.recordEventWarningf(canary, "Halt %s.%s advancement waiting for traffic increase approval %s", - canary.Name, canary.Namespace, webhook.Name) - if !webhook.MuteAlert { - c.alert(canary, "Canary traffic increase is waiting for approval.", false, flaggerv1.SeverityWarn) + if canary.Status.Phase != flaggerv1.CanaryPhaseWaitingTrafficIncrease { + if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseWaitingTrafficIncrease); err != nil { + c.logger.With("canary", fmt.Sprintf("%s.%s", canary.Name, canary.Namespace)).Errorf("%v", err) + } + c.recordEventWarningf(canary, "Halt %s.%s advancement waiting for traffic increase approval %s", + canary.Name, canary.Namespace, webhook.Name) + if !webhook.MuteAlert { + c.alert(canary, "Canary traffic increase is waiting for approval.", false, flaggerv1.SeverityWarn) + } } return false + } else { + if canary.Status.Phase == flaggerv1.CanaryPhaseWaitingTrafficIncrease { + if err := canaryController.SetStatusPhase(canary, flaggerv1.CanaryPhaseProgressing); err != nil { + c.logger.With("canary", fmt.Sprintf("%s.%s", canary.Name, canary.Namespace)).Errorf("%v", err) + return false + } + } } c.recordEventInfof(canary, "Confirm-traffic-increase check %s passed", webhook.Name) }