From 61e128c0e44e1402d42e7bae5f28e83c742f3b39 Mon Sep 17 00:00:00 2001 From: tyagian Date: Thu, 7 Sep 2023 23:04:46 -0400 Subject: [PATCH 1/5] retry limit on rolloutHook --- charts/flagger/crds/crd.yaml | 3 +++ pkg/apis/flagger/v1beta1/canary.go | 5 +++++ pkg/controller/scheduler.go | 21 ++++++++++++++++----- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/charts/flagger/crds/crd.yaml b/charts/flagger/crds/crd.yaml index ad4686100..4382929cf 100644 --- a/charts/flagger/crds/crd.yaml +++ b/charts/flagger/crds/crd.yaml @@ -1077,6 +1077,9 @@ spec: description: URL address of this webhook type: string format: url + retrylimit: + type: integer + description: Number of retry if Rollout hook failed timeout: description: Request timeout for this webhook type: string diff --git a/pkg/apis/flagger/v1beta1/canary.go b/pkg/apis/flagger/v1beta1/canary.go index 40d162d1c..ec475993b 100644 --- a/pkg/apis/flagger/v1beta1/canary.go +++ b/pkg/apis/flagger/v1beta1/canary.go @@ -390,6 +390,11 @@ type CanaryWebhook struct { // Metadata (key-value pairs) for this webhook // +optional Metadata *map[string]string `json:"metadata,omitempty"` + + // FailureThreshold for rollout hook + // +optional + RetryLimit int `json:"retrylimit,omitempty"` + } // CanaryWebhookPayload holds the deployment info and metadata sent to webhooks diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go index d470d993d..425a720c9 100644 --- a/pkg/controller/scheduler.go +++ b/pkg/controller/scheduler.go @@ -727,11 +727,22 @@ func (c *Controller) runAnalysis(canary *flaggerv1.Canary) bool { // run external checks for _, webhook := range canary.GetAnalysis().Webhooks { if webhook.Type == "" || webhook.Type == flaggerv1.RolloutHook { - err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseProgressing, webhook) - if err != nil { - c.recordEventWarningf(canary, "Halt %s.%s advancement external check %s failed %v", - canary.Name, canary.Namespace, webhook.Name, err) - return false + retries := 0 + for { + err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseProgressing, webhook) + if err != nil { + c.recordEventWarningf(canary, "Retrying %s.%s advancement external check %s failed %v", + canary.Name, canary.Namespace, webhook.Name, err) + retries++ + if retries >= retrylimit { + c.recordEventWarningf(canary, "Halt %s.%s advancement external check %s failed %v", + canary.Name, canary.Namespace, webhook.Name, err) + return false // retry limit crossed retrylimit + } + } else { + break // Success, exit the retry loop + } + } } } From b89c267d6dbef05748ac3ec87eb628e98e123d30 Mon Sep 17 00:00:00 2001 From: tyagian Date: Fri, 8 Sep 2023 18:04:10 -0400 Subject: [PATCH 2/5] update crds --- artifacts/flagger/crd.yaml | 3 +++ kustomize/base/flagger/crd.yaml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/artifacts/flagger/crd.yaml b/artifacts/flagger/crd.yaml index ad4686100..4382929cf 100644 --- a/artifacts/flagger/crd.yaml +++ b/artifacts/flagger/crd.yaml @@ -1077,6 +1077,9 @@ spec: description: URL address of this webhook type: string format: url + retrylimit: + type: integer + description: Number of retry if Rollout hook failed timeout: description: Request timeout for this webhook type: string diff --git a/kustomize/base/flagger/crd.yaml b/kustomize/base/flagger/crd.yaml index ad4686100..4382929cf 100644 --- a/kustomize/base/flagger/crd.yaml +++ b/kustomize/base/flagger/crd.yaml @@ -1077,6 +1077,9 @@ spec: description: URL address of this webhook type: string format: url + retrylimit: + type: integer + description: Number of retry if Rollout hook failed timeout: description: Request timeout for this webhook type: string From 3d7f52391ba4cb834083aad124cd5efeb851682c Mon Sep 17 00:00:00 2001 From: tyagian Date: Sun, 10 Sep 2023 09:51:10 -0400 Subject: [PATCH 3/5] added runRolloutWebhooks method --- pkg/apis/flagger/v1beta1/canary.go | 2 +- pkg/controller/scheduler.go | 39 ++++++++++++++++++------------ 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/pkg/apis/flagger/v1beta1/canary.go b/pkg/apis/flagger/v1beta1/canary.go index ec475993b..d21f70d0d 100644 --- a/pkg/apis/flagger/v1beta1/canary.go +++ b/pkg/apis/flagger/v1beta1/canary.go @@ -393,7 +393,7 @@ type CanaryWebhook struct { // FailureThreshold for rollout hook // +optional - RetryLimit int `json:"retrylimit,omitempty"` + WebhookRetries int `json:"webhookRetries,omitempty"` } diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go index 425a720c9..9ead2e018 100644 --- a/pkg/controller/scheduler.go +++ b/pkg/controller/scheduler.go @@ -727,22 +727,11 @@ func (c *Controller) runAnalysis(canary *flaggerv1.Canary) bool { // run external checks for _, webhook := range canary.GetAnalysis().Webhooks { if webhook.Type == "" || webhook.Type == flaggerv1.RolloutHook { - retries := 0 - for { - err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseProgressing, webhook) - if err != nil { - c.recordEventWarningf(canary, "Retrying %s.%s advancement external check %s failed %v", - canary.Name, canary.Namespace, webhook.Name, err) - retries++ - if retries >= retrylimit { - c.recordEventWarningf(canary, "Halt %s.%s advancement external check %s failed %v", - canary.Name, canary.Namespace, webhook.Name, err) - return false // retry limit crossed retrylimit - } - } else { - break // Success, exit the retry loop - } - + err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseProgressing, webhook) + if err != nil { + c.recordEventWarningf(canary, "Halt %s.%s advancement external check %s failed %v", + canary.Name, canary.Namespace, webhook.Name, err) + return false } } } @@ -760,6 +749,24 @@ func (c *Controller) runAnalysis(canary *flaggerv1.Canary) bool { return true } +func (c *Controller) runRolloutWebhooks(canary *flaggerv1.Canary,retryLimit int) bool { + for _, webhook := range canary.GetAnalysis().Webhooks { + if webhook.Type == "" || webhook.Type == flaggerv1.RolloutHook { + if canary.Status.WebhookRetries >= retryLimit { + c.recordEventWarningf(canary, "Retries exceeded limit for webhook %s", webhook.Name) + c.recordEventWarningf(canary, "Halt %s.%s advancement external check %s failed %v",canary.Name, canary.Namespace, webhook.Name, err) + return false + } + if err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseProgressing, webhook); if err != nil { + c.recordEventWarningf(canary, "Retrying: %s.%s advancement external check %s failed %v",canary.Name, canary.Namespace, webhook.Name, err) + canary.Status.WebhookRetries++ + return false + } + } + } + return true +} + func (c *Controller) shouldSkipAnalysis(canary *flaggerv1.Canary, canaryController canary.Controller, meshRouter router.Interface, scalerReconciler canary.ScalerReconciler, err error, retriable bool) bool { if !canary.SkipAnalysis() { return false From 4c34c09bc8a4770d96bb4e74fbb4cdef10c54fc4 Mon Sep 17 00:00:00 2001 From: tyagian Date: Sun, 10 Sep 2023 10:16:59 -0400 Subject: [PATCH 4/5] update runRolloutWebhooks --- pkg/controller/scheduler.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go index 9ead2e018..276d67f31 100644 --- a/pkg/controller/scheduler.go +++ b/pkg/controller/scheduler.go @@ -430,6 +430,7 @@ func (c *Controller) advanceCanary(name string, namespace string) { return } } else { + c.runRolloutWebhooks(cd); if ok := c.runAnalysis(cd); !ok { if err := canaryController.SetStatusFailedChecks(cd, cd.Status.FailedChecks+1); err != nil { c.recordEventWarningf(cd, "%v", err) @@ -754,11 +755,9 @@ func (c *Controller) runRolloutWebhooks(canary *flaggerv1.Canary,retryLimit int) if webhook.Type == "" || webhook.Type == flaggerv1.RolloutHook { if canary.Status.WebhookRetries >= retryLimit { c.recordEventWarningf(canary, "Retries exceeded limit for webhook %s", webhook.Name) - c.recordEventWarningf(canary, "Halt %s.%s advancement external check %s failed %v",canary.Name, canary.Namespace, webhook.Name, err) return false } if err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseProgressing, webhook); if err != nil { - c.recordEventWarningf(canary, "Retrying: %s.%s advancement external check %s failed %v",canary.Name, canary.Namespace, webhook.Name, err) canary.Status.WebhookRetries++ return false } From 4a258b88729659113ed9091aac8d01f977adb024 Mon Sep 17 00:00:00 2001 From: Anuj Tyagi Date: Sun, 10 Sep 2023 19:43:27 -0400 Subject: [PATCH 5/5] Update scheduler.go Signed-off-by: Anuj Tyagi --- pkg/controller/scheduler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go index 276d67f31..096b9d469 100644 --- a/pkg/controller/scheduler.go +++ b/pkg/controller/scheduler.go @@ -757,7 +757,7 @@ func (c *Controller) runRolloutWebhooks(canary *flaggerv1.Canary,retryLimit int) c.recordEventWarningf(canary, "Retries exceeded limit for webhook %s", webhook.Name) return false } - if err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseProgressing, webhook); if err != nil { + if err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseProgressing, webhook); err != nil { canary.Status.WebhookRetries++ return false }