Skip to content

Commit

Permalink
Analytics Webhook retries
Browse files Browse the repository at this point in the history
  • Loading branch information
Kwasniewski committed Sep 18, 2023
1 parent 7ab0eb1 commit 3d8dd05
Show file tree
Hide file tree
Showing 14 changed files with 212 additions and 3 deletions.
10 changes: 10 additions & 0 deletions artifacts/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1086,6 +1086,16 @@ spec:
type: object
additionalProperties:
type: string
status:
description: CanaryStatus defines the observed state of a canary.
type: object
properties:
retries:
description: Current number of retries the webhook has performed
type: number
retries:
description: Number of retries before incrementing canary failure count
type: number
sessionAffinity:
description: SessionAffinity represents the session affinity settings for a canary run.
type: object
Expand Down
10 changes: 10 additions & 0 deletions charts/flagger/crds/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1086,6 +1086,16 @@ spec:
type: object
additionalProperties:
type: string
status:
description: CanaryStatus defines the observed state of a canary.
type: object
properties:
retries:
description: Current number of retries the webhook has performed
type: number
retries:
description: Number of retries before incrementing canary failure count
type: number
sessionAffinity:
description: SessionAffinity represents the session affinity settings for a canary run.
type: object
Expand Down
10 changes: 10 additions & 0 deletions kustomize/base/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1086,6 +1086,16 @@ spec:
type: object
additionalProperties:
type: string
status:
description: CanaryStatus defines the observed state of a canary.
type: object
properties:
retries:
description: Current number of retries the webhook has performed
type: number
retries:
description: Number of retries before incrementing canary failure count
type: number
sessionAffinity:
description: SessionAffinity represents the session affinity settings for a canary run.
type: object
Expand Down
7 changes: 7 additions & 0 deletions pkg/apis/flagger/v1beta1/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,13 @@ type CanaryWebhook struct {
// Metadata (key-value pairs) for this webhook
// +optional
Metadata *map[string]string `json:"metadata,omitempty"`

// Canary Webhook Status
Status CanaryWebhookStatus `json:"status"`

// Retries for failing webhook before incrementing rollout failure threshold
// +optional
Retries int `json:"retries,omitempty"`
}

// CanaryWebhookPayload holds the deployment info and metadata sent to webhooks
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/flagger/v1beta1/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,8 @@ type CanaryStatus struct {
// +optional
Conditions []CanaryCondition `json:"conditions,omitempty"`
}

// CanaryStatus is used for state persistence (read-only)
type CanaryWebhookStatus struct {
Retries int `json:"retries"`
}
17 changes: 17 additions & 0 deletions pkg/apis/flagger/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pkg/canary/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type Controller interface {
SetStatusWeight(canary *flaggerv1.Canary, val int) error
SetStatusIterations(canary *flaggerv1.Canary, val int) error
SetStatusPhase(canary *flaggerv1.Canary, phase flaggerv1.CanaryPhase) error
SetWebhookStatusRetries(canary *flaggerv1.Canary, webhook int, val int) error
Initialize(canary *flaggerv1.Canary) error
Promote(canary *flaggerv1.Canary) error
HasTargetChanged(canary *flaggerv1.Canary) (bool, error)
Expand Down
5 changes: 5 additions & 0 deletions pkg/canary/daemonset_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,8 @@ func (c *DaemonSetController) SetStatusIterations(cd *flaggerv1.Canary, val int)
func (c *DaemonSetController) SetStatusPhase(cd *flaggerv1.Canary, phase flaggerv1.CanaryPhase) error {
return setStatusPhase(c.flaggerClient, cd, phase)
}

// SetWebhookStatusRetries updates the webhook retries counter
func (c *DaemonSetController) SetWebhookStatusRetries(cd *flaggerv1.Canary, webhook int, val int) error {
return SetWebhookStatusRetries(c.flaggerClient, cd, webhook, val)
}
5 changes: 5 additions & 0 deletions pkg/canary/deployment_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,8 @@ func (c *DeploymentController) SetStatusIterations(cd *flaggerv1.Canary, val int
func (c *DeploymentController) SetStatusPhase(cd *flaggerv1.Canary, phase flaggerv1.CanaryPhase) error {
return setStatusPhase(c.flaggerClient, cd, phase)
}

// SetWebhookStatusRetries updates the webhook retries counter
func (c *DeploymentController) SetWebhookStatusRetries(cd *flaggerv1.Canary, webhook int, val int) error {
return SetWebhookStatusRetries(c.flaggerClient, cd, webhook, val)
}
5 changes: 5 additions & 0 deletions pkg/canary/service_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ func (c *ServiceController) SetStatusPhase(cd *flaggerv1.Canary, phase flaggerv1
return setStatusPhase(c.flaggerClient, cd, phase)
}

// SetWebhookStatusRetries updates the webhook retries counter
func (c *ServiceController) SetWebhookStatusRetries(cd *flaggerv1.Canary, webhook int, val int) error {
return SetWebhookStatusRetries(c.flaggerClient, cd, webhook, val)
}

// GetMetadata returns the pod label selector, label value and svc ports
func (c *ServiceController) GetMetadata(_ *flaggerv1.Canary) (string, string, map[string]int32, error) {
return "", "", nil, nil
Expand Down
25 changes: 25 additions & 0 deletions pkg/canary/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,31 @@ func setStatusPhase(flaggerClient clientset.Interface, cd *flaggerv1.Canary, pha
return nil
}

func SetWebhookStatusRetries(flaggerClient clientset.Interface, cd *flaggerv1.Canary, webhook int, val int) error {
firstTry := true
name, ns := cd.GetName(), cd.GetNamespace()
err := retry.RetryOnConflict(retry.DefaultBackoff, func() (err error) {
if !firstTry {
cd, err = flaggerClient.FlaggerV1beta1().Canaries(ns).Get(context.TODO(), name, metav1.GetOptions{})
if err != nil {
return fmt.Errorf("canary %s.%s get query failed: %w", name, ns, err)
}
}

cdCopy := cd.DeepCopy()
cdCopy.GetAnalysis().Webhooks[webhook].Status.Retries = val
cdCopy.Status.LastTransitionTime = metav1.Now()

err = updateStatusWithUpgrade(flaggerClient, cdCopy)
firstTry = false
return
})
if err != nil {
return fmt.Errorf("failed after retries: %w", err)
}
return nil
}

// getStatusCondition returns a condition based on type
func getStatusCondition(status flaggerv1.CanaryStatus, conditionType flaggerv1.CanaryConditionType) *flaggerv1.CanaryCondition {
for i := range status.Conditions {
Expand Down
28 changes: 25 additions & 3 deletions pkg/controller/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,16 @@ func (c *Controller) advanceCanary(name string, namespace string) {
return
}
} else {
webhookStatus := c.runAnalysisWebhooks(cd, canaryController)
if webhookStatus == "failure" {
if err := canaryController.SetStatusFailedChecks(cd, cd.Status.FailedChecks+1); err != nil {
c.recordEventWarningf(cd, "%v", err)
}
return
} else if webhookStatus == "retry" {
return
}

if ok := c.runAnalysis(cd); !ok {
if err := canaryController.SetStatusFailedChecks(cd, cd.Status.FailedChecks+1); err != nil {
c.recordEventWarningf(cd, "%v", err)
Expand Down Expand Up @@ -723,19 +733,31 @@ func (c *Controller) runBlueGreen(canary *flaggerv1.Canary, canaryController can

}

func (c *Controller) runAnalysis(canary *flaggerv1.Canary) bool {
func (c *Controller) runAnalysisWebhooks(canary *flaggerv1.Canary, canaryController canary.Controller) string {
// run external checks
for _, webhook := range canary.GetAnalysis().Webhooks {
for id, webhook := range canary.GetAnalysis().Webhooks {
if webhook.Type == "" || webhook.Type == flaggerv1.RolloutHook {
err := CallWebhook(canary.Name, canary.Namespace, flaggerv1.CanaryPhaseProgressing, webhook)
if err != nil {
if webhook.Retries > 0 && webhook.Status.Retries < webhook.Retries {
if err := canaryController.SetWebhookStatusRetries(canary, id, webhook.Status.Retries+1); err != nil {
c.recordEventWarningf(canary, "%v", err)
}
c.recordEventWarningf(canary, "Halt %s.%s advancement external check %s failed %v retrying %d more times",
canary.Name, canary.Namespace, webhook.Name, err, webhook.Retries-webhook.Status.Retries-1)
return "retry"
}
c.recordEventWarningf(canary, "Halt %s.%s advancement external check %s failed %v",
canary.Name, canary.Namespace, webhook.Name, err)
return false
return "failure"
}
}
}

return "success"
}

func (c *Controller) runAnalysis(canary *flaggerv1.Canary) bool {
ok := c.runBuiltinMetricChecks(canary)
if !ok {
return ok
Expand Down
74 changes: 74 additions & 0 deletions pkg/controller/scheduler_deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -646,3 +646,77 @@ func TestScheduler_DeploymentAlerts(t *testing.T) {
// initialization done - now send alert
mocks.ctrl.advanceCanary("podinfo", "default")
}

func TestScheduler_DeploymentWebhookRetry(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
}))
defer ts.Close()

tsOk := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusFound)
}))
defer ts.Close()

canary := newDeploymentTestCanary()
canary.Spec.Analysis.Webhooks = []flaggerv1.CanaryWebhook{
{
Name: "failing-url",
Type: "rollout",
URL: ts.URL,
Retries: 3,
},
{
Name: "success-url",
Type: "rollout",
URL: tsOk.URL,
Retries: 5,
},
}
mocks := newDeploymentFixture(canary)

// initializing
mocks.ctrl.advanceCanary("podinfo", "default")

// make primary ready
mocks.makePrimaryReady(t)

// initialized
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertPhase(mocks.flaggerClient, "podinfo", flaggerv1.CanaryPhaseInitialized))

// update
dep2 := newDeploymentTestDeploymentV2()
_, err := mocks.kubeClient.AppsV1().Deployments("default").Update(context.TODO(), dep2, metav1.UpdateOptions{})
require.NoError(t, err)

// detect changes
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertPhase(mocks.flaggerClient, "podinfo", flaggerv1.CanaryPhaseProgressing))
mocks.makeCanaryReady(t)

// progressing
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertPhase(mocks.flaggerClient, "podinfo", flaggerv1.CanaryPhaseProgressing))

// verify webhook retry counter increases
mocks.ctrl.advanceCanary("podinfo", "default")
// Failing URL 1, Success URL 0
require.NoError(t, assertWebhookRetries(mocks.flaggerClient, "podinfo", 0, 1))
require.NoError(t, assertWebhookRetries(mocks.flaggerClient, "podinfo", 1, 0))
// Failing URL 2, Success URL 0
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertWebhookRetries(mocks.flaggerClient, "podinfo", 0, 2))
require.NoError(t, assertWebhookRetries(mocks.flaggerClient, "podinfo", 1, 0))
// Failing URL 3, Success URL 0
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertWebhookRetries(mocks.flaggerClient, "podinfo", 0, 3))
require.NoError(t, assertWebhookRetries(mocks.flaggerClient, "podinfo", 1, 0))

// update failed checks to max
mocks.deployer.SyncStatus(mocks.canary, flaggerv1.CanaryStatus{Phase: flaggerv1.CanaryPhaseProgressing, FailedChecks: 10})

// canary failed due to failed webhook
mocks.ctrl.advanceCanary("podinfo", "default")
require.NoError(t, assertPhase(mocks.flaggerClient, "podinfo", flaggerv1.CanaryPhaseFailed))
}
13 changes: 13 additions & 0 deletions pkg/controller/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,19 @@ func assertPhase(flaggerClient clientset.Interface, canary string, phase flagger
return nil
}

func assertWebhookRetries(flaggerClient clientset.Interface, canary string, webhook int, retries int) error {
c, err := flaggerClient.FlaggerV1beta1().Canaries("default").Get(context.TODO(), canary, metav1.GetOptions{})
if err != nil {
return err
}

if c.GetAnalysis().Webhooks[webhook].Status.Retries != retries {
return fmt.Errorf("got webhook status retries %d wanted %d", c.GetAnalysis().Webhooks[webhook].Status.Retries, retries)
}

return nil
}

func alwaysReady() bool {
return true
}
Expand Down

0 comments on commit 3d8dd05

Please sign in to comment.