Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prototype for frozen canaries. #1196

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions artifacts/flagger/crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1012,6 +1012,9 @@ spec:
type: object
additionalProperties:
type: string
freezeOnFailure:
description: Freeze canary on failure
type: boolean
status:
description: CanaryStatus defines the observed state of a canary.
type: object
Expand All @@ -1032,6 +1035,7 @@ spec:
- Failed
- Terminating
- Terminated
- Frozen
trackedConfigs:
description: TrackedConfig of this canary
additionalProperties:
Expand Down
6 changes: 6 additions & 0 deletions pkg/apis/flagger/v1beta1/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ type CanaryAnalysis struct {
// A/B testing HTTP header match conditions
// +optional
Match []istiov1alpha3.HTTPMatchRequest `json:"match,omitempty"`

FreezeOnFailure bool `json:"freezeOnFailure,omitempty"`
}

// CanaryMetric holds the reference to metrics used for canary analysis
Expand Down Expand Up @@ -485,6 +487,10 @@ func (c *Canary) GetAnalysisCanaryReadyThreshold() int {
return CanaryReadyThreshold
}

func (c *Canary) GetAnalysisFreezeOnFailure() bool {
return c.GetAnalysis().FreezeOnFailure
}

// GetMetricInterval returns the metric interval default value (1m)
func (c *Canary) GetMetricInterval() string {
return MetricInterval
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/flagger/v1beta1/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ const (
// CanaryPhaseTerminated means the canary has been finalized
// and successfully deleted
CanaryPhaseTerminated CanaryPhase = "Terminated"
CanaryPhaseFrozen CanaryPhase = "Frozen"
)

// CanaryStatus is used for state persistence (read-only)
Expand Down
30 changes: 22 additions & 8 deletions pkg/controller/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -274,18 +274,22 @@ func (c *Controller) advanceCanary(name string, namespace string) {
c.recordEventInfof(cd, "New revision detected! Restarting analysis for %s.%s",
cd.Spec.TargetRef.Name, cd.Namespace)

// route all traffic back to primary
primaryWeight = c.totalWeight(cd)
canaryWeight = 0
if err := meshRouter.SetRoutes(cd, primaryWeight, canaryWeight, false); err != nil {
c.recordEventWarningf(cd, "%v", err)
return
if !cd.Spec.Analysis.FreezeOnFailure {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why restart only when FreezeOnFailure is disabled? Even, if FreezeOnFailue is true and the canary deployment changed, I think we'd want the canary analysis to restart.

// route all traffic back to primary
primaryWeight = c.totalWeight(cd)
if err := meshRouter.SetRoutes(cd, primaryWeight, canaryWeight, false); err != nil {
c.recordEventWarningf(cd, "%v", err)
return
}
} else {
canaryWeight = c.totalWeight(cd)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand the reasoning behind this. Why would we want to set .status.canaryWeight to 100, when it's not actually receiving 100% of the traffic?

}

// reset status
status := flaggerv1.CanaryStatus{
Phase: flaggerv1.CanaryPhaseProgressing,
CanaryWeight: 0,
CanaryWeight: canaryWeight,
FailedChecks: 0,
Iterations: 0,
}
Expand Down Expand Up @@ -355,6 +359,14 @@ func (c *Controller) advanceCanary(name string, namespace string) {
c.alert(cd, fmt.Sprintf("Progress deadline exceeded %v", err),
false, flaggerv1.SeverityError)
}
if cd.Spec.Analysis.FreezeOnFailure {
c.recordEventInfof(cd, "Freezing failed canary: %s.%s", cd.Spec.TargetRef.Name, cd.Namespace)
cd.Status.Phase = flaggerv1.CanaryPhaseFrozen
if err := canaryController.SyncStatus(cd, cd.Status); err != nil {
c.recordEventWarningf(cd, "%v", err)
}
return
}
c.rollback(cd, canaryController, meshRouter)
return
}
Expand Down Expand Up @@ -788,7 +800,8 @@ func (c *Controller) checkCanaryStatus(canary *flaggerv1.Canary, canaryControlle
if canary.Status.Phase == flaggerv1.CanaryPhaseProgressing ||
canary.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion ||
canary.Status.Phase == flaggerv1.CanaryPhasePromoting ||
canary.Status.Phase == flaggerv1.CanaryPhaseFinalising {
canary.Status.Phase == flaggerv1.CanaryPhaseFinalising ||
canary.Status.Phase == flaggerv1.CanaryPhaseFrozen {
return true
}

Expand Down Expand Up @@ -834,7 +847,8 @@ func (c *Controller) checkCanaryStatus(canary *flaggerv1.Canary, canaryControlle

func (c *Controller) hasCanaryRevisionChanged(canary *flaggerv1.Canary, canaryController canary.Controller) bool {
if canary.Status.Phase == flaggerv1.CanaryPhaseProgressing ||
canary.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion {
canary.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion ||
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to check for CanaryPhaseFrozen, because of the concern described in the above comment.

canary.Status.Phase == flaggerv1.CanaryPhaseFrozen {
if diff, _ := canaryController.HasTargetChanged(canary); diff {
return true
}
Expand Down