-
Notifications
You must be signed in to change notification settings - Fork 819
/
bluegreen.go
347 lines (311 loc) · 12.8 KB
/
bluegreen.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
package rollout
import (
"fmt"
"math"
"sort"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1"
"github.com/argoproj/argo-rollouts/utils/defaults"
replicasetutil "github.com/argoproj/argo-rollouts/utils/replicaset"
serviceutil "github.com/argoproj/argo-rollouts/utils/service"
)
// rolloutBlueGreen implements the logic for rolling a new replica set.
func (c *rolloutContext) rolloutBlueGreen() error {
previewSvc, activeSvc, err := c.getPreviewAndActiveServices()
if err != nil {
return err
}
c.newRS, err = c.getAllReplicaSetsAndSyncRevision(true)
if err != nil {
return fmt.Errorf("failed to getAllReplicaSetsAndSyncRevision in rolloutBlueGreen create true: %w", err)
}
// This must happen right after the new replicaset is created
err = c.reconcilePreviewService(previewSvc)
if err != nil {
return err
}
if replicasetutil.CheckPodSpecChange(c.rollout, c.newRS) {
return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc)
}
err = c.podRestarter.Reconcile(c)
if err != nil {
return err
}
err = c.reconcileBlueGreenReplicaSets(activeSvc)
if err != nil {
return err
}
c.reconcileBlueGreenPause(activeSvc, previewSvc)
err = c.reconcileActiveService(activeSvc)
if err != nil {
return err
}
err = c.awsVerifyTargetGroups(activeSvc)
if err != nil {
return err
}
err = c.reconcileAnalysisRuns()
if err != nil {
return err
}
err = c.reconcileEphemeralMetadata()
if err != nil {
return err
}
return c.syncRolloutStatusBlueGreen(previewSvc, activeSvc)
}
func (c *rolloutContext) reconcileBlueGreenStableReplicaSet(activeSvc *corev1.Service) error {
if _, ok := activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey]; !ok {
return nil
}
activeRS, _ := replicasetutil.GetReplicaSetByTemplateHash(c.allRSs, activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey])
if activeRS == nil {
c.log.Warn("There shouldn't be a nil active replicaset if the active Service selector is set")
return nil
}
c.log.Infof("Reconciling stable ReplicaSet '%s'", activeRS.Name)
_, _, err := c.scaleReplicaSetAndRecordEvent(activeRS, defaults.GetReplicasOrDefault(c.rollout.Spec.Replicas))
if err != nil {
return fmt.Errorf("failed to scaleReplicaSetAndRecordEvent in reconcileBlueGreenStableReplicaSet: %w", err)
}
return err
}
func (c *rolloutContext) reconcileBlueGreenReplicaSets(activeSvc *corev1.Service) error {
err := c.removeScaleDownDeadlines()
if err != nil {
return err
}
err = c.reconcileBlueGreenStableReplicaSet(activeSvc)
if err != nil {
return err
}
_, err = c.reconcileNewReplicaSet()
if err != nil {
return err
}
// Scale down old non-active, non-stable replicasets, if we can.
_, err = c.reconcileOtherReplicaSets()
if err != nil {
return err
}
if err := c.reconcileRevisionHistoryLimit(c.otherRSs); err != nil {
return err
}
return nil
}
// isBlueGreenFastTracked returns true if we should skip the pause step because update has been fast tracked
func (c *rolloutContext) isBlueGreenFastTracked(activeSvc *corev1.Service) bool {
if replicasetutil.HasScaleDownDeadline(c.newRS) {
c.log.Infof("Detected scale down annotation for ReplicaSet '%s' and will skip pause", c.newRS.Name)
return true
}
if c.rollout.Status.PromoteFull {
return true
}
if _, ok := activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey]; !ok {
return true
}
if activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] == c.newRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey] {
return true
}
return false
}
// reconcileBlueGreenPause will automatically pause or resume the blue-green rollout
// depending if auto-promotion is enabled and we have passedAutoPromotionSeconds
func (c *rolloutContext) reconcileBlueGreenPause(activeSvc, previewSvc *corev1.Service) {
if c.rollout.Status.Abort {
return
}
if !replicasetutil.ReadyForPause(c.rollout, c.newRS, c.allRSs) {
c.log.Infof("New RS '%s' is not ready to pause", c.newRS.Name)
return
}
if reason := c.haltProgress(); reason != "" {
c.log.Infof("skipping pause reconciliation: %s", reason)
return
}
if c.isBlueGreenFastTracked(activeSvc) {
c.log.Debug("skipping pause: fast-tracked update")
c.pauseContext.RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause)
return
}
newRSPodHash := c.newRS.Labels[v1alpha1.DefaultRolloutUniqueLabelKey]
if activeSvc.Spec.Selector[v1alpha1.DefaultRolloutUniqueLabelKey] == newRSPodHash {
c.log.Debug("skipping pause: desired ReplicaSet already active")
c.pauseContext.RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause)
return
}
if c.rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint {
c.log.Debug("skipping pause: scaleUpPreviewCheckPoint passed")
c.pauseContext.RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause)
return
}
if !needsBlueGreenControllerPause(c.rollout) {
c.pauseContext.RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause)
return
}
// if we get here, the controller should manage the pause/resume
c.log.Infof("reconciling pause (autoPromotionSeconds: %d)", c.rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds)
if !c.completedPrePromotionAnalysis() {
c.log.Infof("not ready for pause: prePromotionAnalysis incomplete")
return
}
pauseCond := getPauseCondition(c.rollout, v1alpha1.PauseReasonBlueGreenPause)
if pauseCond != nil {
// We are currently paused. Check if we completed our pause duration
if !c.pauseContext.CompletedBlueGreenPause() {
c.log.Info("pause incomplete")
if c.rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds > 0 {
c.checkEnqueueRolloutDuringWait(pauseCond.StartTime, c.rollout.Spec.Strategy.BlueGreen.AutoPromotionSeconds)
}
} else {
c.log.Infof("pause completed")
c.pauseContext.RemovePauseCondition(v1alpha1.PauseReasonBlueGreenPause)
}
} else {
// no pause condition exists. If Status.ControllerPause is true, the user manually resumed
// the rollout. e.g. `kubectl argo rollouts promote ROLLOUT`
if !c.rollout.Status.ControllerPause {
c.log.Info("pausing")
c.pauseContext.AddPauseCondition(v1alpha1.PauseReasonBlueGreenPause)
}
}
}
// needsBlueGreenControllerPause indicates if the controller should manage the pause status of the blue-green rollout
func needsBlueGreenControllerPause(ro *v1alpha1.Rollout) bool {
if ro.Spec.Strategy.BlueGreen.AutoPromotionEnabled != nil {
if !*ro.Spec.Strategy.BlueGreen.AutoPromotionEnabled {
return true
}
}
return ro.Spec.Strategy.BlueGreen.AutoPromotionSeconds > 0
}
// scaleDownOldReplicaSetsForBlueGreen scales down old replica sets when rollout strategy is "Blue Green".
func (c *rolloutContext) scaleDownOldReplicaSetsForBlueGreen(oldRSs []*appsv1.ReplicaSet) (bool, error) {
if getPauseCondition(c.rollout, v1alpha1.PauseReasonInconclusiveAnalysis) != nil {
c.log.Infof("Cannot scale down old ReplicaSets while paused with inconclusive Analysis ")
return false, nil
}
if c.rollout.Spec.Strategy.BlueGreen != nil && c.rollout.Spec.Strategy.BlueGreen.PostPromotionAnalysis != nil && c.rollout.Spec.Strategy.BlueGreen.ScaleDownDelaySeconds == nil && !skipPostPromotionAnalysisRun(c.rollout, c.newRS) {
currentPostAr := c.currentArs.BlueGreenPostPromotion
if currentPostAr == nil || currentPostAr.Status.Phase != v1alpha1.AnalysisPhaseSuccessful {
c.log.Infof("Cannot scale down old ReplicaSets while Analysis is running and no ScaleDownDelaySeconds")
return false, nil
}
}
sort.Sort(sort.Reverse(replicasetutil.ReplicaSetsByRevisionNumber(oldRSs)))
hasScaled := false
annotationedRSs := int32(0)
rolloutReplicas := defaults.GetReplicasOrDefault(c.rollout.Spec.Replicas)
for _, targetRS := range oldRSs {
if c.isReplicaSetReferenced(targetRS) {
// We might get here if user interrupted an an update in order to move back to stable.
c.log.Infof("Skip scale down of older RS '%s': still referenced", targetRS.Name)
continue
}
if *targetRS.Spec.Replicas == 0 {
// cannot scale down this ReplicaSet.
continue
}
var desiredReplicaCount int32
var err error
annotationedRSs, desiredReplicaCount, err = c.scaleDownDelayHelper(targetRS, annotationedRSs, rolloutReplicas)
if err != nil {
return false, err
}
if *targetRS.Spec.Replicas == desiredReplicaCount {
// already at desired account, nothing to do
continue
}
// Scale down.
_, _, err = c.scaleReplicaSetAndRecordEvent(targetRS, desiredReplicaCount)
if err != nil {
return false, fmt.Errorf("failed to scaleReplicaSetAndRecordEvent in scaleDownOldReplicaSetsForBlueGreen: %w", err)
}
hasScaled = true
}
return hasScaled, nil
}
func GetScaleDownRevisionLimit(ro *v1alpha1.Rollout) int32 {
if ro.Spec.Strategy.BlueGreen != nil {
if ro.Spec.Strategy.BlueGreen.ScaleDownDelayRevisionLimit != nil {
return *ro.Spec.Strategy.BlueGreen.ScaleDownDelayRevisionLimit
}
}
if ro.Spec.Strategy.Canary != nil {
if ro.Spec.Strategy.Canary.ScaleDownDelayRevisionLimit != nil {
return *ro.Spec.Strategy.Canary.ScaleDownDelayRevisionLimit
}
}
return math.MaxInt32
}
func (c *rolloutContext) syncRolloutStatusBlueGreen(previewSvc *corev1.Service, activeSvc *corev1.Service) error {
newStatus := c.calculateBaseStatus()
newStatus.StableRS = c.rollout.Status.StableRS
if replicasetutil.CheckPodSpecChange(c.rollout, c.newRS) {
c.resetRolloutStatus(&newStatus)
}
if c.rollout.Status.PromoteFull || c.isRollbackWithinWindow() {
c.pauseContext.ClearPauseConditions()
c.pauseContext.RemoveAbort()
}
previewSelector := serviceutil.GetRolloutSelectorLabel(previewSvc)
if previewSelector != c.rollout.Status.BlueGreen.PreviewSelector {
c.log.Infof("Updating preview selector (%s -> %s)", c.rollout.Status.BlueGreen.PreviewSelector, previewSelector)
}
newStatus.BlueGreen.PreviewSelector = previewSelector
activeSelector := serviceutil.GetRolloutSelectorLabel(activeSvc)
if activeSelector != c.rollout.Status.BlueGreen.ActiveSelector {
c.log.Infof("Updating active selector (%s -> %s)", c.rollout.Status.BlueGreen.ActiveSelector, activeSelector)
}
newStatus.BlueGreen.ActiveSelector = activeSelector
if reason := c.shouldFullPromote(newStatus); reason != "" {
c.promoteStable(&newStatus, reason)
} else {
newStatus.BlueGreen.ScaleUpPreviewCheckPoint = c.calculateScaleUpPreviewCheckPoint(newStatus)
}
activeRS, _ := replicasetutil.GetReplicaSetByTemplateHash(c.allRSs, newStatus.BlueGreen.ActiveSelector)
if activeRS != nil {
newStatus.HPAReplicas = activeRS.Status.Replicas
newStatus.Selector = metav1.FormatLabelSelector(activeRS.Spec.Selector)
newStatus.AvailableReplicas = activeRS.Status.AvailableReplicas
newStatus.ReadyReplicas = activeRS.Status.ReadyReplicas
} else {
// when we do not have an active replicaset, accounting is done on the default rollout selector
newStatus.HPAReplicas = replicasetutil.GetActualReplicaCountForReplicaSets(c.allRSs)
newStatus.Selector = metav1.FormatLabelSelector(c.rollout.Spec.Selector)
newStatus.AvailableReplicas = replicasetutil.GetAvailableReplicaCountForReplicaSets(c.allRSs)
// NOTE: setting ready replicas is skipped since it's already performed in c.calculateBaseStatus() and is redundant
// newStatus.ReadyReplicas = replicasetutil.GetReadyReplicaCountForReplicaSets(c.allRSs)
}
newStatus = c.calculateRolloutConditions(newStatus)
return c.persistRolloutStatus(&newStatus)
}
// calculateScaleUpPreviewCheckPoint calculates the correct value of status.blueGreen.scaleUpPreviewCheckPoint
// which is used by the blueGreen.previewReplicaCount feature. scaleUpPreviewCheckPoint is a single
// direction trip-wire, initialized to false, and gets flipped true as soon as the preview replicas
// matches scaleUpPreviewCheckPoint and prePromotionAnalysis (if used) completes. It get reset to
// false when the pod template changes, or the rollout fully promotes (stableRS == newRS)
func (c *rolloutContext) calculateScaleUpPreviewCheckPoint(newStatus v1alpha1.RolloutStatus) bool {
if c.rollout.Spec.Strategy.BlueGreen.PreviewReplicaCount == nil {
// previewReplicaCount feature is not being used
return false
}
// Once the ScaleUpPreviewCheckPoint is set to true, the rollout should keep that value until
// the newRS becomes the new stableRS or there is a template change.
prevValue := c.rollout.Status.BlueGreen.ScaleUpPreviewCheckPoint
if prevValue {
return true
}
if !c.completedPrePromotionAnalysis() || !c.pauseContext.CompletedBlueGreenPause() {
// do not set the checkpoint unless prePromotionAnalysis was successful and we completed our pause
return false
}
previewCountAvailable := *c.rollout.Spec.Strategy.BlueGreen.PreviewReplicaCount == replicasetutil.GetAvailableReplicaCountForReplicaSets([]*appsv1.ReplicaSet{c.newRS})
if prevValue != previewCountAvailable {
c.log.Infof("setting scaleUpPreviewCheckPoint to %v: preview replica count availability is %v", previewCountAvailable, previewCountAvailable)
}
return previewCountAvailable
}