This repository has been archived by the owner on Nov 30, 2023. It is now read-only.
/
drain_old_workers.go
97 lines (79 loc) · 2.98 KB
/
drain_old_workers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package nodepool
import (
"context"
"strings"
"time"
"github.com/giantswarm/microerror"
"sigs.k8s.io/cluster-api/util"
"github.com/giantswarm/azure-operator/v6/pkg/drainer"
"github.com/giantswarm/azure-operator/v6/pkg/tenantcluster"
"github.com/giantswarm/azure-operator/v6/service/controller/key"
"github.com/giantswarm/azure-operator/v6/pkg/handler/nodes/state"
)
func (r *Resource) drainOldWorkerInstances(ctx context.Context, obj interface{}, currentState state.State) (state.State, error) {
azureMachinePool, err := key.ToAzureMachinePool(obj)
if err != nil {
return DeploymentUninitialized, microerror.Mask(err)
}
cluster, err := util.GetClusterFromMetadata(ctx, r.CtrlClient, azureMachinePool.ObjectMeta)
if err != nil {
return DeploymentUninitialized, microerror.Mask(err)
}
if !cluster.GetDeletionTimestamp().IsZero() {
r.Logger.Debugf(ctx, "Cluster is being deleted, skipping reconciling node pool")
return currentState, nil
}
if azureMachinePool.Spec.Template.SpotVMOptions != nil {
r.Logger.Debugf(ctx, "Skipping state %s because node pool is using Spot Instances.", currentState)
return TerminateOldWorkerInstances, nil
}
oldInstances, _, err := r.splitInstancesByUpdatedStatus(ctx, azureMachinePool)
if err != nil {
return currentState, microerror.Mask(err)
}
if len(oldInstances) > 0 {
r.Logger.Debugf(ctx, "There are still %d workers from the previous release running", len(oldInstances))
tenantClusterK8sClients, err := r.tenantClientFactory.GetAllClients(ctx, cluster)
if tenantcluster.IsAPINotAvailableError(err) {
r.Logger.Debugf(ctx, "tenant API not available yet")
r.Logger.Debugf(ctx, "canceling resource")
return currentState, nil
} else if err != nil {
return currentState, microerror.Mask(err)
}
nodeDrainer, err := drainer.New(drainer.Config{
Logger: r.Logger,
WCClients: tenantClusterK8sClients,
})
if err != nil {
return currentState, microerror.Mask(err)
}
completed := true
for _, instance := range oldInstances {
nodeName := strings.ToLower(*instance.OsProfile.ComputerName)
r.Logger.Debugf(ctx, "Draining node %q (instance name %q)", nodeName, *instance.Name)
err = nodeDrainer.DrainNode(ctx, nodeName, 15*time.Minute)
if drainer.IsEvictionInProgress(err) {
// Node still draining.
r.Logger.Debugf(ctx, "Node %q is still draining: %s", nodeName, err)
completed = false
} else if drainer.IsDrainTimeout(err) {
// Node drain timed out.
r.Logger.Debugf(ctx, "Timeout while draining node %q", nodeName)
} else if err != nil {
r.Logger.Debugf(ctx, "Error draining node %q: %s", nodeName, err)
return currentState, microerror.Mask(err)
} else {
r.Logger.Debugf(ctx, "Node %q drained successfully", nodeName)
}
}
if completed {
return TerminateOldWorkerInstances, nil
}
// Nodes still to be drained.
return currentState, nil
}
// All old nodes are terminated.
r.Logger.Debugf(ctx, "no old workers were found")
return TerminateOldWorkerInstances, nil
}