-
Notifications
You must be signed in to change notification settings - Fork 277
/
status.go
229 lines (190 loc) Β· 11.2 KB
/
status.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
package clusters
import (
"context"
"github.com/pkg/errors"
v1 "k8s.io/api/core/v1"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/controller-runtime/pkg/client"
anywherev1 "github.com/aws/eks-anywhere/pkg/api/v1alpha1"
"github.com/aws/eks-anywhere/pkg/controller"
)
// UpdateClusterStatusForControlPlane checks the current state of the Cluster's control plane and updates the
// Cluster status information.
func UpdateClusterStatusForControlPlane(ctx context.Context, client client.Client, cluster *anywherev1.Cluster) error {
kcp, err := controller.GetKubeadmControlPlane(ctx, client, cluster)
if err != nil {
return errors.Wrapf(err, "getting kubeadmcontrolplane")
}
updateControlPlaneInitializedCondition(cluster, kcp)
updateControlPlaneReadyCondition(cluster, kcp)
return nil
}
// UpdateClusterStatusForWorkers checks the current state of the Cluster's workers and updates the
// Cluster status information.
func UpdateClusterStatusForWorkers(ctx context.Context, client client.Client, cluster *anywherev1.Cluster) error {
machineDeployments, err := controller.GetMachineDeployments(ctx, client, cluster)
if err != nil {
return errors.Wrap(err, "getting machine deployments")
}
updateWorkersReadyCondition(cluster, machineDeployments)
return nil
}
// UpdateClusterStatusForCNI updates the Cluster status for the default cni before the control plane is ready. The CNI reconciler
// handles the rest of the logic for determining the condition and updating the status based on the current state of the cluster.
func UpdateClusterStatusForCNI(ctx context.Context, cluster *anywherev1.Cluster) {
// Here, we want to initialize the DefaultCNIConfigured condition only when the condition does not exist,
// such as in the event of cluster creation. In this case, when the control plane is not ready, we can assume
// the CNI is not ready yet.
if !conditions.IsTrue(cluster, anywherev1.ControlPlaneReadyCondition) &&
conditions.Get(cluster, anywherev1.DefaultCNIConfiguredCondition) == nil {
conditions.MarkFalse(cluster, anywherev1.DefaultCNIConfiguredCondition, anywherev1.ControlPlaneNotReadyReason, clusterv1.ConditionSeverityInfo, "")
return
}
// Self managed clusters do not use the CNI reconciler, so this status would never get resolved.
// TODO: Remove after self-managed clusters are created with the controller in the CLI
if cluster.IsSelfManaged() {
ciliumCfg := cluster.Spec.ClusterNetwork.CNIConfig.Cilium
// Though it may be installed initially to successfully create the cluster,
// if the CNI is configured to skip upgrades, we mark the condition as "False"
if ciliumCfg != nil && !ciliumCfg.IsManaged() {
conditions.MarkFalse(cluster, anywherev1.DefaultCNIConfiguredCondition, anywherev1.SkipUpgradesForDefaultCNIConfiguredReason, clusterv1.ConditionSeverityWarning, "Configured to skip default Cilium CNI upgrades")
return
}
// Otherwise, since the control plane is fully ready we can assume the CNI has been configured.
conditions.MarkTrue(cluster, anywherev1.DefaultCNIConfiguredCondition)
}
}
// updateControlPlaneReadyCondition updates the ControlPlaneReady condition, after checking the state of the control plane
// in the cluster.
func updateControlPlaneReadyCondition(cluster *anywherev1.Cluster, kcp *controlplanev1.KubeadmControlPlane) {
initializedCondition := conditions.Get(cluster, anywherev1.ControlPlaneInitializedCondition)
if initializedCondition.Status != "True" {
conditions.MarkFalse(cluster, anywherev1.ControlPlaneReadyCondition, initializedCondition.Reason, initializedCondition.Severity, initializedCondition.Message)
return
}
if kcp == nil {
return
}
// We make sure to check that the status is up to date before using it
if kcp.Status.ObservedGeneration != kcp.ObjectMeta.Generation {
conditions.MarkFalse(cluster, anywherev1.ControlPlaneReadyCondition, anywherev1.OutdatedInformationReason, clusterv1.ConditionSeverityInfo, "")
return
}
// The control plane should be marked ready when the count specified in the spec is
// equal to the ready number of nodes in the cluster and they're all of the right version specified.
expected := cluster.Spec.ControlPlaneConfiguration.Count
totalReplicas := int(kcp.Status.Replicas)
// First, in the case of a rolling upgrade, we get the number of outdated nodes, and as long as there are some,
// we want to reflect in the message that the Cluster is in progres upgdating the old nodes with the
// the new machine spec.
updatedReplicas := int(kcp.Status.UpdatedReplicas)
totalOutdated := totalReplicas - updatedReplicas
if totalOutdated > 0 {
conditions.MarkFalse(cluster, anywherev1.ControlPlaneReadyCondition, anywherev1.RollingUpgradeInProgress, clusterv1.ConditionSeverityInfo, "Control plane nodes not up-to-date yet, %d rolling (%d up to date)", totalReplicas, updatedReplicas)
return
}
// Then, we check that the number of nodes in the cluster match the expected amount. If not, we
// mark that the Cluster is scaling up or scale down the control plane replicas to the expected amount.
if totalReplicas < expected {
conditions.MarkFalse(cluster, anywherev1.ControlPlaneReadyCondition, anywherev1.ScalingUpReason, clusterv1.ConditionSeverityInfo, "Scaling up control plane nodes, %d expected (%d actual)", expected, totalReplicas)
return
}
if totalReplicas > expected {
conditions.MarkFalse(cluster, anywherev1.ControlPlaneReadyCondition, anywherev1.ScalingDownReason, clusterv1.ConditionSeverityInfo, "Scaling down control plane nodes, %d expected (%d actual)", expected, totalReplicas)
return
}
readyReplicas := int(kcp.Status.ReadyReplicas)
if readyReplicas != expected {
conditions.MarkFalse(cluster, anywherev1.ControlPlaneReadyCondition, anywherev1.NodesNotReadyReason, clusterv1.ConditionSeverityInfo, "Control plane nodes not ready yet, %d expected (%d ready)", expected, readyReplicas)
return
}
// We check the condition signifying the overall health of the control plane components. Usually, the control plane should be healthy
// at this point but if that is not the case, we report it as an error.
kcpControlPlaneHealthyCondition := conditions.Get(kcp, controlplanev1.ControlPlaneComponentsHealthyCondition)
if kcpControlPlaneHealthyCondition != nil && kcpControlPlaneHealthyCondition.Status == v1.ConditionFalse {
conditions.MarkFalse(cluster, anywherev1.ControlPlaneReadyCondition, anywherev1.ControlPlaneComponentsUnhealthyReason, clusterv1.ConditionSeverityError, kcpControlPlaneHealthyCondition.Message)
return
}
conditions.MarkTrue(cluster, anywherev1.ControlPlaneReadyCondition)
}
// updateControlPlaneInitializedCondition updates the ControlPlaneInitialized condition if it hasn't already been set.
// This condition should be set only once.
func updateControlPlaneInitializedCondition(cluster *anywherev1.Cluster, kcp *controlplanev1.KubeadmControlPlane) {
// Return early if the ControlPlaneInitializedCondition is already "True"
if conditions.IsTrue(cluster, anywherev1.ControlPlaneInitializedCondition) {
return
}
if kcp == nil {
conditions.Set(cluster, controlPlaneInitializationInProgressCondition())
return
}
// We make sure to check that the status is up to date before using it
if kcp.Status.ObservedGeneration != kcp.ObjectMeta.Generation {
conditions.MarkFalse(cluster, anywherev1.ControlPlaneInitializedCondition, anywherev1.OutdatedInformationReason, clusterv1.ConditionSeverityInfo, "")
return
}
// Then, we'll check explicitly for that the control plane is available. This way, we do not rely on CAPI
// to implicitly to fill out our conditions reasons, and we can have custom messages.
available := conditions.IsTrue(kcp, controlplanev1.AvailableCondition)
if !available {
conditions.Set(cluster, controlPlaneInitializationInProgressCondition())
return
}
conditions.MarkTrue(cluster, anywherev1.ControlPlaneInitializedCondition)
}
// updateWorkersReadyCondition updates the WorkersReadyConditon condition after checking the state of the worker node groups
// in the cluster.
func updateWorkersReadyCondition(cluster *anywherev1.Cluster, machineDeployments []clusterv1.MachineDeployment) {
initializedCondition := conditions.Get(cluster, anywherev1.ControlPlaneInitializedCondition)
if initializedCondition.Status != "True" {
conditions.MarkFalse(cluster, anywherev1.WorkersReadyConditon, anywherev1.ControlPlaneNotInitializedReason, clusterv1.ConditionSeverityInfo, "")
return
}
totalExpected := 0
for _, wng := range cluster.Spec.WorkerNodeGroupConfigurations {
totalExpected += *wng.Count
}
// First, we need aggregate the number of nodes across worker node groups to be able to assess the condition of the workers
// as a whole.
totalReadyReplicas := 0
totalUpdatedReplicas := 0
totalReplicas := 0
for _, md := range machineDeployments {
// We make sure to check that the status is up to date before using the information from the machine deployment status.
if md.Status.ObservedGeneration != md.ObjectMeta.Generation {
conditions.MarkFalse(cluster, anywherev1.WorkersReadyConditon, anywherev1.OutdatedInformationReason, clusterv1.ConditionSeverityInfo, "Worker node group %s status not up to date yet", md.Name)
return
}
totalReadyReplicas += int(md.Status.ReadyReplicas)
totalUpdatedReplicas += int(md.Status.UpdatedReplicas)
totalReplicas += int(md.Status.Replicas)
}
// There may be worker nodes that are not up to date yet in the case of a rolling upgrade,
// so reflect that on the conditon with an appropriate message.
totalOutdated := totalReplicas - totalUpdatedReplicas
if totalOutdated > 0 {
conditions.MarkFalse(cluster, anywherev1.WorkersReadyConditon, anywherev1.RollingUpgradeInProgress, clusterv1.ConditionSeverityInfo, "Worker nodes not up-to-date yet, %d rolling (%d up to date)", totalReplicas, totalUpdatedReplicas)
return
}
// If the number of worker nodes replicas need to be scaled up.
if totalReplicas < totalExpected {
conditions.MarkFalse(cluster, anywherev1.WorkersReadyConditon, anywherev1.ScalingUpReason, clusterv1.ConditionSeverityInfo, "Scaling up worker nodes, %d expected (%d actual)", totalExpected, totalReplicas)
return
}
// If the number of worker nodes replicas need to be scaled down.
if totalReplicas > totalExpected {
conditions.MarkFalse(cluster, anywherev1.WorkersReadyConditon, anywherev1.ScalingDownReason, clusterv1.ConditionSeverityInfo, "Scaling down worker nodes, %d expected (%d actual)", totalExpected, totalReplicas)
return
}
if totalReadyReplicas != totalExpected {
conditions.MarkFalse(cluster, anywherev1.WorkersReadyConditon, anywherev1.NodesNotReadyReason, clusterv1.ConditionSeverityInfo, "Worker nodes not ready yet, %d expected (%d ready)", totalExpected, totalReadyReplicas)
return
}
conditions.MarkTrue(cluster, anywherev1.WorkersReadyConditon)
}
// controlPlaneInitializationInProgressCondition returns a new "False" condition for the ControlPlaneInitializationInProgress reason.
func controlPlaneInitializationInProgressCondition() *anywherev1.Condition {
return conditions.FalseCondition(anywherev1.ControlPlaneInitializedCondition, anywherev1.ControlPlaneInitializationInProgressReason, clusterv1.ConditionSeverityInfo, "The first control plane instance is not available yet")
}