/
actuator_delete.go
306 lines (263 loc) · 12.9 KB
/
actuator_delete.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
// Copyright (c) 2019 SAP SE or an SAP affiliate company. All rights reserved. This file is licensed under the Apache Software License, v. 2 except as noted otherwise in the LICENSE file
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package genericactuator
import (
"context"
"errors"
"fmt"
"strings"
"time"
extensionscontroller "github.com/gardener/gardener/extensions/pkg/controller"
extensionsv1alpha1 "github.com/gardener/gardener/pkg/apis/extensions/v1alpha1"
"github.com/gardener/gardener/pkg/utils/flow"
kutil "github.com/gardener/gardener/pkg/utils/kubernetes"
retryutils "github.com/gardener/gardener/pkg/utils/retry"
machinev1alpha1 "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
"github.com/go-logr/logr"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)
const (
forceDeletionLabelKey = "force-deletion"
forceDeletionLabelValue = "True"
)
func (a *genericActuator) Delete(ctx context.Context, log logr.Logger, worker *extensionsv1alpha1.Worker, cluster *extensionscontroller.Cluster) error {
log = log.WithValues("operation", "delete")
workerDelegate, err := a.delegateFactory.WorkerDelegate(ctx, worker, cluster)
if err != nil {
return fmt.Errorf("could not instantiate actuator context: %w", err)
}
// Call pre deletion hook to prepare Worker deletion.
if err := workerDelegate.PreDeleteHook(ctx); err != nil {
return fmt.Errorf("pre worker deletion hook failed: %w", err)
}
// Make sure machine-controller-manager is awake before deleting the machines.
var replicaFunc = func() int32 {
return 1
}
// Deploy the machine-controller-manager into the cluster to make sure worker nodes can be removed.
if err := a.deployMachineControllerManager(ctx, log, worker, cluster, workerDelegate, replicaFunc); err != nil {
return err
}
// Redeploy generated machine classes to update credentials machine-controller-manager used.
log.Info("Deploying the machine classes")
if err := workerDelegate.DeployMachineClasses(ctx); err != nil {
return fmt.Errorf("failed to deploy the machine classes: %w", err)
}
// Wait until the machine class credentials secret has been acquired.
log.Info("Waiting until the machine class credentials secret has been acquired")
if err := a.waitUntilCredentialsSecretAcquiredOrReleased(ctx, true, worker, workerDelegate); err != nil {
return fmt.Errorf("failed while waiting for the machine class credentials secret to be acquired: %w", err)
}
if workerCredentialsDelegate, ok := workerDelegate.(WorkerCredentialsDelegate); ok {
// Update cloud credentials for all existing machine class secrets
cloudCredentials, err := workerCredentialsDelegate.GetMachineControllerManagerCloudCredentials(ctx)
if err != nil {
return fmt.Errorf("failed to get the cloud credentials in namespace %s: %w", worker.Namespace, err)
}
if err = a.updateCloudCredentialsInAllMachineClassSecrets(ctx, log, cloudCredentials, worker.Namespace); err != nil {
return fmt.Errorf("failed to update cloud credentials in machine class secrets for namespace %s: %w", worker.Namespace, err)
}
}
// Mark all existing machines to become forcefully deleted.
log.Info("Marking all machines to become forcefully deleted")
if err := a.markAllMachinesForcefulDeletion(ctx, log, worker.Namespace); err != nil {
return fmt.Errorf("marking all machines for forceful deletion failed: %w", err)
}
// Delete all machine deployments.
log.Info("Deleting all machine deployments")
if err := a.client.DeleteAllOf(ctx, &machinev1alpha1.MachineDeployment{}, client.InNamespace(worker.Namespace)); err != nil {
return fmt.Errorf("cleaning up all machine deployments failed: %w", err)
}
// Delete all machine classes.
log.Info("Deleting all machine classes")
if err := a.client.DeleteAllOf(ctx, workerDelegate.MachineClass(), client.InNamespace(worker.Namespace)); err != nil {
return fmt.Errorf("cleaning up all machine classes failed: %w", err)
}
// Delete all machine class secrets.
log.Info("Deleting all machine class secrets")
if err := a.client.DeleteAllOf(ctx, &corev1.Secret{}, client.InNamespace(worker.Namespace), client.MatchingLabels(getMachineClassSecretLabels())); err != nil {
return fmt.Errorf("cleaning up all machine class secrets failed: %w", err)
}
// Wait until all machine resources have been properly deleted.
if err := a.waitUntilMachineResourcesDeleted(ctx, log, worker, workerDelegate); err != nil {
return fmt.Errorf("Failed while waiting for all machine resources to be deleted: %w", err)
}
// Wait until the machine class credentials secret has been released.
log.Info("Waiting until the machine class credentials secret has been released")
if err := a.waitUntilCredentialsSecretAcquiredOrReleased(ctx, false, worker, workerDelegate); err != nil {
return fmt.Errorf("failed while waiting for the machine class credentials secret to be released: %w", err)
}
// Delete the machine-controller-manager.
if err := a.deleteMachineControllerManager(ctx, log, worker); err != nil {
return fmt.Errorf("failed deleting machine-controller-manager: %w", err)
}
// Cleanup machine dependencies.
// TODO(dkistner): Remove in a future release.
if err := workerDelegate.CleanupMachineDependencies(ctx); err != nil {
return fmt.Errorf("failed to cleanup machine dependencies: %w", err)
}
// Call post deletion hook after Worker deletion has happened.
if err := workerDelegate.PostDeleteHook(ctx); err != nil {
return fmt.Errorf("post worker deletion hook failed: %w", err)
}
return nil
}
// Mark all existing machines to become forcefully deleted.
func (a *genericActuator) markAllMachinesForcefulDeletion(ctx context.Context, log logr.Logger, namespace string) error {
log.Info("Marking all machines for forceful deletion")
// Mark all existing machines to become forcefully deleted.
existingMachines := &machinev1alpha1.MachineList{}
if err := a.client.List(ctx, existingMachines, client.InNamespace(namespace)); err != nil {
return err
}
var tasks []flow.TaskFn
for _, machine := range existingMachines.Items {
m := machine
tasks = append(tasks, func(ctx context.Context) error {
return a.markMachineForcefulDeletion(ctx, &m)
})
}
if err := flow.Parallel(tasks...)(ctx); err != nil {
return fmt.Errorf("failed labelling machines for forceful deletion: %w", err)
}
return nil
}
// markMachineForcefulDeletion labels a machine object to become forcefully deleted.
func (a *genericActuator) markMachineForcefulDeletion(ctx context.Context, machine *machinev1alpha1.Machine) error {
if machine.Labels == nil {
machine.Labels = map[string]string{}
}
if val, ok := machine.Labels[forceDeletionLabelKey]; ok && val == forceDeletionLabelValue {
return nil
}
machine.Labels[forceDeletionLabelKey] = forceDeletionLabelValue
return a.client.Update(ctx, machine)
}
// waitUntilMachineResourcesDeleted waits for a maximum of 30 minutes until all machine resources have been properly
// deleted by the machine-controller-manager. It polls the status every 5 seconds.
// TODO: Parallelise this?
func (a *genericActuator) waitUntilMachineResourcesDeleted(ctx context.Context, log logr.Logger, worker *extensionsv1alpha1.Worker, workerDelegate WorkerDelegate) error {
var (
countMachines = -1
countMachineSets = -1
countMachineDeployments = -1
countMachineClasses = -1
countMachineClassSecrets = -1
)
log.Info("Waiting until all machine resources have been deleted")
return retryutils.UntilTimeout(ctx, 5*time.Second, 5*time.Minute, func(ctx context.Context) (bool, error) {
msg := ""
// Check whether all machines have been deleted.
if countMachines != 0 {
existingMachines := &machinev1alpha1.MachineList{}
if err := a.reader.List(ctx, existingMachines, client.InNamespace(worker.Namespace)); err != nil {
return retryutils.SevereError(err)
}
countMachines = len(existingMachines.Items)
msg += fmt.Sprintf("%d machines, ", countMachines)
}
// Check whether all machine sets have been deleted.
if countMachineSets != 0 {
existingMachineSets := &machinev1alpha1.MachineSetList{}
if err := a.reader.List(ctx, existingMachineSets, client.InNamespace(worker.Namespace)); err != nil {
return retryutils.SevereError(err)
}
countMachineSets = len(existingMachineSets.Items)
msg += fmt.Sprintf("%d machine sets, ", countMachineSets)
}
// Check whether all machine deployments have been deleted.
if countMachineDeployments != 0 {
existingMachineDeployments := &machinev1alpha1.MachineDeploymentList{}
if err := a.reader.List(ctx, existingMachineDeployments, client.InNamespace(worker.Namespace)); err != nil {
return retryutils.SevereError(err)
}
countMachineDeployments = len(existingMachineDeployments.Items)
msg += fmt.Sprintf("%d machine deployments, ", countMachineDeployments)
// Check whether an operation failed during the deletion process.
for _, existingMachineDeployment := range existingMachineDeployments.Items {
for _, failedMachine := range existingMachineDeployment.Status.FailedMachines {
return retryutils.SevereError(fmt.Errorf("machine %s failed: %s", failedMachine.Name, failedMachine.LastOperation.Description))
}
}
}
// Check whether all machine classes have been deleted.
if countMachineClasses != 0 {
machineClassList := workerDelegate.MachineClassList()
if err := a.reader.List(ctx, machineClassList, client.InNamespace(worker.Namespace)); err != nil {
return retryutils.SevereError(err)
}
machineClasses, err := meta.ExtractList(machineClassList)
if err != nil {
return retryutils.SevereError(err)
}
countMachineClasses = len(machineClasses)
msg += fmt.Sprintf("%d machine classes, ", countMachineClasses)
}
// Check whether all machine class secrets have been deleted.
if countMachineClassSecrets != 0 {
count := 0
existingMachineClassSecrets, err := a.listMachineClassSecrets(ctx, worker.Namespace)
if err != nil {
return retryutils.SevereError(err)
}
for _, machineClassSecret := range existingMachineClassSecrets.Items {
if len(machineClassSecret.Finalizers) != 0 {
count++
}
}
countMachineClassSecrets = count
msg += fmt.Sprintf("%d machine class secrets, ", countMachineClassSecrets)
}
if countMachines != 0 || countMachineSets != 0 || countMachineDeployments != 0 || countMachineClasses != 0 || countMachineClassSecrets != 0 {
log.Info("Waiting until machine resources have been deleted",
"machines", countMachines, "machineSets", countMachineSets, "machineDeployments", countMachineDeployments,
"machineClasses", countMachineClasses, "machineClassSecrets", countMachineClassSecrets)
return retryutils.MinorError(fmt.Errorf("waiting until the following machine resources have been deleted: %s", strings.TrimSuffix(msg, ", ")))
}
return retryutils.Ok()
})
}
func (a *genericActuator) waitUntilCredentialsSecretAcquiredOrReleased(ctx context.Context, acquired bool, worker *extensionsv1alpha1.Worker, workerDelegate WorkerDelegate) error {
acquiredOrReleased := false
return retryutils.UntilTimeout(ctx, 5*time.Second, 5*time.Minute, func(ctx context.Context) (bool, error) {
// Check whether the finalizer of the machine class credentials secret has been added or removed.
// This check is only applicable when the given workerDelegate does not implement the
// deprecated WorkerCredentialsDelegate interface, i.e. machine classes reference a separate
// Secret for cloud provider credentials.
if !acquiredOrReleased {
_, ok := workerDelegate.(WorkerCredentialsDelegate)
if ok {
acquiredOrReleased = true
} else {
secret, err := kutil.GetSecretByReference(ctx, a.client, &worker.Spec.SecretRef)
if err != nil {
return retryutils.SevereError(fmt.Errorf("could not get the secret referenced by worker: %+v", err))
}
// We need to check for both mcmFinalizer and mcmProviderFinalizer:
// - mcmFinalizer is the finalizer used by machine controller manager and its in-tree providers
// - mcmProviderFinalizer is the finalizer used by out-of-tree machine controller providers
if (controllerutil.ContainsFinalizer(secret, mcmFinalizer) || controllerutil.ContainsFinalizer(secret, mcmProviderFinalizer)) == acquired {
acquiredOrReleased = true
}
}
}
if !acquiredOrReleased {
return retryutils.MinorError(errors.New("machine class credentials secret has not yet been acquired or released"))
}
return retryutils.Ok()
})
}