/
garbage_collector.go
298 lines (259 loc) · 10.1 KB
/
garbage_collector.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
// SPDX-FileCopyrightText: 2021 SAP SE or an SAP affiliate company and Gardener contributors.
//
// SPDX-License-Identifier: Apache-2.0
package container
import (
"context"
"fmt"
"time"
"k8s.io/apimachinery/pkg/util/wait"
"github.com/gardener/landscaper/pkg/utils/read_write_layer"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
containerv1alpha1 "github.com/gardener/landscaper/apis/deployer/container/v1alpha1"
lc "github.com/gardener/landscaper/controller-utils/pkg/logging/constants"
"github.com/gardener/landscaper/apis/deployer/container"
kutil "github.com/gardener/landscaper/controller-utils/pkg/kubernetes"
"github.com/gardener/landscaper/controller-utils/pkg/logging"
lsv1alpha1 "github.com/gardener/landscaper/apis/core/v1alpha1"
)
type GarbageCollector struct {
lsUncachedClient client.Client
lsCachedClient client.Client
hostUncachedClient client.Client
hostCachedClient client.Client
log logging.Logger
deployerID string
hostNamespace string
config containerv1alpha1.GarbageCollection
requeueAfter time.Duration
keepPods bool
}
// NewGarbageCollector creates a new Garbage collector that cleanups leaked service accounts, rbac rules and pods.
func NewGarbageCollector(
lsUncachedClient, lsCachedClient, hostUncachedClient, hostCachedClient client.Client,
log logging.Logger,
deployerID,
hostNamespace string,
config containerv1alpha1.GarbageCollection,
keepPods bool) *GarbageCollector {
return &GarbageCollector{
lsUncachedClient: lsUncachedClient,
lsCachedClient: lsCachedClient,
hostUncachedClient: hostUncachedClient,
hostCachedClient: hostCachedClient,
log: log,
deployerID: deployerID,
hostNamespace: hostNamespace,
config: config,
requeueAfter: time.Duration(config.RequeueTimeSeconds) * time.Second,
keepPods: keepPods,
}
}
func (gc *GarbageCollector) StartDeployerJob(ctx context.Context) error {
gc.log.Info("GarbageCollector: starting garbage collection")
wait.UntilWithContext(ctx, gc.Cleanup, gc.requeueAfter)
return nil
}
func (gc *GarbageCollector) Cleanup(ctx context.Context) {
ctx = logging.NewContext(ctx, gc.log)
logger, ctx := logging.FromContextOrNew(ctx, nil)
listOptions := []client.ListOption{client.InNamespace(gc.hostNamespace),
client.HasLabels{container.ContainerDeployerDeployItemNameLabel, container.ContainerDeployerDeployItemNamespaceLabel}}
if len(gc.deployerID) != 0 {
listOptions = []client.ListOption{client.InNamespace(gc.hostNamespace),
client.HasLabels{container.ContainerDeployerDeployItemNameLabel, container.ContainerDeployerDeployItemNamespaceLabel},
client.MatchingLabels{container.ContainerDeployerIDLabel: gc.deployerID}}
}
// cleanup service accounts
saList := &corev1.ServiceAccountList{}
if err := gc.hostUncachedClient.List(ctx, saList, listOptions...); err != nil {
logger.Error(err, err.Error())
}
for i := range saList.Items {
next := &saList.Items[i]
if err := gc.cleanupRBACResources(ctx, next); err != nil {
logger.Error(err, "cleanup service account", lc.KeyResource, kutil.ObjectKeyFromObject(next).String())
}
}
// cleanup roles
roleList := &rbacv1.RoleList{}
if err := gc.hostUncachedClient.List(ctx, roleList, listOptions...); err != nil {
logger.Error(err, err.Error())
}
for i := range roleList.Items {
next := &roleList.Items[i]
if err := gc.cleanupRBACResources(ctx, next); err != nil {
logger.Error(err, "cleanup role", lc.KeyResource, kutil.ObjectKeyFromObject(next).String())
}
}
// cleanup rolesbidings
roleBindingList := &rbacv1.RoleBindingList{}
if err := gc.hostUncachedClient.List(ctx, roleBindingList, listOptions...); err != nil {
logger.Error(err, err.Error())
}
for i := range roleBindingList.Items {
next := &roleBindingList.Items[i]
if err := gc.cleanupRBACResources(ctx, next); err != nil {
logger.Error(err, "cleanup rolebinding", lc.KeyResource, kutil.ObjectKeyFromObject(next).String())
}
}
// cleanup secrets
secretList := &corev1.SecretList{}
if err := read_write_layer.ListSecrets(ctx, gc.hostUncachedClient, secretList, read_write_layer.R000074, listOptions...); err != nil {
logger.Error(err, err.Error())
}
for i := range secretList.Items {
next := &secretList.Items[i]
if err := gc.cleanupSecret(ctx, next); err != nil {
logger.Error(err, "cleanup secret", lc.KeyResource, kutil.ObjectKeyFromObject(next).String())
}
}
if !gc.keepPods {
// cleanup pods
podList := &corev1.PodList{}
if err := read_write_layer.ListPods(ctx, gc.hostUncachedClient, podList, read_write_layer.R000075, listOptions...); err != nil {
logger.Error(err, err.Error())
}
for i := range podList.Items {
next := &podList.Items[i]
if err := gc.cleanupPod(ctx, next); err != nil {
logger.Error(err, "cleanup pod", lc.KeyResource, kutil.ObjectKeyFromObject(next).String())
}
}
}
}
func (gc *GarbageCollector) cleanupRBACResources(ctx context.Context, obj client.Object) error {
shouldGC, err := gc.shouldGarbageCollect(ctx, obj)
if err != nil {
return err
}
if !shouldGC {
return nil
}
di := &lsv1alpha1.DeployItem{}
di.Name = obj.GetLabels()[container.ContainerDeployerDeployItemNameLabel]
di.Namespace = obj.GetLabels()[container.ContainerDeployerDeployItemNamespaceLabel]
if err := CleanupRBAC(ctx, di, gc.hostUncachedClient, obj.GetNamespace()); err != nil {
return err
}
return nil
}
// cleanupSecret deletes secrets that do not have a parent deploy item anymore.
func (gc *GarbageCollector) cleanupSecret(ctx context.Context, obj *corev1.Secret) error {
shouldGC, err := gc.shouldGarbageCollect(ctx, obj)
if err != nil {
return err
}
if !shouldGC {
return nil
}
if err := gc.hostUncachedClient.Delete(ctx, obj); err != nil {
return err
}
return nil
}
// cleanupPod deletes pods that do not have a parent deploy item anymore.
func (gc *GarbageCollector) cleanupPod(ctx context.Context, obj *corev1.Pod) error {
logger, _ := logging.FromContextOrNew(ctx, nil)
if obj.Status.Phase == corev1.PodPending || obj.Status.Phase == corev1.PodRunning || obj.Status.Phase == corev1.PodUnknown {
logger.Debug("Not garbage collected", lc.KeyReason, "pod is still running", lc.KeyPhase, obj.Status.Phase)
return nil
}
shouldGC, err := gc.shouldGarbageCollect(ctx, obj)
if err != nil {
return err
}
if shouldGC {
// always garbage collect pods that do not have a corresponding deployitem anymore
logger.Debug("Garbage collected", lc.KeyReason, "deploy item does not exist anymore")
if err := CleanupPod(ctx, gc.hostUncachedClient, obj, false); err != nil {
return fmt.Errorf("unable to garbage collect pod %s: %w", kutil.ObjectKeyFromObject(obj).String(), err)
}
return nil
}
if !controllerutil.ContainsFinalizer(obj, container.ContainerDeployerFinalizer) {
logger.Debug("Garbage collected", lc.KeyReason, "pod has no finalizer")
err := gc.hostUncachedClient.Delete(ctx, obj)
return err
}
isLatest, err := gc.isLatestPod(ctx, obj)
if err != nil {
return err
}
if isLatest {
logger.Debug("Not garbage collected", lc.KeyReason, "latest pod")
return nil
}
if err := CleanupPod(ctx, gc.hostUncachedClient, obj, false); err != nil {
return fmt.Errorf("unable to garbage collect pod %s: %w", kutil.ObjectKeyFromObject(obj).String(), err)
}
logger.Debug("Garbage collected")
return nil
}
// isLatestPod cleans returns if the current pod is the latest executed pod.
func (gc *GarbageCollector) isLatestPod(ctx context.Context, pod *corev1.Pod) (bool, error) {
var (
diName = pod.Labels[container.ContainerDeployerDeployItemNameLabel]
diNamespace = pod.Labels[container.ContainerDeployerDeployItemNamespaceLabel]
)
podList := &corev1.PodList{}
if err := read_write_layer.ListPods(ctx, gc.hostUncachedClient, podList, read_write_layer.R000076,
client.InNamespace(gc.hostNamespace),
client.MatchingLabels{
container.ContainerDeployerDeployItemNameLabel: diName,
container.ContainerDeployerDeployItemNamespaceLabel: diNamespace,
}); err != nil {
return false, err
}
if len(podList.Items) == 0 {
return false, fmt.Errorf("no pods found in the host namespace %s", gc.hostNamespace)
}
// only return latest pod and ignore previous runs
var latest *corev1.Pod
for _, p := range podList.Items {
// ignore pods with no finalizer as they are already reconciled and their state was persisted.
if !controllerutil.ContainsFinalizer(&p, container.ContainerDeployerFinalizer) {
continue
}
if latest == nil {
latest = p.DeepCopy()
continue
}
if p.CreationTimestamp.Equal(&latest.CreationTimestamp) {
// currently only for test debugging.
// remove as soon as the test is stable.
gc.log.Debug("Creation time equals", "currentPod", p.Name, "latest", latest.Name)
}
if p.CreationTimestamp.After(latest.CreationTimestamp.Time) {
latest = p.DeepCopy()
}
}
if latest == nil {
return false, nil
}
return latest.Name == pod.Name, nil // namespace is irrelevant
}
// shouldGarbageCollect checks whether the object should be garbage collected.
// By default, an object should be garbage collected if the corresponding deploy item has been deleted.
func (gc *GarbageCollector) shouldGarbageCollect(ctx context.Context, obj client.Object) (bool, error) {
di := &lsv1alpha1.DeployItem{}
key := types.NamespacedName{
Namespace: obj.GetLabels()[container.ContainerDeployerDeployItemNamespaceLabel],
Name: obj.GetLabels()[container.ContainerDeployerDeployItemNameLabel],
}
logger := gc.log.WithValues("deployItem", key.String(), lc.KeyResource, kutil.ObjectKeyFromObject(obj).String())
if err := read_write_layer.GetDeployItem(ctx, gc.lsUncachedClient, key, di, read_write_layer.R000036); err != nil {
if apierrors.IsNotFound(err) {
return true, nil
}
// do not cleanup as we are unsure about the state of the deploy item.
return false, err
}
logger.Debug("DeployItem still exists, resource should not be garbage collected")
return false, nil
}