-
Notifications
You must be signed in to change notification settings - Fork 833
/
liveness.go
57 lines (49 loc) · 2.1 KB
/
liveness.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package node
import (
"context"
"fmt"
"time"
"github.com/awslabs/karpenter/pkg/apis/provisioning/v1alpha4"
"github.com/awslabs/karpenter/pkg/utils/injectabletime"
"github.com/awslabs/karpenter/pkg/utils/node"
v1 "k8s.io/api/core/v1"
"knative.dev/pkg/logging"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)
const LivenessTimeout = 15 * time.Minute
// Liveness is a subreconciler that deletes nodes if its determined to be unrecoverable
type Liveness struct {
kubeClient client.Client
}
// Reconcile reconciles the node
func (r *Liveness) Reconcile(ctx context.Context, provisioner *v1alpha4.Provisioner, n *v1.Node) (reconcile.Result, error) {
if injectabletime.Now().Sub(n.GetCreationTimestamp().Time) < LivenessTimeout {
return reconcile.Result{}, nil
}
condition := node.GetCondition(n.Status.Conditions, v1.NodeReady)
// If the reason is "", then the condition has never been set. We expect
// either the kubelet to set this reason, or the kcm's
// node-livecycle-controller to set the status to NodeStatusNeverUpdated if
// the kubelet cannot connect. Once the value is NodeStatusNeverUpdated and
// the node is beyond the liveness timeout, we will delete the node.
if condition.Reason != "" && condition.Reason != "NodeStatusNeverUpdated" {
return reconcile.Result{}, nil
}
logging.FromContext(ctx).Infof("Triggering termination for node that failed to join %s", n.Name)
if err := r.kubeClient.Delete(ctx, n); err != nil {
return reconcile.Result{}, fmt.Errorf("deleting node %s, %w", n.Name, err)
}
return reconcile.Result{}, nil
}