Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Nodes are now owned by Provisioners, and will cascade on delete by default #1934

Merged
merged 2 commits into from
Jun 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 3 additions & 6 deletions charts/karpenter/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,13 @@ rules:
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["nodes", "pods"]
verbs: ["get", "list", "watch", "patch", "delete"]
- apiGroups: [""]
resources: ["configmaps"]
resources: ["pods", "nodes", "configmaps"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["create"]
verbs: ["create", "patch", "delete"]
- apiGroups: [""]
resources: ["pods/binding", "pods/eviction"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice tidying!

resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: ["apps"]
resources: ["daemonsets"]
Expand Down
19 changes: 13 additions & 6 deletions pkg/controllers/node/finalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ import (
"context"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"knative.dev/pkg/ptr"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"github.com/aws/karpenter/pkg/apis/provisioning/v1alpha5"
"github.com/aws/karpenter/pkg/utils/functional"
)

// Finalizer is a subreconciler that ensures nodes have the termination
Expand All @@ -31,12 +33,17 @@ import (
type Finalizer struct{}

// Reconcile reconciles the node
func (r *Finalizer) Reconcile(_ context.Context, _ *v1alpha5.Provisioner, n *v1.Node) (reconcile.Result, error) {
if !n.DeletionTimestamp.IsZero() {
func (r *Finalizer) Reconcile(_ context.Context, provisioner *v1alpha5.Provisioner, node *v1.Node) (reconcile.Result, error) {
if !node.DeletionTimestamp.IsZero() {
return reconcile.Result{}, nil
}
if !functional.ContainsString(n.Finalizers, v1alpha5.TerminationFinalizer) {
n.Finalizers = append(n.Finalizers, v1alpha5.TerminationFinalizer)
}
node.OwnerReferences = []metav1.OwnerReference{{
APIVersion: v1alpha5.SchemeGroupVersion.String(),
Kind: "Provisioner",
Name: provisioner.Name,
UID: provisioner.UID,
BlockOwnerDeletion: ptr.Bool(true),
}}
controllerutil.AddFinalizer(node, v1alpha5.TerminationFinalizer)
return reconcile.Result{}, nil
}
15 changes: 15 additions & 0 deletions pkg/controllers/node/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,5 +289,20 @@ var _ = Describe("Controller", func() {
n = ExpectNodeExists(ctx, env.Client, n.Name)
Expect(n.Finalizers).To(Equal(n.Finalizers))
})
It("should add an owner reference to the node", func() {
n := test.Node(test.NodeOptions{ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{v1alpha5.ProvisionerNameLabelKey: provisioner.Name},
}})
ExpectApplied(ctx, env.Client, provisioner, n)
ExpectReconcileSucceeded(ctx, controller, client.ObjectKeyFromObject(n))
n = ExpectNodeExists(ctx, env.Client, n.Name)
Expect(n.OwnerReferences).To(Equal([]metav1.OwnerReference{{
APIVersion: v1alpha5.SchemeGroupVersion.String(),
Kind: "Provisioner",
Name: provisioner.Name,
UID: provisioner.UID,
BlockOwnerDeletion: ptr.Bool(true),
}}))
})
})
})
4 changes: 4 additions & 0 deletions pkg/controllers/provisioning/provisioner.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ func (p *Provisioner) getPods(ctx context.Context) ([]*v1.Pod, error) {
return pods, nil
}

// nolint: gocyclo
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😮

func (p *Provisioner) schedule(ctx context.Context, pods []*v1.Pod) ([]*scheduler.Node, error) {
defer metrics.Measure(schedulingDuration.WithLabelValues(injection.GetNamespacedName(ctx).Name))()

Expand All @@ -182,6 +183,9 @@ func (p *Provisioner) schedule(ctx context.Context, pods []*v1.Pod) ([]*schedule
}
for i := range provisionerList.Items {
provisioner := &provisionerList.Items[i]
if !provisioner.DeletionTimestamp.IsZero() {
continue
}
// Create node template
nodeTemplates = append(nodeTemplates, scheduling.NewNodeTemplate(provisioner))
// Get instance type options
Expand Down
11 changes: 11 additions & 0 deletions pkg/controllers/provisioning/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package provisioning_test
import (
"context"
"testing"
"time"

"github.com/aws/karpenter/pkg/cloudprovider"

Expand Down Expand Up @@ -90,6 +91,16 @@ var _ = Describe("Provisioning", func() {
ExpectScheduled(ctx, env.Client, pod)
}
})
It("should ignore provisioners that are deleting", func() {
ExpectApplied(ctx, env.Client, test.Provisioner(test.ProvisionerOptions{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &metav1.Time{Time: time.Now()}}}))
pods := ExpectProvisioned(ctx, env.Client, controller, test.UnschedulablePod())
nodes := &v1.NodeList{}
Expect(env.Client.List(ctx, nodes)).To(Succeed())
Expect(len(nodes.Items)).To(Equal(0))
for _, pod := range pods {
ExpectNotScheduled(ctx, env.Client, pod)
}
})
It("should provision nodes for pods with supported node selectors", func() {
provisioner := test.Provisioner()
schedulable := []*v1.Pod{
Expand Down
1 change: 1 addition & 0 deletions website/content/en/preview/tasks/deprovisioning.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ These include:

There are both automated and manual ways of deprovisioning nodes provisioned by Karpenter:

* **Provisioner Deletion**: Nodes are considered to be "owned" by the Provisioner that launched them. Karpenter will gracefully terminate nodes when a provisioner is deleted. Nodes may be reparented to another provisioner by modifying their labels. For example: `kubectl label node -l karpenter.sh/provisioner-name=source-provisioner-name karpenter.sh/provisioner-name=destination-provisioner-name --overwrite`.
* **Node empty**: Karpenter notes when the last workload (non-daemonset) pod stops running on a node. From that point, Karpenter waits the number of seconds set by `ttlSecondsAfterEmpty` in the provisioner, then Karpenter requests to delete the node. This feature can keep costs down by removing nodes that are no longer being used for workloads.
* **Node expired**: Karpenter requests to delete the node after a set number of seconds, based on the provisioner `ttlSecondsUntilExpired` value, from the time the node was provisioned. One use case for node expiry is to handle node upgrades. Old nodes (with a potentially outdated Kubernetes version or operating system) are deleted, and replaced with nodes on the current version (assuming that you requested the latest version, rather than a specific version).

Expand Down
7 changes: 5 additions & 2 deletions website/content/en/preview/upgrade-guide/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ When there is a breaking change we will:

Besides the peer review process for all changes to the code base we also do the followings in order to find
incompatibilities:
* (To be implemented) To check the compatibility of the application, we will automate tests for installing, uninstalling, upgrading from an older version, and downgrading to an older version
* (To be implemented) To check the compatibility of the application, we will automate tests for installing, uninstalling, upgrading from an older version, and downgrading to an older version
* (To be implemented) To check the compatibility of the documentation with the application, we will turn the commands in our documentation into scripts that we can automatically run

## Nightly Builds
Expand All @@ -69,9 +69,12 @@ for a subset of older versions and deprecate the others.

# Released Upgrade Notes

## Upgrading to v0.12.0+
v0.12.0 adds an OwnerReference to each Node created by a provisioner. Previously, deleting a provisioner would orphan nodes. Now, deleting a provisioner will cause Kubernetes [cascading delete](https://kubernetes.io/docs/concepts/architecture/garbage-collection/#cascading-deletion) logic to gracefully terminate the nodes using the Karpenter node finalizer. You may still orphan nodes by removing the owner reference.

## Upgrading to v0.11.0+

v0.11.0 changes the way that the `vpc.amazonaws.com/pod-eni` resource is reported. Instead of being reported for all nodes that could support the resources regardless of if the cluster is configured to support it, it is now controlled by a command line flag or environment variable. The parameter defaults to false and must be set if your cluster uses [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html). This can be enabled by setting the environment variable `AWS_ENABLE_POD_ENI` to true via the helm value `controller.env`.
v0.11.0 changes the way that the `vpc.amazonaws.com/pod-eni` resource is reported. Instead of being reported for all nodes that could support the resources regardless of if the cluster is configured to support it, it is now controlled by a command line flag or environment variable. The parameter defaults to false and must be set if your cluster uses [security groups for pods](https://docs.aws.amazon.com/eks/latest/userguide/security-groups-for-pods.html). This can be enabled by setting the environment variable `AWS_ENABLE_POD_ENI` to true via the helm value `controller.env`.

Other extended resources must be registered on nodes by their respective device plugins which are typically installed as DaemonSets (e.g. the `nvidia.com/gpu` resource will be registered by the [NVIDIA device plugin](https://github.com/NVIDIA/k8s-device-plugin). Previously, Karpenter would register these resources on nodes at creation and they would be zeroed out by `kubelet` at startup. By allowing the device plugins to register the resources, pods will not bind to the nodes before any device plugin initialization has occurred.

Expand Down