Skip to content

Commit

Permalink
test: expanded storage integration tests (#5229)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmdeal committed Dec 8, 2023
1 parent d0c0745 commit a3c0dfc
Show file tree
Hide file tree
Showing 4 changed files with 236 additions and 81 deletions.
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ require (
k8s.io/utils v0.0.0-20230726121419-3b25d923346b
knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd
sigs.k8s.io/controller-runtime v0.16.3
sigs.k8s.io/karpenter v0.33.0
sigs.k8s.io/karpenter v0.33.1-0.20231207192330-58e1d7552081
)

require (
Expand Down Expand Up @@ -84,6 +84,7 @@ require (
github.com/prometheus/procfs v0.12.0 // indirect
github.com/prometheus/statsd_exporter v0.24.0 // indirect
github.com/rivo/uniseg v0.4.4 // indirect
github.com/robfig/cron/v3 v3.0.1 // indirect
github.com/spf13/cobra v1.7.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
go.opencensus.io v0.24.0 // indirect
Expand Down
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,8 @@ github.com/prometheus/statsd_exporter v0.24.0/go.mod h1:+dQiRTqn9DnPmN5mI5Xond+k
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs=
github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
Expand Down Expand Up @@ -761,8 +763,8 @@ sigs.k8s.io/controller-runtime v0.16.3 h1:2TuvuokmfXvDUamSx1SuAOO3eTyye+47mJCigw
sigs.k8s.io/controller-runtime v0.16.3/go.mod h1:j7bialYoSn142nv9sCOJmQgDXQXxnroFU4VnX/brVJ0=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/karpenter v0.33.0 h1:FvFzhYMwFackafImFeFz/ti/z/55NiZtr8HXiS+z9lU=
sigs.k8s.io/karpenter v0.33.0/go.mod h1:7hPB7kdImFHAFp7pqPUqgBDOrh8GEcRnHT5uIlsXMKA=
sigs.k8s.io/karpenter v0.33.1-0.20231207192330-58e1d7552081 h1:/axynNIcyHkij4uXbyi3EN1ZiPTwJOU0iiCKpss4mso=
sigs.k8s.io/karpenter v0.33.1-0.20231207192330-58e1d7552081/go.mod h1:J/nUafEcmZrz34hS0B8H51hqgoAd5LhGeS63kMauOjs=
sigs.k8s.io/structured-merge-diff/v4 v4.3.0 h1:UZbZAZfX0wV2zr7YZorDz6GXROfDFj6LvqCRm4VUVKk=
sigs.k8s.io/structured-merge-diff/v4 v4.3.0/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo=
Expand Down
28 changes: 28 additions & 0 deletions pkg/apis/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,34 @@ spec:
expireAfter: 720h
description: Disruption contains the parameters that relate to Karpenter's disruption logic
properties:
budgets:
default:
- maxUnavailable: 10%
description: Budgets is a list of Budgets. If there are multiple active budgets, Karpenter uses the most restrictive maxUnavailable. If left undefined, this will default to one budget with a maxUnavailable to 10%.
items:
description: Budget defines when Karpenter will restrict the number of Node Claims that can be terminating simultaneously.
properties:
crontab:
description: Crontab specifies when a budget begins being active, using the upstream cronjob syntax. If omitted, the budget is always active. Currently timezones are not supported. This is required if Duration is set.
pattern: ^(@(annually|yearly|monthly|weekly|daily|midnight|hourly))|((.+)\s(.+)\s(.+)\s(.+)\s(.+))$
type: string
duration:
description: Duration determines how long a Budget is active since each Crontab hit. Only minutes and hours are accepted, as cron does not work in seconds. If omitted, the budget is always active. This is required if Crontab is set. This regex has an optional 0s at the end since the duration.String() always adds a 0s at the end.
pattern: ^([0-9]+(m|h)+(0s)?)$
type: string
maxUnavailable:
default: 10%
description: 'MaxUnavailable dictates how many NodeClaims owned by this NodePool can be terminating at once. It must be set. This only considers NodeClaims with the karpenter.sh/disruption taint. We can''t use an intstr.IntOrString since kubebuilder doesn''t support pattern checking for int values for IntOrString values. Ref: https://github.com/kubernetes-sigs/controller-tools/blob/55efe4be40394a288216dab63156b0a64fb82929/pkg/crd/markers/validation.go#L379-L388'
pattern: ^((100|[0-9]{1,2})%|[0-9]+)$
type: string
required:
- maxUnavailable
type: object
maxItems: 50
type: array
x-kubernetes-validations:
- message: '''crontab'' must be set with ''duration'''
rule: '!self.all(x, (has(x.crontab) && !has(x.duration)) || (!has(x.crontab) && has(x.duration)))'
consolidateAfter:
description: ConsolidateAfter is the duration the controller will wait before attempting to terminate nodes that are underutilized. Refer to ConsolidationPolicy for how underutilization is considered.
pattern: ^(([0-9]+(s|m|h))+)|(Never)$
Expand Down
280 changes: 202 additions & 78 deletions test/suites/integration/storage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,104 +16,228 @@ package integration_test

import (
"fmt"
"strings"

"github.com/aws/aws-sdk-go/aws"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/aws/aws-sdk-go/aws"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/samber/lo"

"github.com/aws/karpenter-provider-aws/pkg/errors"

"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/karpenter/pkg/test"
)

// This test requires the EBS CSI driver to be installed
var _ = Describe("Dynamic PVC", func() {
It("should run a pod with a dynamic persistent volume", func() {
// Ensure that the EBS driver is installed, or we can't run the test.
var ds appsv1.DaemonSet
if err := env.Client.Get(env.Context, client.ObjectKey{
Namespace: "kube-system",
Name: "ebs-csi-node",
}, &ds); err != nil {
if errors.IsNotFound(err) {
Skip(fmt.Sprintf("skipping dynamic PVC test due to missing EBS driver %s", err))
} else {
Fail(fmt.Sprintf("determining EBS driver status, %s", err))
}
}
storageClassName := "ebs-sc-test"
bindMode := storagev1.VolumeBindingWaitForFirstConsumer
sc := test.StorageClass(test.StorageClassOptions{
ObjectMeta: metav1.ObjectMeta{
Name: storageClassName,
},
Provisioner: aws.String("ebs.csi.aws.com"),
VolumeBindingMode: &bindMode,
var _ = Describe("Persistent Volumes", func() {
Context("Static", func() {
It("should run a pod with a pre-bound persistent volume (empty storage class)", func() {
pvc := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{
VolumeName: "test-volume",
StorageClassName: lo.ToPtr(""),
})
pv := test.PersistentVolume(test.PersistentVolumeOptions{
ObjectMeta: metav1.ObjectMeta{
Name: pvc.Spec.VolumeName,
},
})
pod := test.Pod(test.PodOptions{
PersistentVolumeClaims: []string{pvc.Name},
})

env.ExpectCreated(nodeClass, nodePool, pv, pvc, pod)
env.EventuallyExpectHealthy(pod)
env.ExpectCreatedNodeCount("==", 1)
})

pvc := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "ebs-claim",
},
StorageClassName: aws.String(storageClassName),
Resources: v1.ResourceRequirements{Requests: v1.ResourceList{v1.ResourceStorage: resource.MustParse("5Gi")}},
It("should run a pod with a pre-bound persistent volume (non-existent storage class)", func() {
pvc := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{
VolumeName: "test-volume",
StorageClassName: lo.ToPtr("non-existent-storage-class"),
})
pv := test.PersistentVolume(test.PersistentVolumeOptions{
ObjectMeta: metav1.ObjectMeta{
Name: pvc.Spec.VolumeName,
},
StorageClassName: "non-existent-storage-class",
})
pod := test.Pod(test.PodOptions{
PersistentVolumeClaims: []string{pvc.Name},
})
env.ExpectCreated(nodeClass, nodePool, pv, pvc, pod)
env.EventuallyExpectHealthy(pod)
env.ExpectCreatedNodeCount("==", 1)
})

pod := test.Pod(test.PodOptions{
PersistentVolumeClaims: []string{pvc.Name},
It("should run a pod with a pre-bound persistent volume while respecting topology constraints", func() {
subnets := env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName})
shuffledAZs := lo.Shuffle(lo.Keys(subnets))

pvc := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{
StorageClassName: lo.ToPtr("non-existent-storage-class"),
})
pv := test.PersistentVolume(test.PersistentVolumeOptions{
StorageClassName: "non-existent-storage-class",
Zones: []string{shuffledAZs[0]},
})
pod := test.Pod(test.PodOptions{
PersistentVolumeClaims: []string{pvc.Name},
})
env.ExpectCreated(nodeClass, nodePool, pv, pvc, pod)
env.EventuallyExpectHealthy(pod)
env.ExpectCreatedNodeCount("==", 1)
})
It("should run a pod with a generic ephemeral volume", func() {
pv := test.PersistentVolume(test.PersistentVolumeOptions{
StorageClassName: "non-existent-storage-class",
})
pod := test.Pod(test.PodOptions{
EphemeralVolumeTemplates: []test.EphemeralVolumeTemplateOptions{{
StorageClassName: lo.ToPtr("non-existent-storage-class"),
}},
})

env.ExpectCreated(nodeClass, nodePool, pv, pod)
env.EventuallyExpectHealthy(pod)
env.ExpectCreatedNodeCount("==", 1)
})

env.ExpectCreated(nodeClass, nodePool, sc, pvc, pod)
env.EventuallyExpectHealthy(pod)
env.ExpectCreatedNodeCount("==", 1)
env.ExpectDeleted(pod)
})
})

var _ = Describe("Static PVC", func() {
It("should run a pod with a static persistent volume", func() {
storageClassName := "nfs-test"
bindMode := storagev1.VolumeBindingWaitForFirstConsumer
sc := test.StorageClass(test.StorageClassOptions{
ObjectMeta: metav1.ObjectMeta{
Name: storageClassName,
},
VolumeBindingMode: &bindMode,
Context("Dynamic", func() {
var storageClass *storagev1.StorageClass
BeforeEach(func() {
// Ensure that the EBS driver is installed, or we can't run the test.
var ds appsv1.DaemonSet
if err := env.Client.Get(env.Context, client.ObjectKey{
Namespace: "kube-system",
Name: "ebs-csi-node",
}, &ds); err != nil {
if errors.IsNotFound(err) {
Skip(fmt.Sprintf("skipping dynamic PVC test due to missing EBS driver %s", err))
} else {
Fail(fmt.Sprintf("determining EBS driver status, %s", err))
}
}
storageClass = test.StorageClass(test.StorageClassOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "test-storage-class",
},
Provisioner: aws.String("ebs.csi.aws.com"),
VolumeBindingMode: lo.ToPtr(storagev1.VolumeBindingWaitForFirstConsumer),
})
})

pv := test.PersistentVolume(test.PersistentVolumeOptions{
ObjectMeta: metav1.ObjectMeta{Name: "nfs-test-volume"},
StorageClassName: "nfs-test",
It("should run a pod with a dynamic persistent volume", func() {
pvc := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{
StorageClassName: &storageClass.Name,
})
pod := test.Pod(test.PodOptions{
PersistentVolumeClaims: []string{pvc.Name},
})

env.ExpectCreated(nodeClass, nodePool, storageClass, pvc, pod)
env.EventuallyExpectHealthy(pod)
env.ExpectCreatedNodeCount("==", 1)
})

// the server here doesn't need to actually exist for the pod to start running
pv.Spec.NFS = &v1.NFSVolumeSource{
Server: "fake.server",
Path: "/some/path",
}
pv.Spec.CSI = nil

pvc := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{
ObjectMeta: metav1.ObjectMeta{
Name: "nfs-claim",
},
StorageClassName: aws.String(storageClassName),
VolumeName: pv.Name,
Resources: v1.ResourceRequirements{Requests: v1.ResourceList{v1.ResourceStorage: resource.MustParse("5Gi")}},
It("should run a pod with a dynamic persistent volume while respecting allowed topologies", func() {
subnets := env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName})
shuffledAZs := lo.Shuffle(lo.Keys(subnets))

storageClass.AllowedTopologies = []v1.TopologySelectorTerm{{
MatchLabelExpressions: []v1.TopologySelectorLabelRequirement{{
Key: "topology.ebs.csi.aws.com/zone",
Values: []string{shuffledAZs[0]},
}},
}}

pvc := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{
StorageClassName: &storageClass.Name,
})
pod := test.Pod(test.PodOptions{
PersistentVolumeClaims: []string{pvc.Name},
})

env.ExpectCreated(nodeClass, nodePool, storageClass, pvc, pod)
env.EventuallyExpectHealthy(pod)
env.ExpectCreatedNodeCount("==", 1)
})

pod := test.Pod(test.PodOptions{
PersistentVolumeClaims: []string{pvc.Name},
It("should run a pod with a dynamic persistent volume while respecting volume limits", func() {
ExpectSetEBSDriverLimit(1)
DeferCleanup(func() {
ExpectRemoveEBSDriverLimit()
})

count := 2
pvcs := lo.Times(count, func(_ int) *v1.PersistentVolumeClaim {
return test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{
StorageClassName: &storageClass.Name,
})
})
pods := lo.Map(pvcs, func(pvc *v1.PersistentVolumeClaim, _ int) *v1.Pod {
return test.Pod(test.PodOptions{
PersistentVolumeClaims: []string{pvc.Name},
})
})

// Pod creation is spread out here to address an upstream issue where pods can schedule to the same node
// and exceed its volume limits before the csi driver reports limits. This is not something Karpenter
// currently compensates for.
env.ExpectCreated(nodeClass, nodePool, storageClass, pvcs[0], pods[0])
env.EventuallyExpectHealthy(pods[0])
env.ExpectCreatedNodeCount("==", 1)
env.ExpectCreated(pvcs[1], pods[1])
env.EventuallyExpectHealthy(pods[1])
env.ExpectCreatedNodeCount("==", 2)
})
It("should run a pod with a generic ephemeral volume", func() {
pod := test.Pod(test.PodOptions{
EphemeralVolumeTemplates: []test.EphemeralVolumeTemplateOptions{{
StorageClassName: &storageClass.Name,
}},
})

env.ExpectCreated(nodeClass, nodePool, storageClass, pod)
env.EventuallyExpectHealthy(pod)
env.ExpectCreatedNodeCount("==", 1)
})

env.ExpectCreated(nodeClass, nodePool, sc, pv, pvc, pod)
env.EventuallyExpectHealthy(pod)
env.ExpectCreatedNodeCount("==", 1)
env.ExpectDeleted(pod)
})
})

func ExpectSetEBSDriverLimit(limit int) {
GinkgoHelper()
ds := &appsv1.DaemonSet{}
Expect(env.Client.Get(env.Context, client.ObjectKey{Namespace: "kube-system", Name: "ebs-csi-node"}, ds)).To(Succeed())
stored := ds.DeepCopy()

containers := ds.Spec.Template.Spec.Containers
for i := range containers {
if containers[i].Name != "ebs-plugin" {
continue
}
containers[i].Args = append(containers[i].Args, fmt.Sprintf("--volume-attach-limit=%d", limit))
break
}
Expect(env.Client.Patch(env.Context, ds, client.MergeFrom(stored))).To(Succeed())
}

func ExpectRemoveEBSDriverLimit() {
GinkgoHelper()
ds := &appsv1.DaemonSet{}
Expect(env.Client.Get(env.Context, client.ObjectKey{Namespace: "kube-system", Name: "ebs-csi-node"}, ds)).To(Succeed())
stored := ds.DeepCopy()

containers := ds.Spec.Template.Spec.Containers
for i := range containers {
if containers[i].Name != "ebs-plugin" {
continue
}
containers[i].Args = lo.Reject(containers[i].Args, func(arg string, _ int) bool {
return strings.Contains(arg, "--volume-attach-limit")
})
break
}
Expect(env.Client.Patch(env.Context, ds, client.MergeFrom(stored))).To(Succeed())
}

0 comments on commit a3c0dfc

Please sign in to comment.