From 4b3a2d26978f90335b98a34c180d51b297450c4e Mon Sep 17 00:00:00 2001 From: Jonathan Innis Date: Fri, 20 Oct 2023 16:19:53 -0700 Subject: [PATCH] Add Integration E2E testing for v1beta1 --- .github/workflows/e2e-matrix.yaml | 2 +- .github/workflows/e2e.yaml | 1 + test/pkg/environment/common/expectations.go | 25 +- .../alpha/integration/kubelet_config_test.go | 4 +- .../suites/alpha/scale/deprovisioning_test.go | 2 +- test/suites/alpha/scale/provisioning_test.go | 2 +- test/suites/beta/integration/ami_test.go | 345 ++++++++++++++ .../beta/integration/aws_metadata_test.go | 49 ++ .../integration/block_device_mappings_test.go | 57 +++ test/suites/beta/integration/cni_test.go | 90 ++++ .../suites/beta/integration/daemonset_test.go | 136 ++++++ .../suites/beta/integration/emptiness_test.go | 65 +++ .../integration/extended_resources_test.go | 423 ++++++++++++++++++ test/suites/beta/integration/hash_test.go | 53 +++ .../beta/integration/instance_profile_test.go | 58 +++ .../beta/integration/kubelet_config_test.go | 282 ++++++++++++ .../lease_garbagecollection_test.go | 44 ++ .../beta/integration/scheduling_test.go | 388 ++++++++++++++++ .../beta/integration/security_group_test.go | 93 ++++ test/suites/beta/integration/storage_test.go | 119 +++++ test/suites/beta/integration/subnet_test.go | 183 ++++++++ test/suites/beta/integration/suite_test.go | 77 ++++ test/suites/beta/integration/tags_test.go | 110 +++++ .../beta/integration/termination_test.go | 50 +++ .../testdata/al2_no_mime_userdata_input.sh | 2 + .../testdata/al2_userdata_input.sh | 10 + .../integration/testdata/amd_driver_input.sh | 46 ++ .../integration/testdata/br_userdata_input.sh | 4 + .../testdata/windows_userdata_input.ps1 | 1 + .../beta/integration/validation_test.go | 185 ++++++++ 30 files changed, 2900 insertions(+), 6 deletions(-) create mode 100644 test/suites/beta/integration/ami_test.go create mode 100644 test/suites/beta/integration/aws_metadata_test.go create mode 100644 test/suites/beta/integration/block_device_mappings_test.go create mode 100644 test/suites/beta/integration/cni_test.go create mode 100644 test/suites/beta/integration/daemonset_test.go create mode 100644 test/suites/beta/integration/emptiness_test.go create mode 100644 test/suites/beta/integration/extended_resources_test.go create mode 100644 test/suites/beta/integration/hash_test.go create mode 100644 test/suites/beta/integration/instance_profile_test.go create mode 100644 test/suites/beta/integration/kubelet_config_test.go create mode 100644 test/suites/beta/integration/lease_garbagecollection_test.go create mode 100644 test/suites/beta/integration/scheduling_test.go create mode 100644 test/suites/beta/integration/security_group_test.go create mode 100644 test/suites/beta/integration/storage_test.go create mode 100644 test/suites/beta/integration/subnet_test.go create mode 100644 test/suites/beta/integration/suite_test.go create mode 100644 test/suites/beta/integration/tags_test.go create mode 100644 test/suites/beta/integration/termination_test.go create mode 100644 test/suites/beta/integration/testdata/al2_no_mime_userdata_input.sh create mode 100644 test/suites/beta/integration/testdata/al2_userdata_input.sh create mode 100644 test/suites/beta/integration/testdata/amd_driver_input.sh create mode 100644 test/suites/beta/integration/testdata/br_userdata_input.sh create mode 100644 test/suites/beta/integration/testdata/windows_userdata_input.ps1 create mode 100644 test/suites/beta/integration/validation_test.go diff --git a/.github/workflows/e2e-matrix.yaml b/.github/workflows/e2e-matrix.yaml index da5f528af31f..4d2b1bda843b 100644 --- a/.github/workflows/e2e-matrix.yaml +++ b/.github/workflows/e2e-matrix.yaml @@ -49,7 +49,7 @@ jobs: strategy: fail-fast: false matrix: - suite: [Beta/Drift, Alpha/Integration, Alpha/Machine, Alpha/Consolidation, Alpha/Utilization, Alpha/Interruption, Alpha/Drift, Alpha/Expiration, Alpha/Chaos, Alpha/IPv6] + suite: [Beta/Integration, Beta/Drift, Alpha/Integration, Alpha/Machine, Alpha/Consolidation, Alpha/Utilization, Alpha/Interruption, Alpha/Drift, Alpha/Expiration, Alpha/Chaos, Alpha/IPv6] uses: ./.github/workflows/e2e.yaml with: suite: ${{ matrix.suite }} diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index ffe5bdde5ee2..689786ca9250 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -15,6 +15,7 @@ on: type: choice required: true options: + - Beta/Integration - Beta/Drift - Alpha/Integration - Alpha/Machine diff --git a/test/pkg/environment/common/expectations.go b/test/pkg/environment/common/expectations.go index b62476485e99..8f3844da19b8 100644 --- a/test/pkg/environment/common/expectations.go +++ b/test/pkg/environment/common/expectations.go @@ -710,7 +710,9 @@ func (env *Environment) ExpectCABundle() string { return base64.StdEncoding.EncodeToString(transportConfig.TLS.CAData) } -func (env *Environment) GetDaemonSetCount(prov *v1alpha5.Provisioner) int { +func (env *Environment) GetDaemonSetCountLegacy(prov *v1alpha5.Provisioner) int { + GinkgoHelper() + // Performs the same logic as the scheduler to get the number of daemonset // pods that we estimate we will need to schedule as overhead to each node daemonSetList := &appsv1.DaemonSetList{} @@ -728,3 +730,24 @@ func (env *Environment) GetDaemonSetCount(prov *v1alpha5.Provisioner) int { return true }) } + +func (env *Environment) GetDaemonSetCount(np *corev1beta1.NodePool) int { + GinkgoHelper() + + // Performs the same logic as the scheduler to get the number of daemonset + // pods that we estimate we will need to schedule as overhead to each node + daemonSetList := &appsv1.DaemonSetList{} + Expect(env.Client.List(env.Context, daemonSetList)).To(Succeed()) + + return lo.CountBy(daemonSetList.Items, func(d appsv1.DaemonSet) bool { + p := &v1.Pod{Spec: d.Spec.Template.Spec} + nodeTemplate := pscheduling.NewNodeClaimTemplate(np) + if err := scheduling.Taints(nodeTemplate.Spec.Taints).Tolerates(p); err != nil { + return false + } + if err := nodeTemplate.Requirements.Compatible(scheduling.NewPodRequirements(p), scheduling.AllowUndefinedWellKnownLabelsV1Beta1); err != nil { + return false + } + return true + }) +} diff --git a/test/suites/alpha/integration/kubelet_config_test.go b/test/suites/alpha/integration/kubelet_config_test.go index aec27cb931c9..5f9afec6a7bc 100644 --- a/test/suites/alpha/integration/kubelet_config_test.go +++ b/test/suites/alpha/integration/kubelet_config_test.go @@ -192,7 +192,7 @@ var _ = Describe("KubeletConfiguration Overrides", func() { }) // Get the DS pod count and use it to calculate the DS pod overhead - dsCount := env.GetDaemonSetCount(provisioner) + dsCount := env.GetDaemonSetCountLegacy(provisioner) provisioner.Spec.KubeletConfiguration = &v1alpha5.KubeletConfiguration{ MaxPods: ptr.Int32(1 + int32(dsCount)), } @@ -260,7 +260,7 @@ var _ = Describe("KubeletConfiguration Overrides", func() { // 2. If # of DS pods is even, we will have i.e. ceil((4+2)/2) = 3 // Since we restrict node to two cores, we will allow 6 pods. Both nodes will have // 4 DS pods and 2 test pods. - dsCount := env.GetDaemonSetCount(provisioner) + dsCount := env.GetDaemonSetCountLegacy(provisioner) provisioner.Spec.KubeletConfiguration = &v1alpha5.KubeletConfiguration{ PodsPerCore: ptr.Int32(int32(math.Ceil(float64(2+dsCount) / 2))), } diff --git a/test/suites/alpha/scale/deprovisioning_test.go b/test/suites/alpha/scale/deprovisioning_test.go index 473a82621bdf..f80ebcb6b9bd 100644 --- a/test/suites/alpha/scale/deprovisioning_test.go +++ b/test/suites/alpha/scale/deprovisioning_test.go @@ -126,7 +126,7 @@ var _ = Describe("Deprovisioning", Label(debug.NoWatch), Label(debug.NoEvents), } deployment = test.Deployment(deploymentOptions) selector = labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels) - dsCount = env.GetDaemonSetCount(provisioner) + dsCount = env.GetDaemonSetCountLegacy(provisioner) }) AfterEach(func() { diff --git a/test/suites/alpha/scale/provisioning_test.go b/test/suites/alpha/scale/provisioning_test.go index 0ebf0268815b..4c73c6115045 100644 --- a/test/suites/alpha/scale/provisioning_test.go +++ b/test/suites/alpha/scale/provisioning_test.go @@ -86,7 +86,7 @@ var _ = Describe("Provisioning", Label(debug.NoWatch), Label(debug.NoEvents), fu }) selector = labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels) // Get the DS pod count and use it to calculate the DS pod overhead - dsCount = env.GetDaemonSetCount(provisioner) + dsCount = env.GetDaemonSetCountLegacy(provisioner) }) It("should scale successfully on a node-dense scale-up", Label(debug.NoEvents), func(_ context.Context) { // Disable Prefix Delegation for the node-dense scale-up to not exhaust the IPs diff --git a/test/suites/beta/integration/ami_test.go b/test/suites/beta/integration/ami_test.go new file mode 100644 index 000000000000..2160f0d486de --- /dev/null +++ b/test/suites/beta/integration/ami_test.go @@ -0,0 +1,345 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "encoding/base64" + "fmt" + "os" + "strings" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/ssm" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + coretest "github.com/aws/karpenter-core/pkg/test" + "github.com/aws/karpenter/pkg/apis/v1beta1" + awsenv "github.com/aws/karpenter/test/pkg/environment/aws" +) + +var _ = Describe("AMI", func() { + var customAMI string + BeforeEach(func() { + customAMI = env.GetCustomAMI("/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id", 1) + }) + + It("should use the AMI defined by the AMI Selector", func() { + pod := coretest.Pod() + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(customAMI)))) + }) + It("should use the most recent AMI when discovering multiple", func() { + // choose an old static image + parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{ + Name: aws.String("/aws/service/eks/optimized-ami/1.23/amazon-linux-2/amazon-eks-node-1.23-v20230322/image_id"), + }) + Expect(err).To(BeNil()) + oldCustomAMI := *parameter.Parameter.Value + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + ID: customAMI, + }, + { + ID: oldCustomAMI, + }, + } + nodeClass.Spec.UserData = aws.String(fmt.Sprintf("#!/bin/bash\n/etc/eks/bootstrap.sh '%s'", env.ClusterName)) + pod := coretest.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(customAMI)))) + }) + It("should support ami selector aws::name but fail with incorrect owners", func() { + output, err := env.EC2API.DescribeImages(&ec2.DescribeImagesInput{ + ImageIds: []*string{aws.String(customAMI)}, + }) + Expect(err).To(BeNil()) + Expect(output.Images).To(HaveLen(1)) + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + Name: *output.Images[0].Name, + Owner: "fakeOwnerValue", + }, + } + nodeClass.Spec.UserData = aws.String(fmt.Sprintf("#!/bin/bash\n/etc/eks/bootstrap.sh '%s'", env.ClusterName)) + pod := coretest.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.ExpectCreatedNodeCount("==", 0) + Expect(pod.Spec.NodeName).To(Equal("")) + }) + It("should support ami selector aws::name with default owners", func() { + output, err := env.EC2API.DescribeImages(&ec2.DescribeImagesInput{ + ImageIds: []*string{aws.String(customAMI)}, + }) + Expect(err).To(BeNil()) + Expect(output.Images).To(HaveLen(1)) + + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + Name: *output.Images[0].Name, + }, + } + nodeClass.Spec.UserData = aws.String(fmt.Sprintf("#!/bin/bash\n/etc/eks/bootstrap.sh '%s'", env.ClusterName)) + pod := coretest.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(customAMI)))) + }) + It("should support ami selector aws::ids", func() { + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + ID: customAMI, + }, + } + nodeClass.Spec.UserData = aws.String(fmt.Sprintf("#!/bin/bash\n/etc/eks/bootstrap.sh '%s'", env.ClusterName)) + pod := coretest.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(customAMI)))) + }) + + Context("AMIFamily", func() { + It("should provision a node using the AL2 family", func() { + pod := coretest.Pod() + env.ExpectCreated(nodeClass, nodePool, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should provision a node using the Bottlerocket family", func() { + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyBottlerocket + pod := coretest.Pod() + env.ExpectCreated(nodeClass, nodePool, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should provision a node using the Ubuntu family", func() { + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyUbuntu + // TODO: remove requirements after Ubuntu fixes bootstrap script issue w/ + // new instance types not included in the max-pods.txt file. (https://github.com/aws/karpenter/issues/4472) + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceFamily, + Operator: v1.NodeSelectorOpNotIn, + Values: []string{"m7a", "r7a", "c7a"}, + }, + { + Key: v1beta1.LabelInstanceCategory, + Operator: v1.NodeSelectorOpIn, + Values: []string{"c", "m", "r"}, + }, + }...) + pod := coretest.Pod() + env.ExpectCreated(nodeClass, nodePool, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should support Custom AMIFamily with AMI Selectors", func() { + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + ID: customAMI, + }, + } + nodeClass.Spec.UserData = aws.String(fmt.Sprintf("#!/bin/bash\n/etc/eks/bootstrap.sh '%s'", env.ClusterName)) + pod := coretest.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("ImageId", HaveValue(Equal(customAMI)))) + }) + It("should have the AWSNodeTemplateStatus for AMIs using wildcard", func() { + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + Tags: map[string]string{"aws::name": "*"}, + }, + } + env.ExpectCreated(nodeClass) + nc := EventuallyExpectAMIsToExist(nodeClass) + Expect(len(nc.Status.AMIs)).To(BeNumerically("<", 10)) + }) + It("should have the AWSNodeTemplateStatus for AMIs using tags", func() { + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + ID: customAMI, + }, + } + env.ExpectCreated(nodeClass) + nc := EventuallyExpectAMIsToExist(nodeClass) + Expect(len(nc.Status.AMIs)).To(BeNumerically("==", 1)) + Expect(nc.Status.AMIs[0].ID).To(Equal(customAMI)) + }) + }) + + Context("UserData", func() { + It("should merge UserData contents for AL2 AMIFamily", func() { + content, err := os.ReadFile("testdata/al2_userdata_input.sh") + Expect(err).ToNot(HaveOccurred()) + nodeClass.Spec.UserData = aws.String(string(content)) + nodePool.Spec.Template.Spec.Taints = []v1.Taint{{Key: "example.com", Value: "value", Effect: "NoExecute"}} + nodePool.Spec.Template.Spec.StartupTaints = []v1.Taint{{Key: "example.com", Value: "value", Effect: "NoSchedule"}} + pod := coretest.Pod(coretest.PodOptions{Tolerations: []v1.Toleration{{Key: "example.com", Operator: v1.TolerationOpExists}}}) + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + Expect(env.GetNode(pod.Spec.NodeName).Spec.Taints).To(ContainElements( + v1.Taint{Key: "example.com", Value: "value", Effect: "NoExecute"}, + v1.Taint{Key: "example.com", Value: "value", Effect: "NoSchedule"}, + )) + actualUserData, err := base64.StdEncoding.DecodeString(*getInstanceAttribute(pod.Spec.NodeName, "userData").UserData.Value) + Expect(err).ToNot(HaveOccurred()) + // Since the node has joined the cluster, we know our bootstrapping was correct. + // Just verify if the UserData contains our custom content too, rather than doing a byte-wise comparison. + Expect(string(actualUserData)).To(ContainSubstring("Running custom user data script")) + }) + It("should merge non-MIME UserData contents for AL2 AMIFamily", func() { + content, err := os.ReadFile("testdata/al2_no_mime_userdata_input.sh") + Expect(err).ToNot(HaveOccurred()) + nodeClass.Spec.UserData = aws.String(string(content)) + nodePool.Spec.Template.Spec.Taints = []v1.Taint{{Key: "example.com", Value: "value", Effect: "NoExecute"}} + nodePool.Spec.Template.Spec.StartupTaints = []v1.Taint{{Key: "example.com", Value: "value", Effect: "NoSchedule"}} + pod := coretest.Pod(coretest.PodOptions{Tolerations: []v1.Toleration{{Key: "example.com", Operator: v1.TolerationOpExists}}}) + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + Expect(env.GetNode(pod.Spec.NodeName).Spec.Taints).To(ContainElements( + v1.Taint{Key: "example.com", Value: "value", Effect: "NoExecute"}, + v1.Taint{Key: "example.com", Value: "value", Effect: "NoSchedule"}, + )) + actualUserData, err := base64.StdEncoding.DecodeString(*getInstanceAttribute(pod.Spec.NodeName, "userData").UserData.Value) + Expect(err).ToNot(HaveOccurred()) + // Since the node has joined the cluster, we know our bootstrapping was correct. + // Just verify if the UserData contains our custom content too, rather than doing a byte-wise comparison. + Expect(string(actualUserData)).To(ContainSubstring("Running custom user data script")) + }) + It("should merge UserData contents for Bottlerocket AMIFamily", func() { + content, err := os.ReadFile("testdata/br_userdata_input.sh") + Expect(err).ToNot(HaveOccurred()) + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyBottlerocket + nodeClass.Spec.UserData = aws.String(string(content)) + nodePool.Spec.Template.Spec.Taints = []v1.Taint{{Key: "example.com", Value: "value", Effect: "NoExecute"}} + nodePool.Spec.Template.Spec.StartupTaints = []v1.Taint{{Key: "example.com", Value: "value", Effect: "NoSchedule"}} + pod := coretest.Pod(coretest.PodOptions{Tolerations: []v1.Toleration{{Key: "example.com", Operator: v1.TolerationOpExists}}}) + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + Expect(env.GetNode(pod.Spec.NodeName).Spec.Taints).To(ContainElements( + v1.Taint{Key: "example.com", Value: "value", Effect: "NoExecute"}, + v1.Taint{Key: "example.com", Value: "value", Effect: "NoSchedule"}, + )) + actualUserData, err := base64.StdEncoding.DecodeString(*getInstanceAttribute(pod.Spec.NodeName, "userData").UserData.Value) + Expect(err).ToNot(HaveOccurred()) + Expect(string(actualUserData)).To(ContainSubstring("kube-api-qps = 30")) + }) + It("should merge UserData contents for Windows AMIFamily", func() { + env.ExpectWindowsIPAMEnabled() + DeferCleanup(func() { + env.ExpectWindowsIPAMDisabled() + }) + + content, err := os.ReadFile("testdata/windows_userdata_input.ps1") + Expect(err).ToNot(HaveOccurred()) + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyWindows2022 + nodeClass.Spec.UserData = aws.String(string(content)) + nodePool.Spec.Template.Spec.Taints = []v1.Taint{{Key: "example.com", Value: "value", Effect: "NoExecute"}} + nodePool.Spec.Template.Spec.StartupTaints = []v1.Taint{{Key: "example.com", Value: "value", Effect: "NoSchedule"}} + + // TODO: remove this requirement once VPC RC rolls out m7a.*, r7a.* ENI data (https://github.com/aws/karpenter/issues/4472) + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1.LabelOSStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{string(v1.Windows)}, + }, + { + Key: v1beta1.LabelInstanceFamily, + Operator: v1.NodeSelectorOpNotIn, + Values: []string{"m7a", "r7a", "c7a"}, + }, + { + Key: v1beta1.LabelInstanceCategory, + Operator: v1.NodeSelectorOpIn, + Values: []string{"c", "m", "r"}, + }, + }...) + pod := coretest.Pod(coretest.PodOptions{ + Image: awsenv.WindowsDefaultImage, + NodeSelector: map[string]string{ + v1.LabelOSStable: string(v1.Windows), + v1.LabelWindowsBuild: "10.0.20348", + }, + Tolerations: []v1.Toleration{{Key: "example.com", Operator: v1.TolerationOpExists}}, + }) + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthyWithTimeout(time.Minute*15, pod) // Wait 15 minutes because Windows nodes/containers take longer to spin up + Expect(env.GetNode(pod.Spec.NodeName).Spec.Taints).To(ContainElements( + v1.Taint{Key: "example.com", Value: "value", Effect: "NoExecute"}, + v1.Taint{Key: "example.com", Value: "value", Effect: "NoSchedule"}, + )) + actualUserData, err := base64.StdEncoding.DecodeString(*getInstanceAttribute(pod.Spec.NodeName, "userData").UserData.Value) + Expect(err).ToNot(HaveOccurred()) + Expect(string(actualUserData)).To(ContainSubstring("Write-Host \"Running custom user data script\"")) + Expect(string(actualUserData)).To(ContainSubstring("[string]$EKSBootstrapScriptFile = \"$env:ProgramFiles\\Amazon\\EKS\\Start-EKSBootstrap.ps1\"")) + }) + }) +}) + +//nolint:unparam +func getInstanceAttribute(nodeName string, attribute string) *ec2.DescribeInstanceAttributeOutput { + var node v1.Node + Expect(env.Client.Get(env.Context, types.NamespacedName{Name: nodeName}, &node)).To(Succeed()) + providerIDSplit := strings.Split(node.Spec.ProviderID, "/") + instanceID := providerIDSplit[len(providerIDSplit)-1] + instanceAttribute, err := env.EC2API.DescribeInstanceAttribute(&ec2.DescribeInstanceAttributeInput{ + InstanceId: aws.String(instanceID), + Attribute: aws.String(attribute), + }) + Expect(err).ToNot(HaveOccurred()) + return instanceAttribute +} + +func EventuallyExpectAMIsToExist(nodeClass *v1beta1.EC2NodeClass) *v1beta1.EC2NodeClass { + nc := &v1beta1.EC2NodeClass{} + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClass), nc)).To(Succeed()) + g.Expect(nc.Status.AMIs).ToNot(BeNil()) + }).WithTimeout(30 * time.Second).Should(Succeed()) + return nc +} diff --git a/test/suites/beta/integration/aws_metadata_test.go b/test/suites/beta/integration/aws_metadata_test.go new file mode 100644 index 000000000000..30b13299a5a2 --- /dev/null +++ b/test/suites/beta/integration/aws_metadata_test.go @@ -0,0 +1,49 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + coretest "github.com/aws/karpenter-core/pkg/test" + "github.com/aws/karpenter/pkg/apis/v1beta1" +) + +var _ = Describe("MetadataOptions", func() { + It("should use specified metadata options", func() { + nodeClass.Spec.MetadataOptions = &v1beta1.MetadataOptions{ + HTTPEndpoint: aws.String("enabled"), + HTTPProtocolIPv6: aws.String("enabled"), + HTTPPutResponseHopLimit: aws.Int64(1), + HTTPTokens: aws.String("required"), + } + pod := coretest.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("MetadataOptions", HaveValue(Equal(ec2.InstanceMetadataOptionsResponse{ + State: aws.String(ec2.InstanceMetadataOptionsStateApplied), + HttpEndpoint: aws.String("enabled"), + HttpProtocolIpv6: aws.String("enabled"), + HttpPutResponseHopLimit: aws.Int64(1), + HttpTokens: aws.String("required"), + InstanceMetadataTags: aws.String("disabled"), + })))) + }) +}) diff --git a/test/suites/beta/integration/block_device_mappings_test.go b/test/suites/beta/integration/block_device_mappings_test.go new file mode 100644 index 000000000000..71697f096c71 --- /dev/null +++ b/test/suites/beta/integration/block_device_mappings_test.go @@ -0,0 +1,57 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "github.com/aws/aws-sdk-go/aws" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/aws/karpenter-core/pkg/test" + "github.com/aws/karpenter-core/pkg/utils/resources" + "github.com/aws/karpenter/pkg/apis/v1beta1" +) + +var _ = Describe("BlockDeviceMappings", func() { + It("should use specified block device mappings", func() { + nodeClass.Spec.BlockDeviceMappings = []*v1beta1.BlockDeviceMapping{ + { + DeviceName: aws.String("/dev/xvda"), + EBS: &v1beta1.BlockDevice{ + VolumeSize: resources.Quantity("10G"), + VolumeType: aws.String("io2"), + IOPS: aws.Int64(1000), + Encrypted: aws.Bool(true), + DeleteOnTermination: aws.Bool(true), + }, + }, + } + pod := test.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + instance := env.GetInstance(pod.Spec.NodeName) + Expect(len(instance.BlockDeviceMappings)).To(Equal(1)) + Expect(instance.BlockDeviceMappings[0]).ToNot(BeNil()) + Expect(instance.BlockDeviceMappings[0]).To(HaveField("DeviceName", HaveValue(Equal("/dev/xvda")))) + Expect(instance.BlockDeviceMappings[0].Ebs).To(HaveField("DeleteOnTermination", HaveValue(BeTrue()))) + volume := env.GetVolume(instance.BlockDeviceMappings[0].Ebs.VolumeId) + Expect(volume).To(HaveField("Encrypted", HaveValue(BeTrue()))) + Expect(volume).To(HaveField("Size", HaveValue(Equal(int64(10))))) // Convert G -> Gib (rounded up) + Expect(volume).To(HaveField("Iops", HaveValue(Equal(int64(1000))))) + Expect(volume).To(HaveField("VolumeType", HaveValue(Equal("io2")))) + }) +}) diff --git a/test/suites/beta/integration/cni_test.go b/test/suites/beta/integration/cni_test.go new file mode 100644 index 000000000000..eb579bd67152 --- /dev/null +++ b/test/suites/beta/integration/cni_test.go @@ -0,0 +1,90 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "strconv" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + + "github.com/aws/karpenter-core/pkg/test" +) + +var _ = Describe("CNITests", func() { + It("should set max pods to 110 when AWSENILimited when AWS_ENI_LIMITED_POD_DENSITY is false", func() { + env.ExpectSettingsOverriddenLegacy(map[string]string{"aws.enableENILimitedPodDensity": "false"}) + pod := test.Pod() + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + var node corev1.Node + Expect(env.Client.Get(env.Context, types.NamespacedName{Name: pod.Spec.NodeName}, &node)).To(Succeed()) + allocatablePods, _ := node.Status.Allocatable.Pods().AsInt64() + Expect(allocatablePods).To(Equal(int64(110))) + }) + It("should set eni-limited maxPods when AWSENILimited when AWS_ENI_LIMITED_POD_DENSITY is true", func() { + pod := test.Pod() + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + var node corev1.Node + Expect(env.Client.Get(env.Context, types.NamespacedName{Name: pod.Spec.NodeName}, &node)).To(Succeed()) + allocatablePods, _ := node.Status.Allocatable.Pods().AsInt64() + Expect(allocatablePods).To(Equal(eniLimitedPodsFor(node.Labels["node.kubernetes.io/instance-type"]))) + }) + It("should set maxPods when reservedENIs is set", func() { + env.ExpectSettingsOverriddenLegacy(map[string]string{"aws.reservedENIs": "1"}) + env.ExpectSettingsOverridden(corev1.EnvVar{Name: "RESERVED_ENIS", Value: "1"}) + pod := test.Pod() + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + var node corev1.Node + Expect(env.Client.Get(env.Context, types.NamespacedName{Name: pod.Spec.NodeName}, &node)).To(Succeed()) + allocatablePods, _ := node.Status.Allocatable.Pods().AsInt64() + Expect(allocatablePods).To(Equal(reservedENIsFor(node.Labels["node.kubernetes.io/instance-type"]))) + }) +}) + +func eniLimitedPodsFor(instanceType string) int64 { + instance, err := env.EC2API.DescribeInstanceTypes(&ec2.DescribeInstanceTypesInput{ + InstanceTypes: aws.StringSlice([]string{instanceType}), + }) + Expect(err).ToNot(HaveOccurred()) + networkInfo := *instance.InstanceTypes[0].NetworkInfo + return *networkInfo.MaximumNetworkInterfaces*(*networkInfo.Ipv4AddressesPerInterface-1) + 2 +} + +func reservedENIsFor(instanceType string) int64 { + instance, err := env.EC2API.DescribeInstanceTypes(&ec2.DescribeInstanceTypesInput{ + InstanceTypes: aws.StringSlice([]string{instanceType}), + }) + Expect(err).ToNot(HaveOccurred()) + networkInfo := *instance.InstanceTypes[0].NetworkInfo + reservedENIs := 0 + reservedENIsVar, ok := lo.Find(env.ExpectSettings(), func(v corev1.EnvVar) bool { return v.Name == "RESERVED_ENIS" }) + if ok { + reservedENIs, err = strconv.Atoi(reservedENIsVar.Value) + Expect(err).ToNot(HaveOccurred()) + } + return (*networkInfo.MaximumNetworkInterfaces-int64(reservedENIs))*(*networkInfo.Ipv4AddressesPerInterface-1) + 2 +} diff --git a/test/suites/beta/integration/daemonset_test.go b/test/suites/beta/integration/daemonset_test.go new file mode 100644 index 000000000000..3e7206814de0 --- /dev/null +++ b/test/suites/beta/integration/daemonset_test.go @@ -0,0 +1,136 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + schedulingv1 "k8s.io/api/scheduling/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "sigs.k8s.io/controller-runtime/pkg/client" + + corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" + "github.com/aws/karpenter-core/pkg/test" +) + +var _ = Describe("DaemonSet", func() { + var limitrange *v1.LimitRange + var priorityclass *schedulingv1.PriorityClass + var daemonset *appsv1.DaemonSet + var dep *appsv1.Deployment + + BeforeEach(func() { + nodePool.Spec.Disruption.ConsolidationPolicy = corev1beta1.ConsolidationPolicyWhenUnderutilized + priorityclass = &schedulingv1.PriorityClass{ + ObjectMeta: metav1.ObjectMeta{ + Name: "high-priority-daemonsets", + }, + Value: int32(10000000), + GlobalDefault: false, + Description: "This priority class should be used for daemonsets.", + } + limitrange = &v1.LimitRange{ + ObjectMeta: metav1.ObjectMeta{ + Name: "limitrange", + Namespace: "default", + }, + } + daemonset = test.DaemonSet(test.DaemonSetOptions{ + PodOptions: test.PodOptions{ + ResourceRequirements: v1.ResourceRequirements{Limits: v1.ResourceList{v1.ResourceMemory: resource.MustParse("1Gi")}}, + PriorityClassName: "high-priority-daemonsets", + }, + }) + numPods := 1 + dep = test.Deployment(test.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "large-app"}, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{v1.ResourceMemory: resource.MustParse("4")}, + }, + }, + }) + }) + It("should account for LimitRange Default on daemonSet pods for resources", func() { + limitrange.Spec.Limits = []v1.LimitRangeItem{ + { + Type: v1.LimitTypeContainer, + Default: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + } + + podSelector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + daemonSetSelector := labels.SelectorFromSet(daemonset.Spec.Selector.MatchLabels) + env.ExpectCreated(nodeClass, nodePool, limitrange, priorityclass, daemonset, dep) + + // Eventually expect a single node to exist and both the deployment pod and the daemonset pod to schedule to it + Eventually(func(g Gomega) { + nodeList := &v1.NodeList{} + g.Expect(env.Client.List(env, nodeList, client.HasLabels{"testing/cluster"})).To(Succeed()) + g.Expect(nodeList.Items).To(HaveLen(1)) + + deploymentPods := env.Monitor.RunningPods(podSelector) + g.Expect(deploymentPods).To(HaveLen(1)) + + daemonSetPods := env.Monitor.RunningPods(daemonSetSelector) + g.Expect(daemonSetPods).To(HaveLen(1)) + + g.Expect(deploymentPods[0].Spec.NodeName).To(Equal(nodeList.Items[0].Name)) + g.Expect(daemonSetPods[0].Spec.NodeName).To(Equal(nodeList.Items[0].Name)) + }).Should(Succeed()) + }) + It("should account for LimitRange DefaultRequest on daemonSet pods for resources", func() { + limitrange.Spec.Limits = []v1.LimitRangeItem{ + { + Type: v1.LimitTypeContainer, + DefaultRequest: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("2"), + v1.ResourceMemory: resource.MustParse("1Gi"), + }, + }, + } + + podSelector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + daemonSetSelector := labels.SelectorFromSet(daemonset.Spec.Selector.MatchLabels) + env.ExpectCreated(nodeClass, nodePool, limitrange, priorityclass, daemonset, dep) + + // Eventually expect a single node to exist and both the deployment pod and the daemonset pod to schedule to it + Eventually(func(g Gomega) { + nodeList := &v1.NodeList{} + g.Expect(env.Client.List(env, nodeList, client.HasLabels{"testing/cluster"})).To(Succeed()) + g.Expect(nodeList.Items).To(HaveLen(1)) + + deploymentPods := env.Monitor.RunningPods(podSelector) + g.Expect(deploymentPods).To(HaveLen(1)) + + daemonSetPods := env.Monitor.RunningPods(daemonSetSelector) + g.Expect(daemonSetPods).To(HaveLen(1)) + + g.Expect(deploymentPods[0].Spec.NodeName).To(Equal(nodeList.Items[0].Name)) + g.Expect(daemonSetPods[0].Spec.NodeName).To(Equal(nodeList.Items[0].Name)) + }).Should(Succeed()) + }) +}) diff --git a/test/suites/beta/integration/emptiness_test.go b/test/suites/beta/integration/emptiness_test.go new file mode 100644 index 000000000000..9c48e694a9e8 --- /dev/null +++ b/test/suites/beta/integration/emptiness_test.go @@ -0,0 +1,65 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "time" + + "github.com/samber/lo" + "k8s.io/apimachinery/pkg/labels" + "knative.dev/pkg/ptr" + + "sigs.k8s.io/controller-runtime/pkg/client" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" + "github.com/aws/karpenter-core/pkg/test" +) + +var _ = Describe("Emptiness", func() { + It("should terminate an empty node", func() { + nodePool.Spec.Disruption.ConsolidationPolicy = corev1beta1.ConsolidationPolicyWhenEmpty + nodePool.Spec.Disruption.ConsolidateAfter = &corev1beta1.NillableDuration{Duration: lo.ToPtr(time.Hour * 300)} + + const numPods = 1 + deployment := test.Deployment(test.DeploymentOptions{Replicas: numPods}) + + By("kicking off provisioning for a deployment") + env.ExpectCreated(nodeClass, nodePool, deployment) + nodeClaim := env.EventuallyExpectCreatedNodeClaimCount("==", 1)[0] + node := env.EventuallyExpectCreatedNodeCount("==", 1)[0] + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), numPods) + + By("making the nodeclaim empty") + persisted := deployment.DeepCopy() + deployment.Spec.Replicas = ptr.Int32(0) + Expect(env.Client.Patch(env, deployment, client.MergeFrom(persisted))).To(Succeed()) + + By("waiting for the nodeclaim emptiness status condition to propagate") + Eventually(func(g Gomega) { + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClaim), nodeClaim)).To(Succeed()) + g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Empty)).ToNot(BeNil()) + g.Expect(nodeClaim.StatusConditions().GetCondition(corev1beta1.Empty).IsTrue()).To(BeTrue()) + }).Should(Succeed()) + + By("waiting for the nodeclaim to deprovision when past its ConsolidateAfter timeout of 0") + nodePool.Spec.Disruption.ConsolidateAfter = &corev1beta1.NillableDuration{Duration: lo.ToPtr(time.Duration(0))} + env.ExpectUpdated(nodePool) + + env.EventuallyExpectNotFound(nodeClaim, node) + }) +}) diff --git a/test/suites/beta/integration/extended_resources_test.go b/test/suites/beta/integration/extended_resources_test.go new file mode 100644 index 000000000000..ba4681e4ee64 --- /dev/null +++ b/test/suites/beta/integration/extended_resources_test.go @@ -0,0 +1,423 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "fmt" + "os" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + + "github.com/aws/karpenter-core/pkg/test" + "github.com/aws/karpenter/pkg/apis/v1beta1" +) + +var _ = Describe("Extended Resources", func() { + It("should provision nodes for a deployment that requests nvidia.com/gpu", func() { + ExpectNvidiaDevicePluginCreated() + + numPods := 1 + dep := test.Deployment(test.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "large-app"}, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("1"), + }, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + env.ExpectCreated(nodeClass, nodePool, dep) + env.EventuallyExpectHealthyPodCount(selector, numPods) + env.ExpectCreatedNodeCount("==", 1) + env.EventuallyExpectInitializedNodeCount("==", 1) + }) + It("should provision nodes for a deployment that requests nvidia.com/gpu (Bottlerocket)", func() { + // For Bottlerocket, we are testing that resources are initialized without needing a device plugin + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyBottlerocket + numPods := 1 + dep := test.Deployment(test.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "large-app"}, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + "nvidia.com/gpu": resource.MustParse("1"), + }, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + env.ExpectCreated(nodeClass, nodePool, dep) + env.EventuallyExpectHealthyPodCount(selector, numPods) + env.ExpectCreatedNodeCount("==", 1) + env.EventuallyExpectInitializedNodeCount("==", 1) + }) + It("should provision nodes for a deployment that requests vpc.amazonaws.com/pod-eni (security groups for pods)", func() { + env.ExpectPodENIEnabled() + DeferCleanup(func() { + env.ExpectPodENIDisabled() + }) + env.ExpectSettingsOverriddenLegacy(map[string]string{"aws.enablePodENI": "true"}) + // TODO: remove this requirement once VPC RC rolls out m7a.*, r7a.* ENI data (https://github.com/aws/karpenter/issues/4472) + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceFamily, + Operator: v1.NodeSelectorOpNotIn, + Values: []string{"m7a", "r7a", "c7a"}, + }, + { + Key: v1beta1.LabelInstanceCategory, + Operator: v1.NodeSelectorOpIn, + Values: []string{"c", "m", "r"}, + }, + }...) + numPods := 1 + dep := test.Deployment(test.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "large-app"}, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + "vpc.amazonaws.com/pod-eni": resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + "vpc.amazonaws.com/pod-eni": resource.MustParse("1"), + }, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + env.ExpectCreated(nodeClass, nodePool, dep) + env.EventuallyExpectHealthyPodCount(selector, numPods) + env.ExpectCreatedNodeCount("==", 1) + env.EventuallyExpectInitializedNodeCount("==", 1) + }) + It("should provision nodes for a deployment that requests amd.com/gpu", func() { + Skip("skipping test on AMD instance types") + ExpectAMDDevicePluginCreated() + + customAMI := env.GetCustomAMI("/aws/service/eks/optimized-ami/%s/amazon-linux-2/recommended/image_id", 0) + + // We create custom userData that installs the AMD Radeon driver and then performs the EKS bootstrap script + // We use a Custom AMI so that we can reboot after we start the kubelet service + rawContent, err := os.ReadFile("testdata/amd_driver_input.sh") + Expect(err).ToNot(HaveOccurred()) + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + ID: customAMI, + }, + } + nodeClass.Spec.UserData = lo.ToPtr(fmt.Sprintf(string(rawContent), env.ClusterName, + env.ClusterEndpoint, env.ExpectCABundle(), nodePool.Name)) + + numPods := 1 + dep := test.Deployment(test.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "large-app"}, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + "amd.com/gpu": resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + "amd.com/gpu": resource.MustParse("1"), + }, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + env.ExpectCreated(nodeClass, nodePool, dep) + Eventually(func(g Gomega) { + g.Expect(env.Monitor.RunningPodsCount(selector)).To(Equal(numPods)) + }).WithTimeout(15 * time.Minute).Should(Succeed()) // The node needs additional time to install the AMD GPU driver + env.ExpectCreatedNodeCount("==", 1) + env.EventuallyExpectInitializedNodeCount("==", 1) + }) + // Need to subscribe to the AMI to run the test successfully + // https://aws.amazon.com/marketplace/pp/prodview-st5jc2rk3phr2?sr=0-2&ref_=beagle&applicationId=AWSMPContessa + It("should provision nodes for a deployment that requests habana.ai/gaudi", func() { + Skip("skipping test on an exotic instance type") + ExpectHabanaDevicePluginCreated() + + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + ID: "ami-0fae925f94979981f", + }, + } + numPods := 1 + dep := test.Deployment(test.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "large-app"}, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + "habana.ai/gaudi": resource.MustParse("1"), + }, + Limits: v1.ResourceList{ + "habana.ai/gaudi": resource.MustParse("1"), + }, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + env.ExpectCreated(nodeClass, nodePool, dep) + env.EventuallyExpectHealthyPodCount(selector, numPods) + env.ExpectCreatedNodeCount("==", 1) + env.EventuallyExpectInitializedNodeCount("==", 1) + }) +}) + +func ExpectNvidiaDevicePluginCreated() { + GinkgoHelper() + env.ExpectCreated(&appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nvidia-device-plugin-daemonset", + Namespace: "kube-system", + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": "nvidia-device-plugin-ds", + }, + }, + UpdateStrategy: appsv1.DaemonSetUpdateStrategy{ + Type: appsv1.RollingUpdateDaemonSetStrategyType, + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "name": "nvidia-device-plugin-ds", + }, + }, + Spec: v1.PodSpec{ + Tolerations: []v1.Toleration{ + { + Key: "nvidia.com/gpu", + Operator: v1.TolerationOpExists, + Effect: v1.TaintEffectNoSchedule, + }, + }, + PriorityClassName: "system-node-critical", + Containers: []v1.Container{ + { + Name: "nvidia-device-plugin-ctr", + Image: "nvcr.io/nvidia/k8s-device-plugin:v0.12.3", + Env: []v1.EnvVar{ + { + Name: "FAIL_ON_INIT_ERROR", + Value: "false", + }, + }, + SecurityContext: &v1.SecurityContext{ + AllowPrivilegeEscalation: lo.ToPtr(false), + Capabilities: &v1.Capabilities{ + Drop: []v1.Capability{"ALL"}, + }, + }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "device-plugin", + MountPath: "/var/lib/kubelet/device-plugins", + }, + }, + }, + }, + Volumes: []v1.Volume{ + { + Name: "device-plugin", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/var/lib/kubelet/device-plugins", + }, + }, + }, + }, + }, + }, + }, + }) +} + +func ExpectAMDDevicePluginCreated() { + GinkgoHelper() + env.ExpectCreated(&appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "amdgpu-device-plugin-daemonset", + Namespace: "kube-system", + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": "amdgpu-dp-ds", + }, + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "name": "amdgpu-dp-ds", + }, + }, + Spec: v1.PodSpec{ + PriorityClassName: "system-node-critical", + Tolerations: []v1.Toleration{ + { + Key: "amd.com/gpu", + Operator: v1.TolerationOpExists, + Effect: v1.TaintEffectNoSchedule, + }, + }, + Containers: []v1.Container{ + { + Name: "amdgpu-dp-cntr", + Image: "rocm/k8s-device-plugin", + SecurityContext: &v1.SecurityContext{ + AllowPrivilegeEscalation: lo.ToPtr(false), + Capabilities: &v1.Capabilities{ + Drop: []v1.Capability{"ALL"}, + }, + }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "dp", + MountPath: "/var/lib/kubelet/device-plugins", + }, + { + Name: "sys", + MountPath: "/sys", + }, + }, + }, + }, + Volumes: []v1.Volume{ + { + Name: "dp", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/var/lib/kubelet/device-plugins", + }, + }, + }, + { + Name: "sys", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/sys", + }, + }, + }, + }, + }, + }, + }, + }) +} + +func ExpectHabanaDevicePluginCreated() { + GinkgoHelper() + env.ExpectCreated(&v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "habana-system", + }, + }) + env.ExpectCreated(&appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "habanalabs-device-plugin-daemonset", + Namespace: "habana-system", + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "name": "habanalabs-device-plugin-ds", + }, + }, + UpdateStrategy: appsv1.DaemonSetUpdateStrategy{ + Type: appsv1.RollingUpdateDaemonSetStrategyType, + }, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "scheduler.alpha.kubernetes.io/critical-pod": "", + }, + Labels: map[string]string{ + "name": "habanalabs-device-plugin-ds", + }, + }, + Spec: v1.PodSpec{ + Tolerations: []v1.Toleration{ + { + Key: "habana.ai/gaudi", + Operator: v1.TolerationOpExists, + Effect: v1.TaintEffectNoSchedule, + }, + }, + PriorityClassName: "system-node-critical", + Containers: []v1.Container{ + { + Name: "habanalabs-device-plugin-ctr", + Image: "vault.habana.ai/docker-k8s-device-plugin/docker-k8s-device-plugin:latest", + SecurityContext: &v1.SecurityContext{ + Privileged: lo.ToPtr(true), + }, + VolumeMounts: []v1.VolumeMount{ + { + Name: "device-plugin", + MountPath: "/var/lib/kubelet/device-plugins", + }, + }, + }, + }, + Volumes: []v1.Volume{ + { + Name: "device-plugin", + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: "/var/lib/kubelet/device-plugins", + }, + }, + }, + }, + }, + }, + }, + }) +} diff --git a/test/suites/beta/integration/hash_test.go b/test/suites/beta/integration/hash_test.go new file mode 100644 index 000000000000..552abb3ffffe --- /dev/null +++ b/test/suites/beta/integration/hash_test.go @@ -0,0 +1,53 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "sigs.k8s.io/controller-runtime/pkg/client" + + corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" + "github.com/aws/karpenter/pkg/apis/v1beta1" +) + +var _ = Describe("CRD Hash", func() { + It("should have NodePool hash", func() { + env.ExpectCreated(nodeClass, nodePool) + + Eventually(func(g Gomega) { + np := &corev1beta1.NodePool{} + err := env.Client.Get(env, client.ObjectKeyFromObject(nodePool), np) + g.Expect(err).ToNot(HaveOccurred()) + + hash, found := np.Annotations[corev1beta1.NodePoolHashAnnotationKey] + g.Expect(found).To(BeTrue()) + g.Expect(hash).To(Equal(np.Hash())) + }) + }) + It("should have EC2NodeClass hash", func() { + env.ExpectCreated(nodeClass) + + Eventually(func(g Gomega) { + nc := &v1beta1.EC2NodeClass{} + err := env.Client.Get(env, client.ObjectKeyFromObject(nodeClass), nc) + g.Expect(err).ToNot(HaveOccurred()) + + hash, found := nc.Annotations[v1beta1.AnnotationNodeClassHash] + g.Expect(found).To(BeTrue()) + g.Expect(hash).To(Equal(nc.Hash())) + }) + }) +}) diff --git a/test/suites/beta/integration/instance_profile_test.go b/test/suites/beta/integration/instance_profile_test.go new file mode 100644 index 000000000000..b40432aae5eb --- /dev/null +++ b/test/suites/beta/integration/instance_profile_test.go @@ -0,0 +1,58 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/iam" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + + coretest "github.com/aws/karpenter-core/pkg/test" + awserrors "github.com/aws/karpenter/pkg/errors" + "github.com/aws/karpenter/pkg/providers/instanceprofile" +) + +var _ = Describe("InstanceProfile Generation", func() { + It("should generate the InstanceProfile when setting the role", func() { + pod := coretest.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + env.EventuallyExpectHealthy(pod) + node := env.ExpectCreatedNodeCount("==", 1)[0] + + instance := env.GetInstance(node.Name) + Expect(instance.IamInstanceProfile).ToNot(BeNil()) + Expect(instance.IamInstanceProfile.Arn).To(ContainSubstring(nodeClass.Spec.Role)) + + instanceProfile := env.ExpectInstanceProfileExists(instanceprofile.GetProfileName(env.Context, env.Region, nodeClass)) + Expect(instanceProfile.Roles).To(HaveLen(1)) + Expect(lo.FromPtr(instanceProfile.Roles[0].RoleName)).To(Equal(nodeClass.Spec.Role)) + }) + It("should remove the generated InstanceProfile when deleting the NodeClass", func() { + pod := coretest.Pod() + env.ExpectCreated(nodePool, nodeClass, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectDeleted(nodePool, nodeClass) + Eventually(func(g Gomega) { + _, err := env.IAMAPI.GetInstanceProfileWithContext(env.Context, &iam.GetInstanceProfileInput{ + InstanceProfileName: aws.String(instanceprofile.GetProfileName(env.Context, env.Region, nodeClass)), + }) + g.Expect(awserrors.IsNotFound(err)).To(BeTrue()) + }).Should(Succeed()) + }) +}) diff --git a/test/suites/beta/integration/kubelet_config_test.go b/test/suites/beta/integration/kubelet_config_test.go new file mode 100644 index 000000000000..729c2b1c4576 --- /dev/null +++ b/test/suites/beta/integration/kubelet_config_test.go @@ -0,0 +1,282 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "math" + "time" + + . "github.com/onsi/ginkgo/v2" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "knative.dev/pkg/ptr" + + corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" + "github.com/aws/karpenter/test/pkg/environment/aws" + + "github.com/aws/karpenter-core/pkg/test" + "github.com/aws/karpenter/pkg/apis/v1beta1" +) + +var _ = Describe("KubeletConfiguration Overrides", func() { + Context("All kubelet configuration set", func() { + BeforeEach(func() { + // MaxPods needs to account for the daemonsets that will run on the nodes + nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ + MaxPods: ptr.Int32(110), + PodsPerCore: ptr.Int32(10), + SystemReserved: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("200m"), + v1.ResourceMemory: resource.MustParse("200Mi"), + v1.ResourceEphemeralStorage: resource.MustParse("1Gi"), + }, + KubeReserved: v1.ResourceList{ + v1.ResourceCPU: resource.MustParse("200m"), + v1.ResourceMemory: resource.MustParse("200Mi"), + v1.ResourceEphemeralStorage: resource.MustParse("1Gi"), + }, + EvictionHard: map[string]string{ + "memory.available": "5%", + "nodefs.available": "5%", + "nodefs.inodesFree": "5%", + "imagefs.available": "5%", + "imagefs.inodesFree": "5%", + "pid.available": "3%", + }, + EvictionSoft: map[string]string{ + "memory.available": "10%", + "nodefs.available": "10%", + "nodefs.inodesFree": "10%", + "imagefs.available": "10%", + "imagefs.inodesFree": "10%", + "pid.available": "6%", + }, + EvictionSoftGracePeriod: map[string]metav1.Duration{ + "memory.available": {Duration: time.Minute * 2}, + "nodefs.available": {Duration: time.Minute * 2}, + "nodefs.inodesFree": {Duration: time.Minute * 2}, + "imagefs.available": {Duration: time.Minute * 2}, + "imagefs.inodesFree": {Duration: time.Minute * 2}, + "pid.available": {Duration: time.Minute * 2}, + }, + EvictionMaxPodGracePeriod: ptr.Int32(120), + ImageGCHighThresholdPercent: ptr.Int32(50), + ImageGCLowThresholdPercent: ptr.Int32(10), + CPUCFSQuota: ptr.Bool(false), + } + }) + DescribeTable("Linux AMIFamilies", + func(amiFamily *string) { + nodeClass.Spec.AMIFamily = amiFamily + // Need to enable nodepool-level OS-scoping for now since DS evaluation is done off of the nodepool + // requirements, not off of the instance type options so scheduling can fail if nodepools aren't + // properly scoped + // TODO: remove this requirement once VPC RC rolls out m7a.*, r7a.* ENI data (https://github.com/aws/karpenter/issues/4472) + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceFamily, + Operator: v1.NodeSelectorOpNotIn, + Values: []string{"m7a", "r7a", "c7a"}, + }, + { + Key: v1beta1.LabelInstanceCategory, + Operator: v1.NodeSelectorOpIn, + Values: []string{"c", "m", "r"}, + }, + { + Key: v1.LabelOSStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{string(v1.Linux)}, + }, + }...) + pod := test.Pod(test.PodOptions{ + NodeSelector: map[string]string{ + v1.LabelOSStable: string(v1.Linux), + v1.LabelArchStable: "amd64", + }, + }) + env.ExpectCreated(nodeClass, nodePool, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + }, + Entry("when the AMIFamily is AL2", &v1beta1.AMIFamilyAL2), + Entry("when the AMIFamily is Ubuntu", &v1beta1.AMIFamilyUbuntu), + Entry("when the AMIFamily is Bottlerocket", &v1beta1.AMIFamilyBottlerocket), + ) + DescribeTable("Windows AMIFamilies", + func(amiFamily *string) { + env.ExpectWindowsIPAMEnabled() + DeferCleanup(func() { + env.ExpectWindowsIPAMDisabled() + }) + + nodeClass.Spec.AMIFamily = amiFamily + // Need to enable nodepool-level OS-scoping for now since DS evaluation is done off of the nodepool + // requirements, not off of the instance type options so scheduling can fail if nodepool aren't + // properly scoped + // TODO: remove this requirement once VPC RC rolls out m7a.*, r7a.*, c7a.* ENI data (https://github.com/aws/karpenter/issues/4472) + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceFamily, + Operator: v1.NodeSelectorOpNotIn, + Values: []string{"m7a", "r7a", "c7a"}, + }, + { + Key: v1beta1.LabelInstanceCategory, + Operator: v1.NodeSelectorOpIn, + Values: []string{"c", "m", "r"}, + }, + { + Key: v1.LabelOSStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{string(v1.Windows)}, + }, + }...) + pod := test.Pod(test.PodOptions{ + Image: aws.WindowsDefaultImage, + NodeSelector: map[string]string{ + v1.LabelOSStable: string(v1.Windows), + v1.LabelArchStable: "amd64", + }, + }) + env.ExpectCreated(nodeClass, nodePool, pod) + env.EventuallyExpectHealthyWithTimeout(time.Minute*15, pod) + env.ExpectCreatedNodeCount("==", 1) + }, + Entry("when the AMIFamily is Windows2019", &v1beta1.AMIFamilyWindows2019), + Entry("when the AMIFamily is Windows2022", &v1beta1.AMIFamilyWindows2022), + ) + }) + It("should schedule pods onto separate nodes when maxPods is set", func() { + // MaxPods needs to account for the daemonsets that will run on the nodes + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1.LabelOSStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{string(v1.Linux)}, + }, + }...) + + // Get the DS pod count and use it to calculate the DS pod overhead + dsCount := env.GetDaemonSetCount(nodePool) + nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ + MaxPods: ptr.Int32(1 + int32(dsCount)), + } + + numPods := 3 + dep := test.Deployment(test.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "large-app"}, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("100m")}, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + env.ExpectCreated(nodeClass, nodePool, dep) + + env.EventuallyExpectHealthyPodCount(selector, numPods) + env.ExpectCreatedNodeCount("==", 3) + env.ExpectUniqueNodeNames(selector, 3) + }) + It("should schedule pods onto separate nodes when podsPerCore is set", func() { + // PodsPerCore needs to account for the daemonsets that will run on the nodes + // This will have 4 pods available on each node (2 taken by daemonset pods) + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceCPU, + Operator: v1.NodeSelectorOpIn, + Values: []string{"2"}, + }, + { + Key: v1.LabelOSStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{string(v1.Linux)}, + }, + }...) + numPods := 4 + dep := test.Deployment(test.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "large-app"}, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("100m")}, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + + // Get the DS pod count and use it to calculate the DS pod overhead + // We calculate podsPerCore to split the test pods and the DS pods between two nodes: + // 1. If # of DS pods is odd, we will have i.e. ceil((3+2)/2) = 3 + // Since we restrict node to two cores, we will allow 6 pods. One node will have 3 + // DS pods and 3 test pods. Other node will have 1 test pod and 3 DS pods + // 2. If # of DS pods is even, we will have i.e. ceil((4+2)/2) = 3 + // Since we restrict node to two cores, we will allow 6 pods. Both nodes will have + // 4 DS pods and 2 test pods. + dsCount := env.GetDaemonSetCount(nodePool) + nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ + PodsPerCore: ptr.Int32(int32(math.Ceil(float64(2+dsCount) / 2))), + } + + env.ExpectCreated(nodeClass, nodePool, dep) + env.EventuallyExpectHealthyPodCount(selector, numPods) + env.ExpectCreatedNodeCount("==", 2) + env.ExpectUniqueNodeNames(selector, 2) + }) + It("should ignore podsPerCore value when Bottlerocket is used", func() { + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyBottlerocket + // All pods should schedule to a single node since we are ignoring podsPerCore value + // This would normally schedule to 3 nodes if not using Bottlerocket + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceCPU, + Operator: v1.NodeSelectorOpIn, + Values: []string{"2"}, + }, + { + Key: v1.LabelOSStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{string(v1.Linux)}, + }, + }...) + nodePool.Spec.Template.Spec.Kubelet.PodsPerCore = ptr.Int32(1) + numPods := 6 + dep := test.Deployment(test.DeploymentOptions{ + Replicas: int32(numPods), + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{"app": "large-app"}, + }, + ResourceRequirements: v1.ResourceRequirements{ + Requests: v1.ResourceList{v1.ResourceCPU: resource.MustParse("100m")}, + }, + }, + }) + selector := labels.SelectorFromSet(dep.Spec.Selector.MatchLabels) + + env.ExpectCreated(nodeClass, nodePool, dep) + env.EventuallyExpectHealthyPodCount(selector, numPods) + env.ExpectCreatedNodeCount("==", 1) + env.ExpectUniqueNodeNames(selector, 1) + }) +}) diff --git a/test/suites/beta/integration/lease_garbagecollection_test.go b/test/suites/beta/integration/lease_garbagecollection_test.go new file mode 100644 index 000000000000..e20cf8528983 --- /dev/null +++ b/test/suites/beta/integration/lease_garbagecollection_test.go @@ -0,0 +1,44 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "time" + + coordinationsv1 "k8s.io/api/coordination/v1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/aws/karpenter-core/pkg/test" + + . "github.com/onsi/ginkgo/v2" +) + +var _ = Describe("Lease Garbage Collection", func() { + var badLease *coordinationsv1.Lease + BeforeEach(func() { + badLease = &coordinationsv1.Lease{ + ObjectMeta: v1.ObjectMeta{ + CreationTimestamp: v1.Time{Time: time.Now().Add(-time.Hour * 2)}, + Name: "new-lease", + Namespace: "kube-node-lease", + Labels: map[string]string{test.DiscoveryLabel: "unspecified"}, + }, + } + }) + It("should delete node lease that does not contain an OwnerReference", func() { + env.ExpectCreated(badLease) + env.EventuallyExpectNotFound(badLease) + }) +}) diff --git a/test/suites/beta/integration/scheduling_test.go b/test/suites/beta/integration/scheduling_test.go new file mode 100644 index 000000000000..985cc06484b5 --- /dev/null +++ b/test/suites/beta/integration/scheduling_test.go @@ -0,0 +1,388 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "fmt" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/util/sets" + "knative.dev/pkg/ptr" + + corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" + "github.com/aws/karpenter-core/pkg/test" + "github.com/aws/karpenter/pkg/apis/v1beta1" + "github.com/aws/karpenter/test/pkg/debug" + "github.com/aws/karpenter/test/pkg/environment/aws" +) + +var _ = Describe("Scheduling", Ordered, ContinueOnFailure, func() { + var selectors sets.Set[string] + + BeforeAll(func() { + selectors = sets.New[string]() + }) + AfterAll(func() { + // Ensure that we're exercising all well known labels + Expect(lo.Keys(selectors)).To(ContainElements(append(corev1beta1.WellKnownLabels.UnsortedList(), lo.Keys(corev1beta1.NormalizedLabels)...))) + }) + It("should apply annotations to the node", func() { + nodePool.Spec.Template.Annotations = map[string]string{ + "foo": "bar", + corev1beta1.DoNotDisruptAnnotationKey: "true", + } + pod := test.Pod() + env.ExpectCreated(nodeClass, nodePool, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + Expect(env.GetNode(pod.Spec.NodeName).Annotations).To(And(HaveKeyWithValue("foo", "bar"), HaveKeyWithValue(corev1beta1.DoNotDisruptAnnotationKey, "true"))) + }) + It("should support well-known labels for instance type selection", func() { + nodeSelector := map[string]string{ + // Well Known + corev1beta1.NodePoolLabelKey: nodePool.Name, + v1.LabelInstanceTypeStable: "c5.large", + // Well Known to AWS + v1beta1.LabelInstanceHypervisor: "nitro", + v1beta1.LabelInstanceCategory: "c", + v1beta1.LabelInstanceGeneration: "5", + v1beta1.LabelInstanceFamily: "c5", + v1beta1.LabelInstanceSize: "large", + v1beta1.LabelInstanceCPU: "2", + v1beta1.LabelInstanceMemory: "4096", + v1beta1.LabelInstanceNetworkBandwidth: "750", + v1beta1.LabelInstancePods: "29", + } + selectors.Insert(lo.Keys(nodeSelector)...) // Add node selector keys to selectors used in testing to ensure we test all labels + requirements := lo.MapToSlice(nodeSelector, func(key string, value string) v1.NodeSelectorRequirement { + return v1.NodeSelectorRequirement{Key: key, Operator: v1.NodeSelectorOpIn, Values: []string{value}} + }) + deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ + NodeSelector: nodeSelector, + NodePreferences: requirements, + NodeRequirements: requirements, + }}) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should support well-known labels for local NVME storage", func() { + selectors.Insert(v1beta1.LabelInstanceLocalNVME) // Add node selector keys to selectors used in testing to ensure we test all labels + deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ + NodePreferences: []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceLocalNVME, + Operator: v1.NodeSelectorOpGt, + Values: []string{"0"}, + }, + }, + NodeRequirements: []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceLocalNVME, + Operator: v1.NodeSelectorOpGt, + Values: []string{"0"}, + }, + }, + }}) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should support well-known labels for encryption in transit", func() { + selectors.Insert(v1beta1.LabelInstanceEncryptionInTransitSupported) // Add node selector keys to selectors used in testing to ensure we test all labels + deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ + NodePreferences: []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceEncryptionInTransitSupported, + Operator: v1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + NodeRequirements: []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceEncryptionInTransitSupported, + Operator: v1.NodeSelectorOpIn, + Values: []string{"true"}, + }, + }, + }}) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should support well-known deprecated labels", func() { + nodeSelector := map[string]string{ + // Deprecated Labels + v1.LabelFailureDomainBetaRegion: env.Region, + v1.LabelFailureDomainBetaZone: fmt.Sprintf("%sa", env.Region), + "beta.kubernetes.io/arch": "amd64", + "beta.kubernetes.io/os": "linux", + v1.LabelInstanceType: "c5.large", + } + selectors.Insert(lo.Keys(nodeSelector)...) // Add node selector keys to selectors used in testing to ensure we test all labels + requirements := lo.MapToSlice(nodeSelector, func(key string, value string) v1.NodeSelectorRequirement { + return v1.NodeSelectorRequirement{Key: key, Operator: v1.NodeSelectorOpIn, Values: []string{value}} + }) + deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ + NodeSelector: nodeSelector, + NodePreferences: requirements, + NodeRequirements: requirements, + }}) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should support well-known labels for topology and architecture", func() { + nodeSelector := map[string]string{ + // Well Known + corev1beta1.NodePoolLabelKey: nodePool.Name, + v1.LabelTopologyRegion: env.Region, + v1.LabelTopologyZone: fmt.Sprintf("%sa", env.Region), + v1.LabelOSStable: "linux", + v1.LabelArchStable: "amd64", + corev1beta1.CapacityTypeLabelKey: corev1beta1.CapacityTypeOnDemand, + } + selectors.Insert(lo.Keys(nodeSelector)...) // Add node selector keys to selectors used in testing to ensure we test all labels + requirements := lo.MapToSlice(nodeSelector, func(key string, value string) v1.NodeSelectorRequirement { + return v1.NodeSelectorRequirement{Key: key, Operator: v1.NodeSelectorOpIn, Values: []string{value}} + }) + deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ + NodeSelector: nodeSelector, + NodePreferences: requirements, + NodeRequirements: requirements, + }}) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should support well-known labels for a gpu (nvidia)", func() { + nodeSelector := map[string]string{ + v1beta1.LabelInstanceGPUName: "t4", + v1beta1.LabelInstanceGPUMemory: "16384", + v1beta1.LabelInstanceGPUManufacturer: "nvidia", + v1beta1.LabelInstanceGPUCount: "1", + } + selectors.Insert(lo.Keys(nodeSelector)...) // Add node selector keys to selectors used in testing to ensure we test all labels + requirements := lo.MapToSlice(nodeSelector, func(key string, value string) v1.NodeSelectorRequirement { + return v1.NodeSelectorRequirement{Key: key, Operator: v1.NodeSelectorOpIn, Values: []string{value}} + }) + deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ + NodeSelector: nodeSelector, + NodePreferences: requirements, + NodeRequirements: requirements, + }}) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should support well-known labels for an accelerator (inferentia)", func() { + nodeSelector := map[string]string{ + v1beta1.LabelInstanceAcceleratorName: "inferentia", + v1beta1.LabelInstanceAcceleratorManufacturer: "aws", + v1beta1.LabelInstanceAcceleratorCount: "1", + } + selectors.Insert(lo.Keys(nodeSelector)...) // Add node selector keys to selectors used in testing to ensure we test all labels + requirements := lo.MapToSlice(nodeSelector, func(key string, value string) v1.NodeSelectorRequirement { + return v1.NodeSelectorRequirement{Key: key, Operator: v1.NodeSelectorOpIn, Values: []string{value}} + }) + deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ + NodeSelector: nodeSelector, + NodePreferences: requirements, + NodeRequirements: requirements, + }}) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should support well-known labels for windows-build version", func() { + env.ExpectWindowsIPAMEnabled() + DeferCleanup(func() { + env.ExpectWindowsIPAMDisabled() + }) + + nodeSelector := map[string]string{ + // Well Known + v1.LabelWindowsBuild: v1beta1.Windows2022Build, + v1.LabelOSStable: string(v1.Windows), // Specify the OS to enable vpc-resource-controller to inject the PrivateIPv4Address resource + } + selectors.Insert(lo.Keys(nodeSelector)...) // Add node selector keys to selectors used in testing to ensure we test all labels + requirements := lo.MapToSlice(nodeSelector, func(key string, value string) v1.NodeSelectorRequirement { + return v1.NodeSelectorRequirement{Key: key, Operator: v1.NodeSelectorOpIn, Values: []string{value}} + }) + deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ + NodeSelector: nodeSelector, + NodePreferences: requirements, + NodeRequirements: requirements, + Image: aws.WindowsDefaultImage, + }}) + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyWindows2022 + // TODO: remove this requirement once VPC RC rolls out m7a.*, r7a.* ENI data (https://github.com/aws/karpenter/issues/4472) + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceFamily, + Operator: v1.NodeSelectorOpNotIn, + Values: []string{"m7a", "r7a", "c7a"}, + }, + { + Key: v1beta1.LabelInstanceCategory, + Operator: v1.NodeSelectorOpIn, + Values: []string{"c", "m", "r"}, + }, + }...) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCountWithTimeout(time.Minute*15, labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should support the node-restriction.kubernetes.io label domain", func() { + // Assign labels to the nodepool so that it has known values + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1.LabelNamespaceNodeRestriction + "/team", + Operator: v1.NodeSelectorOpExists, + }, + { + Key: v1.LabelNamespaceNodeRestriction + "/custom-label", + Operator: v1.NodeSelectorOpExists, + }, + }...) + nodeSelector := map[string]string{ + v1.LabelNamespaceNodeRestriction + "/team": "team-1", + v1.LabelNamespaceNodeRestriction + "/custom-label": "custom-value", + } + selectors.Insert(lo.Keys(nodeSelector)...) // Add node selector keys to selectors used in testing to ensure we test all labels + requirements := lo.MapToSlice(nodeSelector, func(key string, value string) v1.NodeSelectorRequirement { + return v1.NodeSelectorRequirement{Key: key, Operator: v1.NodeSelectorOpIn, Values: []string{value}} + }) + deployment := test.Deployment(test.DeploymentOptions{Replicas: 1, PodOptions: test.PodOptions{ + NodeSelector: nodeSelector, + NodePreferences: requirements, + NodeRequirements: requirements, + }}) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should provision a node for naked pods", func() { + pod := test.Pod() + + env.ExpectCreated(nodeClass, nodePool, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should provision a node for a deployment", Label(debug.NoWatch), Label(debug.NoEvents), func() { + deployment := test.Deployment(test.DeploymentOptions{Replicas: 50}) + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), int(*deployment.Spec.Replicas)) + env.ExpectCreatedNodeCount("<=", 2) // should probably all land on a single node, but at worst two depending on batching + }) + It("should provision a node for a self-affinity deployment", func() { + // just two pods as they all need to land on the same node + podLabels := map[string]string{"test": "self-affinity"} + deployment := test.Deployment(test.DeploymentOptions{ + Replicas: 2, + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: podLabels, + }, + PodRequirements: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{MatchLabels: podLabels}, + TopologyKey: v1.LabelHostname, + }, + }, + }, + }) + + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(deployment.Spec.Selector.MatchLabels), 2) + env.ExpectCreatedNodeCount("==", 1) + }) + It("should provision three nodes for a zonal topology spread", func() { + // one pod per zone + podLabels := map[string]string{"test": "zonal-spread"} + deployment := test.Deployment(test.DeploymentOptions{ + Replicas: 3, + PodOptions: test.PodOptions{ + ObjectMeta: metav1.ObjectMeta{ + Labels: podLabels, + }, + TopologySpreadConstraints: []v1.TopologySpreadConstraint{ + { + MaxSkew: 1, + TopologyKey: v1.LabelTopologyZone, + WhenUnsatisfiable: v1.DoNotSchedule, + LabelSelector: &metav1.LabelSelector{MatchLabels: podLabels}, + }, + }, + }, + }) + + env.ExpectCreated(nodeClass, nodePool, deployment) + env.EventuallyExpectHealthyPodCount(labels.SelectorFromSet(podLabels), 3) + env.ExpectCreatedNodeCount("==", 3) + }) + It("should provision a node using a NodePool with higher priority", func() { + nodePoolLowPri := test.NodePool(corev1beta1.NodePool{ + Spec: corev1beta1.NodePoolSpec{ + Weight: ptr.Int32(10), + Template: corev1beta1.NodeClaimTemplate{ + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + Requirements: []v1.NodeSelectorRequirement{ + { + Key: v1.LabelInstanceTypeStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{"t3.nano"}, + }, + }, + }, + }, + }, + }) + nodePoolHighPri := test.NodePool(corev1beta1.NodePool{ + Spec: corev1beta1.NodePoolSpec{ + Weight: ptr.Int32(100), + Template: corev1beta1.NodeClaimTemplate{ + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + Requirements: []v1.NodeSelectorRequirement{ + { + Key: v1.LabelInstanceTypeStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{"c4.large"}, + }, + }, + }, + }, + }, + }) + pod := test.Pod() + env.ExpectCreated(pod, nodeClass, nodePoolLowPri, nodePoolHighPri) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + Expect(ptr.StringValue(env.GetInstance(pod.Spec.NodeName).InstanceType)).To(Equal("c4.large")) + Expect(env.GetNode(pod.Spec.NodeName).Labels[corev1beta1.NodePoolLabelKey]).To(Equal(nodePoolHighPri.Name)) + }) +}) diff --git a/test/suites/beta/integration/security_group_test.go b/test/suites/beta/integration/security_group_test.go new file mode 100644 index 000000000000..d89aa53e28b0 --- /dev/null +++ b/test/suites/beta/integration/security_group_test.go @@ -0,0 +1,93 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "time" + + "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + "k8s.io/apimachinery/pkg/util/sets" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/aws/karpenter-core/pkg/test" + "github.com/aws/karpenter/pkg/apis/v1beta1" + "github.com/aws/karpenter/test/pkg/environment/aws" +) + +var _ = Describe("SecurityGroups", func() { + It("should use the security-group-id selector", func() { + securityGroups := env.GetSecurityGroups(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + Expect(len(securityGroups)).To(BeNumerically(">", 1)) + nodeClass.Spec.SecurityGroupSelectorTerms = lo.Map(securityGroups, func(sg aws.SecurityGroup, _ int) v1beta1.SecurityGroupSelectorTerm { + return v1beta1.SecurityGroupSelectorTerm{ + ID: lo.FromPtr(sg.GroupId), + } + }) + pod := test.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("SecurityGroups", ConsistOf(&securityGroups[0].GroupIdentifier, &securityGroups[1].GroupIdentifier))) + }) + + It("should use the security group selector with multiple tag values", func() { + securityGroups := env.GetSecurityGroups(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + Expect(len(securityGroups)).To(BeNumerically(">", 1)) + first := securityGroups[0] + last := securityGroups[len(securityGroups)-1] + + nodeClass.Spec.SecurityGroupSelectorTerms = []v1beta1.SecurityGroupSelectorTerm{ + { + Tags: map[string]string{"Name": lo.FromPtr(lo.FindOrElse(first.Tags, &ec2.Tag{}, func(tag *ec2.Tag) bool { return lo.FromPtr(tag.Key) == "Name" }).Value)}, + }, + { + Tags: map[string]string{"Name": lo.FromPtr(lo.FindOrElse(last.Tags, &ec2.Tag{}, func(tag *ec2.Tag) bool { return lo.FromPtr(tag.Key) == "Name" }).Value)}, + }, + } + pod := test.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("SecurityGroups", ConsistOf(&first.GroupIdentifier, &last.GroupIdentifier))) + }) + + It("should update the AWSNodeTemplateStatus for security groups", func() { + env.ExpectCreated(nodeClass) + EventuallyExpectSecurityGroups(env, nodeClass) + }) +}) + +func EventuallyExpectSecurityGroups(env *aws.Environment, nodeClass *v1beta1.EC2NodeClass) { + securityGroups := env.GetSecurityGroups(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + Expect(securityGroups).ToNot(HaveLen(0)) + + ids := sets.New(lo.Map(securityGroups, func(s aws.SecurityGroup, _ int) string { + return lo.FromPtr(s.GroupId) + })...) + Eventually(func(g Gomega) { + temp := &v1beta1.EC2NodeClass{} + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClass), temp)).To(Succeed()) + g.Expect(sets.New(lo.Map(temp.Status.SecurityGroups, func(s v1beta1.SecurityGroup, _ int) string { + return s.ID + })...).Equal(ids)) + }).WithTimeout(10 * time.Second).Should(Succeed()) +} diff --git a/test/suites/beta/integration/storage_test.go b/test/suites/beta/integration/storage_test.go new file mode 100644 index 000000000000..af9c38b658cc --- /dev/null +++ b/test/suites/beta/integration/storage_test.go @@ -0,0 +1,119 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "fmt" + + "github.com/aws/aws-sdk-go/aws" + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + storagev1 "k8s.io/api/storage/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + . "github.com/onsi/ginkgo/v2" + + "github.com/aws/karpenter-core/pkg/test" +) + +// This test requires the EBS CSI driver to be installed +var _ = Describe("Dynamic PVC", func() { + It("should run a pod with a dynamic persistent volume", func() { + // Ensure that the EBS driver is installed, or we can't run the test. + var ds appsv1.DaemonSet + if err := env.Client.Get(env.Context, client.ObjectKey{ + Namespace: "kube-system", + Name: "ebs-csi-node", + }, &ds); err != nil { + if errors.IsNotFound(err) { + Skip(fmt.Sprintf("skipping dynamic PVC test due to missing EBS driver %s", err)) + } else { + Fail(fmt.Sprintf("determining EBS driver status, %s", err)) + } + } + storageClassName := "ebs-sc-test" + bindMode := storagev1.VolumeBindingWaitForFirstConsumer + sc := test.StorageClass(test.StorageClassOptions{ + ObjectMeta: metav1.ObjectMeta{ + Name: storageClassName, + }, + Provisioner: aws.String("ebs.csi.aws.com"), + VolumeBindingMode: &bindMode, + }) + + pvc := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ebs-claim", + }, + StorageClassName: aws.String(storageClassName), + Resources: v1.ResourceRequirements{Requests: v1.ResourceList{v1.ResourceStorage: resource.MustParse("5Gi")}}, + }) + + pod := test.Pod(test.PodOptions{ + PersistentVolumeClaims: []string{pvc.Name}, + }) + + env.ExpectCreated(nodeClass, nodePool, sc, pvc, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + env.ExpectDeleted(pod) + }) +}) + +var _ = Describe("Static PVC", func() { + It("should run a pod with a static persistent volume", func() { + storageClassName := "nfs-test" + bindMode := storagev1.VolumeBindingWaitForFirstConsumer + sc := test.StorageClass(test.StorageClassOptions{ + ObjectMeta: metav1.ObjectMeta{ + Name: storageClassName, + }, + VolumeBindingMode: &bindMode, + }) + + pv := test.PersistentVolume(test.PersistentVolumeOptions{ + ObjectMeta: metav1.ObjectMeta{Name: "nfs-test-volume"}, + StorageClassName: "nfs-test", + }) + + // the server here doesn't need to actually exist for the pod to start running + pv.Spec.NFS = &v1.NFSVolumeSource{ + Server: "fake.server", + Path: "/some/path", + } + pv.Spec.CSI = nil + + pvc := test.PersistentVolumeClaim(test.PersistentVolumeClaimOptions{ + ObjectMeta: metav1.ObjectMeta{ + Name: "nfs-claim", + }, + StorageClassName: aws.String(storageClassName), + VolumeName: pv.Name, + Resources: v1.ResourceRequirements{Requests: v1.ResourceList{v1.ResourceStorage: resource.MustParse("5Gi")}}, + }) + + pod := test.Pod(test.PodOptions{ + PersistentVolumeClaims: []string{pvc.Name}, + }) + + env.ExpectCreated(nodeClass, nodePool, sc, pv, pvc, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + env.ExpectDeleted(pod) + }) +}) diff --git a/test/suites/beta/integration/subnet_test.go b/test/suites/beta/integration/subnet_test.go new file mode 100644 index 000000000000..af5eb321bea2 --- /dev/null +++ b/test/suites/beta/integration/subnet_test.go @@ -0,0 +1,183 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "time" + + "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/onsi/gomega/types" + "github.com/samber/lo" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/aws/karpenter-core/pkg/test" + "github.com/aws/karpenter/pkg/apis/v1beta1" + "github.com/aws/karpenter/test/pkg/environment/aws" +) + +var _ = Describe("Subnets", func() { + It("should use the subnet-id selector", func() { + subnets := env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + Expect(len(subnets)).ToNot(Equal(0)) + shuffledAZs := lo.Shuffle(lo.Keys(subnets)) + firstSubnet := subnets[shuffledAZs[0]][0] + + nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{ + { + ID: firstSubnet, + }, + } + pod := test.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("SubnetId", HaveValue(Equal(firstSubnet)))) + }) + It("should use resource based naming as node names", func() { + subnets := env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + Expect(len(subnets)).ToNot(Equal(0)) + + allSubnets := lo.Flatten(lo.Values(subnets)) + + ExpectResourceBasedNamingEnabled(allSubnets...) + DeferCleanup(func() { + ExpectResourceBasedNamingDisabled(allSubnets...) + }) + pod := test.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + ExceptNodeNameToContainInstanceID(pod.Spec.NodeName) + }) + It("should use the subnet tag selector with multiple tag values", func() { + // Get all the subnets for the cluster + subnets := env.GetSubnetNameAndIds(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + Expect(len(subnets)).To(BeNumerically(">", 1)) + firstSubnet := subnets[0] + lastSubnet := subnets[len(subnets)-1] + + nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{ + { + Tags: map[string]string{"Name": firstSubnet.Name}, + }, + { + Tags: map[string]string{"Name": lastSubnet.Name}, + }, + } + pod := test.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("SubnetId", HaveValue(BeElementOf(firstSubnet.ID, lastSubnet.ID)))) + }) + + It("should use a subnet within the AZ requested", func() { + subnets := env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + Expect(len(subnets)).ToNot(Equal(0)) + shuffledAZs := lo.Shuffle(lo.Keys(subnets)) + + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1.LabelZoneFailureDomainStable, + Operator: "In", + Values: []string{shuffledAZs[0]}, + }, + }...) + pod := test.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + env.ExpectInstance(pod.Spec.NodeName).To(HaveField("SubnetId", Or( + lo.Map(subnets[shuffledAZs[0]], func(subnetID string, _ int) types.GomegaMatcher { return HaveValue(Equal(subnetID)) })..., + ))) + }) + + It("should have the NodeClass status for subnets", func() { + env.ExpectCreated(nodeClass) + EventuallyExpectSubnets(env, nodeClass) + }) +}) + +func ExpectResourceBasedNamingEnabled(subnetIDs ...string) { + for subnetID := range subnetIDs { + _, err := env.EC2API.ModifySubnetAttribute(&ec2.ModifySubnetAttributeInput{ + EnableResourceNameDnsARecordOnLaunch: &ec2.AttributeBooleanValue{ + Value: lo.ToPtr(true), + }, + SubnetId: lo.ToPtr(subnetIDs[subnetID]), + }) + Expect(err).To(BeNil()) + _, err = env.EC2API.ModifySubnetAttribute(&ec2.ModifySubnetAttributeInput{ + PrivateDnsHostnameTypeOnLaunch: lo.ToPtr("resource-name"), + SubnetId: lo.ToPtr(subnetIDs[subnetID]), + }) + Expect(err).To(BeNil()) + } +} + +func ExpectResourceBasedNamingDisabled(subnetIDs ...string) { + for subnetID := range subnetIDs { + _, err := env.EC2API.ModifySubnetAttribute(&ec2.ModifySubnetAttributeInput{ + EnableResourceNameDnsARecordOnLaunch: &ec2.AttributeBooleanValue{ + Value: lo.ToPtr(false), + }, + SubnetId: lo.ToPtr(subnetIDs[subnetID]), + }) + Expect(err).To(BeNil()) + _, err = env.EC2API.ModifySubnetAttribute(&ec2.ModifySubnetAttributeInput{ + PrivateDnsHostnameTypeOnLaunch: lo.ToPtr("ip-name"), + SubnetId: lo.ToPtr(subnetIDs[subnetID]), + }) + Expect(err).To(BeNil()) + } +} + +func ExceptNodeNameToContainInstanceID(nodeName string) { + instance := env.GetInstance(nodeName) + Expect(nodeName).To(Not(Equal(lo.FromPtr(instance.InstanceId)))) + ContainSubstring(nodeName, lo.FromPtr(instance.InstanceId)) +} + +// SubnetInfo is a simple struct for testing +type SubnetInfo struct { + Name string + ID string +} + +func EventuallyExpectSubnets(env *aws.Environment, nodeClass *v1beta1.EC2NodeClass) { + subnets := env.GetSubnets(map[string]string{"karpenter.sh/discovery": env.ClusterName}) + Expect(subnets).ToNot(HaveLen(0)) + ids := sets.New(lo.Flatten(lo.Values(subnets))...) + + Eventually(func(g Gomega) { + temp := &v1beta1.EC2NodeClass{} + g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(nodeClass), temp)).To(Succeed()) + g.Expect(sets.New(lo.Map(temp.Status.Subnets, func(s v1beta1.Subnet, _ int) string { + return s.ID + })...).Equal(ids)) + }).WithTimeout(10 * time.Second).Should(Succeed()) +} diff --git a/test/suites/beta/integration/suite_test.go b/test/suites/beta/integration/suite_test.go new file mode 100644 index 000000000000..26d7a8c62fc1 --- /dev/null +++ b/test/suites/beta/integration/suite_test.go @@ -0,0 +1,77 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "fmt" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" + coretest "github.com/aws/karpenter-core/pkg/test" + "github.com/aws/karpenter/pkg/apis/v1beta1" + "github.com/aws/karpenter/pkg/test" + "github.com/aws/karpenter/test/pkg/environment/aws" +) + +var env *aws.Environment +var nodeClass *v1beta1.EC2NodeClass +var nodePool *corev1beta1.NodePool + +func TestIntegration(t *testing.T) { + RegisterFailHandler(Fail) + BeforeSuite(func() { + env = aws.NewEnvironment(t) + }) + AfterSuite(func() { + env.Stop() + }) + RunSpecs(t, "Integration") +} + +var _ = BeforeEach(func() { + env.BeforeEach() + nodeClass = test.EC2NodeClass(v1beta1.EC2NodeClass{ + Spec: v1beta1.EC2NodeClassSpec{ + AMIFamily: &v1beta1.AMIFamilyAL2, + SecurityGroupSelectorTerms: []v1beta1.SecurityGroupSelectorTerm{ + { + Tags: map[string]string{"karpenter.sh/discovery": env.ClusterName}, + }, + }, + SubnetSelectorTerms: []v1beta1.SubnetSelectorTerm{ + { + Tags: map[string]string{"karpenter.sh/discovery": env.ClusterName}, + }, + }, + Role: fmt.Sprintf("KarpenterNodeRole-%s", env.ClusterName), + }, + }) + nodePool = coretest.NodePool(corev1beta1.NodePool{ + Spec: corev1beta1.NodePoolSpec{ + Template: corev1beta1.NodeClaimTemplate{ + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{ + Name: nodeClass.Name, + }, + }, + }, + }, + }) +}) +var _ = AfterEach(func() { env.Cleanup() }) +var _ = AfterEach(func() { env.AfterEach() }) diff --git a/test/suites/beta/integration/tags_test.go b/test/suites/beta/integration/tags_test.go new file mode 100644 index 000000000000..afd6024ca5df --- /dev/null +++ b/test/suites/beta/integration/tags_test.go @@ -0,0 +1,110 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "github.com/aws/aws-sdk-go/service/ec2" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "time" + + "github.com/samber/lo" + v1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" + coretest "github.com/aws/karpenter-core/pkg/test" + "github.com/aws/karpenter/pkg/apis/v1beta1" + "github.com/aws/karpenter/pkg/providers/instance" + "github.com/aws/karpenter/pkg/test" +) + +var _ = Describe("Tags", func() { + Context("Static Tags", func() { + It("should tag all associated resources", func() { + nodeClass.Spec.Tags = map[string]string{"TestTag": "TestVal"} + pod := coretest.Pod() + + env.ExpectCreated(pod, nodeClass, nodePool) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + instance := env.GetInstance(pod.Spec.NodeName) + volumeTags := tagMap(env.GetVolume(instance.BlockDeviceMappings[0].Ebs.VolumeId).Tags) + instanceTags := tagMap(instance.Tags) + + Expect(instanceTags).To(HaveKeyWithValue("TestTag", "TestVal")) + Expect(volumeTags).To(HaveKeyWithValue("TestTag", "TestVal")) + }) + }) + + Context("Tagging Controller", func() { + It("should tag with karpenter.sh/nodeclaim and Name tag", func() { + pod := coretest.Pod() + + env.ExpectCreated(nodePool, nodeClass, pod) + env.EventuallyExpectCreatedNodeCount("==", 1) + node := env.EventuallyExpectInitializedNodeCount("==", 1)[0] + nodeName := client.ObjectKeyFromObject(node) + + Eventually(func(g Gomega) { + node = &v1.Node{} + g.Expect(env.Client.Get(env.Context, nodeName, node)).To(Succeed()) + g.Expect(node.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationInstanceTagged, "true")) + }, time.Minute) + + nodeInstance := instance.NewInstance(lo.ToPtr(env.GetInstance(node.Name))) + Expect(nodeInstance.Tags).To(HaveKeyWithValue("Name", node.Name)) + Expect(nodeInstance.Tags).To(HaveKey("karpenter.sh/nodeclaim")) + }) + + It("shouldn't overwrite custom Name tags", func() { + nodeClass = test.EC2NodeClass(*nodeClass, v1beta1.EC2NodeClass{Spec: v1beta1.EC2NodeClassSpec{ + Tags: map[string]string{"Name": "custom-name"}, + }}) + nodePool = coretest.NodePool(*nodePool, corev1beta1.NodePool{ + Spec: corev1beta1.NodePoolSpec{ + Template: corev1beta1.NodeClaimTemplate{ + Spec: corev1beta1.NodeClaimSpec{ + NodeClassRef: &corev1beta1.NodeClassReference{Name: nodeClass.Name}, + }, + }, + }, + }) + pod := coretest.Pod() + + env.ExpectCreated(nodePool, nodeClass, pod) + env.EventuallyExpectCreatedNodeCount("==", 1) + node := env.EventuallyExpectInitializedNodeCount("==", 1)[0] + nodeName := client.ObjectKeyFromObject(node) + + Eventually(func(g Gomega) { + node = &v1.Node{} + g.Expect(env.Client.Get(env.Context, nodeName, node)).To(Succeed()) + g.Expect(node.Annotations).To(HaveKeyWithValue(v1beta1.AnnotationInstanceTagged, "true")) + }, time.Minute) + + nodeInstance := instance.NewInstance(lo.ToPtr(env.GetInstance(node.Name))) + Expect(nodeInstance.Tags).To(HaveKeyWithValue("Name", "custom-name")) + Expect(nodeInstance.Tags).To(HaveKey("karpenter.sh/nodeclaim")) + }) + }) +}) + +func tagMap(tags []*ec2.Tag) map[string]string { + return lo.SliceToMap(tags, func(tag *ec2.Tag) (string, string) { + return *tag.Key, *tag.Value + }) +} diff --git a/test/suites/beta/integration/termination_test.go b/test/suites/beta/integration/termination_test.go new file mode 100644 index 000000000000..246470f9b1ec --- /dev/null +++ b/test/suites/beta/integration/termination_test.go @@ -0,0 +1,50 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/samber/lo" + + "github.com/aws/karpenter-core/pkg/test" +) + +var _ = Describe("Termination", func() { + It("should terminate the node and the instance on deletion", func() { + pod := test.Pod() + env.ExpectCreated(nodeClass, nodePool, pod) + env.EventuallyExpectHealthy(pod) + env.ExpectCreatedNodeCount("==", 1) + + nodes := env.Monitor.CreatedNodes() + instanceID := env.ExpectParsedProviderID(nodes[0].Spec.ProviderID) + env.GetInstance(nodes[0].Name) + + // Pod is deleted so that we don't re-provision after node deletion + // NOTE: We have to do this right now to deal with a race condition in nodepool ownership + // This can be removed once this race is resolved with the NodePool + env.ExpectDeleted(pod) + + // Node is deleted and now should be not found + env.ExpectDeleted(nodes[0]) + env.EventuallyExpectNotFound(nodes[0]) + Eventually(func(g Gomega) { + g.Expect(lo.FromPtr(env.GetInstanceByID(instanceID).State.Name)).To(Equal("shutting-down")) + }, time.Second*10).Should(Succeed()) + }) +}) diff --git a/test/suites/beta/integration/testdata/al2_no_mime_userdata_input.sh b/test/suites/beta/integration/testdata/al2_no_mime_userdata_input.sh new file mode 100644 index 000000000000..37058c604012 --- /dev/null +++ b/test/suites/beta/integration/testdata/al2_no_mime_userdata_input.sh @@ -0,0 +1,2 @@ +#!/bin/bash +echo "Running custom user data script" diff --git a/test/suites/beta/integration/testdata/al2_userdata_input.sh b/test/suites/beta/integration/testdata/al2_userdata_input.sh new file mode 100644 index 000000000000..afc1580817ae --- /dev/null +++ b/test/suites/beta/integration/testdata/al2_userdata_input.sh @@ -0,0 +1,10 @@ +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-Type: text/x-shellscript; charset="us-ascii" + +#!/bin/bash +echo "Running custom user data script" + +--BOUNDARY-- diff --git a/test/suites/beta/integration/testdata/amd_driver_input.sh b/test/suites/beta/integration/testdata/amd_driver_input.sh new file mode 100644 index 000000000000..58a10e211987 --- /dev/null +++ b/test/suites/beta/integration/testdata/amd_driver_input.sh @@ -0,0 +1,46 @@ +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary="BOUNDARY" + +--BOUNDARY +Content-Type: text/x-shellscript; charset="us-ascii" + +#!/bin/bash +cd +sudo amazon-linux-extras install epel -y +sudo yum update -y + +# Create a script to install the AMD Radeon GPU +cat << EOF > /tmp/amd-install.sh +#!/bin/bash +export echo PATH=/usr/local/bin:$PATH +aws s3 cp --recursive s3://ec2-amd-linux-drivers/latest/ . --no-sign-request +tar -xf amdgpu-pro-*rhel*.tar.xz +cd amdgpu-pro-20.20-1184451-rhel-7.8 +./amdgpu-pro-install -y --opencl=pal,legacy +systemctl disable amd-install.service +reboot +EOF +sudo chmod +x /tmp/amd-install.sh + +# Create a service that will run on system reboot +cat << EOF > /etc/systemd/system/amd-install.service +[Unit] +Description=install amd drivers + +[Service] +ExecStart=/bin/bash /tmp/amd-install.sh + +[Install] +WantedBy=multi-user.target +EOF +sudo systemctl enable amd-install.service + +# Run the EKS bootstrap script and then reboot +exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 +/etc/eks/bootstrap.sh '%s' --apiserver-endpoint '%s' --b64-cluster-ca '%s' \ +--use-max-pods false \ +--container-runtime containerd \ +--kubelet-extra-args '--node-labels=karpenter.sh/nodepool=%s,testing/cluster=unspecified' + +reboot +--BOUNDARY-- \ No newline at end of file diff --git a/test/suites/beta/integration/testdata/br_userdata_input.sh b/test/suites/beta/integration/testdata/br_userdata_input.sh new file mode 100644 index 000000000000..c157a6769852 --- /dev/null +++ b/test/suites/beta/integration/testdata/br_userdata_input.sh @@ -0,0 +1,4 @@ +[settings.kubernetes] +kube-api-qps = 30 +[settings.kubernetes.node-taints] +"node.cilium.io/agent-not-ready" = ["true:NoExecute"] diff --git a/test/suites/beta/integration/testdata/windows_userdata_input.ps1 b/test/suites/beta/integration/testdata/windows_userdata_input.ps1 new file mode 100644 index 000000000000..fcdb159f7ce7 --- /dev/null +++ b/test/suites/beta/integration/testdata/windows_userdata_input.ps1 @@ -0,0 +1 @@ +Write-Host "Running custom user data script" \ No newline at end of file diff --git a/test/suites/beta/integration/validation_test.go b/test/suites/beta/integration/validation_test.go new file mode 100644 index 000000000000..b04668550f21 --- /dev/null +++ b/test/suites/beta/integration/validation_test.go @@ -0,0 +1,185 @@ +/* +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package integration_test + +import ( + "fmt" + "time" + + "github.com/samber/lo" + v1 "k8s.io/api/core/v1" + "knative.dev/pkg/ptr" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1beta1 "github.com/aws/karpenter-core/pkg/apis/v1beta1" + "github.com/aws/karpenter/pkg/apis/v1beta1" +) + +var _ = Describe("Validation", func() { + Context("NodePool", func() { + It("should error when a restricted label is used in labels (karpenter.sh/nodepool)", func() { + nodePool.Spec.Template.Labels = map[string]string{ + corev1beta1.NodePoolLabelKey: "my-custom-nodepool", + } + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + }) + It("should error when a restricted label is used in labels (kubernetes.io/custom-label)", func() { + nodePool.Spec.Template.Labels = map[string]string{ + "kubernetes.io/custom-label": "custom-value", + } + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + }) + It("should allow a restricted label exception to be used in labels (node-restriction.kubernetes.io/custom-label)", func() { + nodePool.Spec.Template.Labels = map[string]string{ + v1.LabelNamespaceNodeRestriction + "/custom-label": "custom-value", + } + Expect(env.Client.Create(env.Context, nodePool)).To(Succeed()) + }) + It("should error when a requirement references a restricted label (karpenter.sh/nodepool)", func() { + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: corev1beta1.NodePoolLabelKey, + Operator: v1.NodeSelectorOpIn, + Values: []string{"default"}, + }, + }...) + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + }) + It("should error when a requirement uses In but has no values", func() { + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1.LabelInstanceTypeStable, + Operator: v1.NodeSelectorOpIn, + Values: []string{}, + }, + }...) + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + }) + It("should error when a requirement uses an unknown operator", func() { + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: corev1beta1.CapacityTypeLabelKey, + Operator: "within", + Values: []string{corev1beta1.CapacityTypeSpot}, + }, + }...) + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + }) + It("should error when Gt is used with multiple integer values", func() { + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceMemory, + Operator: v1.NodeSelectorOpGt, + Values: []string{"1000000", "2000000"}, + }, + }...) + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + }) + It("should error when Lt is used with multiple integer values", func() { + nodePool.Spec.Template.Spec.Requirements = append(nodePool.Spec.Template.Spec.Requirements, []v1.NodeSelectorRequirement{ + { + Key: v1beta1.LabelInstanceMemory, + Operator: v1.NodeSelectorOpLt, + Values: []string{"1000000", "2000000"}, + }, + }...) + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + }) + It("should error when ttlSecondAfterEmpty is negative", func() { + nodePool.Spec.Disruption.ConsolidationPolicy = corev1beta1.ConsolidationPolicyWhenEmpty + nodePool.Spec.Disruption.ConsolidateAfter = &corev1beta1.NillableDuration{Duration: lo.ToPtr(-time.Second)} + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + }) + It("should error when ConsolidationPolicy=WhenUnderutilized is used with consolidateAfter", func() { + nodePool.Spec.Disruption.ConsolidationPolicy = corev1beta1.ConsolidationPolicyWhenUnderutilized + nodePool.Spec.Disruption.ConsolidateAfter = &corev1beta1.NillableDuration{Duration: lo.ToPtr(time.Minute)} + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + }) + It("should error if imageGCHighThresholdPercent is less than imageGCLowThresholdPercent", func() { + nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ + ImageGCHighThresholdPercent: ptr.Int32(10), + ImageGCLowThresholdPercent: ptr.Int32(60), + } + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + }) + It("should error if imageGCHighThresholdPercent or imageGCLowThresholdPercent is negative", func() { + nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ + ImageGCHighThresholdPercent: ptr.Int32(-10), + } + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + nodePool.Spec.Template.Spec.Kubelet = &corev1beta1.KubeletConfiguration{ + ImageGCLowThresholdPercent: ptr.Int32(-10), + } + Expect(env.Client.Create(env.Context, nodePool)).ToNot(Succeed()) + }) + }) + Context("EC2NodeClass", func() { + It("should error when amiSelectorTerms are not defined for amiFamily Custom", func() { + nodeClass.Spec.AMIFamily = &v1beta1.AMIFamilyCustom + Expect(env.Client.Create(env.Context, nodeClass)).ToNot(Succeed()) + }) + It("should fail for poorly formatted AMI ids", func() { + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + ID: "must-start-with-ami", + }, + } + Expect(env.Client.Create(env.Context, nodeClass)).ToNot(Succeed()) + }) + It("should succeed when tags don't contain restricted keys", func() { + nodeClass.Spec.Tags = map[string]string{"karpenter.sh/custom-key": "custom-value", "kubernetes.io/role/key": "custom-value"} + Expect(env.Client.Create(env.Context, nodeClass)).To(Succeed()) + }) + It("should error when tags contains a restricted key", func() { + nodeClass.Spec.Tags = map[string]string{"karpenter.sh/nodepool": "custom-value"} + Expect(env.Client.Create(env.Context, nodeClass)).ToNot(Succeed()) + + nodeClass.Spec.Tags = map[string]string{"karpenter.sh/managed-by": env.ClusterName} + Expect(env.Client.Create(env.Context, nodeClass)).ToNot(Succeed()) + + nodeClass.Spec.Tags = map[string]string{fmt.Sprintf("kubernetes.io/cluster/%s", env.ClusterName): "owned"} + Expect(env.Client.Create(env.Context, nodeClass)).ToNot(Succeed()) + }) + It("should fail when securityGroupSelector has id and other filters", func() { + nodeClass.Spec.SecurityGroupSelectorTerms = []v1beta1.SecurityGroupSelectorTerm{ + { + Tags: map[string]string{"karpenter.sh/discovery": env.ClusterName}, + ID: "sg-12345", + }, + } + Expect(env.Client.Create(env.Context, nodeClass)).ToNot(Succeed()) + }) + It("should fail when subnetSelector has id and other filters", func() { + nodeClass.Spec.SubnetSelectorTerms = []v1beta1.SubnetSelectorTerm{ + { + Tags: map[string]string{"karpenter.sh/discovery": env.ClusterName}, + ID: "subnet-12345", + }, + } + Expect(env.Client.Create(env.Context, nodeClass)).ToNot(Succeed()) + }) + It("should fail when amiSelector has id and other filters", func() { + nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{ + { + Tags: map[string]string{"karpenter.sh/discovery": env.ClusterName}, + ID: "ami-12345", + }, + } + Expect(env.Client.Create(env.Context, nodeClass)).ToNot(Succeed()) + }) + }) +})