Skip to content

Commit

Permalink
feat: Add instance profile generation for v1beta1/NodeClass (#4575)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed Sep 22, 2023
1 parent 496b40f commit a2e85b5
Show file tree
Hide file tree
Showing 40 changed files with 1,008 additions and 183 deletions.
1 change: 1 addition & 0 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func main() {
awsCloudProvider,
op.SubnetProvider,
op.SecurityGroupProvider,
op.InstanceProfileProvider,
op.PricingProvider,
op.AMIProvider,
)...).
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ require (
github.com/PuerkitoBio/goquery v1.8.1
github.com/avast/retry-go v3.0.0+incompatible
github.com/aws/aws-sdk-go v1.45.12
github.com/aws/karpenter-core v0.30.1-0.20230919163305-847fa5b2e74e
github.com/aws/karpenter-core v0.30.1-0.20230921184727-4dbd382251d3
github.com/aws/karpenter/tools/kompat v0.0.0-20230915222222-abfbf5fa3644
github.com/imdario/mergo v0.3.16
github.com/mitchellh/hashstructure/v2 v2.0.2
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHS
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/aws/aws-sdk-go v1.45.12 h1:+bKbbesGNPp+TeGrcqfrWuZoqcIEhjwKyBMHQPp80Jo=
github.com/aws/aws-sdk-go v1.45.12/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/aws/karpenter-core v0.30.1-0.20230919163305-847fa5b2e74e h1:KkjLabXp+syE0ax63shGVQTtUsgLBi/30HgjEks1hUo=
github.com/aws/karpenter-core v0.30.1-0.20230919163305-847fa5b2e74e/go.mod h1:SlwFUtchMljQlD5oWFBo3B7voGIa1q4XTwDyTJqJ2h0=
github.com/aws/karpenter-core v0.30.1-0.20230921184727-4dbd382251d3 h1:ObVEjXuIOLYyaDzGtrm6+AGVM4nA91K5oD6a1ByfhXk=
github.com/aws/karpenter-core v0.30.1-0.20230921184727-4dbd382251d3/go.mod h1:SlwFUtchMljQlD5oWFBo3B7voGIa1q4XTwDyTJqJ2h0=
github.com/aws/karpenter/tools/kompat v0.0.0-20230915222222-abfbf5fa3644 h1:M1fxGlOfvSqFYI01HL2zzvomy8e7LiTHk77KDuChWZQ=
github.com/aws/karpenter/tools/kompat v0.0.0-20230915222222-abfbf5fa3644/go.mod h1:l/TIBsaCx/IrOr0Xvlj/cHLOf05QzuQKEZ1hx2XWmfU=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
Expand Down
11 changes: 10 additions & 1 deletion pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,12 @@ spec:
type: string
type: object
role:
description: Role is the AWS identity that nodes use.
description: Role is the AWS identity that nodes use. This field is
immutable. Marking this field as immutable avoids concerns around
terminating managed instance profiles from running instances. This
field may be made mutable in the future, assuming the correct garbage
collection and drift handling is implemented for the old instance
profiles on an update.
type: string
securityGroupSelectorTerms:
description: SecurityGroupSelectorTerms is a list of or security group
Expand Down Expand Up @@ -340,6 +345,10 @@ spec:
- requirements
type: object
type: array
instanceProfile:
description: InstanceProfile contains the resolved instance profile
for the role
type: string
securityGroups:
description: SecurityGroups contains the current Security Groups values
that are available to the cluster under the SecurityGroups selectors.
Expand Down
7 changes: 0 additions & 7 deletions pkg/apis/crds/karpenter.sh_nodeclaims.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ spec:
kind: NodeClaim
listKind: NodeClaimList
plural: nodeclaims
shortNames:
- nc
- ncs
singular: nodeclaim
scope: Cluster
versions:
Expand Down Expand Up @@ -79,10 +76,6 @@ spec:
items:
type: string
type: array
containerRuntime:
description: ContainerRuntime is the container runtime to be used
with your worker nodes.
type: string
cpuCFSQuota:
description: CPUCFSQuota enables CPU CFS quota enforcement for
containers that specify CPU limits.
Expand Down
9 changes: 1 addition & 8 deletions pkg/apis/crds/karpenter.sh_nodepools.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@ spec:
kind: NodePool
listKind: NodePoolList
plural: nodepools
shortNames:
- np
- nps
singular: nodepool
scope: Cluster
versions:
Expand All @@ -30,7 +27,7 @@ spec:
name: v1beta1
schema:
openAPIV3Schema:
description: NodePool is the Schema for the Provisioners API
description: NodePool is the Schema for the NodePools API
properties:
apiVersion:
description: 'APIVersion defines the versioned schema of this representation
Expand Down Expand Up @@ -134,10 +131,6 @@ spec:
items:
type: string
type: array
containerRuntime:
description: ContainerRuntime is the container runtime
to be used with your worker nodes.
type: string
cpuCFSQuota:
description: CPUCFSQuota enables CPU CFS quota enforcement
for containers that specify CPU limits.
Expand Down
9 changes: 6 additions & 3 deletions pkg/apis/v1beta1/ec2nodeclass.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ type EC2NodeClassSpec struct {
// this UserData to ensure nodes are being provisioned with the correct configuration.
// +optional
UserData *string `json:"userData,omitempty"`
// Role is the AWS identity that nodes use.
// Role is the AWS identity that nodes use. This field is immutable.
// Marking this field as immutable avoids concerns around terminating managed instance profiles from running instances.
// This field may be made mutable in the future, assuming the correct garbage collection and drift handling is implemented
// for the old instance profiles on an update.
// +required
Role string `json:"role"`
// Tags to be applied on ec2 resources like instances and launch templates.
Expand Down Expand Up @@ -288,8 +291,8 @@ type EC2NodeClass struct {
IsNodeTemplate bool `json:"-" hash:"ignore"`
}

func (a *EC2NodeClass) Hash() string {
return fmt.Sprint(lo.Must(hashstructure.Hash(a.Spec, hashstructure.FormatV2, &hashstructure.HashOptions{
func (in *EC2NodeClass) Hash() string {
return fmt.Sprint(lo.Must(hashstructure.Hash(in.Spec, hashstructure.FormatV2, &hashstructure.HashOptions{
SlicesAsSets: true,
IgnoreZeroValue: true,
ZeroNil: true,
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/v1beta1/ec2nodeclass_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ import (
)

// SetDefaults for the EC2NodeClass
func (a *EC2NodeClass) SetDefaults(_ context.Context) {}
func (in *EC2NodeClass) SetDefaults(_ context.Context) {}
3 changes: 3 additions & 0 deletions pkg/apis/v1beta1/ec2nodeclass_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,7 @@ type EC2NodeClassStatus struct {
// cluster under the AMI selectors.
// +optional
AMIs []AMI `json:"amis,omitempty"`
// InstanceProfile contains the resolved instance profile for the role
// +optional
InstanceProfile string `json:"instanceProfile,omitempty"`
}
29 changes: 24 additions & 5 deletions pkg/apis/v1beta1/ec2nodeclass_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
)

const (
userDataPath = "userData"
subnetSelectorTermsPath = "subnetSelectorTerms"
securityGroupSelectorTermsPath = "securityGroupSelectorTerms"
amiSelectorTermsPath = "amiSelectorTerms"
Expand All @@ -41,17 +40,27 @@ var (
maxVolumeSize = *resource.NewScaledQuantity(64, resource.Tera)
)

func (a *EC2NodeClass) SupportedVerbs() []admissionregistrationv1.OperationType {
func (in *EC2NodeClass) SupportedVerbs() []admissionregistrationv1.OperationType {
return []admissionregistrationv1.OperationType{
admissionregistrationv1.Create,
admissionregistrationv1.Update,
}
}

func (a *EC2NodeClass) Validate(ctx context.Context) (errs *apis.FieldError) {
func (in *EC2NodeClass) Validate(ctx context.Context) (errs *apis.FieldError) {
if apis.IsInUpdate(ctx) {
original := apis.GetBaseline(ctx).(*EC2NodeClass)
errs = in.validateImmutableFields(original)
}
return errs.Also(
apis.ValidateObjectMetadata(in).ViaField("metadata"),
in.Spec.validate(ctx).ViaField("spec"),
)
}

func (in *EC2NodeClass) validateImmutableFields(original *EC2NodeClass) (errs *apis.FieldError) {
return errs.Also(
apis.ValidateObjectMetadata(a).ViaField("metadata"),
a.Spec.validate(ctx).ViaField("spec"),
in.Spec.validateRoleImmutability(&original.Spec).ViaField("spec"),
)
}

Expand Down Expand Up @@ -278,3 +287,13 @@ func (in *EC2NodeClassSpec) validateTags() (errs *apis.FieldError) {
}
return errs
}

func (in *EC2NodeClassSpec) validateRoleImmutability(originalSpec *EC2NodeClassSpec) *apis.FieldError {
if in.Role != originalSpec.Role {
return &apis.FieldError{
Message: "Immutable field changed",
Paths: []string{"role"},
}
}
return nil
}
6 changes: 6 additions & 0 deletions pkg/apis/v1beta1/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ func init() {
)
}

const (
TerminationFinalizer = Group + "/termination"
)

var (
CapacityTypeSpot = ec2.DefaultTargetCapacityTypeSpot
CapacityTypeOnDemand = ec2.DefaultTargetCapacityTypeOnDemand
Expand Down Expand Up @@ -100,6 +104,8 @@ var (
ResourceAWSPodENI v1.ResourceName = "vpc.amazonaws.com/pod-eni"
ResourcePrivateIPv4Address v1.ResourceName = "vpc.amazonaws.com/PrivateIPv4Address"

LabelNodeClass = Group + "/nodeclass"

LabelInstanceHypervisor = Group + "/instance-hypervisor"
LabelInstanceEncryptionInTransitSupported = Group + "/instance-encryption-in-transit-supported"
LabelInstanceCategory = Group + "/instance-category"
Expand Down
13 changes: 12 additions & 1 deletion pkg/apis/v1beta1/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
. "github.com/onsi/gomega"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"knative.dev/pkg/apis"
. "knative.dev/pkg/logging/testing"

"github.com/aws/aws-sdk-go/aws"
Expand Down Expand Up @@ -535,7 +536,7 @@ var _ = Describe("Validation", func() {
Expect(nodeClass.Hash()).To(Equal(otherNodeClass.Hash()))
})
})
Context("EC2NodeClass BlockDeviceMapping", func() {
Context("BlockDeviceMappings", func() {
It("should fail if more than one root volume is specified", func() {
nodeClass := test.EC2NodeClass(v1beta1.EC2NodeClass{
Spec: v1beta1.EC2NodeClassSpec{
Expand All @@ -562,4 +563,14 @@ var _ = Describe("Validation", func() {
Expect(nodeClass.Validate(ctx)).To(Not(Succeed()))
})
})
Context("Role Immutability", func() {
It("should fail when updating the role", func() {
nc.Spec.Role = "test-role"
Expect(nc.Validate(ctx)).To(Succeed())

updateCtx := apis.WithinUpdate(ctx, nc.DeepCopy())
nc.Spec.Role = "test-role2"
Expect(nc.Validate(updateCtx)).ToNot(Succeed())
})
})
})
2 changes: 2 additions & 0 deletions pkg/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ const (
UnavailableOfferingsTTL = 3 * time.Minute
// InstanceTypesAndZonesTTL is the time before we refresh instance types and zones at EC2
InstanceTypesAndZonesTTL = 5 * time.Minute
// InstanceProfileTTL is the time before we refresh checking instance profile existence at IAM
InstanceProfileTTL = 15 * time.Minute
)

const (
Expand Down
11 changes: 10 additions & 1 deletion pkg/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ func (c *CloudProvider) Create(ctx context.Context, nodeClaim *corev1beta1.NodeC
if errors.IsNotFound(err) {
c.recorder.Publish(cloudproviderevents.NodeClaimFailedToResolveNodeClass(nodeClaim))
}
return nil, fmt.Errorf("resolving node class, %w", err)
// We treat a failure to resolve the NodeClass as an ICE since this means there is no capacity possibilities for this NodeClaim
return nil, cloudprovider.NewInsufficientCapacityError(fmt.Errorf("resolving node class, %w", err))
}
instanceTypes, err := c.resolveInstanceTypes(ctx, nodeClaim, nodeClass)
if err != nil {
Expand Down Expand Up @@ -241,6 +242,10 @@ func (c *CloudProvider) resolveNodeClassFromNodeClaim(ctx context.Context, nodeC
if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: nodeClaim.Spec.NodeClass.Name}, nodeClass); err != nil {
return nil, err
}
// For the purposes of NodeClass CloudProvider resolution, we treat deleting NodeClasses as NotFound
if !nodeClass.DeletionTimestamp.IsZero() {
return nil, errors.NewNotFound(v1beta1.SchemeGroupVersion.WithResource("ec2nodeclasses").GroupResource(), nodeClass.Name)
}
return nodeClass, nil
}

Expand All @@ -261,6 +266,10 @@ func (c *CloudProvider) resolveNodeClassFromNodePool(ctx context.Context, nodePo
if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: nodePool.Spec.Template.Spec.NodeClass.Name}, nodeClass); err != nil {
return nil, err
}
// For the purposes of NodeClass CloudProvider resolution, we treat deleting NodeClasses as NotFound
if !nodeClass.DeletionTimestamp.IsZero() {
return nil, errors.NewNotFound(v1beta1.SchemeGroupVersion.WithResource("ec2nodeclasses").GroupResource(), nodeClass.Name)
}
return nodeClass, nil
}

Expand Down
6 changes: 4 additions & 2 deletions pkg/controllers/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
nodeclaimlink "github.com/aws/karpenter/pkg/controllers/nodeclaim/link"
"github.com/aws/karpenter/pkg/controllers/nodeclass"
"github.com/aws/karpenter/pkg/providers/amifamily"
"github.com/aws/karpenter/pkg/providers/instanceprofile"
"github.com/aws/karpenter/pkg/providers/pricing"
"github.com/aws/karpenter/pkg/providers/securitygroup"
"github.com/aws/karpenter/pkg/providers/subnet"
Expand All @@ -42,13 +43,14 @@ import (

func NewControllers(ctx context.Context, sess *session.Session, clk clock.Clock, kubeClient client.Client, recorder events.Recorder,
unavailableOfferings *cache.UnavailableOfferings, cloudProvider *cloudprovider.CloudProvider, subnetProvider *subnet.Provider,
securityGroupProvider *securitygroup.Provider, pricingProvider *pricing.Provider, amiProvider *amifamily.Provider) []controller.Controller {
securityGroupProvider *securitygroup.Provider, instanceProfileProvider *instanceprofile.Provider, pricingProvider *pricing.Provider,
amiProvider *amifamily.Provider) []controller.Controller {

logging.FromContext(ctx).With("version", project.Version).Debugf("discovered version")

linkController := nodeclaimlink.NewController(kubeClient, cloudProvider)
controllers := []controller.Controller{
nodeclass.NewNodeTemplateController(kubeClient, subnetProvider, securityGroupProvider, amiProvider),
nodeclass.NewNodeTemplateController(kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider),
linkController,
nodeclaimgarbagecollection.NewController(kubeClient, cloudProvider, linkController),
}
Expand Down

0 comments on commit a2e85b5

Please sign in to comment.