Skip to content

Commit

Permalink
chore: Use Security Group Status Controller to Asynchronously Hydrate…
Browse files Browse the repository at this point in the history
… Security Group Data (#6069)
  • Loading branch information
engedaam committed Apr 25, 2024
1 parent 81175f2 commit e6fa442
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 34 deletions.
12 changes: 4 additions & 8 deletions pkg/cloudprovider/drift.go
Expand Up @@ -54,7 +54,7 @@ func (c *CloudProvider) isNodeClassDrifted(ctx context.Context, nodeClaim *corev
if err != nil {
return "", fmt.Errorf("calculating ami drift, %w", err)
}
securitygroupDrifted, err := c.areSecurityGroupsDrifted(ctx, instance, nodeClass)
securitygroupDrifted, err := c.areSecurityGroupsDrifted(instance, nodeClass)
if err != nil {
return "", fmt.Errorf("calculating securitygroup drift, %w", err)
}
Expand Down Expand Up @@ -118,14 +118,10 @@ func (c *CloudProvider) isSubnetDrifted(ctx context.Context, instance *instance.

// Checks if the security groups are drifted, by comparing the security groups returned from the SecurityGroupProvider
// to the ec2 instance security groups
func (c *CloudProvider) areSecurityGroupsDrifted(ctx context.Context, ec2Instance *instance.Instance, nodeClass *v1beta1.EC2NodeClass) (cloudprovider.DriftReason, error) {
securitygroup, err := c.securityGroupProvider.List(ctx, nodeClass)
if err != nil {
return "", err
}
securityGroupIds := sets.New(lo.Map(securitygroup, func(sg *ec2.SecurityGroup, _ int) string { return aws.StringValue(sg.GroupId) })...)
func (c *CloudProvider) areSecurityGroupsDrifted(ec2Instance *instance.Instance, nodeClass *v1beta1.EC2NodeClass) (cloudprovider.DriftReason, error) {
securityGroupIds := sets.New(lo.Map(nodeClass.Status.SecurityGroups, func(sg v1beta1.SecurityGroup, _ int) string { return sg.ID })...)
if len(securityGroupIds) == 0 {
return "", fmt.Errorf("no security groups are discovered")
return "", fmt.Errorf("no security groups are present in the status")
}

if !securityGroupIds.Equal(sets.New(ec2Instance.SecurityGroupIDs...)) {
Expand Down
57 changes: 45 additions & 12 deletions pkg/cloudprovider/suite_test.go
Expand Up @@ -138,6 +138,20 @@ var _ = Describe("CloudProvider", func() {
},
},
})
nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{
{
ID: "sg-test1",
Name: "securityGroup-test1",
},
{
ID: "sg-test2",
Name: "securityGroup-test2",
},
{
ID: "sg-test3",
Name: "securityGroup-test3",
},
}
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed())
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed())
})
Expand Down Expand Up @@ -586,6 +600,11 @@ var _ = Describe("CloudProvider", func() {
},
},
})
nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{
{
ID: validSecurityGroup,
},
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
instanceTypes, err := cloudProvider.GetInstanceTypes(ctx, nodePool)
Expect(err).ToNot(HaveOccurred())
Expand Down Expand Up @@ -671,7 +690,8 @@ var _ = Describe("CloudProvider", func() {
Expect(isDrifted).To(BeEmpty())
})
It("should return an error if the security groups are empty", func() {
awsEnv.EC2API.DescribeSecurityGroupsOutput.Set(&ec2.DescribeSecurityGroupsOutput{SecurityGroups: []*ec2.SecurityGroup{}})
nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{}
ExpectApplied(ctx, env.Client, nodeClass)
// Instance is a reference to what we return in the GetInstances call
instance.SecurityGroups = []*ec2.GroupIdentifier{{GroupId: aws.String(fake.SecurityGroupID())}}
_, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expand All @@ -692,18 +712,17 @@ var _ = Describe("CloudProvider", func() {
Expect(isDrifted).To(Equal(cloudprovider.SecurityGroupDrift))
})
It("should return drifted if more security groups are present than instance security groups then discovered from nodeclass", func() {
awsEnv.EC2API.DescribeSecurityGroupsOutput.Set(&ec2.DescribeSecurityGroupsOutput{
SecurityGroups: []*ec2.SecurityGroup{
{
GroupId: aws.String(validSecurityGroup),
GroupName: aws.String("test-securitygroup"),
},
{
GroupId: aws.String(fake.SecurityGroupID()),
GroupName: aws.String("test-securitygroup"),
},
nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{
{
ID: validSecurityGroup,
Name: "test-securitygroup",
},
})
{
ID: fake.SecurityGroupID(),
Name: "test-securitygroup",
},
}
ExpectApplied(ctx, env.Client, nodeClass)
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(Equal(cloudprovider.SecurityGroupDrift))
Expand Down Expand Up @@ -777,6 +796,13 @@ var _ = Describe("CloudProvider", func() {
},
},
},
Status: v1beta1.EC2NodeClassStatus{
SecurityGroups: []v1beta1.SecurityGroup{
{
ID: validSecurityGroup,
},
},
},
}
nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()})
nodeClaim.Annotations = lo.Assign(nodeClaim.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()})
Expand Down Expand Up @@ -1013,6 +1039,13 @@ var _ = Describe("CloudProvider", func() {
},
},
},
Status: v1beta1.EC2NodeClassStatus{
SecurityGroups: []v1beta1.SecurityGroup{
{
ID: "sg-test1",
},
},
},
})
nodePool2 := coretest.NodePool(corev1beta1.NodePool{
Spec: corev1beta1.NodePoolSpec{
Expand Down
12 changes: 12 additions & 0 deletions pkg/providers/instance/suite_test.go
Expand Up @@ -106,11 +106,23 @@ var _ = Describe("InstanceProvider", func() {
},
},
})
nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{
{
ID: "sg-test1",
},
{
ID: "sg-test2",
},
{
ID: "sg-test3",
},
}
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed())
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed())
})
It("should return an ICE error when all attempted instance types return an ICE error", func() {
ExpectApplied(ctx, env.Client, nodeClaim, nodePool, nodeClass)
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
awsEnv.EC2API.InsufficientCapacityPools.Set([]fake.CapacityPool{
{CapacityType: corev1beta1.CapacityTypeOnDemand, InstanceType: "m5.xlarge", Zone: "test-zone-1a"},
{CapacityType: corev1beta1.CapacityTypeOnDemand, InstanceType: "m5.xlarge", Zone: "test-zone-1b"},
Expand Down
22 changes: 22 additions & 0 deletions pkg/providers/instancetype/suite_test.go
Expand Up @@ -155,6 +155,28 @@ var _ = Describe("InstanceTypeProvider", func() {
},
},
})
nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{
{
ID: "sg-test1",
},
{
ID: "sg-test2",
},
{
ID: "sg-test3",
},
}
windowsNodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{
{
ID: "sg-test1",
},
{
ID: "sg-test2",
},
{
ID: "sg-test3",
},
}
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed())
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed())
})
Expand Down
28 changes: 14 additions & 14 deletions pkg/providers/launchtemplate/launchtemplate.go
Expand Up @@ -163,28 +163,28 @@ func (p *DefaultProvider) createAMIOptions(ctx context.Context, nodeClass *v1bet
if err != nil {
return nil, err
}
// Relying on the status rather than an API call means that Karpenter is subject to a race
// condition where EC2NodeClass spec changes haven't propagated to the status once a node
// has launched.
// If a user changes their EC2NodeClass and shortly after Karpenter launches a node,
// in the worst case, the node could be drifted and re-created.
// TODO @aengeda: add status generation fields to gate node creation until the status is updated from a spec change
// Get constrained security groups
securityGroups, err := p.securityGroupProvider.List(ctx, nodeClass)
if err != nil {
return nil, err
}
if len(securityGroups) == 0 {
return nil, fmt.Errorf("no security groups exist given constraints")
if len(nodeClass.Status.SecurityGroups) == 0 {
return nil, fmt.Errorf("no security groups are present in the status")
}
options := &amifamily.Options{
ClusterName: options.FromContext(ctx).ClusterName,
ClusterEndpoint: p.ClusterEndpoint,
ClusterCIDR: p.ClusterCIDR.Load(),
InstanceProfile: instanceProfile,
InstanceStorePolicy: nodeClass.Spec.InstanceStorePolicy,
SecurityGroups: lo.Map(securityGroups, func(s *ec2.SecurityGroup, _ int) v1beta1.SecurityGroup {
return v1beta1.SecurityGroup{ID: aws.StringValue(s.GroupId), Name: aws.StringValue(s.GroupName)}
}),
Tags: tags,
Labels: labels,
CABundle: p.CABundle,
KubeDNSIP: p.KubeDNSIP,
NodeClassName: nodeClass.Name,
SecurityGroups: nodeClass.Status.SecurityGroups,
Tags: tags,
Labels: labels,
CABundle: p.CABundle,
KubeDNSIP: p.KubeDNSIP,
NodeClassName: nodeClass.Name,
}
if nodeClass.Spec.AssociatePublicIPAddress != nil {
options.AssociatePublicIPAddress = nodeClass.Spec.AssociatePublicIPAddress
Expand Down
22 changes: 22 additions & 0 deletions pkg/providers/launchtemplate/suite_test.go
Expand Up @@ -146,6 +146,17 @@ var _ = Describe("LaunchTemplate Provider", func() {
},
},
})
nodeClass.Status.SecurityGroups = []v1beta1.SecurityGroup{
{
ID: "sg-test1",
},
{
ID: "sg-test2",
},
{
ID: "sg-test3",
},
}
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypes(ctx)).To(Succeed())
Expect(awsEnv.InstanceTypesProvider.UpdateInstanceTypeOfferings(ctx)).To(Succeed())
})
Expand All @@ -171,6 +182,17 @@ var _ = Describe("LaunchTemplate Provider", func() {
},
},
})
nodeClass2.Status.SecurityGroups = []v1beta1.SecurityGroup{
{
ID: "sg-test1",
},
{
ID: "sg-test2",
},
{
ID: "sg-test3",
},
}
pods := []*v1.Pod{
coretest.UnschedulablePod(coretest.PodOptions{NodeRequirements: []v1.NodeSelectorRequirement{
{
Expand Down

0 comments on commit e6fa442

Please sign in to comment.