Skip to content

Commit

Permalink
test: Fix cleanup script to improve flexibility (#4106)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed Jun 22, 2023
1 parent 52db004 commit a44c2dd
Show file tree
Hide file tree
Showing 7 changed files with 318 additions and 8 deletions.
6 changes: 4 additions & 2 deletions .github/actions/e2e/create-cluster/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ runs:
--stack-name iam-${{ inputs.cluster_name }} \
--template-file $CLOUDFORMATION_PATH \
--capabilities CAPABILITY_NAMED_IAM \
--parameter-overrides "ClusterName=${{ inputs.cluster_name }}"
--parameter-overrides "ClusterName=${{ inputs.cluster_name }}" \
--tags "testing.karpenter.sh/type=e2e" github.com/run-url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
- name: create or upgrade cluster
shell: bash
run: |
Expand All @@ -72,7 +73,8 @@ runs:
version: "${{ inputs.kubernetes_version }}"
tags:
karpenter.sh/discovery: ${{ inputs.cluster_name }}
github.com/run-url: "https://github.com/${{ inputs.git_repo }}/actions/runs/${{ github.run_id }}"
github.com/run-url: "https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"
testing.karpenter.sh/type: "e2e"
kubernetesNetworkConfig:
ipFamily: ${{ inputs.ip_family }}
managedNodeGroups:
Expand Down
11 changes: 9 additions & 2 deletions .github/workflows/sweeper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,17 @@ jobs:
if: github.repository == 'aws/karpenter' || github.event_name == 'workflow_dispatch'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: configure aws credentials
uses: aws-actions/configure-aws-credentials@v2
with:
role-to-assume: arn:aws:iam::${{ vars.ACCOUNT_ID }}:role/${{ vars.ROLE_NAME }}
aws-region: ${{ vars.AWS_REGION }}
- uses: actions/checkout@v3
- run: ./test/hack/cleanup/cleanup.sh
- uses: actions/setup-go@v4
with:
go-version-file: test/hack/cleanup/go.mod
check-latest: true
cache-dependency-path: "test/hack/cleanup/go.sum"
- run: go run main.go
working-directory: ./test/hack/cleanup
name: "Run cleanup script"
7 changes: 4 additions & 3 deletions test/gha/cloudformation/iam_cloudformation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ Resources:
- cloudformation:CreateStack
- cloudformation:DeleteStack
- cloudformation:DescribeChangeSet
- cloudformation:DescribeStacks
- cloudformation:DescribeStackEvents
- cloudformation:ExecuteChangeSet
- cloudformation:GetTemplate
Expand All @@ -104,8 +103,10 @@ Resources:
- !Sub "arn:${AWS::Partition}:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/iam-*"
- !Sub "arn:${AWS::Partition}:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/eksctl-*"
- Effect: Allow
Action: cloudformation:ListStacks
Resource: !Sub "arn:${AWS::Partition}:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/*"
Action:
- cloudformation:ListStacks
- cloudformation:DescribeStacks
Resource: "*"
- Effect: Allow
Action:
- eks:CreateCluster
Expand Down
1 change: 0 additions & 1 deletion test/hack/cleanup/cleanup.sh

This file was deleted.

30 changes: 30 additions & 0 deletions test/hack/cleanup/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
module github.com/aws/karpenter/test/hack/cleanup

go 1.20

require (
github.com/aws/aws-sdk-go-v2/config v1.18.27
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0
github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2
github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0
github.com/samber/lo v1.38.1
go.uber.org/zap v1.24.0
)

require (
github.com/aws/aws-sdk-go-v2 v1.18.1 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.13.26 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.34 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.28 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.12.12 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.12 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.19.2 // indirect
github.com/aws/smithy-go v1.13.5 // indirect
github.com/jmespath/go-jmespath v0.4.0 // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 // indirect
)
61 changes: 61 additions & 0 deletions test/hack/cleanup/go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
github.com/aws/aws-sdk-go-v2 v1.18.1 h1:+tefE750oAb7ZQGzla6bLkOwfcQCEtC5y2RqoqCeqKo=
github.com/aws/aws-sdk-go-v2 v1.18.1/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw=
github.com/aws/aws-sdk-go-v2/config v1.18.27 h1:Az9uLwmssTE6OGTpsFqOnaGpLnKDqNYOJzWuC6UAYzA=
github.com/aws/aws-sdk-go-v2/config v1.18.27/go.mod h1:0My+YgmkGxeqjXZb5BYme5pc4drjTnM+x1GJ3zv42Nw=
github.com/aws/aws-sdk-go-v2/credentials v1.13.26 h1:qmU+yhKmOCyujmuPY7tf5MxR/RKyZrOPO3V4DobiTUk=
github.com/aws/aws-sdk-go-v2/credentials v1.13.26/go.mod h1:GoXt2YC8jHUBbA4jr+W3JiemnIbkXOfxSXcisUsZ3os=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4 h1:LxK/bitrAr4lnh9LnIS6i7zWbCOdMsfzKFBI6LUCS0I=
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.13.4/go.mod h1:E1hLXN/BL2e6YizK1zFlYd8vsfi2GTjbjBazinMmeaM=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.34 h1:A5UqQEmPaCFpedKouS4v+dHCTUo2sKqhoKO9U5kxyWo=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.34/go.mod h1:wZpTEecJe0Btj3IYnDx/VlUzor9wm3fJHyvLpQF0VwY=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.28 h1:srIVS45eQuewqz6fKKu6ZGXaq6FuFg5NzgQBAM6g8Y4=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.28/go.mod h1:7VRpKQQedkfIEXb4k52I7swUnZP0wohVajJMRn3vsUw=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35 h1:LWA+3kDM8ly001vJ1X1waCuLJdtTl48gwkPKWy9sosI=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.35/go.mod h1:0Eg1YjxE0Bhn56lx+SHJwCzhW+2JGtizsrx+lCqrfm0=
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0 h1:XbDkc4FLeg1RfnqeblfbJvaEabqq9ByZl4zqyPFkfSc=
github.com/aws/aws-sdk-go-v2/service/cloudformation v1.30.0/go.mod h1:SwQFcCs9Rog8hSHm+81KBkAK+UKLXErA/1ChaEI8mLE=
github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2 h1:PWGu2JhCb/XJlJ7SSFJq76pxk4xWsN76nZxh7TzMHx0=
github.com/aws/aws-sdk-go-v2/service/cloudwatch v1.26.2/go.mod h1:2KOZkkzMDZCo/aLzPhys06mHNkiU74u85aMJA3PLRvg=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0 h1:P4dyjm49F2kKws0FpouBC6fjVImACXKt752+CWa01lM=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.102.0/go.mod h1:tIctCeX9IbzsUTKHt53SVEcgyfxV2ElxJeEB+QUbc4M=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28 h1:bkRyG4a929RCnpVSTvLM2j/T4ls015ZhhYApbmYs15s=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.28/go.mod h1:jj7znCIg05jXlaGBlFMGP8+7UN3VtCkRBG2spnmRQkU=
github.com/aws/aws-sdk-go-v2/service/sso v1.12.12 h1:nneMBM2p79PGWBQovYO/6Xnc2ryRMw3InnDJq1FHkSY=
github.com/aws/aws-sdk-go-v2/service/sso v1.12.12/go.mod h1:HuCOxYsF21eKrerARYO6HapNeh9GBNq7fius2AcwodY=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.12 h1:2qTR7IFk7/0IN/adSFhYu9Xthr0zVFTgBrmPldILn80=
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.14.12/go.mod h1:E4VrHCPzmVB/KFXtqBGKb3c8zpbNBgKe3fisDNLAW5w=
github.com/aws/aws-sdk-go-v2/service/sts v1.19.2 h1:XFJ2Z6sNUUcAz9poj+245DMkrHE4h2j5I9/xD50RHfE=
github.com/aws/aws-sdk-go-v2/service/sts v1.19.2/go.mod h1:dp0yLPsLBOi++WTxzCjA/oZqi6NPIhoR+uF7GeMU9eg=
github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8=
github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg=
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo=
github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8=
github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/samber/lo v1.38.1 h1:j2XEAqXKb09Am4ebOg31SpvzUTTs6EN3VfgeLUhPdXM=
github.com/samber/lo v1.38.1/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw=
go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc=
go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
go.uber.org/zap v1.24.0 h1:FiJd5l1UOLj0wCgbSE0rwwXHzEdAZS6hiiSnxJN/D60=
go.uber.org/zap v1.24.0/go.mod h1:2kMP+WWQ8aoFoedH3T2sq6iJ2yDWpHbP0f6MQbS9Gkg=
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17 h1:3MTrJm4PyNL9NBqvYDSj3DHl46qQakyfqfWo4jgfaEM=
golang.org/x/exp v0.0.0-20220303212507-bbda1eaf7a17/go.mod h1:lgLbSvA5ygNOMpwM/9anMpWVlVJ7Z+cHWq/eFuinpGE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10=
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
210 changes: 210 additions & 0 deletions test/hack/cleanup/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"context"
"time"

"github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/service/cloudformation"
cloudformationtypes "github.com/aws/aws-sdk-go-v2/service/cloudformation/types"
"github.com/aws/aws-sdk-go-v2/service/cloudwatch"
cloudwatchtypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types"
"github.com/aws/aws-sdk-go-v2/service/ec2"
ec2types "github.com/aws/aws-sdk-go-v2/service/ec2/types"
"github.com/samber/lo"
"go.uber.org/zap"
)

const (
expirationTTL = time.Hour * 12
karpenterMetricNamespace = "testing.karpenter.sh/cleanup"

karpenterProvisionerNameTag = "karpenter.sh/provisioner-name"
karpenterLaunchTemplateTag = "karpenter.k8s.aws/cluster"
githubRunURLTag = "github.com/run-url"
)

func main() {
ctx := context.Background()
cfg := lo.Must(config.LoadDefaultConfig(ctx))

logger := lo.Must(zap.NewProduction()).Sugar()

expirationTime := time.Now().Add(-expirationTTL)

logger.With("expiration-time", expirationTime.String()).Infof("resolved expiration time for all resources")

ec2Client := ec2.NewFromConfig(cfg)
cloudFormationClient := cloudformation.NewFromConfig(cfg)
cloudWatchClient := cloudwatch.NewFromConfig(cfg)

// Terminate any old instances that were provisioned by Karpenter as part of testing
// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively
ids := getOldInstances(ctx, ec2Client, expirationTime)
logger.With("ids", ids, "count", len(ids)).Infof("discovered test instances to delete")
if len(ids) > 0 {
if _, err := ec2Client.TerminateInstances(ctx, &ec2.TerminateInstancesInput{
InstanceIds: ids,
}); err != nil {
logger.With("ids", ids, "count", len(ids)).Errorf("terminating test instances, %v", err)
} else {
logger.With("ids", ids, "count", len(ids)).Infof("terminated test instances")
if err = fireMetric(ctx, cloudWatchClient, "InstancesDeleted", float64(len(ids))); err != nil {
logger.With("name", "InstancesDeleted").Errorf("firing metric, %v", err)
}
}
}

// Terminate any old stacks that were provisioned as part of testing
// We execute these in serial since we will most likely get rate limited if we try to delete these too aggressively
names := getOldStacks(ctx, cloudFormationClient, expirationTime)
logger.With("names", names, "count", len(names)).Infof("discovered test stacks to delete")
deleted := 0
for i := range names {
if _, err := cloudFormationClient.DeleteStack(ctx, &cloudformation.DeleteStackInput{
StackName: lo.ToPtr(names[i]),
}); err != nil {
logger.With("name", names[i]).Errorf("deleting test stack, %v", err)
} else {
logger.With("name", names[i]).Infof("deleted test stack")
deleted++
}
}
if err := fireMetric(ctx, cloudWatchClient, "StacksDeleted", float64(deleted)); err != nil {
logger.With("name", "StacksDeleted").Errorf("firing metric, %v", err)
}

// Terminate any old launch templates that were managed by Karpenter and were provisioned as part of testing
names = getOldLaunchTemplates(ctx, ec2Client, expirationTime)
logger.With("names", names, "count", len(names)).Infof("discovered test launch templates to delete")
deleted = 0
for i := range names {
if _, err := ec2Client.DeleteLaunchTemplate(ctx, &ec2.DeleteLaunchTemplateInput{
LaunchTemplateName: lo.ToPtr(names[i]),
}); err != nil {
logger.With("name", names[i]).Errorf("deleting test launch template, %v", err)
} else {
logger.With("name", names[i]).Infof("deleted test launch template")
deleted++
}
}
if err := fireMetric(ctx, cloudWatchClient, "LaunchTemplatesDeleted", float64(deleted)); err != nil {
logger.With("name", "LaunchTemplatesDeleted").Errorf("firing metric, %v", err)
}
}

func fireMetric(ctx context.Context, cloudWatchClient *cloudwatch.Client, name string, value float64) error {
_, err := cloudWatchClient.PutMetricData(ctx, &cloudwatch.PutMetricDataInput{
Namespace: lo.ToPtr(karpenterMetricNamespace),
MetricData: []cloudwatchtypes.MetricDatum{
{
MetricName: lo.ToPtr(name),
Value: lo.ToPtr(value),
},
},
})
return err
}

func getOldInstances(ctx context.Context, ec2Client *ec2.Client, expirationTime time.Time) (ids []string) {
var nextToken *string
for {
out := lo.Must(ec2Client.DescribeInstances(ctx, &ec2.DescribeInstancesInput{
Filters: []ec2types.Filter{
{
Name: lo.ToPtr("instance-state-name"),
Values: []string{string(ec2types.InstanceStateNameRunning)},
},
{
Name: lo.ToPtr("tag-key"),
Values: []string{karpenterProvisionerNameTag},
},
},
NextToken: nextToken,
}))

for _, res := range out.Reservations {
for _, instance := range res.Instances {
if _, found := lo.Find(instance.Tags, func(t ec2types.Tag) bool {
return lo.FromPtr(t.Key) == "kubernetes.io/cluster/KITInfrastructure"
}); !found && lo.FromPtr(instance.LaunchTime).Before(expirationTime) {
ids = append(ids, lo.FromPtr(instance.InstanceId))
}
}
}

nextToken = out.NextToken
if nextToken == nil {
break
}
}
return ids
}

func getOldStacks(ctx context.Context, cloudFormationClient *cloudformation.Client, expirationTime time.Time) (names []string) {
var nextToken *string
for {
out := lo.Must(cloudFormationClient.DescribeStacks(ctx, &cloudformation.DescribeStacksInput{
NextToken: nextToken,
}))

stacks := lo.Reject(out.Stacks, func(s cloudformationtypes.Stack, _ int) bool {
return s.StackStatus == cloudformationtypes.StackStatusDeleteComplete ||
s.StackStatus == cloudformationtypes.StackStatusDeleteInProgress
})
for _, stack := range stacks {
if _, found := lo.Find(stack.Tags, func(t cloudformationtypes.Tag) bool {
return lo.FromPtr(t.Key) == githubRunURLTag
}); found && lo.FromPtr(stack.CreationTime).Before(expirationTime) {
names = append(names, lo.FromPtr(stack.StackName))
}
}

nextToken = out.NextToken
if nextToken == nil {
break
}
}
return names
}

func getOldLaunchTemplates(ctx context.Context, ec2Client *ec2.Client, expirationTime time.Time) (names []string) {
var nextToken *string
for {
out := lo.Must(ec2Client.DescribeLaunchTemplates(ctx, &ec2.DescribeLaunchTemplatesInput{
Filters: []ec2types.Filter{
{
Name: lo.ToPtr("tag-key"),
Values: []string{karpenterLaunchTemplateTag},
},
},
NextToken: nextToken,
}))

for _, launchTemplate := range out.LaunchTemplates {
if lo.FromPtr(launchTemplate.CreateTime).Before(expirationTime) {
names = append(names, lo.FromPtr(launchTemplate.LaunchTemplateName))
}
}

nextToken = out.NextToken
if nextToken == nil {
break
}
}
return names
}

0 comments on commit a44c2dd

Please sign in to comment.