Skip to content

Commit

Permalink
chore: sync v1 staging branch with main (#6335)
Browse files Browse the repository at this point in the history
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: Jonathan Innis <jonathan.innis.ji@gmail.com>
Co-authored-by: Nick Tran <10810510+njtran@users.noreply.github.com>
Co-authored-by: Jason Deal <jmdeal@amazon.com>
Co-authored-by: APICodeGen <APICodeGen@users.noreply.github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Calvin Huang <c@lvin.me>
Co-authored-by: Nathaniel Emerson <arkan@drakon.io>
Co-authored-by: njtran <njtran@amazon.com>
Co-authored-by: oddy <56793934+EigoOda@users.noreply.github.com>
Co-authored-by: Jonathan Innis <joinnis@amazon.com>
Co-authored-by: Reed Schalo <reed0schalo@gmail.com>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: StableRelease <StableRelease@users.noreply.github.com>
Co-authored-by: afreyermuth98 <56300858+afreyermuth98@users.noreply.github.com>
Co-authored-by: Justin Reasoner <gjreasoner@gmail.com>
Co-authored-by: Cameron McAvoy <cmcavoy@indeed.com>
  • Loading branch information
17 people committed Jun 7, 2024
1 parent c423767 commit 331f1ac
Show file tree
Hide file tree
Showing 226 changed files with 6,152 additions and 1,885 deletions.
4 changes: 2 additions & 2 deletions .github/actions/e2e/cleanup/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ inputs:
description: "The git commit, tag, or branch to check out. Requires a corresponding Karpenter snapshot release"
eksctl_version:
description: "Version of eksctl to install"
default: v0.169.0
default: v0.180.0
private_cluster:
description: "Whether the cluster that has to be deleted is private or not. Valid values are 'true' or 'false'"
default: 'false'
runs:
using: "composite"
steps:
- uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
ref: ${{ inputs.git_ref }}
- uses: ./.github/actions/e2e/install-eksctl
Expand Down
6 changes: 3 additions & 3 deletions .github/actions/e2e/install-karpenter/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ inputs:
required: true
k8s_version:
description: 'Version of Kubernetes to use for the launched cluster'
default: "1.29"
default: "1.30"
git_ref:
description: "The git commit, tag, or branch to check out. Requires a corresponding Karpenter snapshot release"
private_cluster:
Expand All @@ -30,7 +30,7 @@ inputs:
runs:
using: "composite"
steps:
- uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
ref: ${{ inputs.git_ref }}
- uses: ./.github/actions/e2e/install-helm
Expand All @@ -44,7 +44,7 @@ runs:
kubectl label ns kube-system scrape=enabled --overwrite=true
kubectl label ns kube-system pod-security.kubernetes.io/warn=restricted --overwrite=true
- name: login to ecr via docker
uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # v3.1.0
uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0
with:
registry: ${{ inputs.ecr_account_id }}.dkr.ecr.${{ inputs.ecr_region }}.amazonaws.com
logout: true
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/e2e/install-prometheus/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ inputs:
runs:
using: "composite"
steps:
- uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
ref: ${{ inputs.git_ref }}
- uses: ./.github/actions/e2e/install-helm
Expand Down
10 changes: 3 additions & 7 deletions .github/actions/e2e/run-tests-private-cluster/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ inputs:
required: true
k8s_version:
description: 'Version of Kubernetes to use for the launched cluster'
default: "1.29"
default: "1.30"
private_cluster:
description: "Whether to create a private cluster which does not add access to the public internet. Valid values are 'true' or 'false'"
default: 'false'
Expand All @@ -53,7 +53,7 @@ runs:
using: "composite"
steps:
- name: login to ecr via docker
uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # v3.1.0
uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0
with:
registry: ${{ inputs.account_id }}.dkr.ecr.${{ inputs.region }}.amazonaws.com
logout: true
Expand Down Expand Up @@ -125,10 +125,6 @@ runs:
- kubectl delete ec2nodeclass --all
- kubectl delete deployment --all
- PRIVATE_CLUSTER=$CLUSTER_NAME TEST_SUITE=$SUITE ENABLE_METRICS=$ENABLE_METRICS METRICS_REGION=$METRICS_REGION GIT_REF="$(git rev-parse HEAD)" CLUSTER_NAME=$CLUSTER_NAME CLUSTER_ENDPOINT="$(aws eks describe-cluster --name $CLUSTER_NAME --query "cluster.endpoint" --output text)" INTERRUPTION_QUEUE=$CLUSTER_NAME make e2etests
- aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/application --retention-in-days 30
- aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/dataplane --retention-in-days 30
- aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/host --retention-in-days 30
- aws logs put-retention-policy --log-group-name /aws/containerinsights/$CLUSTER_NAME/performance --retention-in-days 30
post_build:
commands:
# Describe karpenter pods
Expand Down Expand Up @@ -160,4 +156,4 @@ runs:
VPC_CB,
CLUSTER_VPC_ID,
EKS_CLUSTER_SG,
CLEANUP
CLEANUP
35 changes: 7 additions & 28 deletions .github/actions/e2e/setup-cluster/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ inputs:
required: true
k8s_version:
description: 'Version of Kubernetes to use for the launched cluster'
default: "1.29"
default: "1.30"
eksctl_version:
description: "Version of eksctl to install"
default: v0.175.0
default: v0.180.0
ip_family:
description: "IP Family of the cluster. Valid values are IPv4 or IPv6"
default: "IPv4"
Expand All @@ -50,7 +50,7 @@ inputs:
runs:
using: "composite"
steps:
- uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
ref: ${{ inputs.git_ref }}
- uses: ./.github/actions/e2e/install-eksctl
Expand Down Expand Up @@ -78,7 +78,6 @@ runs:
--capabilities CAPABILITY_NAMED_IAM \
--parameter-overrides "ClusterName=$CLUSTER_NAME" \
--tags "testing/type=e2e" "testing/cluster=$CLUSTER_NAME" "github.com/run-url=https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}" "karpenter.sh/discovery=$CLUSTER_NAME"
aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy --role-name KarpenterNodeRole-$CLUSTER_NAME
- name: create or upgrade cluster
shell: bash
env:
Expand Down Expand Up @@ -153,9 +152,11 @@ runs:
minSize: 2
maxSize: 2
iam:
withAddonPolicies:
cloudWatch: true
instanceRolePermissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary"
taints:
- key: CriticalAddonsOnly
value: "true"
effect: NoSchedule
cloudWatch:
clusterLogging:
enableTypes: ["*"]
Expand All @@ -174,10 +175,6 @@ runs:
$KARPENTER_IAM
withOIDC: true
addons:
- name: amazon-cloudwatch-observability
# Pin addon version due to undiagnosed e2e failures after 1.6.0 release
version: '1.5.5-eksbuild.1'
permissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary"
- name: vpc-cni
permissionsBoundary: "arn:aws:iam::$ACCOUNT_ID:policy/GithubActionsPermissionsBoundary"
- name: coredns
Expand Down Expand Up @@ -214,24 +211,6 @@ runs:
else
eksctl ${cmd} cluster -f clusterconfig.yaml
fi
# Adding taints after all necessary pods have scheduled to the manged node group nodes
# amazon-cloudwatch-observability pods do no not tolerate CriticalAddonsOnly=true:NoSchedule and
# amazon-cloudwatch-observability addons does not allow to add tolerations to the addon pods as part of the advanced configuration
# Overwrite existing taints to ensure we don't fail here on upgrade
kubectl taint nodes CriticalAddonsOnly=true:NoSchedule --all --overwrite
# We delete DaemonSets that we don't care about because it causes inconsistencies in scheduling due to
# dcgm-exporter and neuron-monitor selecting on specific instance types
# See https://github.com/kubernetes-sigs/karpenter/issues/715 for more detail
kubectl delete daemonsets -n amazon-cloudwatch dcgm-exporter neuron-monitor --ignore-not-found
# We patch the priorityClass onto all DaemonSets to ensure that DaemonSets always schedule to nodes so we don't get scheduling inconsistencies
# See https://karpenter.sh/docs/faq/#when-deploying-an-additional-daemonset-to-my-cluster-why-does-karpenter-not-scale-up-my-nodes-to-support-the-extra-daemonset for more detail
# Additionally, we patch an everything toleration onto the daemonsets to prevent them from being included in drain operations.
for DAEMONSET in "cloudwatch-agent" "cloudwatch-agent-windows" "fluent-bit" "fluent-bit-windows"; do
kubectl patch daemonset -n amazon-cloudwatch $DAEMONSET -p '{"spec":{"template":{"spec":{"priorityClassName":"system-node-critical","tolerations": [{"operator": "Exists"}]}}}}' --type=merge
done
- name: tag oidc provider of the cluster
if: always()
shell: bash
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/e2e/slack/notify/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ inputs:
runs:
using: "composite"
steps:
- uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
ref: ${{ inputs.git_ref }}
- id: get-run-name
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/e2e/upgrade-crds/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ runs:
role-to-assume: arn:aws:iam::${{ inputs.account_id }}:role/${{ inputs.role }}
aws-region: ${{ inputs.region }}
role-duration-seconds: 21600
- uses: actions/checkout@44c2b7a8a4ea60a981eaca3cf939b5f4305c123b # v4.1.5
- uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6
with:
ref: ${{ inputs.git_ref }}
- name: install-karpenter
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/install-deps/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ description: 'Installs Go Downloads and installs Karpenter Dependencies'
inputs:
k8sVersion:
description: Kubernetes version to use when installing the toolchain
default: "1.29.x"
default: "1.30.x"
runs:
using: "composite"
steps:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ci-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
if: github.repository == 'aws/karpenter-provider-aws'
strategy:
matrix:
k8sVersion: ["1.23.x", "1.24.x", "1.25.x", "1.26.x", "1.27.x", "1.28.x", "1.29.x"]
k8sVersion: ["1.23.x", "1.24.x", "1.25.x", "1.26.x", "1.27.x", "1.28.x", "1.29.x", "1.30.x"]
steps:
- uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2
- uses: ./.github/actions/install-deps
Expand All @@ -22,7 +22,7 @@ jobs:
- run: K8S_VERSION=${{ matrix.k8sVersion }} make ci-test
- name: Send coverage
# should only send converage once https://docs.coveralls.io/parallel-builds
if: matrix.k8sVersion == '1.29.x'
if: matrix.k8sVersion == '1.30.x'
env:
COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: goveralls -coverprofile=coverage.out -service=github
2 changes: 1 addition & 1 deletion .github/workflows/e2e-cleanup.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,4 @@ jobs:
region: ${{ inputs.region }}
cluster_name: ${{ inputs.cluster_name }}
git_ref: ${{ inputs.git_ref }}
eksctl_version: v0.169.0
eksctl_version: v0.180.0
7 changes: 4 additions & 3 deletions .github/workflows/e2e-matrix.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
default: "us-east-2"
k8s_version:
type: string
default: "1.29"
default: "1.30"
cleanup:
type: boolean
required: true
Expand Down Expand Up @@ -41,7 +41,8 @@ on:
- "1.27"
- "1.28"
- "1.29"
default: "1.29"
- "1.30"
default: "1.30"
cleanup:
type: boolean
required: true
Expand Down Expand Up @@ -95,7 +96,7 @@ jobs:
statuses: write # ./.github/actions/commit-status/start
uses: ./.github/workflows/e2e-upgrade.yaml
with:
from_git_ref: 969530cc8ac4ee8a8c2efed9af823c44813b4ec2
from_git_ref: b3076dca62a81caae2d3c4af4fd378c83a901c48
to_git_ref: ${{ inputs.git_ref }}
region: ${{ inputs.region }}
k8s_version: ${{ inputs.k8s_version }}
Expand Down
20 changes: 6 additions & 14 deletions .github/workflows/e2e-upgrade.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ on:
- "1.27"
- "1.28"
- "1.29"
default: "1.29"
- "1.30"
default: "1.30"
cleanup:
required: true
default: true
Expand All @@ -40,7 +41,7 @@ on:
default: "us-east-2"
k8s_version:
type: string
default: "1.29"
default: "1.30"
cleanup:
required: true
type: boolean
Expand Down Expand Up @@ -90,7 +91,7 @@ jobs:
region: ${{ inputs.region }}
cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
k8s_version: ${{ inputs.k8s_version }}
eksctl_version: v0.175.0
eksctl_version: v0.180.0
ip_family: IPv4 # Set the value to IPv6 if IPv6 suite, else IPv4
git_ref: ${{ inputs.from_git_ref }}
ecr_account_id: ${{ vars.SNAPSHOT_ACCOUNT_ID }}
Expand All @@ -108,7 +109,7 @@ jobs:
region: ${{ inputs.region }}
cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
k8s_version: ${{ inputs.k8s_version }}
eksctl_version: v0.175.0
eksctl_version: v0.180.0
ip_family: IPv4 # Set the value to IPv6 if IPv6 suite, else IPv4
git_ref: ${{ inputs.to_git_ref }}
ecr_account_id: ${{ vars.SNAPSHOT_ACCOUNT_ID }}
Expand All @@ -135,15 +136,6 @@ jobs:
url: ${{ secrets.SLACK_WEBHOOK_URL }}
suite: Upgrade
git_ref: ${{ inputs.to_git_ref }}
- name: add log retention policy
if: always() && inputs.workflow_trigger != 'private_cluster'
env:
CLUSTER_NAME: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
run: |
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/application --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/dataplane --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/host --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/performance --retention-in-days 30
- name: dump logs on failure
uses: ./.github/actions/e2e/dump-logs
if: failure() || cancelled()
Expand All @@ -161,7 +153,7 @@ jobs:
region: ${{ inputs.region }}
cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
git_ref: ${{ inputs.to_git_ref }}
eksctl_version: v0.169.0
eksctl_version: v0.180.0
- if: always() && github.event_name == 'workflow_run'
uses: ./.github/actions/commit-status/end
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e-version-compatibility-trigger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
strategy:
fail-fast: false
matrix:
k8s_version: [ "1.23", "1.24", "1.25", "1.26", "1.27", "1.28", "1.29"]
k8s_version: [ "1.23", "1.24", "1.25", "1.26", "1.27", "1.28", "1.29", "1.30"]
uses: ./.github/workflows/e2e-matrix.yaml
with:
region: ${{ inputs.region || 'eu-west-1' }}
Expand Down
18 changes: 5 additions & 13 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ on:
- "1.27"
- "1.28"
- "1.29"
default: "1.29"
- "1.30"
default: "1.30"
cluster_name:
type: string
cleanup:
Expand All @@ -61,7 +62,7 @@ on:
required: true
k8s_version:
type: string
default: "1.29"
default: "1.30"
enable_metrics:
type: boolean
default: false
Expand Down Expand Up @@ -132,7 +133,7 @@ jobs:
region: ${{ inputs.region }}
cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
k8s_version: ${{ inputs.k8s_version }}
eksctl_version: v0.175.0
eksctl_version: v0.180.0
ip_family: ${{ contains(inputs.suite, 'IPv6') && 'IPv6' || 'IPv4' }} # Set the value to IPv6 if IPv6 suite, else IPv4
private_cluster: ${{ inputs.workflow_trigger == 'private_cluster' }}
git_ref: ${{ inputs.git_ref }}
Expand Down Expand Up @@ -187,15 +188,6 @@ jobs:
suite: ${{ inputs.suite }}
git_ref: ${{ inputs.git_ref }}
workflow_trigger: ${{ inputs.workflow_trigger }}
- name: add log retention policy
if: always() && inputs.workflow_trigger != 'private_cluster'
env:
CLUSTER_NAME: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
run: |
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/application --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/dataplane --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/host --retention-in-days 30
aws logs put-retention-policy --log-group-name /aws/containerinsights/"$CLUSTER_NAME"/performance --retention-in-days 30
- name: dump logs on failure
uses: ./.github/actions/e2e/dump-logs
if: (failure() || cancelled()) && inputs.workflow_trigger != 'private_cluster'
Expand All @@ -213,7 +205,7 @@ jobs:
region: ${{ inputs.region }}
cluster_name: ${{ steps.generate-cluster-name.outputs.CLUSTER_NAME }}
git_ref: ${{ inputs.git_ref }}
eksctl_version: v0.169.0
eksctl_version: v0.180.0
private_cluster: ${{ inputs.workflow_trigger == 'private_cluster' }}
- if: always() && github.event_name == 'workflow_run'
uses: ./.github/actions/commit-status/end
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/image-canary.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
name: ImageCanary
on:
workflow_dispatch:
schedule:
- cron: '0 */1 * * *'
jobs:
Expand All @@ -17,7 +18,7 @@ jobs:
aws-region: ${{ vars.READONLY_REGION }}
role-duration-seconds: 900
# Authenticate to public ECR to prevent rate limiting
- uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # v3.1.0
- uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0
with:
registry: public.ecr.aws
logout: true
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/sweeper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ jobs:
if: vars.CI_ACCOUNT_ID != '' || github.event_name == 'workflow_dispatch'
strategy:
fail-fast: false
max-parallel: 1
matrix:
region: [us-east-2, us-west-2, eu-west-1, eu-north-1]
runs-on: ubuntu-latest
Expand Down
Loading

0 comments on commit 331f1ac

Please sign in to comment.