From cb97dd9a5d880aaad32d4906877b80d747d0d1e8 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 02:08:16 -0500 Subject: [PATCH 01/25] Upgrade canary dependencies --- test/canary/canary.buildspec.yaml | 8 ++++---- test/canary/scripts/install_controller_helm.sh | 18 +++++++++++------- test/canary/scripts/run_test.sh | 9 +++++---- test/e2e/tests/test_inference_component.py | 1 + 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/test/canary/canary.buildspec.yaml b/test/canary/canary.buildspec.yaml index 5ffbdb70..23752b86 100644 --- a/test/canary/canary.buildspec.yaml +++ b/test/canary/canary.buildspec.yaml @@ -8,7 +8,7 @@ phases: # Get cached test image - aws ecr get-login-password --region $CLUSTER_REGION | docker login --username AWS --password-stdin $ECR_CACHE_URI || true - - docker pull ${ECR_CACHE_URI}:latest --quiet || true + # - docker pull ${ECR_CACHE_URI}:latest --quiet || true # Build test image - > @@ -19,15 +19,15 @@ phases: commands: # Run tests - docker run --name ack-canary $(env | cut -f1 -d= | sed 's/^/-e /') --mount type=bind,source="$(pwd)/",target="/${SERVICE}-controller/" ${ECR_CACHE_URI}:latest - + post_build: commands: - docker cp ack-canary:/sagemaker-controller/test/canary/integration_tests.xml /tmp/results.xml || true # Push test image to cache ECR repo - docker push ${ECR_CACHE_URI}:latest || true - + reports: IntegrationTestReport: files: - "results.xml" - base-directory: "/tmp" \ No newline at end of file + base-directory: "/tmp" diff --git a/test/canary/scripts/install_controller_helm.sh b/test/canary/scripts/install_controller_helm.sh index 4bf55026..8c839c42 100755 --- a/test/canary/scripts/install_controller_helm.sh +++ b/test/canary/scripts/install_controller_helm.sh @@ -8,13 +8,17 @@ function install_helm_chart() { local region="$3" local namespace="$4" - yq w -i helm/values.yaml "serviceAccount.annotations" "" - yq w -i helm/values.yaml 'serviceAccount.annotations."eks.amazonaws.com/role-arn"' "$oidc_role_arn" - yq w -i helm/values.yaml "aws.region" $region - yq w -i helm/values.yaml "log.level" "debug" - yq w -i helm/values.yaml "log.enable_development_logging" "true" - + yq eval '.serviceAccount.annotations = {}' -i helm/values.yaml + yq eval ".serviceAccount.annotations.\"eks.amazonaws.com\" = \"$oidc_role_arn\"" -i helm/values.yaml + yq eval ".aws.region = \"$region\"" -i helm/values.yaml + yq eval '.log.level = "debug"' -i helm/values.yaml + yq eval '.log.enable_development_logging = true' -i helm/values.yaml + # yq w -i helm/values.yaml "serviceAccount.annotations" "" + # yq w -i helm/values.yaml 'serviceAccount.annotations."eks.amazonaws.com/role-arn"' "$oidc_role_arn" + # yq w -i helm/values.yaml "aws.region" $region + # yq w -i helm/values.yaml "log.level" "debug" + # yq w -i helm/values.yaml "log.enable_development_logging" "true" kubectl apply -f helm/crds helm install -n $namespace --create-namespace ack-$service-controller --skip-crds helm -} \ No newline at end of file +} diff --git a/test/canary/scripts/run_test.sh b/test/canary/scripts/run_test.sh index 094e7424..25ba9634 100755 --- a/test/canary/scripts/run_test.sh +++ b/test/canary/scripts/run_test.sh @@ -1,6 +1,6 @@ #!/bin/bash -# cleanup on EXIT regardles of error +# cleanup on EXIT regardles of error # Inputs to this file as environment variables # SERVICE @@ -12,7 +12,7 @@ set -euo pipefail export NAMESPACE=${NAMESPACE:-"ack-system"} -export AWS_DEFAULT_REGION=$SERVICE_REGION +export AWS_DEFAULT_REGION=$SERVICE_REGION export E2E_DIR=$SERVICE_REPO_PATH/test/e2e/ SCRIPTS_DIR=${SERVICE_REPO_PATH}/test/canary/scripts @@ -65,7 +65,7 @@ function cleanup { kubectl delete namespace $NAMESPACE cd $E2E_DIR - export PYTHONPATH=.. + export PYTHONPATH=.. python service_cleanup.py } @@ -80,6 +80,7 @@ create_oidc_role "$CLUSTER_NAME" "$CLUSTER_REGION" "$NAMESPACE" # Install service helm chart install_helm_chart $SERVICE $OIDC_ROLE_ARN $SERVICE_REGION $NAMESPACE +cat $SERVICE_REPO_PATH/helm/values.yaml echo "Log helm charts are deployed properly" kubectl -n $NAMESPACE get pods @@ -93,7 +94,7 @@ pushd $E2E_DIR # run tests echo "Run Tests" - pytest_args=( -n 15 --dist loadfile --log-cli-level INFO --junitxml ../canary/integration_tests.xml) + pytest_args=( -rA -n 15 --dist loadfile --log-cli-level INFO --junitxml ../canary/integration_tests.xml) declare pytest_marks if [[ $SERVICE_REGION =~ ^(eu-north-1|eu-west-3)$ ]]; then # If select_regions_1 true we run the notebook_instance test diff --git a/test/e2e/tests/test_inference_component.py b/test/e2e/tests/test_inference_component.py index 157e37b8..8a8d23f3 100644 --- a/test/e2e/tests/test_inference_component.py +++ b/test/e2e/tests/test_inference_component.py @@ -244,6 +244,7 @@ def update_inference_component_failed_test(self, inference_component, faulty_mod (_, faulty_model_resource) = faulty_model faulty_model_name = faulty_model_resource["spec"].get("modelName", None) spec["spec"]["specification"]["modelName"] = faulty_model_name + logging.info(f"Faulty model name: {faulty_model_name}") resource = k8s.patch_custom_resource(reference, spec) resource = k8s.wait_resource_consumed_by_controller(reference) assert resource is not None From 5115ec97d56217ad78a430e4c4dff3589c4940ff Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 02:21:34 -0500 Subject: [PATCH 02/25] update dockerfile --- test/canary/Dockerfile.canary | 64 ++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 12 deletions(-) diff --git a/test/canary/Dockerfile.canary b/test/canary/Dockerfile.canary index 177fb3c9..ef8c6ee1 100644 --- a/test/canary/Dockerfile.canary +++ b/test/canary/Dockerfile.canary @@ -1,23 +1,59 @@ -FROM public.ecr.aws/ubuntu/ubuntu:18.04 +FROM public.ecr.aws/ubuntu/ubuntu:22.04 -# Build time parameters +# Build time parameters ARG SERVICE=sagemaker +# Python parameters +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.12.8 +ARG PYTHON_SHORT_VERSION=3.12 +# Python won’t try to write .pyc or .pyo files on the import of source modules +# Force stdin, stdout and stderr to be totally unbuffered. Good for logging +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 +ENV PYTHONIOENCODING=UTF-8 + RUN apt-get update && apt-get install -y curl \ wget \ git \ - python3.8 \ - python3-pip \ - python3.8-dev \ vim \ sudo \ jq \ - unzip + unzip \ + zlib1g-dev \ + cmake \ + libssl-dev + +# Install python +RUN cd /tmp/ \ +&& wget https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz \ +&& tar xzf Python-${PYTHON_VERSION}.tgz \ +&& cd Python-${PYTHON_VERSION} \ +&& ./configure --enable-optimizations --with-lto --with-computed-gotos --with-system-ffi \ +&& make -j "$(nproc)" \ +&& make altinstall \ +&& cd .. \ +&& rm -rf Python-${PYTHON_VERSION} \ +&& rm Python-${PYTHON_VERSION}.tgz \ +&& ln -s /usr/local/bin/python${PYTHON_SHORT_VERSION} /usr/local/bin/python \ +&& ln -s /usr/local/bin/python${PYTHON_SHORT_VERSION} /usr/local/bin/python3 \ +# This installation generate a .python_history file in the root directory leads sanity check to fail +&& rm -f /root/.python_history + +# Python Path +ENV PATH="/usr/local/bin:${PATH}" + +RUN python -m pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org +RUN pip install --no-cache-dir \ + setuptools # Install awscli RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.6.3.zip" -o "awscliv2.zip" \ && unzip -qq awscliv2.zip \ && ./aws/install +# RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ +# && unzip -qq awscliv2.zip \ +# && ./aws/install # Add yq repository and install yq RUN apt-get update && apt install -y software-properties-common \ @@ -28,23 +64,27 @@ RUN apt-get update && apt install -y software-properties-common \ RUN curl -LO "https://dl.k8s.io/release/v1.24.0/bin/linux/amd64/kubectl" \ && chmod +x ./kubectl \ && cp ./kubectl /bin +# RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \ +# && chmod +x ./kubectl \ +# && cp ./kubectl /bin # Install eksctl RUN curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && mv /tmp/eksctl /bin -# Install Helm +# Install Helm RUN curl -q -L "https://get.helm.sh/helm-v3.7.0-linux-amd64.tar.gz" | tar zxf - -C /usr/local/bin/ \ && mv /usr/local/bin/linux-amd64/helm /usr/local/bin/helm \ && rm -r /usr/local/bin/linux-amd64 \ - && chmod +x /usr/local/bin/helm + && chmod +x /usr/local/bin/helm +# RUN curl -q -L "https://get.helm.sh/helm-v3.19.2-linux-amd64.tar.gz" | tar zxf - -C /usr/local/bin/ \ +# && mv /usr/local/bin/linux-amd64/helm /usr/local/bin/helm \ +# && rm -r /usr/local/bin/linux-amd64 \ +# && chmod +x /usr/local/bin/helm ENV SERVICE_REPO_PATH=/$SERVICE-controller COPY ./test/e2e/requirements.txt requirements.txt -RUN ln -s /usr/bin/python3.8 /usr/bin/python \ - && python -m pip install --upgrade pip - RUN python -m pip install -r requirements.txt WORKDIR /$SERVICE_REPO_PATH -CMD ["./test/canary/scripts/run_test.sh"] \ No newline at end of file +CMD ["./test/canary/scripts/run_test.sh"] From eb9e9871383d86d910559d16f3c2a79c784ab475 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 02:48:29 -0500 Subject: [PATCH 03/25] disable buildspec --- test/canary/canary.buildspec.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/canary/canary.buildspec.yaml b/test/canary/canary.buildspec.yaml index 23752b86..5ec5fa26 100644 --- a/test/canary/canary.buildspec.yaml +++ b/test/canary/canary.buildspec.yaml @@ -11,10 +11,10 @@ phases: # - docker pull ${ECR_CACHE_URI}:latest --quiet || true # Build test image - - > - docker build -f ./test/canary/Dockerfile.canary . -t ${ECR_CACHE_URI}:latest - --build-arg SERVICE="${SERVICE##*/}" --quiet - || echo "Docker Build Failed" || true + # - > + # docker build -f ./test/canary/Dockerfile.canary . -t ${ECR_CACHE_URI}:latest + # --build-arg SERVICE="${SERVICE##*/}" --quiet + # || echo "Docker Build Failed" || true build: commands: # Run tests From ce1e24c0def260fabae5b2117a83f5adfb3429dc Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 02:51:13 -0500 Subject: [PATCH 04/25] revert yq --- .../canary/scripts/install_controller_helm.sh | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/canary/scripts/install_controller_helm.sh b/test/canary/scripts/install_controller_helm.sh index 8c839c42..c9e7c046 100755 --- a/test/canary/scripts/install_controller_helm.sh +++ b/test/canary/scripts/install_controller_helm.sh @@ -8,16 +8,16 @@ function install_helm_chart() { local region="$3" local namespace="$4" - yq eval '.serviceAccount.annotations = {}' -i helm/values.yaml - yq eval ".serviceAccount.annotations.\"eks.amazonaws.com\" = \"$oidc_role_arn\"" -i helm/values.yaml - yq eval ".aws.region = \"$region\"" -i helm/values.yaml - yq eval '.log.level = "debug"' -i helm/values.yaml - yq eval '.log.enable_development_logging = true' -i helm/values.yaml - # yq w -i helm/values.yaml "serviceAccount.annotations" "" - # yq w -i helm/values.yaml 'serviceAccount.annotations."eks.amazonaws.com/role-arn"' "$oidc_role_arn" - # yq w -i helm/values.yaml "aws.region" $region - # yq w -i helm/values.yaml "log.level" "debug" - # yq w -i helm/values.yaml "log.enable_development_logging" "true" + # yq eval '.serviceAccount.annotations = {}' -i helm/values.yaml + # yq eval ".serviceAccount.annotations.\"eks.amazonaws.com\" = \"$oidc_role_arn\"" -i helm/values.yaml + # yq eval ".aws.region = \"$region\"" -i helm/values.yaml + # yq eval '.log.level = "debug"' -i helm/values.yaml + # yq eval '.log.enable_development_logging = true' -i helm/values.yaml + yq w -i helm/values.yaml "serviceAccount.annotations" "" + yq w -i helm/values.yaml 'serviceAccount.annotations."eks.amazonaws.com/role-arn"' "$oidc_role_arn" + yq w -i helm/values.yaml "aws.region" $region + yq w -i helm/values.yaml "log.level" "debug" + yq w -i helm/values.yaml "log.enable_development_logging" "true" kubectl apply -f helm/crds helm install -n $namespace --create-namespace ack-$service-controller --skip-crds helm From dbd22b971d91ef059055b13c9b930b3f10828b19 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 03:10:22 -0500 Subject: [PATCH 05/25] build with old acktest --- test/canary/canary.buildspec.yaml | 12 ++++++------ test/e2e/requirements.txt | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/test/canary/canary.buildspec.yaml b/test/canary/canary.buildspec.yaml index 5ec5fa26..6033a262 100644 --- a/test/canary/canary.buildspec.yaml +++ b/test/canary/canary.buildspec.yaml @@ -11,20 +11,20 @@ phases: # - docker pull ${ECR_CACHE_URI}:latest --quiet || true # Build test image - # - > - # docker build -f ./test/canary/Dockerfile.canary . -t ${ECR_CACHE_URI}:latest - # --build-arg SERVICE="${SERVICE##*/}" --quiet - # || echo "Docker Build Failed" || true + - > + docker build -f ./test/canary/Dockerfile.canary . -t ${ECR_CACHE_URI}:canary-release + --build-arg SERVICE="${SERVICE##*/}" --quiet + || echo "Docker Build Failed" || true build: commands: # Run tests - - docker run --name ack-canary $(env | cut -f1 -d= | sed 's/^/-e /') --mount type=bind,source="$(pwd)/",target="/${SERVICE}-controller/" ${ECR_CACHE_URI}:latest + - docker run --name ack-canary $(env | cut -f1 -d= | sed 's/^/-e /') --mount type=bind,source="$(pwd)/",target="/${SERVICE}-controller/" ${ECR_CACHE_URI}:canary-release post_build: commands: - docker cp ack-canary:/sagemaker-controller/test/canary/integration_tests.xml /tmp/results.xml || true # Push test image to cache ECR repo - - docker push ${ECR_CACHE_URI}:latest || true + - docker push ${ECR_CACHE_URI}:canary-release || true reports: IntegrationTestReport: diff --git a/test/e2e/requirements.txt b/test/e2e/requirements.txt index 3cea3154..15109257 100644 --- a/test/e2e/requirements.txt +++ b/test/e2e/requirements.txt @@ -1,4 +1,4 @@ -acktest @ git+https://github.com/aws-controllers-k8s/test-infra.git@c91073f388b3b42192aac300762fca2542fa39f0 +acktest @ git+https://github.com/aws-controllers-k8s/test-infra.git@66d07f4daa2ce12d92f07cb332d5342a0aea4feb pytest==8.0.2 black==20.8b1 flaky==3.7.0 From 9e8c1aaa84034f201f40e7e99d2671c87bb4f3b7 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 03:29:31 -0500 Subject: [PATCH 06/25] yq eval --- .../canary/scripts/install_controller_helm.sh | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/canary/scripts/install_controller_helm.sh b/test/canary/scripts/install_controller_helm.sh index c9e7c046..8c839c42 100755 --- a/test/canary/scripts/install_controller_helm.sh +++ b/test/canary/scripts/install_controller_helm.sh @@ -8,16 +8,16 @@ function install_helm_chart() { local region="$3" local namespace="$4" - # yq eval '.serviceAccount.annotations = {}' -i helm/values.yaml - # yq eval ".serviceAccount.annotations.\"eks.amazonaws.com\" = \"$oidc_role_arn\"" -i helm/values.yaml - # yq eval ".aws.region = \"$region\"" -i helm/values.yaml - # yq eval '.log.level = "debug"' -i helm/values.yaml - # yq eval '.log.enable_development_logging = true' -i helm/values.yaml - yq w -i helm/values.yaml "serviceAccount.annotations" "" - yq w -i helm/values.yaml 'serviceAccount.annotations."eks.amazonaws.com/role-arn"' "$oidc_role_arn" - yq w -i helm/values.yaml "aws.region" $region - yq w -i helm/values.yaml "log.level" "debug" - yq w -i helm/values.yaml "log.enable_development_logging" "true" + yq eval '.serviceAccount.annotations = {}' -i helm/values.yaml + yq eval ".serviceAccount.annotations.\"eks.amazonaws.com\" = \"$oidc_role_arn\"" -i helm/values.yaml + yq eval ".aws.region = \"$region\"" -i helm/values.yaml + yq eval '.log.level = "debug"' -i helm/values.yaml + yq eval '.log.enable_development_logging = true' -i helm/values.yaml + # yq w -i helm/values.yaml "serviceAccount.annotations" "" + # yq w -i helm/values.yaml 'serviceAccount.annotations."eks.amazonaws.com/role-arn"' "$oidc_role_arn" + # yq w -i helm/values.yaml "aws.region" $region + # yq w -i helm/values.yaml "log.level" "debug" + # yq w -i helm/values.yaml "log.enable_development_logging" "true" kubectl apply -f helm/crds helm install -n $namespace --create-namespace ack-$service-controller --skip-crds helm From 98d01cc1eaca4ed6f3b58b4fbceb2eddff1ca15d Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 03:46:44 -0500 Subject: [PATCH 07/25] yq eval fix --- test/canary/scripts/install_controller_helm.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/canary/scripts/install_controller_helm.sh b/test/canary/scripts/install_controller_helm.sh index 8c839c42..b86ac27a 100755 --- a/test/canary/scripts/install_controller_helm.sh +++ b/test/canary/scripts/install_controller_helm.sh @@ -9,8 +9,8 @@ function install_helm_chart() { local namespace="$4" yq eval '.serviceAccount.annotations = {}' -i helm/values.yaml - yq eval ".serviceAccount.annotations.\"eks.amazonaws.com\" = \"$oidc_role_arn\"" -i helm/values.yaml - yq eval ".aws.region = \"$region\"" -i helm/values.yaml + yq eval ".serviceAccount.annotations.\"eks.amazonaws.com\" = $oidc_role_arn" -i helm/values.yaml + yq eval ".aws.region = $region" -i helm/values.yaml yq eval '.log.level = "debug"' -i helm/values.yaml yq eval '.log.enable_development_logging = true' -i helm/values.yaml # yq w -i helm/values.yaml "serviceAccount.annotations" "" From 7434cc6388ce70170f6638a33aa11d1f4cf4de77 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 04:00:49 -0500 Subject: [PATCH 08/25] fix eval --- test/canary/scripts/install_controller_helm.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/canary/scripts/install_controller_helm.sh b/test/canary/scripts/install_controller_helm.sh index b86ac27a..722ab617 100755 --- a/test/canary/scripts/install_controller_helm.sh +++ b/test/canary/scripts/install_controller_helm.sh @@ -8,9 +8,9 @@ function install_helm_chart() { local region="$3" local namespace="$4" - yq eval '.serviceAccount.annotations = {}' -i helm/values.yaml - yq eval ".serviceAccount.annotations.\"eks.amazonaws.com\" = $oidc_role_arn" -i helm/values.yaml - yq eval ".aws.region = $region" -i helm/values.yaml + yq eval '.serviceAccount.annotations = ""' -i helm/values.yaml + yq eval ".serviceAccount.annotations.\"eks.amazonaws.com\" = \"$oidc_role_arn\"" -i helm/values.yaml + yq eval ".aws.region = \"$region\"" -i helm/values.yaml yq eval '.log.level = "debug"' -i helm/values.yaml yq eval '.log.enable_development_logging = true' -i helm/values.yaml # yq w -i helm/values.yaml "serviceAccount.annotations" "" From b0042833181bd9d988b71ad54ac60cba5c385bdc Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 04:22:05 -0500 Subject: [PATCH 09/25] remove annotations --- test/canary/canary.buildspec.yaml | 2 +- test/canary/scripts/install_controller_helm.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/canary/canary.buildspec.yaml b/test/canary/canary.buildspec.yaml index 6033a262..4aaafb29 100644 --- a/test/canary/canary.buildspec.yaml +++ b/test/canary/canary.buildspec.yaml @@ -8,7 +8,7 @@ phases: # Get cached test image - aws ecr get-login-password --region $CLUSTER_REGION | docker login --username AWS --password-stdin $ECR_CACHE_URI || true - # - docker pull ${ECR_CACHE_URI}:latest --quiet || true + - docker pull ${ECR_CACHE_URI}:latest --quiet || true # Build test image - > diff --git a/test/canary/scripts/install_controller_helm.sh b/test/canary/scripts/install_controller_helm.sh index 722ab617..28579bdf 100755 --- a/test/canary/scripts/install_controller_helm.sh +++ b/test/canary/scripts/install_controller_helm.sh @@ -8,7 +8,7 @@ function install_helm_chart() { local region="$3" local namespace="$4" - yq eval '.serviceAccount.annotations = ""' -i helm/values.yaml + # yq eval '.serviceAccount.annotations = ""' -i helm/values.yaml yq eval ".serviceAccount.annotations.\"eks.amazonaws.com\" = \"$oidc_role_arn\"" -i helm/values.yaml yq eval ".aws.region = \"$region\"" -i helm/values.yaml yq eval '.log.level = "debug"' -i helm/values.yaml From 6dfdd8c71746bfec72a3931402b8b83e57dfe5c4 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 05:11:45 -0500 Subject: [PATCH 10/25] fix role arn --- test/canary/scripts/install_controller_helm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/canary/scripts/install_controller_helm.sh b/test/canary/scripts/install_controller_helm.sh index 28579bdf..7a6efd43 100755 --- a/test/canary/scripts/install_controller_helm.sh +++ b/test/canary/scripts/install_controller_helm.sh @@ -9,7 +9,7 @@ function install_helm_chart() { local namespace="$4" # yq eval '.serviceAccount.annotations = ""' -i helm/values.yaml - yq eval ".serviceAccount.annotations.\"eks.amazonaws.com\" = \"$oidc_role_arn\"" -i helm/values.yaml + yq eval ".serviceAccount.annotations.\"eks.amazonaws.com/role-arn\" = \"$oidc_role_arn\"" -i helm/values.yaml yq eval ".aws.region = \"$region\"" -i helm/values.yaml yq eval '.log.level = "debug"' -i helm/values.yaml yq eval '.log.enable_development_logging = true' -i helm/values.yaml From d55b8ad017e6b36c837dde87ff290034ec0bdb5a Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 06:41:18 -0500 Subject: [PATCH 11/25] merge main --- test/canary/scripts/run_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/canary/scripts/run_test.sh b/test/canary/scripts/run_test.sh index 25ba9634..20c40633 100755 --- a/test/canary/scripts/run_test.sh +++ b/test/canary/scripts/run_test.sh @@ -47,7 +47,7 @@ function cleanup { kubectl delete modelbiasjobdefinitions --all kubectl delete modelexplainabilityjobdefinitions --all kubectl delete modelqualityjobdefinitions --all - kubectl delete adoptedresources --all + # kubectl delete adoptedresources --all kubectl delete featuregroups --all kubectl delete modelpackages --all kubectl delete modelpackagegroups --all From 7140b29bc9d7193df06929ff381e26d9bbd1483f Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 06:42:22 -0500 Subject: [PATCH 12/25] bump black --- test/e2e/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/e2e/requirements.txt b/test/e2e/requirements.txt index 15109257..b97779a2 100644 --- a/test/e2e/requirements.txt +++ b/test/e2e/requirements.txt @@ -1,4 +1,4 @@ -acktest @ git+https://github.com/aws-controllers-k8s/test-infra.git@66d07f4daa2ce12d92f07cb332d5342a0aea4feb +acktest @ git+https://github.com/aws-controllers-k8s/test-infra.git@c91073f388b3b42192aac300762fca2542fa39f0 pytest==8.0.2 -black==20.8b1 +black>=24.3.0 flaky==3.7.0 From 822d9f127a5532ac1b37807837db2498c846fd3b Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 06:58:43 -0500 Subject: [PATCH 13/25] remove old yq commands --- test/canary/scripts/install_controller_helm.sh | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/canary/scripts/install_controller_helm.sh b/test/canary/scripts/install_controller_helm.sh index 7a6efd43..81cbd3b2 100755 --- a/test/canary/scripts/install_controller_helm.sh +++ b/test/canary/scripts/install_controller_helm.sh @@ -8,16 +8,10 @@ function install_helm_chart() { local region="$3" local namespace="$4" - # yq eval '.serviceAccount.annotations = ""' -i helm/values.yaml yq eval ".serviceAccount.annotations.\"eks.amazonaws.com/role-arn\" = \"$oidc_role_arn\"" -i helm/values.yaml yq eval ".aws.region = \"$region\"" -i helm/values.yaml yq eval '.log.level = "debug"' -i helm/values.yaml yq eval '.log.enable_development_logging = true' -i helm/values.yaml - # yq w -i helm/values.yaml "serviceAccount.annotations" "" - # yq w -i helm/values.yaml 'serviceAccount.annotations."eks.amazonaws.com/role-arn"' "$oidc_role_arn" - # yq w -i helm/values.yaml "aws.region" $region - # yq w -i helm/values.yaml "log.level" "debug" - # yq w -i helm/values.yaml "log.enable_development_logging" "true" kubectl apply -f helm/crds helm install -n $namespace --create-namespace ack-$service-controller --skip-crds helm From e2dac701e69958fedbff9f8285f6f3df78900566 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 07:02:03 -0500 Subject: [PATCH 14/25] update requirements --- test/canary/scripts/run_test.sh | 2 +- test/e2e/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/canary/scripts/run_test.sh b/test/canary/scripts/run_test.sh index 20c40633..8501a27b 100755 --- a/test/canary/scripts/run_test.sh +++ b/test/canary/scripts/run_test.sh @@ -94,7 +94,7 @@ pushd $E2E_DIR # run tests echo "Run Tests" - pytest_args=( -rA -n 15 --dist loadfile --log-cli-level INFO --junitxml ../canary/integration_tests.xml) + pytest_args=( -n 15 --dist loadfile --log-cli-level INFO --junitxml ../canary/integration_tests.xml) declare pytest_marks if [[ $SERVICE_REGION =~ ^(eu-north-1|eu-west-3)$ ]]; then # If select_regions_1 true we run the notebook_instance test diff --git a/test/e2e/requirements.txt b/test/e2e/requirements.txt index b97779a2..201d8306 100644 --- a/test/e2e/requirements.txt +++ b/test/e2e/requirements.txt @@ -1,4 +1,4 @@ -acktest @ git+https://github.com/aws-controllers-k8s/test-infra.git@c91073f388b3b42192aac300762fca2542fa39f0 +acktest @ git+https://github.com/aws-controllers-k8s/test-infra.git@b9fd0d31598f7cc9e2952d9513501c6c2464b059 pytest==8.0.2 black>=24.3.0 flaky==3.7.0 From 6b5aa51e4cd3046660480bc8e1eae184ec12f3c2 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 07:04:03 -0500 Subject: [PATCH 15/25] remove cat --- test/canary/scripts/run_test.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/test/canary/scripts/run_test.sh b/test/canary/scripts/run_test.sh index 8501a27b..3035e088 100755 --- a/test/canary/scripts/run_test.sh +++ b/test/canary/scripts/run_test.sh @@ -80,7 +80,6 @@ create_oidc_role "$CLUSTER_NAME" "$CLUSTER_REGION" "$NAMESPACE" # Install service helm chart install_helm_chart $SERVICE $OIDC_ROLE_ARN $SERVICE_REGION $NAMESPACE -cat $SERVICE_REPO_PATH/helm/values.yaml echo "Log helm charts are deployed properly" kubectl -n $NAMESPACE get pods From d83367273998892464fea5b117165d60d5cb1512 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 07:20:33 -0500 Subject: [PATCH 16/25] add yq annotation --- test/canary/scripts/install_controller_helm.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/test/canary/scripts/install_controller_helm.sh b/test/canary/scripts/install_controller_helm.sh index 81cbd3b2..d8315f49 100755 --- a/test/canary/scripts/install_controller_helm.sh +++ b/test/canary/scripts/install_controller_helm.sh @@ -8,6 +8,7 @@ function install_helm_chart() { local region="$3" local namespace="$4" + yq eval ".serviceAccount.annotations = {}" -i helm/values.yaml yq eval ".serviceAccount.annotations.\"eks.amazonaws.com/role-arn\" = \"$oidc_role_arn\"" -i helm/values.yaml yq eval ".aws.region = \"$region\"" -i helm/values.yaml yq eval '.log.level = "debug"' -i helm/values.yaml From 0b75d8e4fcf05b1a81985f63ddfe58c196f4d9b6 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 07:21:57 -0500 Subject: [PATCH 17/25] remove adopted resouorce --- test/canary/scripts/run_test.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/test/canary/scripts/run_test.sh b/test/canary/scripts/run_test.sh index 3035e088..7d7eca5a 100755 --- a/test/canary/scripts/run_test.sh +++ b/test/canary/scripts/run_test.sh @@ -47,7 +47,6 @@ function cleanup { kubectl delete modelbiasjobdefinitions --all kubectl delete modelexplainabilityjobdefinitions --all kubectl delete modelqualityjobdefinitions --all - # kubectl delete adoptedresources --all kubectl delete featuregroups --all kubectl delete modelpackages --all kubectl delete modelpackagegroups --all From 4bd03839e90230b7c82d26775417b400288dbe8e Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 07:22:50 -0500 Subject: [PATCH 18/25] update aws cli --- test/canary/Dockerfile.canary | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/test/canary/Dockerfile.canary b/test/canary/Dockerfile.canary index ef8c6ee1..8eb6ddf1 100644 --- a/test/canary/Dockerfile.canary +++ b/test/canary/Dockerfile.canary @@ -48,12 +48,9 @@ RUN pip install --no-cache-dir \ setuptools # Install awscli -RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64-2.6.3.zip" -o "awscliv2.zip" \ +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ && unzip -qq awscliv2.zip \ && ./aws/install -# RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ -# && unzip -qq awscliv2.zip \ -# && ./aws/install # Add yq repository and install yq RUN apt-get update && apt install -y software-properties-common \ From 4332e3cd4a0c613779c8e383f3135c16ba8f05a7 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 08:59:52 -0500 Subject: [PATCH 19/25] update kubectl --- test/canary/Dockerfile.canary | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/canary/Dockerfile.canary b/test/canary/Dockerfile.canary index 8eb6ddf1..f916b137 100644 --- a/test/canary/Dockerfile.canary +++ b/test/canary/Dockerfile.canary @@ -37,7 +37,6 @@ RUN cd /tmp/ \ && rm Python-${PYTHON_VERSION}.tgz \ && ln -s /usr/local/bin/python${PYTHON_SHORT_VERSION} /usr/local/bin/python \ && ln -s /usr/local/bin/python${PYTHON_SHORT_VERSION} /usr/local/bin/python3 \ -# This installation generate a .python_history file in the root directory leads sanity check to fail && rm -f /root/.python_history # Python Path @@ -58,12 +57,9 @@ RUN apt-get update && apt install -y software-properties-common \ && apt update && apt install -y yq # Install kubectl -RUN curl -LO "https://dl.k8s.io/release/v1.24.0/bin/linux/amd64/kubectl" \ +RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \ && chmod +x ./kubectl \ && cp ./kubectl /bin -# RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \ -# && chmod +x ./kubectl \ -# && cp ./kubectl /bin # Install eksctl RUN curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && mv /tmp/eksctl /bin From e325947fe4c656a5dcb6355bb4ddfe8d34b0775f Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 09:01:30 -0500 Subject: [PATCH 20/25] update helm --- test/canary/Dockerfile.canary | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/canary/Dockerfile.canary b/test/canary/Dockerfile.canary index f916b137..186464a3 100644 --- a/test/canary/Dockerfile.canary +++ b/test/canary/Dockerfile.canary @@ -65,14 +65,10 @@ RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/s RUN curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp && mv /tmp/eksctl /bin # Install Helm -RUN curl -q -L "https://get.helm.sh/helm-v3.7.0-linux-amd64.tar.gz" | tar zxf - -C /usr/local/bin/ \ +RUN curl -q -L "https://get.helm.sh/helm-v4.0.0-linux-amd64.tar.gz" | tar zxf - -C /usr/local/bin/ \ && mv /usr/local/bin/linux-amd64/helm /usr/local/bin/helm \ && rm -r /usr/local/bin/linux-amd64 \ && chmod +x /usr/local/bin/helm -# RUN curl -q -L "https://get.helm.sh/helm-v3.19.2-linux-amd64.tar.gz" | tar zxf - -C /usr/local/bin/ \ -# && mv /usr/local/bin/linux-amd64/helm /usr/local/bin/helm \ -# && rm -r /usr/local/bin/linux-amd64 \ -# && chmod +x /usr/local/bin/helm ENV SERVICE_REPO_PATH=/$SERVICE-controller COPY ./test/e2e/requirements.txt requirements.txt From 6e3ac666111074fd0ee62e7f76a3584e5de02f4d Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 09:33:59 -0500 Subject: [PATCH 21/25] remove updating status check --- test/e2e/tests/test_inference_component.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/test/e2e/tests/test_inference_component.py b/test/e2e/tests/test_inference_component.py index 8a8d23f3..b1eaaadf 100644 --- a/test/e2e/tests/test_inference_component.py +++ b/test/e2e/tests/test_inference_component.py @@ -244,18 +244,10 @@ def update_inference_component_failed_test(self, inference_component, faulty_mod (_, faulty_model_resource) = faulty_model faulty_model_name = faulty_model_resource["spec"].get("modelName", None) spec["spec"]["specification"]["modelName"] = faulty_model_name - logging.info(f"Faulty model name: {faulty_model_name}") resource = k8s.patch_custom_resource(reference, spec) resource = k8s.wait_resource_consumed_by_controller(reference) assert resource is not None - # inference component transitions Updating -> InService state - assert_inference_component_status_in_sync( - reference.name, - reference, - cfg.INFERENCE_COMPONENT_STATUS_UPDATING, - ) - assert k8s.wait_on_condition( reference, ack_condition.CONDITION_TYPE_RESOURCE_SYNCED, "False" ) From d18529d39a6ad69adde8fb477267497d72269863 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 09:39:25 -0500 Subject: [PATCH 22/25] increase wait resource wait time --- test/e2e/__init__.py | 2 +- test/e2e/tests/test_inference_component.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/test/e2e/__init__.py b/test/e2e/__init__.py index 71f6acde..4906c87f 100644 --- a/test/e2e/__init__.py +++ b/test/e2e/__init__.py @@ -191,7 +191,7 @@ def wait_resource_inference_component_status( def assert_inference_component_status_in_sync(inference_component_name, reference, expected_status): assert ( wait_sagemaker_inference_component_status(inference_component_name, expected_status) - == wait_resource_inference_component_status(reference, expected_status, 2) + == wait_resource_inference_component_status(reference, expected_status) == expected_status ) diff --git a/test/e2e/tests/test_inference_component.py b/test/e2e/tests/test_inference_component.py index b1eaaadf..157e37b8 100644 --- a/test/e2e/tests/test_inference_component.py +++ b/test/e2e/tests/test_inference_component.py @@ -248,6 +248,13 @@ def update_inference_component_failed_test(self, inference_component, faulty_mod resource = k8s.wait_resource_consumed_by_controller(reference) assert resource is not None + # inference component transitions Updating -> InService state + assert_inference_component_status_in_sync( + reference.name, + reference, + cfg.INFERENCE_COMPONENT_STATUS_UPDATING, + ) + assert k8s.wait_on_condition( reference, ack_condition.CONDITION_TYPE_RESOURCE_SYNCED, "False" ) From dc34e7737c7b9f635b44cdd6c5df840d8803200f Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 09:40:18 -0500 Subject: [PATCH 23/25] fix cache --- test/canary/canary.buildspec.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/canary/canary.buildspec.yaml b/test/canary/canary.buildspec.yaml index 4aaafb29..075cc3df 100644 --- a/test/canary/canary.buildspec.yaml +++ b/test/canary/canary.buildspec.yaml @@ -8,7 +8,7 @@ phases: # Get cached test image - aws ecr get-login-password --region $CLUSTER_REGION | docker login --username AWS --password-stdin $ECR_CACHE_URI || true - - docker pull ${ECR_CACHE_URI}:latest --quiet || true + - docker pull ${ECR_CACHE_URI}:canary-release --quiet || true # Build test image - > From e0404011352ec744d0878239ccec434db95ca2a9 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 11:51:27 -0500 Subject: [PATCH 24/25] ic remove updating wait --- test/e2e/tests/test_inference_component.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/test/e2e/tests/test_inference_component.py b/test/e2e/tests/test_inference_component.py index 157e37b8..99ccbae3 100644 --- a/test/e2e/tests/test_inference_component.py +++ b/test/e2e/tests/test_inference_component.py @@ -248,17 +248,6 @@ def update_inference_component_failed_test(self, inference_component, faulty_mod resource = k8s.wait_resource_consumed_by_controller(reference) assert resource is not None - # inference component transitions Updating -> InService state - assert_inference_component_status_in_sync( - reference.name, - reference, - cfg.INFERENCE_COMPONENT_STATUS_UPDATING, - ) - - assert k8s.wait_on_condition( - reference, ack_condition.CONDITION_TYPE_RESOURCE_SYNCED, "False" - ) - assert k8s.get_resource_condition(reference, ack_condition.CONDITION_TYPE_TERMINAL) is None resource = k8s.get_resource(reference) assert_inference_component_status_in_sync( From 30fe5abac409317e31e91d2fc819e0db0d121486 Mon Sep 17 00:00:00 2001 From: sirutBuasai Date: Mon, 24 Nov 2025 17:01:01 -0500 Subject: [PATCH 25/25] revert temp changes --- test/canary/canary.buildspec.yaml | 8 ++++---- test/e2e/__init__.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/canary/canary.buildspec.yaml b/test/canary/canary.buildspec.yaml index 075cc3df..19fb6972 100644 --- a/test/canary/canary.buildspec.yaml +++ b/test/canary/canary.buildspec.yaml @@ -8,23 +8,23 @@ phases: # Get cached test image - aws ecr get-login-password --region $CLUSTER_REGION | docker login --username AWS --password-stdin $ECR_CACHE_URI || true - - docker pull ${ECR_CACHE_URI}:canary-release --quiet || true + - docker pull ${ECR_CACHE_URI}:latest --quiet || true # Build test image - > - docker build -f ./test/canary/Dockerfile.canary . -t ${ECR_CACHE_URI}:canary-release + docker build -f ./test/canary/Dockerfile.canary . -t ${ECR_CACHE_URI}:latest --build-arg SERVICE="${SERVICE##*/}" --quiet || echo "Docker Build Failed" || true build: commands: # Run tests - - docker run --name ack-canary $(env | cut -f1 -d= | sed 's/^/-e /') --mount type=bind,source="$(pwd)/",target="/${SERVICE}-controller/" ${ECR_CACHE_URI}:canary-release + - docker run --name ack-canary $(env | cut -f1 -d= | sed 's/^/-e /') --mount type=bind,source="$(pwd)/",target="/${SERVICE}-controller/" ${ECR_CACHE_URI}:latest post_build: commands: - docker cp ack-canary:/sagemaker-controller/test/canary/integration_tests.xml /tmp/results.xml || true # Push test image to cache ECR repo - - docker push ${ECR_CACHE_URI}:canary-release || true + - docker push ${ECR_CACHE_URI}:latest || true reports: IntegrationTestReport: diff --git a/test/e2e/__init__.py b/test/e2e/__init__.py index 4906c87f..71f6acde 100644 --- a/test/e2e/__init__.py +++ b/test/e2e/__init__.py @@ -191,7 +191,7 @@ def wait_resource_inference_component_status( def assert_inference_component_status_in_sync(inference_component_name, reference, expected_status): assert ( wait_sagemaker_inference_component_status(inference_component_name, expected_status) - == wait_resource_inference_component_status(reference, expected_status) + == wait_resource_inference_component_status(reference, expected_status, 2) == expected_status )