From 3c50e8c751b103cbc1eea9a32887e8fe3675bc04 Mon Sep 17 00:00:00 2001 From: Mahad Janjua Date: Thu, 7 Dec 2023 13:39:35 -0800 Subject: [PATCH] E2E Testing: Run test on PR --- .github/workflows/appsignals-e2e-eks-test.yml | 274 ++++++++++++++++++ .github/workflows/pr-build.yml | 34 +++ 2 files changed, 308 insertions(+) create mode 100644 .github/workflows/appsignals-e2e-eks-test.yml create mode 100644 .github/workflows/pr-build.yml diff --git a/.github/workflows/appsignals-e2e-eks-test.yml b/.github/workflows/appsignals-e2e-eks-test.yml new file mode 100644 index 0000000..6b32055 --- /dev/null +++ b/.github/workflows/appsignals-e2e-eks-test.yml @@ -0,0 +1,274 @@ +# This is a reusable workflow for running the E2E test for App Signals. +# It is meant to be called from another workflow. +# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview +name: App Signals Enablement E2E Testing - EKS +on: + workflow_call: + inputs: + aws-region: + required: true + type: string + test-cluster-name: + required: true + type: string + caller-workflow-name: + required: true + type: string + +permissions: + id-token: write + contents: read + +env: + AWS_DEFAULT_REGION: ${{ inputs.aws-region }} # Used by terraform and AWS CLI commands + TEST_RESOURCES_FOLDER: e2e-test-resources + TEST_ACCOUNT: ${{ secrets.APP_SIGNALS_E2E_TEST_ACC }} + SAMPLE_APP_NAMESPACE: sample-app-namespace + SAMPLE_APP_FRONTEND_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_FE_SA_IMG }} + SAMPLE_APP_REMOTE_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_RE_SA_IMG }} + METRIC_NAMESPACE: AppSignals + LOG_GROUP_NAME: /aws/appsignals/eks + +jobs: + appsignals-e2e-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + # This step avoids code duplication for terraform templates and the validator + # To simplify, we get the entire repo and put it into a separate folder + - name: Get testing resources from ADOT + uses: actions/checkout@v4 + with: + repository: aws-observability/aws-otel-java-instrumentation + ref: main + path: ${{ env.TEST_RESOURCES_FOLDER }} + + - name: Generate testing id + run: echo TESTING_ID="${{ env.AWS_DEFAULT_REGION }}-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.E2E_TEST_ROLE_ARN }} + aws-region: ${{ env.AWS_DEFAULT_REGION }} + + # local directory to store the kubernetes config + - name: Create kubeconfig directory + run: mkdir -p ${{ github.workspace }}/.kube + + - name: Set KUBECONFIG environment variable + run: echo KUBECONFIG="${{ github.workspace }}/.kube/config" >> $GITHUB_ENV + + - name: Set up kubeconfig + run: aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ env.AWS_DEFAULT_REGION }} + + - name: Install eksctl + run: | + mkdir ${{ github.workspace }}/eksctl + curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" + tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz + echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH + + - name: Create role for AWS access from the sample app + id: create_service_account + run: | + eksctl create iamserviceaccount \ + --name service-account-${{ env.TESTING_ID }} \ + --namespace ${{ env.SAMPLE_APP_NAMESPACE }} \ + --cluster ${{ inputs.test-cluster-name }} \ + --role-name eks-s3-access-${{ env.TESTING_ID }} \ + --attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \ + --region ${{ env.AWS_DEFAULT_REGION }} \ + --approve + + - name: Set up terraform + uses: hashicorp/setup-terraform@v2 + with: + terraform_wrapper: false + + - name: Deploy sample app via terraform + working-directory: ${{ env.TEST_RESOURCES_FOLDER }}/testing/terraform/eks + run: | + terraform init + terraform validate + terraform apply -auto-approve \ + -var="test_id=${{ env.TESTING_ID }}" \ + -var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \ + -var="kube_directory_path=${{ github.workspace }}/.kube" \ + -var="eks_cluster_name=${{ inputs.test-cluster-name }}" \ + -var="eks_cluster_context_name=$(kubectl config current-context)" \ + -var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \ + -var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \ + -var="sample_app_image=${{ env.SAMPLE_APP_FRONTEND_SERVICE_IMAGE }}" \ + -var="sample_remote_app_image=${{ env.SAMPLE_APP_REMOTE_SERVICE_IMAGE }}" + + - name: Enable App Signals + working-directory: scripts/eks/appsignals + run: | + ./enable-app-signals.sh \ + ${{ inputs.test-cluster-name }} \ + ${{ env.AWS_DEFAULT_REGION }} \ + ${{ env.SAMPLE_APP_NAMESPACE }} + + # Application pods need to be restarted for the + # app signals instrumentation to take effect + - name: Restart the app pods + run: kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }} + + - name: Wait for sample app pods to come up + run: | + kubectl wait --for=condition=Ready pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }} + + - name: Get remote service pod name and IP + run: | + echo "REMOTE_SERVICE_DEPLOYMENT_NAME=$(kubectl get deployments -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].metadata.name}')" >> $GITHUB_ENV + echo "REMOTE_SERVICE_POD_IP=$(kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].status.podIP}')" >> $GITHUB_ENV + + - name: Verify pod Adot image + run: | + kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --output json | \ + jq '.items[0].status.initContainerStatuses[0].imageID' + + - name: Verify pod CWAgent image + run: | + kubectl get pods -n amazon-cloudwatch --output json | \ + jq '.items[0].status.containerStatuses[0].imageID' + + - name: Get the sample app endpoint + run: | + echo "APP_ENDPOINT=$(terraform output sample_app_endpoint)" >> $GITHUB_ENV + working-directory: ${{ env.TEST_RESOURCES_FOLDER }}/testing/terraform/eks + + - name: Wait for app endpoint to come online + id: endpoint-check + run: | + attempt_counter=0 + max_attempts=30 + until $(curl --output /dev/null --silent --head --fail http://${{ env.APP_ENDPOINT }}); do + if [ ${attempt_counter} -eq ${max_attempts} ];then + echo "Max attempts reached" + exit 1 + fi + + printf '.' + attempt_counter=$(($attempt_counter+1)) + sleep 10 + done + + # This steps increases the speed of the validation by creating the telemetry data in advance + - name: Call all test APIs + continue-on-error: true + run: | + curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call/ + curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call/ + curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}/ + curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call/ + + # Validation for app signals telemetry data + - name: Validate generated EMF logs + id: log-validation + if: steps.endpoint-check.outcome == 'success' && !cancelled() + working-directory: ${{ env.TEST_RESOURCES_FOLDER }} + run: ./gradlew testing:validator:run --args='-c eks/log-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.APP_ENDPOINT }} + --region ${{ env.AWS_DEFAULT_REGION }} + --account-id ${{ env.TEST_ACCOUNT }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --log-group ${{ env.LOG_GROUP_NAME }} + --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} + --platform-info ${{ inputs.test-cluster-name }} + --service-name sample-application-${{ env.TESTING_ID }} + --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} + --request-body ip=${{ env.REMOTE_SERVICE_POD_IP }} + --rollup' + + - name: Validate generated metrics + id: metric-validation + if: (success() || steps.log-validation.outcome == 'failure') && !cancelled() + working-directory: ${{ env.TEST_RESOURCES_FOLDER }} + run: ./gradlew testing:validator:run --args='-c eks/metric-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.APP_ENDPOINT }} + --region ${{ env.AWS_DEFAULT_REGION }} + --account-id ${{ env.TEST_ACCOUNT }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --log-group ${{ env.LOG_GROUP_NAME }} + --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} + --platform-info ${{ inputs.test-cluster-name }} + --service-name sample-application-${{ env.TESTING_ID }} + --remote-service-name sample-remote-application-${{ env.TESTING_ID }} + --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} + --request-body ip=${{ env.REMOTE_SERVICE_POD_IP }} + --rollup' + + - name: Validate generated traces + id: trace-validation + if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled() + working-directory: ${{ env.TEST_RESOURCES_FOLDER }} + run: ./gradlew testing:validator:run --args='-c eks/trace-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.APP_ENDPOINT }} + --region ${{ env.AWS_DEFAULT_REGION }} + --account-id ${{ env.TEST_ACCOUNT }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --log-group ${{ env.LOG_GROUP_NAME }} + --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} + --platform-info ${{ inputs.test-cluster-name }} + --service-name sample-application-${{ env.TESTING_ID }} + --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} + --request-body ip=${{ env.REMOTE_SERVICE_POD_IP }} + --rollup' + + # Clean up Procedures + + - name: Remove log group deletion command + if: always() + working-directory: scripts/eks/appsignals + run: | + delete_log_group="aws logs delete-log-group --log-group-name '${{ env.LOG_GROUP_NAME }}' --region \$REGION" + sed -i "s#$delete_log_group##g" clean-app-signals.sh + + - name: Clean Up App Signals + if: always() + continue-on-error: true + working-directory: scripts/eks/appsignals + run: | + ./clean-app-signals.sh \ + ${{ inputs.test-cluster-name }} \ + ${{ env.AWS_DEFAULT_REGION }} \ + ${{ env.SAMPLE_APP_NAMESPACE }} + + # This step also deletes lingering resources from previous test runs + - name: Delete all sample app resources + if: always() + continue-on-error: true + timeout-minutes: 10 + run: kubectl delete namespace ${{ env.SAMPLE_APP_NAMESPACE }} + + - name: Terraform destroy + if: always() + continue-on-error: true + working-directory: ${{ env.TEST_RESOURCES_FOLDER }}/testing/terraform/eks + run: | + terraform destroy -auto-approve \ + -var="test_id=${{ env.TESTING_ID }}" \ + -var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \ + -var="kube_directory_path=${{ github.workspace }}/.kube" \ + -var="eks_cluster_name=${{ inputs.test-cluster-name }}" \ + -var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \ + -var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \ + -var="sample_app_image=${{ env.SAMPLE_APP_IMAGE }}" + + - name: Remove aws access service account + if: always() + continue-on-error: true + run: | + eksctl delete iamserviceaccount \ + --name service-account-${{ env.TESTING_ID }} \ + --namespace ${{ env.SAMPLE_APP_NAMESPACE }} \ + --cluster ${{ inputs.test-cluster-name }} \ + --region ${{ env.AWS_DEFAULT_REGION }} \ diff --git a/.github/workflows/pr-build.yml b/.github/workflows/pr-build.yml new file mode 100644 index 0000000..ec7b5d2 --- /dev/null +++ b/.github/workflows/pr-build.yml @@ -0,0 +1,34 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: MIT + +name: PR Build +on: + push: + branches: + - e2e-test + workflow_dispatch: + pull_request: + branches: + - main* + types: + - opened + - synchronize + - reopened + - ready_for_review + +concurrency: + group: ${{ github.workflow }}-${{ github.ref_name }} + cancel-in-progress: true + +permissions: + id-token: write + contents: read + +jobs: + e2e-test: + uses: ./.github/workflows/appsignals-e2e-eks-test.yml + secrets: inherit + with: + aws-region: 'us-east-1' + test-cluster-name: 'e2e-enablement-script-test' + caller-workflow-name: 'appsignals-e2e-eks-canary-test' \ No newline at end of file