From 0d30137aa9e9c9c6a040583ce52414604795a4e7 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Wed, 27 Jan 2021 14:44:44 +0100 Subject: [PATCH 01/12] WIP: nightly e2e tests workflow --- .circleci/config.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 0dc65c2ff4..5ab34a462f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -48,6 +48,7 @@ jobs: - run: name: Python Tests command: make test-python + build-and-deploy: docker: - image: circleci/python:3.6 @@ -68,8 +69,26 @@ jobs: no_output_timeout: 20m - run: make ci-build-and-upload-cli + e2e-tests-aws: + docker: + - image: circleci/python:3.6 + steps: + - checkout + - setup_remote_docker + - run: + name: Install Test Package + command: pip install -e ./test/e2e + - run: + name: Create Cluster + command: cortex cluster up --config --configure-env aws + - run: + name: Run E2E Tests + command: pytest -v test/e2e/tests -k aws --aws-env aws --s3-path s3:// + + workflows: version: 2.1 + build: jobs: - test @@ -90,3 +109,13 @@ workflows: only: - master - /^[0-9]+\.[0-9]+$/ + nightly: + triggers: + - schedule: + cron: "0 0 * * *" + filters: + branches: + only: + - master + jobs: + - e2e-tests-aws From 15a8a23445eae673871156043fc30f6d5ed60e9b Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Wed, 3 Feb 2021 15:52:03 +0100 Subject: [PATCH 02/12] Nightly workflow for testing on AWS --- .circleci/config.yml | 66 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5ab34a462f..f7467b7d2e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,5 +1,8 @@ version: 2.1 +orbs: + slack: circleci/slack@4.2.0 + commands: return-if-not-deployed-branch: description: >- @@ -72,19 +75,70 @@ jobs: e2e-tests-aws: docker: - image: circleci/python:3.6 + environment: + CORTEX_TEST_REALTIME_DEPLOY_TIMEOUT: 60 + CORTEX_TEST_BATCH_DEPLOY_TIMEOUT: 30 + CORTEX_TEST_BATCH_JOB_TIMEOUT: 200 + NUM_BUILD_PROCS: 2 steps: - checkout - - setup_remote_docker + - setup_remote_docker: + docker_layer_caching: true + - install-go - run: - name: Install Test Package - command: pip install -e ./test/e2e + name: Generate Cluster Config + command: | + cat \<< EOF > ./cluster-aws.yaml + cluster_name: cortex + provider: aws + region: ${AWS_REGION} + instance_type: t3a.small + min_instances: 1 + max_instances: 5 + bucket: ${AWS_BUCKET} + + image_operator: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/operator:latest + image_manager: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/manager:latest + image_downloader: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/downloader:latest + image_request_monitor: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/request-monitor:latest + image_cluster_autoscaler: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/cluster-autoscaler:latest + image_metrics_server: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/metrics-server:latest + image_inferentia: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/inferentia:latest + image_neuron_rtd: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/neuron-rtd:latest + image_nvidia: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/nvidia:latest + image_fluent_bit: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/fluent-bit:latest + image_istio_proxy: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/istio-proxy:latest + image_istio_pilot: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/istio-pilot:latest + image_prometheus: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/prometheus:latest + image_prometheus_config_reloader: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/prometheus-config-reloader:latest + image_prometheus_operator: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/prometheus-operator:latest + image_prometheus_statsd_exporter: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/prometheus-statsd-exporter:latest + image_prometheus_to_cloudwatch: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/prometheus-to-cloudwatch:latest + EOF + - run: + name: Install Dependencies + command: | + pip install awscli + pip install -e ./test/e2e + make cli + echo 'export CORTEX_CLI_PATH="$(pwd)/bin/cortex"' >> $BASH_ENV + - run: + name: Build Images + command: make images-all-aws - run: name: Create Cluster - command: cortex cluster up --config --configure-env aws + command: cortex cluster up --config ./cluster-aws.yaml --configure-env aws -y - run: name: Run E2E Tests - command: pytest -v test/e2e/tests -k aws --aws-env aws --s3-path s3:// - + command: pytest -v test/e2e/tests -k aws --aws-env aws + - run: + name: Delete Cluster + command: cortex cluster down --config ./cluster-aws.yaml -y + when: always + - slack/notify: + event: fail + channel: "#eng" + template: basic_fail_1 workflows: version: 2.1 From 83dbd6dd1c6eebf23cafc26e7c954733920e1701 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 4 Feb 2021 15:00:08 +0100 Subject: [PATCH 03/12] Use pre-built master images on nightly --- .circleci/config.yml | 49 +++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 28 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f7467b7d2e..970ea2e774 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -82,49 +82,42 @@ jobs: NUM_BUILD_PROCS: 2 steps: - checkout - - setup_remote_docker: - docker_layer_caching: true - - install-go + - setup_remote_docker - run: name: Generate Cluster Config command: | cat \<< EOF > ./cluster-aws.yaml cluster_name: cortex provider: aws - region: ${AWS_REGION} + region: ${NIGHTLY_AWS_REGION} instance_type: t3a.small min_instances: 1 max_instances: 5 - bucket: ${AWS_BUCKET} + bucket: ${NIGHTLY_AWS_BUCKET} - image_operator: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/operator:latest - image_manager: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/manager:latest - image_downloader: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/downloader:latest - image_request_monitor: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/request-monitor:latest - image_cluster_autoscaler: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/cluster-autoscaler:latest - image_metrics_server: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/metrics-server:latest - image_inferentia: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/inferentia:latest - image_neuron_rtd: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/neuron-rtd:latest - image_nvidia: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/nvidia:latest - image_fluent_bit: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/fluent-bit:latest - image_istio_proxy: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/istio-proxy:latest - image_istio_pilot: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/istio-pilot:latest - image_prometheus: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/prometheus:latest - image_prometheus_config_reloader: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/prometheus-config-reloader:latest - image_prometheus_operator: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/prometheus-operator:latest - image_prometheus_statsd_exporter: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/prometheus-statsd-exporter:latest - image_prometheus_to_cloudwatch: ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/cortexlabs/prometheus-to-cloudwatch:latest + image_operator: quay.io/cortexlabs/operator:master + image_manager: quay.io/cortexlabs/manager:master + image_downloader: quay.io/cortexlabs/downloader:master + image_request_monitor: quay.io/cortexlabs/request-monitor:master + image_cluster_autoscaler: quay.io/cortexlabs/cluster-autoscaler:master + image_metrics_server: quay.io/cortexlabs/metrics-server:master + image_inferentia: quay.io/cortexlabs/inferentia:master + image_neuron_rtd: quay.io/cortexlabs/neuron-rtd:master + image_nvidia: quay.io/cortexlabs/nvidia:master + image_fluent_bit: quay.io/cortexlabs/fluent-bit:master + image_istio_proxy: quay.io/cortexlabs/istio-proxy:master + image_istio_pilot: quay.io/cortexlabs/istio-pilot:master + image_prometheus: quay.io/cortexlabs/prometheus:master + image_prometheus_config_reloader: quay.io/cortexlabs/prometheus-config-reloader:master + image_prometheus_operator: quay.io/cortexlabs/prometheus-operator:master + image_prometheus_statsd_exporter: quay.io/cortexlabs/prometheus-statsd-exporter:master + image_prometheus_to_cloudwatch: quay.io/cortexlabs/prometheus-to-cloudwatch:master EOF - run: name: Install Dependencies command: | - pip install awscli pip install -e ./test/e2e - make cli - echo 'export CORTEX_CLI_PATH="$(pwd)/bin/cortex"' >> $BASH_ENV - - run: - name: Build Images - command: make images-all-aws + pip install https://s3-us-west-2.amazonaws.com/get-cortex/master/python/cortex-master.tar.gz - run: name: Create Cluster command: cortex cluster up --config ./cluster-aws.yaml --configure-env aws -y From 3cf4e38b6bf91c3a35d0e9eba4ce435c00d85e40 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 4 Feb 2021 15:22:01 +0100 Subject: [PATCH 04/12] Use circleci commands to simplify jobs --- .circleci/config.yml | 98 +++++++++++++++++++++++++++++++++----------- 1 file changed, 75 insertions(+), 23 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 970ea2e774..52050caf10 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -32,6 +32,42 @@ commands: name: Login to Quay command: docker login -u=$QUAY_USERNAME -p=$QUAY_PASSWORD quay.io + install-e2e-dependencies: + description: Install E2E Tests Dependencies + steps: + - run: + name: Install Dependencies + command: | + pip install -e ./test/e2e + pip install https://s3-us-west-2.amazonaws.com/get-cortex/master/python/cortex-master.tar.gz + + run-e2e-tests: + description: Creates a temporary cluster and runs the cortex E2E tests + parameters: + provider: + description: cloud provider + type: enum + enum: [gcp, aws] + config: + description: cluster config file path + type: string + default: ./cluster.yaml + steps: + - run: + name: Create Cluster + command: cortex cluster up --config << parameters.config >> --configure-env << parameters.provider >> -y + - run: + name: Run E2E Tests + command: pytest -v test/e2e/tests -k << parameters.provider >> --<< parameters.provider >>-env << parameters.provider >> + - run: + name: Delete Cluster + command: cortex cluster down --config << parameters.config >> -y + when: always + - slack/notify: + event: fail + channel: "#eng" + template: basic_fail_1 + jobs: test: docker: @@ -75,18 +111,14 @@ jobs: e2e-tests-aws: docker: - image: circleci/python:3.6 - environment: - CORTEX_TEST_REALTIME_DEPLOY_TIMEOUT: 60 - CORTEX_TEST_BATCH_DEPLOY_TIMEOUT: 30 - CORTEX_TEST_BATCH_JOB_TIMEOUT: 200 - NUM_BUILD_PROCS: 2 steps: - checkout - setup_remote_docker + - install-e2e-dependencies - run: name: Generate Cluster Config command: | - cat \<< EOF > ./cluster-aws.yaml + cat \<< EOF > ./cluster.yaml cluster_name: cortex provider: aws region: ${NIGHTLY_AWS_REGION} @@ -113,25 +145,44 @@ jobs: image_prometheus_statsd_exporter: quay.io/cortexlabs/prometheus-statsd-exporter:master image_prometheus_to_cloudwatch: quay.io/cortexlabs/prometheus-to-cloudwatch:master EOF + - run-e2e-tests: + provider: aws + config: ./cluster.yaml + + e2e-tests-gcp: + docker: + - image: circleci/python:3.6 + steps: + - checkout + - setup_remote_docker + - install-e2e-dependencies - run: - name: Install Dependencies + name: Generate Cluster Config command: | - pip install -e ./test/e2e - pip install https://s3-us-west-2.amazonaws.com/get-cortex/master/python/cortex-master.tar.gz - - run: - name: Create Cluster - command: cortex cluster up --config ./cluster-aws.yaml --configure-env aws -y - - run: - name: Run E2E Tests - command: pytest -v test/e2e/tests -k aws --aws-env aws - - run: - name: Delete Cluster - command: cortex cluster down --config ./cluster-aws.yaml -y - when: always - - slack/notify: - event: fail - channel: "#eng" - template: basic_fail_1 + cat \<< EOF > ./cluster.yaml + cluster_name: cortex + project: ${NIGHTLY_GCP_PROJECT} + zone: ${NIGHTLY_GCP_ZONE} + provider: gcp + instance_type: n1-standard-2 + min_instances: 1 + max_instances: 5 + + image_operator: quay.io/cortexlabs/operator:master + image_manager: quay.io/cortexlabs/manager:master + image_downloader: quay.io/cortexlabs/downloader:master + image_istio_proxy: quay.io/cortexlabs/istio-proxy:master + image_istio_pilot: quay.io/cortexlabs/istio-pilot:master + image_google_pause: quay.io/cortexlabs/google-pause:master + image_prometheus: quay.io/cortexlabs/prometheus:master + image_prometheus_config_reloader: quay.io/cortexlabs/prometheus-config-reloader:master + image_prometheus_operator: quay.io/cortexlabs/prometheus-operator:master + image_prometheus_statsd_exporter: quay.io/cortexlabs/prometheus-statsd-exporter:master + image_prometheus_stackdriver_sidecar: quay.io/cortexlabs/prometheus-stackdriver-sidecar:master + EOF + - run-e2e-tests: + provider: gcp + config: ./cluster.yaml workflows: version: 2.1 @@ -166,3 +217,4 @@ workflows: - master jobs: - e2e-tests-aws + - e2e-tests-gcp From d5238dc1afc0beb3f3fe07377230a370c6fb73a1 Mon Sep 17 00:00:00 2001 From: David Eliahu Date: Tue, 9 Feb 2021 16:03:22 -0800 Subject: [PATCH 05/12] Update config.yml --- .circleci/config.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 52050caf10..8d46b6a477 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -55,13 +55,13 @@ commands: steps: - run: name: Create Cluster - command: cortex cluster up --config << parameters.config >> --configure-env << parameters.provider >> -y + command: cortex cluster up --config << parameters.config >> --configure-env << parameters.provider >> --aws-key $NIGHTLY_AWS_ACCESS_KEY_ID --aws-secret $NIGHTLY_AWS_SECRET_ACCESS_KEY -y - run: name: Run E2E Tests command: pytest -v test/e2e/tests -k << parameters.provider >> --<< parameters.provider >>-env << parameters.provider >> - run: name: Delete Cluster - command: cortex cluster down --config << parameters.config >> -y + command: cortex cluster down --config << parameters.config >> --aws-key $NIGHTLY_AWS_ACCESS_KEY_ID --aws-secret $NIGHTLY_AWS_SECRET_ACCESS_KEY -y when: always - slack/notify: event: fail @@ -121,11 +121,11 @@ jobs: cat \<< EOF > ./cluster.yaml cluster_name: cortex provider: aws - region: ${NIGHTLY_AWS_REGION} - instance_type: t3a.small + region: us-east-1 + instance_type: g4dn.xlarge min_instances: 1 - max_instances: 5 - bucket: ${NIGHTLY_AWS_BUCKET} + max_instances: 2 + bucket: cortex-dev-nightly image_operator: quay.io/cortexlabs/operator:master image_manager: quay.io/cortexlabs/manager:master @@ -161,12 +161,13 @@ jobs: command: | cat \<< EOF > ./cluster.yaml cluster_name: cortex - project: ${NIGHTLY_GCP_PROJECT} - zone: ${NIGHTLY_GCP_ZONE} + project: cortexlabs-dev + zone: us-central1-a provider: gcp instance_type: n1-standard-2 + accelerator_type: nvidia-tesla-t4 min_instances: 1 - max_instances: 5 + max_instances: 2 image_operator: quay.io/cortexlabs/operator:master image_manager: quay.io/cortexlabs/manager:master From 7e4decc4647b62d70efe7d55f75ed70ddd744468 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 11 Feb 2021 14:16:08 +0100 Subject: [PATCH 06/12] Split GCP and AWS workflows, set GCP credentials --- .circleci/config.yml | 57 +++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8d46b6a477..42d693fcdc 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -53,16 +53,34 @@ commands: type: string default: ./cluster.yaml steps: - - run: - name: Create Cluster - command: cortex cluster up --config << parameters.config >> --configure-env << parameters.provider >> --aws-key $NIGHTLY_AWS_ACCESS_KEY_ID --aws-secret $NIGHTLY_AWS_SECRET_ACCESS_KEY -y - - run: - name: Run E2E Tests - command: pytest -v test/e2e/tests -k << parameters.provider >> --<< parameters.provider >>-env << parameters.provider >> - - run: - name: Delete Cluster - command: cortex cluster down --config << parameters.config >> --aws-key $NIGHTLY_AWS_ACCESS_KEY_ID --aws-secret $NIGHTLY_AWS_SECRET_ACCESS_KEY -y - when: always + - when: + condition: + equal: [<< parameters.provider >>, "aws"] + steps: + - run: + name: Create Cluster + command: cortex cluster up --config << parameters.config >> --configure-env << parameters.provider >> --aws-key $NIGHTLY_AWS_ACCESS_KEY_ID --aws-secret $NIGHTLY_AWS_SECRET_ACCESS_KEY -y + - run: + name: Run E2E Tests + command: pytest -v test/e2e/tests -k << parameters.provider >> --<< parameters.provider >>-env << parameters.provider >> + - run: + name: Delete Cluster + command: cortex cluster down --config << parameters.config >> --aws-key $NIGHTLY_AWS_ACCESS_KEY_ID --aws-secret $NIGHTLY_AWS_SECRET_ACCESS_KEY -y + when: always + - when: + condition: + equal: [ << parameters.provider >>, "gcp" ] + steps: + - run: + name: Create Cluster + command: cortex cluster-gcp up --config << parameters.config >> --configure-env << parameters.provider >> -y + - run: + name: Run E2E Tests + command: pytest -v test/e2e/tests -k << parameters.provider >> --<< parameters.provider >>-env << parameters.provider >> + - run: + name: Delete Cluster + command: cortex cluster down --config << parameters.config >> -y + when: always - slack/notify: event: fail channel: "#eng" @@ -156,6 +174,11 @@ jobs: - checkout - setup_remote_docker - install-e2e-dependencies + - run: + name: Initialize GCP Credentials + command: | + echo ${NIGHTLY_GOOGLE_APPLICATION_CREDENTIALS} > ./google_service_account.json + echo 'export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/google_service_account.json' >> $BASH_ENV - run: name: Generate Cluster Config command: | @@ -209,13 +232,13 @@ workflows: - master - /^[0-9]+\.[0-9]+$/ nightly: - triggers: - - schedule: - cron: "0 0 * * *" - filters: - branches: - only: - - master +# triggers: +# - schedule: +# cron: "0 0 * * *" +# filters: +# branches: +# only: +# - master jobs: - e2e-tests-aws - e2e-tests-gcp From dba5aa3a8421fdc77e6ff459bd61abe1506c8d00 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 11 Feb 2021 16:55:11 +0100 Subject: [PATCH 07/12] Update cluster configs and AWS creds --- .circleci/config.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 42d693fcdc..8331f8c089 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -133,6 +133,11 @@ jobs: - checkout - setup_remote_docker - install-e2e-dependencies + - run: + name: Initialize Credentials + command: | + echo 'export AWS_ACCESS_KEY_ID=${NIGHTLY_AWS_ACCESS_KEY_ID}' >> $BASH_ENV + echo 'export AWS_SECRET_ACCESS_KEY=${NIGHTLY_AWS_SECRET_ACCESS_KEY}' >> $BASH_ENV - run: name: Generate Cluster Config command: | @@ -161,7 +166,7 @@ jobs: image_prometheus_config_reloader: quay.io/cortexlabs/prometheus-config-reloader:master image_prometheus_operator: quay.io/cortexlabs/prometheus-operator:master image_prometheus_statsd_exporter: quay.io/cortexlabs/prometheus-statsd-exporter:master - image_prometheus_to_cloudwatch: quay.io/cortexlabs/prometheus-to-cloudwatch:master + image_grafana: quay.io/cortexlabs/grafana:master EOF - run-e2e-tests: provider: aws @@ -175,7 +180,7 @@ jobs: - setup_remote_docker - install-e2e-dependencies - run: - name: Initialize GCP Credentials + name: Initialize Credentials command: | echo ${NIGHTLY_GOOGLE_APPLICATION_CREDENTIALS} > ./google_service_account.json echo 'export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/google_service_account.json' >> $BASH_ENV @@ -195,6 +200,7 @@ jobs: image_operator: quay.io/cortexlabs/operator:master image_manager: quay.io/cortexlabs/manager:master image_downloader: quay.io/cortexlabs/downloader:master + image_request_monitor: quay.io/cortexlabs/request-monitor:master image_istio_proxy: quay.io/cortexlabs/istio-proxy:master image_istio_pilot: quay.io/cortexlabs/istio-pilot:master image_google_pause: quay.io/cortexlabs/google-pause:master @@ -202,7 +208,7 @@ jobs: image_prometheus_config_reloader: quay.io/cortexlabs/prometheus-config-reloader:master image_prometheus_operator: quay.io/cortexlabs/prometheus-operator:master image_prometheus_statsd_exporter: quay.io/cortexlabs/prometheus-statsd-exporter:master - image_prometheus_stackdriver_sidecar: quay.io/cortexlabs/prometheus-stackdriver-sidecar:master + image_grafana: quay.io/cortexlabs/grafana:master EOF - run-e2e-tests: provider: gcp From 98805521ae95a4ed24d816e4f13c4b51be52b01f Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 11 Feb 2021 17:47:39 +0100 Subject: [PATCH 08/12] Use default AWS creds --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 8331f8c089..effa2c0078 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -59,13 +59,13 @@ commands: steps: - run: name: Create Cluster - command: cortex cluster up --config << parameters.config >> --configure-env << parameters.provider >> --aws-key $NIGHTLY_AWS_ACCESS_KEY_ID --aws-secret $NIGHTLY_AWS_SECRET_ACCESS_KEY -y + command: cortex cluster up --config << parameters.config >> --configure-env << parameters.provider >> -y - run: name: Run E2E Tests command: pytest -v test/e2e/tests -k << parameters.provider >> --<< parameters.provider >>-env << parameters.provider >> - run: name: Delete Cluster - command: cortex cluster down --config << parameters.config >> --aws-key $NIGHTLY_AWS_ACCESS_KEY_ID --aws-secret $NIGHTLY_AWS_SECRET_ACCESS_KEY -y + command: cortex cluster down --config << parameters.config >> -y when: always - when: condition: From 0b5a298cb25ea28ec457dd174b5c4b3d22bfa985 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 11 Feb 2021 17:48:04 +0100 Subject: [PATCH 09/12] Set slack channel as a parameter --- .circleci/config.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index effa2c0078..b497041218 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -52,6 +52,10 @@ commands: description: cluster config file path type: string default: ./cluster.yaml + slack_channel: + description: "slack channel where failed builds will be posted (should start with #)" + type: string + default: "#builds" steps: - when: condition: @@ -83,7 +87,7 @@ commands: when: always - slack/notify: event: fail - channel: "#eng" + channel: << parameters.slack_channel >> template: basic_fail_1 jobs: From 09688026ee12f55e129f713314dcf0f5eff6ef45 Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 11 Feb 2021 17:48:39 +0100 Subject: [PATCH 10/12] Add missing batch env variable for AWS --- .circleci/config.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index b497041218..7929a3098d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -133,6 +133,8 @@ jobs: e2e-tests-aws: docker: - image: circleci/python:3.6 + environment: + CORTEX_TEST_BATCH_S3_PATH: s3://cortex-dev-nightly/test/jobs steps: - checkout - setup_remote_docker From eb77c9d087adf88f144c17d853726077e19d22ac Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 11 Feb 2021 17:53:26 +0100 Subject: [PATCH 11/12] Fix cluster down on GCP --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7929a3098d..7ab0a1f64f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -83,7 +83,7 @@ commands: command: pytest -v test/e2e/tests -k << parameters.provider >> --<< parameters.provider >>-env << parameters.provider >> - run: name: Delete Cluster - command: cortex cluster down --config << parameters.config >> -y + command: cortex cluster-gcp down --config << parameters.config >> -y when: always - slack/notify: event: fail From fac33bfe15668757596a81fb2271bbc9371ae9cb Mon Sep 17 00:00:00 2001 From: Miguel Varela Ramos Date: Thu, 11 Feb 2021 18:20:53 +0100 Subject: [PATCH 12/12] Add schedule back to nightly workflow --- .circleci/config.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7ab0a1f64f..7b6d14c366 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -244,13 +244,13 @@ workflows: - master - /^[0-9]+\.[0-9]+$/ nightly: -# triggers: -# - schedule: -# cron: "0 0 * * *" -# filters: -# branches: -# only: -# - master + triggers: + - schedule: + cron: "0 0 * * *" + filters: + branches: + only: + - master jobs: - e2e-tests-aws - e2e-tests-gcp