Skip to content

System Tests Enterprise #4545

System Tests Enterprise

System Tests Enterprise #4545

# Copyright 2023 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
name: System Tests Enterprise
on:
push:
branches:
- '.+-system-tests'
schedule:
# * is a special character in YAML so you have to quote this string
# Run the system tests every 8 hours (cron hours should divide 24 equally, otherwise there will be an overlap at the end of the day)
- cron: '0 */8 * * *'
workflow_dispatch:
inputs:
docker_registry:
description: 'Docker registry to pull images from (default: ghcr.io/, use registry.hub.docker.com/ for docker hub)'
required: true
default: 'ghcr.io/'
clean_resources_in_teardown:
description: 'Clean resources created by test (like project) in each test teardown (default: true - perform clean)'
required: true
default: 'true'
type: choice
options:
- 'true'
- 'false'
override_iguazio_version:
description: 'Override the configured target system iguazio version (leave empty to resolve automatically)'
required: false
concurrency: one-at-a-time
jobs:
system-test-cleanup:
name: System Test Cleanup
runs-on: [ self-hosted, Linux ]
container:
image: ubuntu:latest
timeout-minutes: 10
# let's not run this on every fork, change to your fork when developing
if: github.repository == 'mlrun/mlrun' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v3
- name: Install dependencies
run: |
apt-get update -qqy && apt-get install -y sshpass
- name: cleanup docker images from registries
# SSH to datanode and delete all docker images created by the system tests by restarting the docker-registry
# deployment and the datanode docker-registry
run: |
sshpass \
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
ssh \
-o StrictHostKeyChecking=no \
-o ServerAliveInterval=180 \
-o ServerAliveCountMax=3 \
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }} \
kubectl -n default-tenant rollout restart deployment docker-registry
sshpass \
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
scp \
automation/system_test/cleanup.py \
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/home/iguazio/cleanup.py
sshpass \
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
scp \
automation/system_test/dev_utilities.py \
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/home/iguazio/dev_utilities.py
prepare-system-tests-enterprise-ci:
# When increasing the timeout make sure it's not larger than the schedule cron interval
timeout-minutes: 55
name: Prepare System Tests Enterprise
runs-on: [ self-hosted, Linux ]
container:
image: python:3.9
needs: [system-test-cleanup]
# let's not run this on every fork, change to your fork when developing
if: github.repository == 'mlrun/mlrun' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v3
- name: Install dependencies
run: |
apt-get update -qqy && apt-get install -y sshpass jq curl gnupg nodejs
- uses: actions/setup-node@v4
with:
node-version: 18
- name: Copy state branch file from remote
run: |
sshpass \
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
scp \
-o StrictHostKeyChecking=no \
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/tmp/system-tests-branches-list.txt \
system-tests-branches-list.txt
- name: Resolve Branch To Run System Tests
id: current-branch
shell: bash
# we store a file named /tmp/system-tests-branches-list.txt which contains a list of branches to run system tests
# on the branches are separated with commas, so each run we pop the first branch in the list and append it to the
# end of the list.
# This mechanism allows us to run on multiple branches without the need to modify the file or secrets each time
# a new branch is added or removed
run: |
# Read branches from local file
branches=$(cat system-tests-branches-list.txt)
echo "branches found in system-tests-branches-list.txt: $branches"
# Split branches into an array
IFS=',' read -ra branches_array <<< "$branches"
# Get the first branch in the list to work on
first_branch="${branches_array[0]}"
echo "working on $first_branch"
# Remove the first branch from the list
branches_array=("${branches_array[@]:1}")
# Add the first branch at the end of the list
branches_array+=("$first_branch")
# Join branches back into a string
branches=$(printf ",%s" "${branches_array[@]}")
branches=${branches:1}
# Output the new list of branches
echo "$branches"
# Write new branches order to a local file
echo "$branches" | cat > system-tests-branches-list.txt
# Set output
echo "name=$(echo $first_branch)" >> $GITHUB_OUTPUT
- name: Override remote file from local resolved branch list
run: |
# Override the remote file with the new list of branches
sshpass \
-p "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
scp \
-o StrictHostKeyChecking=no system-tests-branches-list.txt \
${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}@${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}:/tmp/
# checking out to base branch and not the target(resolved) branch, to be able to run the changed preparation code
# before merging the changes to upstream.
- name: Checkout base branch
uses: actions/checkout@v3
- name: Install automation scripts dependencies
run: |
python -m pip install -r automation/requirements.txt
- name: Extract git hashes from upstream and latest version
id: git_upstream_info
run: |
# Get the latest commit of mlrun/mlrun (that is older than 1 hour)
echo "mlrun_hash=$( \
cd /tmp && \
git clone --single-branch --branch ${{ steps.current-branch.outputs.name }} https://github.com/mlrun/mlrun.git mlrun-upstream 2> /dev/null && \
cd mlrun-upstream && \
git rev-list --until="1 hour ago" --max-count 1 --abbrev-commit HEAD && \
cd .. && \
rm -rf mlrun-upstream)" >> $GITHUB_OUTPUT
# Get the latest commit of mlrun/ui (that is older than 1 hour)
echo "ui_hash=$( \
cd /tmp && \
git clone --single-branch --branch ${{ steps.current-branch.outputs.name }} https://github.com/mlrun/ui.git mlrun-ui 2> /dev/null && \
cd mlrun-ui && \
git rev-list --until="1 hour ago" --max-count 1 --abbrev-commit HEAD && \
cd .. && \
rm -rf mlrun-ui)" >> $GITHUB_OUTPUT
# Get the tested mlrun version
echo "unstable_version_prefix=$( \
curl https://raw.githubusercontent.com/mlrun/mlrun/${{ steps.current-branch.outputs.name }}/automation/version/unstable_version_prefix \
)" >> $GITHUB_OUTPUT
- name: Set computed versions params
id: computed_params
run: |
action_mlrun_hash=${{ steps.git_action_info.outputs.mlrun_hash }} && \
upstream_mlrun_hash=${{ steps.git_upstream_info.outputs.mlrun_hash }} && \
export mlrun_hash=${upstream_mlrun_hash:-`echo $action_mlrun_hash`}
echo "mlrun_hash=$(echo $mlrun_hash)" >> $GITHUB_OUTPUT
action_mlrun_ui_hash=${{ steps.git_action_ui_info.outputs.ui_hash }} && \
upstream_mlrun_ui_hash=${{ steps.git_upstream_info.outputs.ui_hash }} && \
export ui_hash=${upstream_mlrun_ui_hash:-`echo $action_mlrun_ui_hash`}
echo "ui_hash=$(echo $ui_hash)" >> $GITHUB_OUTPUT
echo "mlrun_version=$(echo ${{ steps.git_upstream_info.outputs.unstable_version_prefix }}+$mlrun_hash)" >> $GITHUB_OUTPUT
echo "mlrun_docker_tag=$(echo ${{ steps.git_upstream_info.outputs.unstable_version_prefix }}-$mlrun_hash)" >> $GITHUB_OUTPUT
echo "mlrun_ui_version=${{ steps.git_upstream_info.outputs.unstable_version_prefix }}-$ui_hash" >> $GITHUB_OUTPUT
echo "mlrun_docker_registry=$( \
input_docker_registry=$INPUT_DOCKER_REGISTRY && \
echo ${input_docker_registry:-ghcr.io/})" >> $GITHUB_OUTPUT
echo "mlrun_system_tests_clean_resources=$( \
input_system_tests_clean_resources=$INPUT_CLEAN_RESOURCES_IN_TEARDOWN && \
echo ${input_system_tests_clean_resources:-true})" >> $GITHUB_OUTPUT
echo "override_iguazio_version=$INPUT_OVERRIDE_IGUAZIO_VERSION" >> $GITHUB_OUTPUT
env:
INPUT_DOCKER_REGISTRY: ${{ github.event.inputs.docker_registry }}
INPUT_OVERRIDE_IGUAZIO_VERSION: ${{ github.event.inputs.override_iguazio_version }}
INPUT_CLEAN_RESOURCES_IN_TEARDOWN: ${{ github.event.inputs.clean_resources_in_teardown }}
- name: Prepare System Test Environment and Install MLRun
env:
IP_ADDR_PREFIX: ${{ secrets.IP_ADDR_PREFIX }}
timeout-minutes: 50
run: |
python automation/system_test/prepare.py run \
--data-cluster-ip "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}" \
--data-cluster-ssh-username "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}" \
--data-cluster-ssh-password "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
--provctl-download-url "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_PATH }}" \
--provctl-download-s3-access-key "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_URL_S3_ACCESS_KEY }}" \
--provctl-download-s3-key-id "${{ secrets.LATEST_SYSTEM_TEST_PROVCTL_DOWNLOAD_URL_S3_KEY_ID }}" \
--username "${{ secrets.LATEST_SYSTEM_TEST_USERNAME }}" \
--access-key "${{ secrets.LATEST_SYSTEM_TEST_ACCESS_KEY }}" \
--iguazio-version "${{ steps.computed_params.outputs.iguazio_version }}" \
--mlrun-version "${{ steps.computed_params.outputs.mlrun_version }}" \
--mlrun-ui-version "${{ steps.computed_params.outputs.mlrun_ui_version }}" \
--mlrun-commit "${{ steps.computed_params.outputs.mlrun_hash }}" \
--override-image-registry "${{ steps.computed_params.outputs.mlrun_docker_registry }}"
- name: Prepare System Test env.yml and MLRun installation from current branch
timeout-minutes: 5
run: |
python automation/system_test/prepare.py env \
--data-cluster-ip "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_IP }}" \
--data-cluster-ssh-username "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_USERNAME }}" \
--data-cluster-ssh-password "${{ secrets.LATEST_SYSTEM_TEST_DATA_CLUSTER_SSH_PASSWORD }}" \
--username "${{ secrets.LATEST_SYSTEM_TEST_USERNAME }}" \
--access-key "${{ secrets.LATEST_SYSTEM_TEST_ACCESS_KEY }}" \
--slack-webhook-url "${{ secrets.LATEST_SYSTEM_TEST_SLACK_WEBHOOK_URL }}" \
--branch "${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}" \
--github-access-token "${{ secrets.SYSTEM_TEST_GITHUB_ACCESS_TOKEN }}" \
--save-to-path "$GITHUB_WORKSPACE/env.yml"
- name: Encrypt file
id: encrypt_file
run: |
gpg \
--batch \
--passphrase "${{ env.GPG_PASSPHRASE }}" \
--output "$GITHUB_WORKSPACE/env.yml.gpg" \
--symmetric "$GITHUB_WORKSPACE/env.yml"
echo "env_file_path=$(echo $GITHUB_WORKSPACE/env.yml.gpg)" >> $GITHUB_OUTPUT
env:
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
- name: Upload env file
uses: actions/upload-artifact@v3
with:
name: env
path: "${{ steps.encrypt_file.outputs.env_file_path }}"
if-no-files-found: error
outputs:
mlrunVersion: ${{ steps.computed_params.outputs.mlrun_version }}
mlrunBranch: ${{ steps.current-branch.outputs.name }}
mlrunSystemTestsCleanResources: ${{ steps.computed_params.outputs.mlrun_system_tests_clean_resources }}
run-system-tests-enterprise-ci:
# When increasing the timeout make sure it's not larger than the schedule cron interval
timeout-minutes: 360
name: Test ${{ matrix.test_component }} [${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}]
# requires prepare to finish before starting
needs: [prepare-system-tests-enterprise-ci]
runs-on: [ self-hosted, Linux ]
container:
image: python:3.9
# let's not run this on every fork, change to your fork when developing
if: github.repository == 'mlrun/mlrun' || github.event_name == 'workflow_dispatch'
strategy:
fail-fast: false
max-parallel: 1
matrix:
test_component:
- api
- runtimes
- projects
- model_monitoring
- examples
- backwards_compatibility
- datastore
- logs
- feature_store
steps:
- name: Install Dependencies
run: |
# gnupg for decrypting env.yml.gpg
# nodejs for installing github action dependencies
# graphviz for generating graphs (feature store tests)
# git-core for cloning repos and etc, required by many suites
# gcc, make for installing python packages
apt-get update -qqy && \
apt-get install -y \
gnupg \
nodejs \
graphviz \
git-core \
gcc \
make
- uses: actions/setup-node@v4
with:
node-version: 18
- uses: actions/checkout@v3
# checking out to the resolved branch to run system tests on, as now we run the actual tests, we don't want to run
# the system tests of the branch that triggered the system tests as it might be in a different version
# than the mlrun version we deployed on the previous job (can have features that the resolved branch doesn't have)
with:
ref: ${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}
- uses: actions/download-artifact@v3
with:
name: env
path: /tmp
- name: Decrypt file
run: |
gpg \
--batch \
--passphrase "${{ env.GPG_PASSPHRASE }}" \
--output "$GITHUB_WORKSPACE/tests/system/env.yml" \
--decrypt "/tmp/env.yml.gpg"
# ensure file is created
test -f "$GITHUB_WORKSPACE/tests/system/env.yml"
env:
GPG_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
- name: Pre Tests Requirements
run: |
# due to
# https://github.com/git/git/commit/8959555cee7ec045958f9b6dd62e541affb7e7d9
# ensure mlrun git is safe to use
git config --global --add safe.directory "$GITHUB_WORKSPACE"
MLRUN_VERSION="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunVersion }}" \
make install-requirements install-complete-requirements update-version-file
- name: Run System Tests
run: |
MLRUN_SYSTEM_TESTS_GITHUB_RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" \
MLRUN_SYSTEM_TESTS_CLEAN_RESOURCES="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunSystemTestsCleanResources }}" \
MLRUN_VERSION="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunVersion }}" \
MLRUN_SYSTEM_TESTS_COMPONENT="${{ matrix.test_component }}" \
MLRUN_SYSTEM_TESTS_BRANCH="${{ needs.prepare-system-tests-enterprise-ci.outputs.mlrunBranch }}" \
make test-system