Skip to content

Feathr E2E Tests

Feathr E2E Tests #5205

name: Feathr E2E Tests
on:
push:
branches: [main]
paths-ignore:
- "docs/**"
- "ui/**"
- "**/README.md"
pull_request_target:
branches: [main]
types: [labeled, edited, opened, synchronize]
# disable below 'paths-ignore' to guarantee 'test_status' job can be triggered
# with this 'paths-ignore', test jobs needed by 'test_status' will be ignored
# please do not set 'safe to test' label for changes in below paths
# paths-ignore:
# - "docs/**"
# - "ui/**"
# - "**/README.md"
schedule:
# Runs daily at 1 PM UTC (9 PM CST), will send notification to TEAMS_WEBHOOK
- cron: '00 13 * * *'
# Cancel any in progress workflow from the same PR, branch or tag when a new workflow is triggered
# Ref: https://stackoverflow.com/questions/66335225/how-to-cancel-previous-runs-in-the-pr-when-you-push-new-commitsupdate-the-curre
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
paths_filter:
runs-on: ubuntu-latest
if: github.event_name == 'pull_request_target'
outputs:
# output1 - if changes for a PR contain source files
output1: ${{ steps.filter_src.outputs.src }}
# output2 - if changes for a PR contain registry files
output2: ${{ steps.filter_registry.outputs.registry }}
steps:
- name: Checkout
uses: actions/checkout@v3
- uses: dorny/paths-filter@v2
id: filter_src
with:
filters: |
src:
- "!((registry|docs|ui)/**)**"
- uses: dorny/paths-filter@v2
id: filter_registry
with:
filters: |
registry:
- "registry/**"
- name: client tests
if: steps.filter_src.outputs.src == 'true'
run: echo "Changes of this PR contain client source files. Need to set label 'safe to test'."
- name: registry tests
if: steps.filter_registry.outputs.registry == 'true'
run: echo "Changes of this PR contain registry files. Need to set label 'registry test'."
- name: no tests
if: steps.filter_registry.outputs.registry != 'true' && steps.filter_src.outputs.src != 'true'
run: echo "Changes of this PR do not contain registry files or client source files. Do not set labels 'safe to test' or 'registry test'"
databricks_test:
runs-on: ubuntu-latest
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
steps:
- name: Checkout
uses: actions/checkout@v2
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Set up JDK 8
uses: actions/setup-java@v2
with:
java-version: "8"
distribution: "temurin"
- name: Gradle build
run: |
./gradlew build
# remote folder for CI upload
echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV
# get local jar name without paths so version change won't affect it
echo "FEATHR_LOCAL_JAR_NAME=$(ls build/libs/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV
# get local jar name without path
echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls build/libs/*.jar)" >> $GITHUB_ENV
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install Feathr Package
run: |
python -m pip install --upgrade pip
python -m pip install -e ./feathr_project/[all]
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Set env variable and upload jars
env:
# set this for databricks CLI
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}}
DATABRICKS_TOKEN: ${{secrets.DATABRICKS_WORKSPACE_TOKEN_VALUE}}
run: |
# overwrite corresponding environment variables to utilize feathr to upload the files
# assuming there will be only one jar in the target folder
databricks fs cp ${{ env.FEATHR_LOCAL_JAR_FULL_NAME_PATH}} dbfs:/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}} --overwrite
- name: Run Feathr with Databricks
env:
PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_databricks"
SPARK_CONFIG__SPARK_CLUSTER: databricks
SPARK_CONFIG__DATABRICKS__WORKSPACE_INSTANCE_URL: ${{secrets.DATABRICKS_HOST}}
DATABRICKS_WORKSPACE_TOKEN_VALUE: ${{secrets.DATABRICKS_WORKSPACE_TOKEN_VALUE}}
SPARK_CONFIG__DATABRICKS__CONFIG_TEMPLATE: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"11.3.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"${{secrets.DATABRICKS_INSTANCE_POOL_ID}}"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}'
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}}
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}}
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}}
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}}
S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}}
S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}}
ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}}
ADLS_KEY: ${{secrets.ADLS_KEY}}
BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}}
BLOB_KEY: ${{secrets.BLOB_KEY}}
JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}}
KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}}
MONITORING_DATABASE_SQL_PASSWORD: ${{secrets.MONITORING_DATABASE_SQL_PASSWORD}}
SPARK_CONFIG__DATABRICKS__FEATHR_RUNTIME_LOCATION: dbfs:/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}}
COSMOS1_KEY: ${{secrets.COSMOS1_KEY}}
SQL1_USER: ${{secrets.SQL1_USER}}
SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}}
run: |
# run only test with databricks. run in 6 parallel jobs
pytest -n 6 --cov-report term-missing --cov=feathr_project/feathr feathr_project/test --cov-config=.github/workflows/.coveragerc_db --cov-fail-under=70
azure_synapse_test:
# might be a bit duplication to setup both the azure_synapse test and databricks test, but for now we will keep those to accelerate the test speed
runs-on: ubuntu-latest
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
steps:
- name: Checkout
uses: actions/checkout@v2
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Set up JDK 8
uses: actions/setup-java@v2
with:
java-version: "8"
distribution: "temurin"
- name: Gradle build
run: |
./gradlew build
# remote folder for CI upload
echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV
# get local jar name without paths so version change won't affect it
echo "FEATHR_LOCAL_JAR_NAME=$(ls build/libs/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV
# get local jar name without path
echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls build/libs/*.jar)" >> $GITHUB_ENV
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Azure Blob Storage Upload (Overwrite)
uses: fixpoint/azblob-upload-artifact@v4
with:
connection-string: ${{secrets.SPARK_JAR_BLOB_CONNECTION_STRING}}
name: ${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}
path: ${{ env.FEATHR_LOCAL_JAR_FULL_NAME_PATH}}
container: ${{secrets.SPARK_JAR_BLOB_CONTAINER}}
cleanup: "true"
- name: Install Feathr Package
run: |
python -m pip install --upgrade pip
python -m pip install -e ./feathr_project/[all]
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Run Feathr with Azure Synapse
env:
PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_synapse"
SPARK_CONFIG__SPARK_CLUSTER: azure_synapse
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}}
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}}
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}}
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}}
S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}}
S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}}
ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}}
ADLS_KEY: ${{secrets.ADLS_KEY}}
BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}}
BLOB_KEY: ${{secrets.BLOB_KEY}}
JDBC_TABLE: ${{secrets.JDBC_TABLE}}
JDBC_USER: ${{secrets.JDBC_USER}}
JDBC_PASSWORD: ${{secrets.JDBC_PASSWORD}}
JDBC_DRIVER: ${{secrets.JDBC_DRIVER}}
JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}}
KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}}
MONITORING_DATABASE_SQL_PASSWORD: ${{secrets.MONITORING_DATABASE_SQL_PASSWORD}}
SPARK_CONFIG__AZURE_SYNAPSE__FEATHR_RUNTIME_LOCATION: "abfss://${{secrets.SPARK_JAR_BLOB_CONTAINER}}@feathrazuretest3storage.dfs.core.windows.net/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}}"
COSMOS1_KEY: ${{secrets.COSMOS1_KEY}}
SQL1_USER: ${{secrets.SQL1_USER}}
SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}}
run: |
# skip databricks related test as we just ran the test; also seperate databricks and synapse test to make sure there's no write conflict
# run in 6 parallel jobs to make the time shorter
pytest -n 6 -m "not databricks" --cov-report term-missing --cov=feathr_project/feathr feathr_project/test --cov-config=.github/workflows/.coveragerc_sy --cov-fail-under=70
local_spark_test:
runs-on: ubuntu-latest
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test'))
steps:
- name: Checkout
uses: actions/checkout@v2
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Set up JDK 8
uses: actions/setup-java@v2
with:
java-version: "8"
distribution: "temurin"
- name: Gradle build
run: |
./gradlew build
# remote folder for CI upload
echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV
# get local jar name without paths so version change won't affect it
echo "FEATHR_LOCAL_JAR_NAME=$(ls build/libs/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV
# get local jar name without path
echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls build/libs/*.jar)" >> $GITHUB_ENV
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install Feathr Package
run: |
python -m pip install --upgrade pip
python -m pip install -e ./feathr_project/[all]
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Run Feathr with Local Spark
env:
PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_local"
SPARK_CONFIG__SPARK_CLUSTER: local
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}}
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}}
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}}
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}}
S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}}
S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}}
ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}}
ADLS_KEY: ${{secrets.ADLS_KEY}}
BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}}
BLOB_KEY: ${{secrets.BLOB_KEY}}
JDBC_TABLE: ${{secrets.JDBC_TABLE}}
JDBC_USER: ${{secrets.JDBC_USER}}
JDBC_PASSWORD: ${{secrets.JDBC_PASSWORD}}
JDBC_DRIVER: ${{secrets.JDBC_DRIVER}}
JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}}
KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}}
MONITORING_DATABASE_SQL_PASSWORD: ${{secrets.MONITORING_DATABASE_SQL_PASSWORD}}
COSMOS1_KEY: ${{secrets.COSMOS1_KEY}}
SQL1_USER: ${{secrets.SQL1_USER}}
SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}}
run: |
# skip cloud related tests
pytest --cov-report term-missing --cov=feathr_project/feathr/spark_provider feathr_project/test/test_local_spark_e2e.py --cov-config=.github/workflows/.coveragerc_local
registry_test:
runs-on: ubuntu-latest
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'registry test'))
steps:
- name: Checkout
uses: actions/checkout@v2
with:
ref: ${{ github.event.pull_request.head.sha }}
- name: Set up JDK 8
uses: actions/setup-java@v2
with:
java-version: "8"
distribution: "temurin"
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Install Feathr Package
run: |
python -m pip install --upgrade pip
if [ -f ./registry/test/requirements.txt ]; then pip install -r ./registry/test/requirements.txt; fi
if [ -f ./registry/purview-registry/requirements.txt ]; then pip install -r ./registry/purview-registry/requirements.txt; fi
if [ -f ./registry/sql-registry/requirements.txt ]; then pip install -r ./registry/sql-registry/requirements.txt; fi
- name: Run Registry Test Cases
env:
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}}
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}}
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}}
PURVIEW_NAME: "feathrazuretest3-purview1"
CONNECTION_STR: ${{secrets.CONNECTION_STR}}
run: |
pytest --cov-report term-missing --cov=registry/sql-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_sql_registry.py --cov-fail-under=20
pytest --cov-report term-missing --cov=registry/purview-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_purview_registry.py --cov-fail-under=20
client_test_status:
# The status used to mark if any required test jobs failed for a PR
# If it's failure, PR will be blocked
runs-on: ubuntu-latest
if: always() && github.event_name == 'pull_request_target'
needs: [databricks_test, azure_synapse_test, local_spark_test, paths_filter]
steps:
- name: Test status failure
if: contains(needs.*.result, 'failure')
run: |
echo "Merging is blocked because one or more test jobs failed."
exit 1
- name: Required test cases skipped
if: contains(needs.*.result, 'skipped') && needs.paths_filter.outputs.output1 == 'true'
run: |
echo "Merging is blocked because some required test jobs didn't run. Please set the label 'safe to test'"
exit 1
- name: Test status success
if: ${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'skipped') || ( contains(needs.*.result, 'skipped') && needs.paths_filter.outputs.output1 != 'true' )}}
run: |
echo "All required CI test jobs for client source passed."
registry_test_status:
# The status used to mark if any required registry test cases failed for a PR
# If it's failure, PR will be blocked
runs-on: ubuntu-latest
if: always() && github.event_name == 'pull_request_target'
needs: [registry_test, paths_filter]
steps:
- name: Test status failure
if: contains(needs.*.result, 'failure')
run: |
echo "Merging is blocked because the registry test job failed."
exit 1
- name: Required test cases skipped
if: needs.registry_test.result == 'skipped' && needs.paths_filter.outputs.output2 == 'true'
run: |
echo "Merging is blocked because the required 'registry_test' jobs didn't run. Please set the label 'registry test'"
exit 1
- name: Test status success
if: needs.registry_test.result == 'success' || needs.registry_test.result == 'skipped' && needs.paths_filter.outputs.output2 != 'true'
run: |
echo "All required CI test cases for registry passed."
failure_notification:
# If any failure, warning message will be sent
needs: [databricks_test, azure_synapse_test, local_spark_test, registry_test]
runs-on: ubuntu-latest
if: failure() && github.event_name == 'schedule'
steps:
- name: Warning
run: |
curl -H 'Content-Type: application/json' -d '{"text": "[WARNING] Daily CI has failure, please check: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }}
notification:
# Final Daily Report with all job status
needs: [databricks_test, azure_synapse_test, local_spark_test, registry_test]
runs-on: ubuntu-latest
if: always() && github.event_name == 'schedule'
steps:
- name: Get Date
run: echo "NOW=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Notification
run: |
curl -H 'Content-Type: application/json' -d '{"text": "${{env.NOW}} Daily Report: 1. Databricks Test ${{needs.databricks_test.result}}, 2. Synapse Test ${{needs.azure_synapse_test.result}} , 3. Local Spark TEST ${{needs.local_spark_test.result}}. Link: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }}