Skip to content

[MAINTENANCE] Remove legacy DataConnectors #14204

[MAINTENANCE] Remove legacy DataConnectors

[MAINTENANCE] Remove legacy DataConnectors #14204

Workflow file for this run

name: ci
# This pipeline is intended to facilitate the full coverage of tests for OSS GX.
# A "reasonable" set of tests will run on every pull request, and a full set will run with:
# 1. Manual triggering
# 2. Every 3 hours
# 3. When we push a tag with a semver pattern x.y.z (for example 0.0.1) which is the pattern we use for releases.
on:
merge_group:
pull_request:
types:
- opened
- reopened
- synchronize
- ready_for_review
schedule:
# https://crontab.guru/every-3-hours
- cron: "0 */3 * * *"
workflow_dispatch: # allows manual triggering with branch picker
push:
# Semantic versioning syntax defined by PyPI: https://pythonpackaging.info/07-Package-Release.html#Versioning-your-code
tags:
# Stable release syntax
- "[0-9]+.[0-9]+.[0-9]+"
# Prerelease syntax
- "[0-9]+.[0-9]+.[0-9]+a[0-9]+"
- "[0-9]+.[0-9]+.[0-9]+b[0-9]+"
- "[0-9]+.[0-9]+.[0-9]+rc[0-9]+"
concurrency:
# Only run one instance of this workflow in PRs, but allow multiple instances in develop.
# https://stackoverflow.com/questions/74117321/if-condition-in-concurrency-in-gha
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
cancel-in-progress: true
jobs:
ci-does-not-run-on-draft-pull-requests:
runs-on: ubuntu-latest
if: github.event.pull_request.draft == true
steps:
- run: echo "CI jobs won't run because this is a draft pull request."
ci-is-on-main-repo:
runs-on: ubuntu-latest
# job only runs on main repo where we have access to credentials, or as part of the release
if: |
github.event.pull_request.head.repo.full_name == github.repository ||
(github.event_name == 'push' && contains(github.ref, 'refs/tags/')) ||
github.event_name == 'merge_group'
steps:
- run: echo "This CI step only runs on the main repo, where we have access to credentials."
static-analysis:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
cache: "pip"
cache-dependency-path: |
requirements-types.txt
reqs/requirements-dev-contrib.txt
- name: Install dependencies
run: pip install -r requirements-types.txt -r reqs/requirements-dev-contrib.txt
- run: invoke lint --no-fmt
- run: invoke fmt --check
- name: Type-check
run: |
invoke type-check --ci --pretty
invoke type-check --ci --pretty --check-stub-sources
- name: Marker-coverage-check
run: invoke marker-coverage
docs-snippets:
needs: [unit-tests, doc-checks, ci-is-on-main-repo]
# run on non-draft PRs with docs changes
if: |
(github.event.pull_request.draft == false && github.base_ref == 'develop') ||
(github.event_name == 'push' && contains(github.ref, 'refs/tags/1.')) ||
github.event_name == 'merge_group'
runs-on: ubuntu-latest
strategy:
matrix:
doc-step:
- docs-creds-needed
- docs-basic
- docs-spark
env:
# google
GE_TEST_GCP_CREDENTIALS: ${{secrets.GE_TEST_GCP_CREDENTIALS}}
GE_TEST_GCP_PROJECT: ${{secrets.GE_TEST_GCP_PROJECT}}
GE_TEST_BIGQUERY_DATASET: ${{secrets.GE_TEST_BIGQUERY_DATASET}}
GOOGLE_APPLICATION_CREDENTIALS: "gcp-credentials.json"
# aws
AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY_ID}}
AWS_DEFAULT_REGION: ${{secrets.AWS_DEFAULT_REGION}}
AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}}
# aws-redshift
REDSHIFT_USERNAME: ${{secrets.REDSHIFT_USERNAME}}
REDSHIFT_PASSWORD: ${{secrets.REDSHIFT_PASSWORD}}
REDSHIFT_HOST: ${{secrets.REDSHIFT_HOST}}
REDSHIFT_PORT: ${{secrets.REDSHIFT_PORT}}
REDSHIFT_DATABASE: ${{secrets.REDSHIFT_DATABASE}}
REDSHIFT_SSLMODE: ${{secrets.REDSHIFT_SSLMODE}}
# azure
AZURE_ACCESS_KEY: ${{secrets.AZURE_ACCESS_KEY}}
AZURE_CREDENTIAL: ${{secrets.AZURE_CREDENTIAL}}
# snowflake
SNOWFLAKE_ACCOUNT: ${{secrets.SNOWFLAKE_ACCOUNT}}
SNOWFLAKE_USER: ${{secrets.SNOWFLAKE_USER}}
SNOWFLAKE_PW: ${{secrets.SNOWFLAKE_PW}}
SNOWFLAKE_DATABASE: ${{secrets.SNOWFLAKE_DATABASE}}
SNOWFLAKE_SCHEMA: ${{secrets.SNOWFLAKE_SCHEMA}}
SNOWFLAKE_WAREHOUSE: ${{secrets.SNOWFLAKE_WAREHOUSE}}
SNOWFLAKE_ROLE: ${{secrets.SNOWFLAKE_ROLE}}
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Run docs_snippet_checker
run: |
yarn install
python ci/checks/validate_docs_snippets.py
- name: Authenticate with GCP
uses: google-github-actions/auth@v1
with:
credentials_json: ${{secrets.GE_TEST_GCP_CREDENTIALS}}
- name: Create JSON file for following step
run: |
echo "$GE_TEST_GCP_CREDENTIALS" > gcp-credentials.json
- name: Install and setup Google Cloud SDK
run: |
# this is recommended by the Google documentation for CI/CD https://cloud.google.com/sdk/docs/install#other_installation_options
curl -sS https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-370.0.0-linux-x86_64.tar.gz > ./google-cloud-sdk-370.0.0-linux-x86_64.tar.gz && tar -xf ./google-cloud-sdk-370.0.0-linux-x86_64.tar.gz
./google-cloud-sdk/install.sh --usage-reporting=False --path-update=True --quiet --install-python=False
# creating new named configuration
./google-cloud-sdk/bin/gcloud config configurations create ge-oss-ci-cd-configurations
# setting account config using project and service account info
./google-cloud-sdk/bin/gcloud config set account account-for-azure-tests --project=$GE_TEST_GCP_PROJECT --access-token-file=$GOOGLE_APPLICATION_CREDENTIALS
# Pass the configured Cloud SDK authentication to gsutil.
./google-cloud-sdk/bin/gcloud config set pass_credentials_to_gsutil True
# Authorize access to Google Cloud with a service account
./google-cloud-sdk/bin/gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
cache: "pip"
cache-dependency-path: |
reqs/requirements-dev-test.txt
- name: Install dependencies
run: |
pip install $(grep -E '^(invoke)' reqs/requirements-dev-contrib.txt)
invoke deps --gx-install -m '${{ matrix.doc-step }}'
- name: Run the tests
run: invoke docs-snippet-tests '${{ matrix.doc-step }}' --up-services --verbose --reports
# upload coverage report to codecov
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v4.1.0
with:
token: ${{ secrets.CODECOV_TOKEN }}
slug: great-expectations/great_expectations
flags: ${{ matrix.doc-step }}
unit-tests:
strategy:
matrix:
os: [ubuntu-latest]
python-version: ["3.8", "3.9", "3.10", "3.11"]
runs-on: ${{ matrix.os }}
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache-dependency-path: |
reqs/requirements-dev-test.txt
setup.py
- name: Install dependencies
run: pip install . -c constraints-dev.txt -r reqs/requirements-dev-test.txt
- name: Run the unit tests
run: invoke ci-tests -m "unit" --xdist --slowest=10 --timeout=1.5 --reports
# upload coverage report to codecov
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v4.1.0
with:
token: ${{ secrets.CODECOV_TOKEN }}
slug: great-expectations/great_expectations
flags: ${{ matrix.python-version }}
doc-checks:
needs: [static-analysis]
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
cache: "pip"
cache-dependency-path: |
reqs/requirements-dev-test.txt
- name: Install dependencies
run: pip install -r reqs/requirements-dev-test.txt
- name: check_repo_root_size
run: sh ./ci/checks/check_repo_root_size.sh
- name: Docstring linter
run: invoke docstrings
- name: line_number_snippet_checker
run: python ci/checks/check_no_line_number_snippets.py
- name: name_tag_snippet_checker
run: python ci/checks/check_only_name_tag_snippets.py
- name: integration_test_gets_run_checker
run: python ci/checks/check_integration_test_gets_run.py
- name: name_tag_snippet_referenced_checker
run: python ci/checks/check_name_tag_snippets_referenced.py
# Disabling public_api_report during V1 development; should be re-enabled upon completion of public API audit
# - name: public_api_report
# run: invoke public-api
- name: link_checker
run: python docs/checks/docs_link_checker.py -p docs/docusaurus/docs -r docs/docusaurus/docs -sr docs/docusaurus/static -s docs -sp static --skip-external
docs-build:
needs: [doc-checks]
# run on non-draft PRs
if: |
(github.event.pull_request.draft == false && github.base_ref == 'develop') ||
(github.event_name == 'push' && contains(github.ref, 'refs/tags/1.')) ||
github.event_name == 'merge_group'
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
cache: "pip"
cache-dependency-path: reqs/requirements-dev-test.txt
- name: Install dependencies
run: pip install -r reqs/requirements-dev-test.txt
- name: Build docs
env:
NODE_OPTIONS: --max_old_space_size=4096
POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
run: cd docs/docusaurus && yarn install && bash ../build_docs
cloud-tests:
needs: [unit-tests, static-analysis, ci-is-on-main-repo]
if: github.event.pull_request.draft == false
permissions:
id-token: write
contents: read
runs-on: ubuntu-latest
env:
GX_CLOUD_BASE_URL: ${{vars.MERCURY_BASE_URL}}
PACT_BROKER_READ_WRITE_TOKEN: ${{secrets.PACT_BROKER_READ_WRITE_TOKEN}}
AUTH0_DOMAIN: ${{secrets.AUTH0_DOMAIN}}
AUTH0_API_AUDIENCE: ${{secrets.AUTH0_API_AUDIENCE}}
AUTH0_MERCURY_API_CLIENT_ID: ${{secrets.AUTH0_MERCURY_API_CLIENT_ID}}
AUTH0_MERCURY_API_CLIENT_SECRET: ${{secrets.AUTH0_MERCURY_API_CLIENT_SECRET}}
GX_CLOUD_ORGANIZATION_ID: ${{secrets.MERCURY_ORGANIZATION_ID}}
GX_CLOUD_ACCESS_TOKEN: ${{secrets.MERCURY_ACCESS_TOKEN}}
PACT_DO_NOT_TRACK: true
SNOWFLAKE_CI_ACCOUNT: ${{secrets.SNOWFLAKE_CI_ACCOUNT}}
SNOWFLAKE_CI_USER_PASSWORD: ${{secrets.SNOWFLAKE_CI_USER_PASSWORD}}
LOGGING_LEVEL: DEBUG
ENVIRONMENT: local
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
cache-dependency-path: |
reqs/requirements-dev-test.txt
setup.py
- name: Install dependencies
run: |
pip install $(grep -E '^(invoke)' reqs/requirements-dev-contrib.txt)
invoke deps --gx-install -m 'cloud' -r test
- name: Configure ECR AWS Credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-region: us-east-1
role-to-assume: arn:aws:iam::258143015559:role/github-amazonec2containerregistryreadonly
- name: Login to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Start services
run: |
docker compose -f assets/docker/mercury/docker-compose.yml up -d --quiet-pull --wait --wait-timeout 150 && \
docker compose -f assets/docker/spark/docker-compose.yml up -d --quiet-pull --wait --wait-timeout 90
- name: Show logs for debugging
if: failure()
run: |
docker compose -f assets/docker/mercury/docker-compose.yml logs
- name: Run the tests
run:
invoke ci-tests 'cloud' --up-services --verbose --reports -W default::great_expectations.datasource.fluent.GxInvalidDatasourceWarning
# upload coverage report to codecov
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v4.1.0
with:
token: ${{ secrets.CODECOV_TOKEN }}
slug: great-expectations/great_expectations
flags: cloud
marker-tests:
needs: [unit-tests, static-analysis]
if: github.event.pull_request.draft == false
env:
# aws
AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY_ID}}
AWS_DEFAULT_REGION: ${{secrets.AWS_DEFAULT_REGION}}
AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}}
DATABRICKS_TOKEN: ${{secrets.DATABRICKS_SQL_TOKEN}}
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}}
DATABRICKS_HTTP_PATH: ${{secrets.DATABRICKS_HTTP_PATH}}
SNOWFLAKE_CI_ACCOUNT: ${{secrets.SNOWFLAKE_CI_ACCOUNT}}
SNOWFLAKE_CI_USER_PASSWORD: ${{secrets.SNOWFLAKE_CI_USER_PASSWORD}}
SNOWFLAKE_ACCOUNT: ${{secrets.SNOWFLAKE_ACCOUNT}}
SNOWFLAKE_USER: ${{secrets.SNOWFLAKE_USER}}
SNOWFLAKE_PW: ${{secrets.SNOWFLAKE_PW}}
SNOWFLAKE_DATABASE: ${{secrets.SNOWFLAKE_DATABASE}}
SNOWFLAKE_SCHEMA: ${{secrets.SNOWFLAKE_SCHEMA}}
SNOWFLAKE_WAREHOUSE: ${{secrets.SNOWFLAKE_WAREHOUSE}}
SNOWFLAKE_ROLE: ${{secrets.SNOWFLAKE_ROLE}}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
markers:
- athena or clickhouse or openpyxl or pyarrow or project or sqlite or aws_creds
- aws_deps
- big
- databricks
- filesystem
- mssql
- mysql
- postgresql
- snowflake
- spark
- trino
python-version: ["3.8", "3.9", "3.10", "3.11"]
exclude:
# TODO: would like to adopt `actionlint` pre-commit hook
# but false positive here and inability to do an inline ignore
# prevents this https://github.com/rhysd/actionlint/issues/237
- python-version: ${{ github.event_name == 'pull_request' && '3.9' }}
- python-version: ${{ github.event_name == 'pull_request' && '3.10' }}
- python-version: ${{ github.event_name == 'merge_group' && '3.9' }}
- python-version: ${{ github.event_name == 'merge_group' && '3.10' }}
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
cache-dependency-path: |
reqs/requirements-dev-test.txt
setup.py
- name: Install dependencies
run: |
pip install $(grep -E '^(invoke)' reqs/requirements-dev-contrib.txt)
invoke deps --gx-install -m '${{ matrix.markers }}' -r test
- name: Run the tests
run: invoke ci-tests '${{ matrix.markers }}' --up-services --verbose --reports
# upload coverage report to codecov
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v4.1.0
with:
token: ${{ secrets.CODECOV_TOKEN }}
slug: great-expectations/great_expectations
flags: "${{ matrix.python-version }} ${{ matrix.markers }}"
py38-min-versions:
needs: [unit-tests, static-analysis]
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
cache: "pip"
cache-dependency-path: reqs/requirements-dev-test.txt
- name: Install dependencies
run: pip install . -c ci/constraints-test/py38-min-install.txt -r reqs/requirements-dev-test.txt
- name: Run the tests
run: invoke ci-tests -m unit --xdist --slowest=10 --timeout=2.0
py39-min-versions:
needs: [unit-tests, static-analysis]
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.9"
cache: "pip"
cache-dependency-path: reqs/requirements-dev-test.txt
- name: Install dependencies
run: pip install . -c ci/constraints-test/py39-min-install.txt -r reqs/requirements-dev-test.txt
- name: Run the tests
run: invoke ci-tests -m unit --xdist --slowest=10 --timeout=2.0
py310-min-versions:
needs: [unit-tests, static-analysis]
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.10"
cache: "pip"
cache-dependency-path: reqs/requirements-dev-test.txt
- name: Install dependencies
run: pip install . -c ci/constraints-test/py310-min-install.txt -r reqs/requirements-dev-test.txt
- name: Run the tests
run: invoke ci-tests -m unit --xdist --slowest=10 --timeout=2.0
py311-min-versions:
needs: [unit-tests, static-analysis]
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
cache: "pip"
cache-dependency-path: reqs/requirements-dev-test.txt
- name: Install dependencies
run: pip install . -c ci/constraints-test/py311-min-install.txt -r reqs/requirements-dev-test.txt
- name: Run the tests
run: invoke ci-tests -m unit --xdist --slowest=10 --timeout=2.0
pydantic-v1:
needs: [unit-tests, static-analysis]
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
cache: "pip"
cache-dependency-path: reqs/requirements-dev-test.txt
- name: Install dependencies
run: pip install . -c ci/constraints-test/pydantic-v1-install.txt -r reqs/requirements-dev-test.txt
- name: Run the tests
run: invoke ci-tests -m unit --xdist --slowest=10 --timeout=2.0
numpy-2:
needs: [unit-tests, static-analysis]
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.9"
cache: "pip"
cache-dependency-path: reqs/requirements-dev-test.txt
- name: Install dependencies
run: pip install . -r reqs/requirements-dev-test.txt
- name: Install numpy 2
run: pip install --pre "numpy>1.9999999999999"
- name: Run the tests
run: invoke ci-tests -m unit --xdist --slowest=10 --timeout=2.0
airflow-min-versions:
needs: [unit-tests, static-analysis]
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
cache: "pip"
cache-dependency-path: requirements.txt
- name: Install dependencies
run: pip install . -c ci/constraints-test/airflow-min-install.txt
- name: Run the test
# ensure the great_expectations can be imported and a context created
run: sh ci/checks/check_min_airflow_dependency_compatibility.sh
import_gx:
needs: [static-analysis]
if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
exclude:
- python-version: ${{ github.event_name == 'pull_request' && '3.9' }}
- python-version: ${{ github.event_name == 'pull_request' && '3.10' }}
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: "pip"
cache-dependency-path: requirements.txt
- name: Install Great Expectations with core deps only
run: pip install .
- name: Import Great Expectations
run: python -c "import great_expectations as gx; print('Successfully imported GX Version:', gx.__version__)"
build-n-publish:
needs:
[
ci-is-on-main-repo,
doc-checks,
static-analysis,
docs-snippets,
unit-tests,
cloud-tests,
marker-tests,
py38-min-versions,
py39-min-versions,
py310-min-versions,
py311-min-versions,
pydantic-v1,
airflow-min-versions,
import_gx,
]
if: github.event_name == 'push' && contains(github.ref, 'refs/tags/')
name: Build and publish Python distributions to PyPI
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/great-expectations
permissions:
id-token: write
steps:
- uses: actions/checkout@master
- name: Set up Python 3.8
uses: actions/setup-python@v3
with:
python-version: "3.8"
- name: Update pip
run: python -m pip install --upgrade pip
- name: Install Twine and Wheel, and prepare packaging
run: |
pip install twine wheel
git config --global user.email "team@greatexpectations.io"
git config --global user.name "Great Expectations"
- name: Build distribution
run: |
python setup.py sdist
python setup.py bdist_wheel
- name: Publish distribution to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
skip-existing: true # prevent against pushing duplicate versions
notify_on_failure:
needs:
[
ci-is-on-main-repo,
doc-checks,
static-analysis,
docs-snippets,
unit-tests,
cloud-tests,
marker-tests,
py38-min-versions,
py39-min-versions,
py310-min-versions,
py311-min-versions,
pydantic-v1,
airflow-min-versions,
import_gx,
build-n-publish,
]
if: always() && !cancelled() && contains(needs.*.result, 'failure') && github.event_name != 'pull_request' && github.event_name != 'merge_group'
runs-on: ubuntu-latest
steps:
- name: Scheduled CI job failure
id: slack
uses: slackapi/slack-github-action@v1.24.0
with:
payload: |
{
"event_name": "${{ github.event_name }}",
"url": "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
}
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_CI_WEBHOOK_URL }}
notify_on_release:
needs: [build-n-publish]
runs-on: ubuntu-latest
steps:
- name: Announce Release
id: slack
uses: slackapi/slack-github-action@v1.24.0
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_RELEASE_WEBHOOK_URL }}