Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
cdkini committed May 5, 2022
2 parents db29417 + ca62bbc commit 7b71b74
Show file tree
Hide file tree
Showing 181 changed files with 9,979 additions and 2,439 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/autoupdate.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# https://github.com/marketplace/actions/auto-update
name: autoupdate
on:
pull_request:
types:
- auto_merge_enabled
push:
branches:
- develop # Whenever the base changes, this action should run

jobs:
autoupdate:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ repos:
- id: isort
exclude: venv/.*\|docs/.*|tests/test_sets/broken_excel_file\.xls
- repo: https://github.com/psf/black
rev: 22.1.0
rev: 22.3.0
hooks:
- id: black
exclude: docs/.*|tests/.*.fixture|.*.ge_store_backend_id
Expand Down
6 changes: 6 additions & 0 deletions assets/docker/starburst/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
version: '3.2'
services:
starburst_db:
image: starburstdata/starburst-enterprise:373-e
ports:
- "8088:8080"
58 changes: 58 additions & 0 deletions azure-pipelines-dependency-graph-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ resources:
MSSQL_PID: Developer
ports:
- 1433:1433
- container: trino
image: trinodb/trino:379
ports:
- 8088:8080

variables:
GE_USAGE_STATISTICS_URL: "https://qa.stats.greatexpectations.io/great_expectations/v1/usage_statistics"
Expand Down Expand Up @@ -118,6 +122,14 @@ stages:
- bash: python scripts/check_docstring_coverage.py
name: DocstringChecker

- job: unused_import_checker
steps:
- script: |
pip install flake8
# https://www.flake8rules.com/rules/F401.html - Prunes the dgtest graph to improve accuracy
flake8 --select F401 great_expectations tests
name: UnusedImportChecker
- stage: import_ge
dependsOn: [lint]
pool:
Expand Down Expand Up @@ -472,6 +484,52 @@ stages:
displayName: 'dgtest'
- job: trino
condition: eq(stageDependencies.scope_check.changes.outputs['CheckChanges.GEChanged'], true)

services:
trino: trino

variables:
python.version: '3.8'

steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
displayName: 'Use Python $(python.version)'

- bash: python -m pip install --upgrade pip==21.3.1
displayName: 'Update pip'

- script: |
printf 'Waiting for Trino database to accept connections'
sleep 30
# until trino --execute "SHOW CATALOGS"; do
# printf '.'
# sleep 1;
# done;
displayName: Wait for database to initialise

- script: |
pip install --requirement requirements-dev-test.txt --requirement requirements-dev-sqlalchemy.txt --constraint constraints-dev.txt
pip install --requirement requirements.txt
pip install .
displayName: 'Install dependencies'
- script: |
# Install dependencies
pip install --requirement requirements.txt
pip install pytest pytest-cov pytest-azurepipelines
git clone https://github.com/superconductive/dgtest.git
pip install -e dgtest
# Run dgtest
dgtest run great_expectations --ignore 'tests/cli' --ignore 'tests/integration/usage_statistics' \
--trino --napoleon-docstrings --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html
displayName: 'dgtest'
- stage: cli_integration
dependsOn: [scope_check, lint, import_ge, custom_checks]
pool:
Expand Down
7 changes: 6 additions & 1 deletion azure-pipelines-docs-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ resources:
MSSQL_PID: Developer
ports:
- 1433:1433
- container: trino
image: trinodb/trino:379
ports:
- 8088:8080

variables:
isMain: $[eq(variables['Build.SourceBranch'], 'refs/heads/main')]
Expand Down Expand Up @@ -85,6 +89,7 @@ stages:
postgres: postgres
mysql: mysql
mssql: mssql
trino: trino

steps:
- task: UsePythonVersion@0
Expand Down Expand Up @@ -137,7 +142,7 @@ stages:
- script: |
pip install pytest pytest-azurepipelines
pytest -v --docs-tests -m integration --mysql --bigquery --mssql --spark --postgresql --aws tests/integration/test_script_runner.py
pytest -v --docs-tests -m integration --mysql --bigquery --mssql --spark --postgresql --trino --aws tests/integration/test_script_runner.py
displayName: 'pytest'
env:
# snowflake credentials
Expand Down
44 changes: 44 additions & 0 deletions azure-pipelines-os-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ resources:
MSSQL_PID: Developer
ports:
- 1433:1433
- container: trino
image: trinodb/trino:379
ports:
- 8088:8080

variables:
GE_USAGE_STATISTICS_URL: "https://qa.stats.greatexpectations.io/great_expectations/v1/usage_statistics"
Expand Down Expand Up @@ -236,3 +240,43 @@ stages:
pip install pytest pytest-cov pytest-azurepipelines
pytest --postgresql --napoleon-docstrings --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html --ignore=tests/cli --ignore=tests/integration/usage_statistics
displayName: 'pytest'
- job: trino
condition: eq(stageDependencies.scope_check.changes.outputs['CheckChanges.GEChanged'], true)

services:
trino: trino

variables:
python.version: '3.8'

steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
displayName: 'Use Python $(python.version)'

- bash: python -m pip install --upgrade pip==20.2.4
displayName: 'Update pip'

- script: |
printf 'Waiting for Trino database to accept connections'
sleep 30
# until trino --execute "SHOW CATALOGS"; do
# printf '.'
# sleep 1;
# done;
displayName: Wait for database to initialise

- script: |
pip install --requirement requirements-dev-test.txt --requirement requirements-dev-sqlalchemy.txt
# Install latest sqlalchemy version
pip install --upgrade SQLAlchemy
pip install --requirement requirements.txt
pip install .
displayName: 'Install dependencies'
- script: |
pip install pytest pytest-cov pytest-azurepipelines
pytest --trino --napoleon-docstrings --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html --ignore=tests/cli --ignore=tests/integration/usage_statistics
displayName: 'pytest'
61 changes: 61 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ resources:
MSSQL_PID: Developer
ports:
- 1433:1433
- container: trino
image: trinodb/trino:379
ports:
- 8088:8080

# The pipeline is run under two primary conditions: if merging into main or as scheduled by the above cron job.
variables:
Expand Down Expand Up @@ -100,6 +104,15 @@ stages:
- bash: python scripts/check_docstring_coverage.py
name: DocstringChecker

- job: unused_import_checker
condition: or(eq(variables.isScheduled, true), eq(variables.isMain, true), eq(variables.isManual, true))
steps:
- script: |
pip install flake8
# https://www.flake8rules.com/rules/F401.html - Prunes the dgtest graph to improve accuracy
flake8 --select F401 great_expectations tests
name: UnusedImportChecker
- stage: import_ge
dependsOn: [lint]
pool:
Expand Down Expand Up @@ -409,6 +422,44 @@ stages:
pytest --mssql --napoleon-docstrings --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html --ignore=tests/cli --ignore=tests/integration/usage_statistics
displayName: 'pytest'
- job: trino
condition: or(eq(variables.isScheduled, true), eq(variables.isMain, true), eq(variables.isManual, true))

services:
trino: trino

variables:
python.version: '3.8'

steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '$(python.version)'
displayName: 'Use Python $(python.version)'

- bash: python -m pip install --upgrade pip==21.3.1
displayName: 'Update pip'

- script: |
printf 'Waiting for Trino database to accept connections'
sleep 30
# until trino --execute "SHOW CATALOGS"; do
# printf '.'
# sleep 1;
# done;
displayName: Wait for database to initialise

- script: |
pip install --requirement requirements-dev-test.txt --requirement requirements-dev-sqlalchemy.txt --constraint constraints-dev.txt
pip install --requirement requirements.txt
pip install .
displayName: 'Install dependencies'
- script: |
pip install pytest pytest-cov pytest-azurepipelines
pytest --trino --napoleon-docstrings --junitxml=junit/test-results.xml --cov=. --cov-report=xml --cov-report=html --ignore=tests/cli --ignore=tests/integration/usage_statistics
displayName: 'pytest'
- stage: cli_integration
dependsOn: [lint, import_ge, custom_checks]
pool:
Expand Down Expand Up @@ -466,6 +517,7 @@ stages:
postgres: postgres
mysql: mysql
mssql: mssql
trino: trino

steps:
- task: UsePythonVersion@0
Expand Down Expand Up @@ -504,6 +556,15 @@ stages:
sqlcmd -U sa -P "ReallyStrongPwd1234%^&*" -Q "CREATE DATABASE test_ci;" -o create_db_output.txt
displayName: 'Configure mssql'
- script: |
printf 'Waiting for Trino database to accept connections'
sleep 30
# until trino --execute "SHOW CATALOGS"; do
# printf '.'
# sleep 1;
# done;
displayName: 'Wait for Trino database to initialise'

# - task: DownloadSecureFile@1
# name: gcp_authkey
# displayName: 'Download Google Service Account'
Expand Down
28 changes: 28 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,34 @@
title: Changelog
---

### 0.15.4
* [FEATURE] Enable self-initializing `ExpectColumnMeanToBeBetween` (#4986)
* [FEATURE] Enable self-initializing `ExpectColumnMedianToBeBetween` (#4987)
* [FEATURE] Enable self-initializing `ExpectColumnSumToBeBetween` (#4988)
* [FEATURE] New MetricSingleBatchParameterBuilder for specifically single-Batch Rule-Based Profiler scenarios (#5003)
* [FEATURE] Enable Pandas DataFrame and Series as MetricValues Output of Metric ParameterBuilder Classes (#5008)
* [FEATURE] Notebook for `VolumeDataAssistant` Example (#5010)
* [FEATURE] Histogram/Partition Single-Batch ParameterBuilder (#5011)
* [FEATURE] Update `DataAssistantResult.plot()` return value to emit `PlotResult` wrapper dataclass (#4962)
* [FEATURE] Limit samplers work with supported sqlalchemy backends (#5014)
* [FEATURE] trino support (#5021)
* [BUGFIX] RBP Profiling Dataset ProgressBar Fix (#4999)
* [BUGFIX] Fix DataAssistantResult serialization issue (#5020)
* [DOCS] Update slack notification guide to not use validation operators. (#4978)
* [MAINTENANCE] Update `autoupdate` GitHub action (#5001)
* [MAINTENANCE] Move `DataAssistant` registry capabilities into `DataAssistantRegistry` to enable user aliasing (#4991)
* [MAINTENANCE] Fix continuous partition example (#4939) (thanks @andyjessen)
* [MAINTENANCE] Preliminary refactors for data samplers. (#4996)
* [MAINTENANCE] Clean up unused imports and enforce through `flake8` in CI/CD (#5005)
* [MAINTENANCE] ParameterBuilder tests should maximally utilize polymorphism (#5007)
* [MAINTENANCE] Clean up type hints in CLI (#5006)
* [MAINTENANCE] Making ParameterBuilder metric computations robust to failures through logging and exception handling (#5009)
* [MAINTENANCE] Condense column-level `vconcat` plots into one interactive plot (#5002)
* [MAINTENANCE] Update version of `black` in pre-commit config (#5019)
* [MAINTENANCE] Improve tooltips and formatting for distinct column values chart in VolumeDataAssistantResult (#5017)
* [MAINTENANCE] Enhance configuring serialization for DotDict type classes (#5023)
* [MAINTENANCE] Pyarrow upper bound (#5028)

### 0.15.3
* [FEATURE] Enable self-initializing capabilities for `ExpectColumnProportionOfUniqueValuesToBeBetween` (#4929)
* [FEATURE] Enable support for plotting both Table and Column charts in `VolumeDataAssistant` (#4930)
Expand Down

0 comments on commit 7b71b74

Please sign in to comment.