Skip to content

Commit

Permalink
Merge branch 'feature/GREAT-571/big-query-temp-tables' of https://git…
Browse files Browse the repository at this point in the history
…hub.com/great-expectations/great_expectations into feature/GREAT-571/big-query-temp-tables

* 'feature/GREAT-571/big-query-temp-tables' of https://github.com/great-expectations/great_expectations:
  [BUGFIX] Enables successful parsing of test cases for multi-table expectations (#4906)
  [BUGFIX] check contrib requirements (#4922)
  [MAINTENANCE] Ensure that code style scripts in CI/CD exit early on failure (#4921)
  ProgressBar for DataAssistant RuleBasedProfiler computations. (#4918)
  release-prep-2022-04-21 (#4919)
  [MAINTENANCE] Altair types cleanup (#4916)
  [BUGFIX] fix validation issue for column domain type and implement expect_column_unique_value_count_to_be_between for VolumeDataAssistant (#4914)
  [FEATURE] Enable variables to be specified at both Profiler and its constituent individual Rule levels (#4912)
  test: update test time (#4911)
  Add module docstring and simplify access to DatePart (#4910)
  • Loading branch information
Shinnnyshinshin committed Apr 21, 2022
2 parents d54a0d0 + 32d96b7 commit 09058a1
Show file tree
Hide file tree
Showing 46 changed files with 2,996 additions and 390 deletions.
2 changes: 2 additions & 0 deletions assets/scripts/build_gallery.py
Expand Up @@ -109,6 +109,8 @@ def get_contrib_requirements(filepath: str) -> Dict:
if "library_metadata" in target_ids:
library_metadata = ast.literal_eval(node.value)
requirements = library_metadata.get("requirements", [])
if type(requirements) == str:
requirements = [requirements]
requirements_info[current_class] = requirements
requirements_info["requirements"] += requirements

Expand Down
8 changes: 4 additions & 4 deletions azure-pipelines-dependency-graph-testing.yml
Expand Up @@ -153,7 +153,7 @@ stages:
displayName: 'Import Great Expectations'
- stage: required
dependsOn: [scope_check, lint, import_ge]
dependsOn: [scope_check, lint, import_ge, custom_checks]
pool:
vmImage: 'ubuntu-18.04'

Expand Down Expand Up @@ -343,7 +343,7 @@ stages:
reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov'

- stage: usage_stats_integration
dependsOn: [scope_check, lint, import_ge]
dependsOn: [scope_check, lint, import_ge, custom_checks]
pool:
vmImage: 'ubuntu-latest'

Expand Down Expand Up @@ -379,7 +379,7 @@ stages:
pool:
vmImage: 'ubuntu-latest'

dependsOn: [scope_check, lint, import_ge]
dependsOn: [scope_check, lint, import_ge, custom_checks]

jobs:
- job: mysql
Expand Down Expand Up @@ -472,7 +472,7 @@ stages:
displayName: 'dgtest'
- stage: cli_integration
dependsOn: [scope_check, lint, import_ge]
dependsOn: [scope_check, lint, import_ge, custom_checks]
pool:
vmImage: 'ubuntu-latest'

Expand Down
29 changes: 23 additions & 6 deletions azure-pipelines.yml
Expand Up @@ -81,6 +81,23 @@ stages:
pyupgrade --py3-plus || EXIT_STATUS=$?
exit $EXIT_STATUS
- stage: custom_checks
pool:
vmImage: 'ubuntu-latest'

jobs:
- job: type_hint_checker
steps:
- script: |
pip install mypy # Prereq for type hint script
python scripts/check_type_hint_coverage.py
name: TypeHintChecker
- job: docstring_checker
steps:
- bash: python scripts/check_docstring_coverage.py
name: DocstringChecker

- stage: import_ge
dependsOn: [lint]
pool:
Expand Down Expand Up @@ -117,7 +134,7 @@ stages:
displayName: 'Import Great Expectations'
- stage: required
dependsOn: [lint, import_ge]
dependsOn: [lint, import_ge, custom_checks]
pool:
vmImage: 'ubuntu-18.04'

Expand Down Expand Up @@ -276,7 +293,7 @@ stages:
reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov'

- stage: usage_stats_integration
dependsOn: [lint, import_ge]
dependsOn: [lint, import_ge, custom_checks]
pool:
vmImage: 'ubuntu-latest'

Expand Down Expand Up @@ -310,7 +327,7 @@ stages:
pool:
vmImage: 'ubuntu-latest'

dependsOn: [lint, import_ge]
dependsOn: [lint, import_ge, custom_checks]

jobs:
- job: mysql
Expand Down Expand Up @@ -389,7 +406,7 @@ stages:
displayName: 'pytest'
- stage: cli_integration
dependsOn: [lint, import_ge]
dependsOn: [lint, import_ge, custom_checks]
pool:
vmImage: 'ubuntu-latest'

Expand Down Expand Up @@ -432,7 +449,7 @@ stages:
condition: and(succeeded(), eq(variables.isMain, true))
pool:
vmImage: 'ubuntu-18.04'
dependsOn: [required, lint, db_integration, usage_stats_integration, cli_integration]
dependsOn: [import_ge, custom_checks, required, lint, db_integration, usage_stats_integration, cli_integration]

jobs:
- job: build_gallery
Expand Down Expand Up @@ -524,7 +541,7 @@ stages:
condition: and(succeeded(), eq(variables.isMain, true))
pool:
vmImage: 'ubuntu-latest'
dependsOn: [required, lint, db_integration, usage_stats_integration, cli_integration]
dependsOn: [import_ge, custom_checks, required, lint, db_integration, usage_stats_integration, cli_integration]

jobs:
- job: deploy
Expand Down
Expand Up @@ -166,7 +166,7 @@ def validate_configuration(
"contributors": [
"@andyjessen",
],
"requirements": "simple_icd_10",
"requirements": ["simple_icd_10"],
}


Expand Down
38 changes: 38 additions & 0 deletions docs/changelog.md
Expand Up @@ -2,6 +2,44 @@
title: Changelog
---

### 0.15.2
* [FEATURE] Split data assets using sql datetime columns (#4871)
* [FEATURE] Plot metrics with `DataAssistantResult.plot()` (#4873)
* [FEATURE] RuleBasedProfiler/DataAssistant/MetricMultiBatchParameterBuilder: Enable Returning Metric Computation Results with batch_id Attribution (#4862)
* [FEATURE] Enable variables to be specified at both Profiler and its constituent individual Rule levels (#4912)
* [FEATURE] Enable self-initializing `ExpectColumnUniqueValueCountToBeBetween` (#4902)
* [FEATURE] Improve diagnostic testing process (#4816)
* [FEATURE] Add Azure CI/CD action to aid with style guide enforcement (type hints) (#4878)
* [FEATURE] Add Azure CI/CD action to aid with style guide enforcement (docstrings) (#4617)
* [FEATURE] Use formal interfaces to clean up DataAssistant and DataAssistantResult modules/classes (#4901)
* [BUGFIX] fix validation issue for column domain type and implement expect_column_unique_value_count_to_be_between for VolumeDataAssistant (#4914)
* [BUGFIX] Fix issue with not using the generated table name on read (#4905)
* [BUGFIX] Add deprecation comment to RuntimeDataConnector
* [BUGFIX] Ensure proper class_name within all RuleBasedProfilerConfig instantiations
* [BUGFIX] fix rounding directive handling (#4887)
* [BUGFIX] `great_expectations` import fails when SQL Alchemy is not installed (#4880)
* [MAINTENANCE] Altair types cleanup (#4916)
* [MAINTENANCE] test: update test time (#4911)
* [MAINTENANCE] Add module docstring and simplify access to DatePart (#4910)
* [MAINTENANCE] Chip away at type hint violations around data context (#4897)
* [MAINTENANCE] Improve error message outputted to user in DocstringChecker action (#4895)
* [MAINTENANCE] Re-enable bigquery tests (#4903)
* [MAINTENANCE] Unit tests for sqlalchemy splitter methods, docs and other improvements (#4900)
* [MAINTENANCE] Move plot logic from `DataAssistant` into `DataAssistantResult` (#4896)
* [MAINTENANCE] Add condition to primary pipeline to ensure `import_ge` stage doesn't cause misleading Slack notifications (#4898)
* [MAINTENANCE] Refactor `RuleBasedProfilerConfig` (#4882)
* [MAINTENANCE] Refactor DataAssistant Access to Parameter Computation Results and Plotting Utilities (#4893)
* [MAINTENANCE] Update `dgtest-overrides` list to include all test files not captured by primary strategy (#4891)
* [MAINTENANCE] Add dgtest-overrides section to dependency_graph Azure pipeline
* [MAINTENANCE] Datasource and DataContext-level tests for RuntimeDataConnector changes (#4866)
* [MAINTENANCE] Temporarily disable bigquery tests. (#4888)
* [MAINTENANCE] Import GE after running `ge init` in packaging CI pipeline (#4885)
* [MAINTENANCE] Add CI stage importing GE with only required dependencies installed (#4884)
* [MAINTENANCE] `DataAssistantResult.plot()` conditional formatting and tooltips (#4881)
* [MAINTENANCE] split data context files (#4879)
* [MAINTENANCE] Add Tanner to CODEOWNERS for schemas.py (#4875)
* [MAINTENANCE] Use defined constants for ParameterNode accessor keys (#4872)

### 0.15.1
* [FEATURE] Additional Rule-Based Profiler Parameter/Variable Access Methods (#4814)
* [FEATURE] DataAssistant and VolumeDataAssistant classes (initial implementation -- to be enhanced as part of subsequent work) (#4844)
Expand Down
39 changes: 39 additions & 0 deletions docs_rtd/changelog.rst
Expand Up @@ -4,6 +4,45 @@
Changelog
#########

0.15.2
-----------------
* [FEATURE] Split data assets using sql datetime columns (#4871)
* [FEATURE] Plot metrics with `DataAssistantResult.plot()` (#4873)
* [FEATURE] RuleBasedProfiler/DataAssistant/MetricMultiBatchParameterBuilder: Enable Returning Metric Computation Results with batch_id Attribution (#4862)
* [FEATURE] Enable variables to be specified at both Profiler and its constituent individual Rule levels (#4912)
* [FEATURE] Enable self-initializing `ExpectColumnUniqueValueCountToBeBetween` (#4902)
* [FEATURE] Improve diagnostic testing process (#4816)
* [FEATURE] Add Azure CI/CD action to aid with style guide enforcement (type hints) (#4878)
* [FEATURE] Add Azure CI/CD action to aid with style guide enforcement (docstrings) (#4617)
* [FEATURE] Use formal interfaces to clean up DataAssistant and DataAssistantResult modules/classes (#4901)
* [BUGFIX] fix validation issue for column domain type and implement expect_column_unique_value_count_to_be_between for VolumeDataAssistant (#4914)
* [BUGFIX] Fix issue with not using the generated table name on read (#4905)
* [BUGFIX] Add deprecation comment to RuntimeDataConnector
* [BUGFIX] Ensure proper class_name within all RuleBasedProfilerConfig instantiations
* [BUGFIX] fix rounding directive handling (#4887)
* [BUGFIX] `great_expectations` import fails when SQL Alchemy is not installed (#4880)
* [MAINTENANCE] Altair types cleanup (#4916)
* [MAINTENANCE] test: update test time (#4911)
* [MAINTENANCE] Add module docstring and simplify access to DatePart (#4910)
* [MAINTENANCE] Chip away at type hint violations around data context (#4897)
* [MAINTENANCE] Improve error message outputted to user in DocstringChecker action (#4895)
* [MAINTENANCE] Re-enable bigquery tests (#4903)
* [MAINTENANCE] Unit tests for sqlalchemy splitter methods, docs and other improvements (#4900)
* [MAINTENANCE] Move plot logic from `DataAssistant` into `DataAssistantResult` (#4896)
* [MAINTENANCE] Add condition to primary pipeline to ensure `import_ge` stage doesn't cause misleading Slack notifications (#4898)
* [MAINTENANCE] Refactor `RuleBasedProfilerConfig` (#4882)
* [MAINTENANCE] Refactor DataAssistant Access to Parameter Computation Results and Plotting Utilities (#4893)
* [MAINTENANCE] Update `dgtest-overrides` list to include all test files not captured by primary strategy (#4891)
* [MAINTENANCE] Add dgtest-overrides section to dependency_graph Azure pipeline
* [MAINTENANCE] Datasource and DataContext-level tests for RuntimeDataConnector changes (#4866)
* [MAINTENANCE] Temporarily disable bigquery tests. (#4888)
* [MAINTENANCE] Import GE after running `ge init` in packaging CI pipeline (#4885)
* [MAINTENANCE] Add CI stage importing GE with only required dependencies installed (#4884)
* [MAINTENANCE] `DataAssistantResult.plot()` conditional formatting and tooltips (#4881)
* [MAINTENANCE] split data context files (#4879)
* [MAINTENANCE] Add Tanner to CODEOWNERS for schemas.py (#4875)
* [MAINTENANCE] Use defined constants for ParameterNode accessor keys (#4872)

0.15.1
-----------------
* [FEATURE] Additional Rule-Based Profiler Parameter/Variable Access Methods (#4814)
Expand Down
2 changes: 1 addition & 1 deletion great_expectations/deployment_version
@@ -1 +1 @@
0.15.1
0.15.2
24 changes: 23 additions & 1 deletion great_expectations/execution_engine/sqlalchemy_data_splitter.py
@@ -1,3 +1,19 @@
"""Create queries for use in sql data splitting.
This module contains utilities for generating queries used by execution engines
and data connectors to split data into batches based on the data itself. It
is typically used from within either an execution engine or a data connector,
not by itself.
Typical usage example:
__init__():
self._sqlalchemy_data_splitter = SqlAlchemyDataSplitter()
elsewhere():
splitter = self._sqlalchemy_data_splitter.get_splitter_method()
split_query_or_clause = splitter()
"""

import datetime
import enum
from typing import Callable, List, Union
Expand Down Expand Up @@ -39,7 +55,13 @@ def __eq__(self, other):


class SqlAlchemyDataSplitter:
"""Methods for splitting data accessible via SqlAlchemyExecutionEngine."""
"""Methods for splitting data accessible via SqlAlchemyExecutionEngine.
Note, for convenience, you can also access DatePart via the instance variable
date_part e.g. SqlAlchemyDataSplitter.date_part.MONTH
"""

date_part: DatePart = DatePart

def get_splitter_method(self, splitter_method_name: str) -> Callable:
"""Get the appropriate splitter method from the method name.
Expand Down
Expand Up @@ -142,21 +142,22 @@ class ExpectColumnMaxToBeBetween(ColumnExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_max_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
variables={
"strict_min": False,
"strict_max": False,
"false_positive_rate": 0.05,
"estimator": "bootstrap",
"num_bootstrap_samples": 9999,
"bootstrap_random_seed": None,
"truncate_values": {
"lower_bound": None,
"upper_bound": None,
},
"round_decimals": None,
},
variables={},
rules={
"default_expect_column_max_to_be_between_rule": {
"variables": {
"strict_min": False,
"strict_max": False,
"false_positive_rate": 0.05,
"estimator": "bootstrap",
"num_bootstrap_samples": 9999,
"bootstrap_random_seed": None,
"truncate_values": {
"lower_bound": None,
"upper_bound": None,
},
"round_decimals": 0,
},
"domain_builder": {
"class_name": "ColumnDomainBuilder",
"module_name": "great_expectations.rule_based_profiler.domain_builder",
Expand Down
Expand Up @@ -135,21 +135,22 @@ class ExpectColumnMinToBeBetween(ColumnExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_min_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
variables={
"strict_min": False,
"strict_max": False,
"false_positive_rate": 0.05,
"estimator": "bootstrap",
"num_bootstrap_samples": 9999,
"bootstrap_random_seed": None,
"truncate_values": {
"lower_bound": None,
"upper_bound": None,
},
"round_decimals": None,
},
variables={},
rules={
"default_expect_column_min_to_be_between_rule": {
"variables": {
"strict_min": False,
"strict_max": False,
"false_positive_rate": 0.05,
"estimator": "bootstrap",
"num_bootstrap_samples": 9999,
"bootstrap_random_seed": None,
"truncate_values": {
"lower_bound": None,
"upper_bound": None,
},
"round_decimals": 0,
},
"domain_builder": {
"class_name": "ColumnDomainBuilder",
"module_name": "great_expectations.rule_based_profiler.domain_builder",
Expand Down
Expand Up @@ -177,25 +177,26 @@ class ExpectColumnQuantileValuesToBeBetween(ColumnExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_quantile_values_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
variables={
"quantiles": [
0.25,
0.5,
0.75,
],
"allow_relative_error": "linear",
"false_positive_rate": 0.05,
"estimator": "bootstrap",
"num_bootstrap_samples": 9999,
"bootstrap_random_seed": None,
"truncate_values": {
"lower_bound": None,
"upper_bound": None,
},
"round_decimals": 1,
},
variables={},
rules={
"default_expect_column_quantile_values_to_be_between_rule": {
"variables": {
"quantiles": [
0.25,
0.5,
0.75,
],
"allow_relative_error": "linear",
"false_positive_rate": 0.05,
"estimator": "bootstrap",
"num_bootstrap_samples": 9999,
"bootstrap_random_seed": None,
"truncate_values": {
"lower_bound": None,
"upper_bound": None,
},
"round_decimals": 1,
},
"domain_builder": {
"class_name": "ColumnDomainBuilder",
"module_name": "great_expectations.rule_based_profiler.domain_builder",
Expand Down

0 comments on commit 09058a1

Please sign in to comment.