Merge branch 'feature/GREAT-571/big-query-temp-tables' of https://git…

…hub.com/great-expectations/great_expectations into feature/GREAT-571/big-query-temp-tables * 'feature/GREAT-571/big-query-temp-tables' of https://github.com/great-expectations/great_expectations: [BUGFIX] Enables successful parsing of test cases for multi-table expectations (#4906) [BUGFIX] check contrib requirements (#4922) [MAINTENANCE] Ensure that code style scripts in CI/CD exit early on failure (#4921) ProgressBar for DataAssistant RuleBasedProfiler computations. (#4918) release-prep-2022-04-21 (#4919) [MAINTENANCE] Altair types cleanup (#4916) [BUGFIX] fix validation issue for column domain type and implement expect_column_unique_value_count_to_be_between for VolumeDataAssistant (#4914) [FEATURE] Enable variables to be specified at both Profiler and its constituent individual Rule levels (#4912) test: update test time (#4911) Add module docstring and simplify access to DatePart (#4910)
great-expectations · Apr 21, 2022 · 09058a1 · 09058a1
2 parents d54a0d0 + 32d96b7
commit 09058a1
Show file tree

Hide file tree

Showing 46 changed files with 2,996 additions and 390 deletions.
diff --git a/assets/scripts/build_gallery.py b/assets/scripts/build_gallery.py
@@ -109,6 +109,8 @@ def get_contrib_requirements(filepath: str) -> Dict:
                 if "library_metadata" in target_ids:
                     library_metadata = ast.literal_eval(node.value)
                     requirements = library_metadata.get("requirements", [])
+                    if type(requirements) == str:
+                        requirements = [requirements]
                     requirements_info[current_class] = requirements
                     requirements_info["requirements"] += requirements
 

diff --git a/azure-pipelines-dependency-graph-testing.yml b/azure-pipelines-dependency-graph-testing.yml
@@ -153,7 +153,7 @@ stages:
            displayName: 'Import Great Expectations'
 
   - stage: required
-    dependsOn: [scope_check, lint, import_ge]
+    dependsOn: [scope_check, lint, import_ge, custom_checks]
     pool:
       vmImage: 'ubuntu-18.04'
 
@@ -343,7 +343,7 @@ stages:
               reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov'
 
   - stage: usage_stats_integration
-    dependsOn: [scope_check, lint, import_ge]
+    dependsOn: [scope_check, lint, import_ge, custom_checks]
     pool:
       vmImage: 'ubuntu-latest'
 
@@ -379,7 +379,7 @@ stages:
     pool:
       vmImage: 'ubuntu-latest'
 
-    dependsOn: [scope_check, lint, import_ge]
+    dependsOn: [scope_check, lint, import_ge, custom_checks]
 
     jobs:
       - job: mysql
@@ -472,7 +472,7 @@ stages:
             displayName: 'dgtest'
 
   - stage: cli_integration
-    dependsOn: [scope_check, lint, import_ge]
+    dependsOn: [scope_check, lint, import_ge, custom_checks]
     pool:
       vmImage: 'ubuntu-latest'
 

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -81,6 +81,23 @@ stages:
               pyupgrade --py3-plus || EXIT_STATUS=$?
               exit $EXIT_STATUS
 
+  - stage: custom_checks
+    pool:
+      vmImage: 'ubuntu-latest'
+
+    jobs:
+    - job: type_hint_checker
+      steps:
+      - script: |
+          pip install mypy # Prereq for type hint script
+          python scripts/check_type_hint_coverage.py
+        name: TypeHintChecker
+
+    - job: docstring_checker
+      steps:
+      - bash: python scripts/check_docstring_coverage.py
+        name: DocstringChecker
+
   - stage: import_ge
     dependsOn: [lint]
     pool:
@@ -117,7 +134,7 @@ stages:
            displayName: 'Import Great Expectations'
 
   - stage: required
-    dependsOn: [lint, import_ge]
+    dependsOn: [lint, import_ge, custom_checks]
     pool:
       vmImage: 'ubuntu-18.04'
 
@@ -276,7 +293,7 @@ stages:
               reportDirectory: '$(System.DefaultWorkingDirectory)/**/htmlcov'
 
   - stage: usage_stats_integration
-    dependsOn: [lint, import_ge]
+    dependsOn: [lint, import_ge, custom_checks]
     pool:
       vmImage: 'ubuntu-latest'
 
@@ -310,7 +327,7 @@ stages:
     pool:
       vmImage: 'ubuntu-latest'
 
-    dependsOn: [lint, import_ge]
+    dependsOn: [lint, import_ge, custom_checks]
 
     jobs:
       - job: mysql
@@ -389,7 +406,7 @@ stages:
             displayName: 'pytest'
 
   - stage: cli_integration
-    dependsOn: [lint, import_ge]
+    dependsOn: [lint, import_ge, custom_checks]
     pool:
       vmImage: 'ubuntu-latest'
 
@@ -432,7 +449,7 @@ stages:
     condition: and(succeeded(), eq(variables.isMain, true))
     pool:
       vmImage: 'ubuntu-18.04'
-    dependsOn: [required, lint, db_integration, usage_stats_integration, cli_integration]
+    dependsOn: [import_ge, custom_checks, required, lint, db_integration, usage_stats_integration, cli_integration]
 
     jobs:
       - job: build_gallery
@@ -524,7 +541,7 @@ stages:
     condition: and(succeeded(), eq(variables.isMain, true))
     pool:
       vmImage: 'ubuntu-latest'
-    dependsOn: [required, lint, db_integration, usage_stats_integration, cli_integration]
+    dependsOn: [import_ge, custom_checks, required, lint, db_integration, usage_stats_integration, cli_integration]
 
     jobs:
       - job: deploy

diff --git a/...s_experimental/expectations/expect_column_values_to_be_icd_ten_category_or_subcategory.py b/...s_experimental/expectations/expect_column_values_to_be_icd_ten_category_or_subcategory.py
@@ -166,7 +166,7 @@ def validate_configuration(
         "contributors": [
             "@andyjessen",
         ],
-        "requirements": "simple_icd_10",
+        "requirements": ["simple_icd_10"],
     }
 
 

diff --git a/docs/changelog.md b/docs/changelog.md
@@ -2,6 +2,44 @@
 title: Changelog
 ---
 
+### 0.15.2
+* [FEATURE] Split data assets using sql datetime columns (#4871)
+* [FEATURE] Plot metrics with `DataAssistantResult.plot()` (#4873)
+* [FEATURE] RuleBasedProfiler/DataAssistant/MetricMultiBatchParameterBuilder: Enable Returning Metric Computation Results with batch_id Attribution (#4862)
+* [FEATURE] Enable variables to be specified at both Profiler and its constituent individual Rule levels (#4912)
+* [FEATURE] Enable self-initializing `ExpectColumnUniqueValueCountToBeBetween` (#4902)
+* [FEATURE] Improve diagnostic testing process (#4816)
+* [FEATURE] Add Azure CI/CD action to aid with style guide enforcement (type hints) (#4878)
+* [FEATURE] Add Azure CI/CD action to aid with style guide enforcement (docstrings) (#4617)
+* [FEATURE] Use formal interfaces to clean up DataAssistant and DataAssistantResult modules/classes (#4901)
+* [BUGFIX] fix validation issue for column domain type and implement expect_column_unique_value_count_to_be_between for VolumeDataAssistant (#4914)
+* [BUGFIX] Fix issue with not using the generated table name on read (#4905)
+* [BUGFIX] Add deprecation comment to RuntimeDataConnector
+* [BUGFIX] Ensure proper class_name within all RuleBasedProfilerConfig instantiations
+* [BUGFIX] fix rounding directive handling (#4887)
+* [BUGFIX] `great_expectations` import fails when SQL Alchemy is not installed (#4880)
+* [MAINTENANCE] Altair types cleanup (#4916)
+* [MAINTENANCE] test: update test time (#4911)
+* [MAINTENANCE] Add module docstring and simplify access to DatePart (#4910)
+* [MAINTENANCE] Chip away at type hint violations around data context (#4897)
+* [MAINTENANCE] Improve error message outputted to user in DocstringChecker action (#4895)
+* [MAINTENANCE] Re-enable bigquery tests (#4903)
+* [MAINTENANCE] Unit tests for sqlalchemy splitter methods, docs and other improvements (#4900)
+* [MAINTENANCE] Move plot logic from `DataAssistant` into `DataAssistantResult` (#4896)
+* [MAINTENANCE] Add condition to primary pipeline to ensure `import_ge` stage doesn't cause misleading Slack notifications (#4898)
+* [MAINTENANCE] Refactor `RuleBasedProfilerConfig` (#4882)
+* [MAINTENANCE] Refactor DataAssistant Access to Parameter Computation Results and Plotting Utilities (#4893)
+* [MAINTENANCE] Update `dgtest-overrides` list to include all test files not captured by primary strategy (#4891)
+* [MAINTENANCE] Add dgtest-overrides section to dependency_graph Azure pipeline
+* [MAINTENANCE] Datasource and DataContext-level tests for RuntimeDataConnector changes (#4866)
+* [MAINTENANCE] Temporarily disable bigquery tests. (#4888)
+* [MAINTENANCE] Import GE after running `ge init` in packaging CI pipeline (#4885)
+* [MAINTENANCE] Add CI stage importing GE with only required dependencies installed (#4884)
+* [MAINTENANCE] `DataAssistantResult.plot()` conditional formatting and tooltips (#4881)
+* [MAINTENANCE] split data context files (#4879)
+* [MAINTENANCE] Add Tanner to CODEOWNERS for schemas.py (#4875)
+* [MAINTENANCE]  Use defined constants for ParameterNode accessor keys (#4872)
+
 ### 0.15.1
 * [FEATURE] Additional Rule-Based Profiler Parameter/Variable Access Methods (#4814)
 * [FEATURE] DataAssistant and VolumeDataAssistant classes (initial implementation -- to be enhanced as part of subsequent work) (#4844)

diff --git a/docs_rtd/changelog.rst b/docs_rtd/changelog.rst
@@ -4,6 +4,45 @@
 Changelog
 #########
 
+0.15.2
+-----------------
+* [FEATURE] Split data assets using sql datetime columns (#4871)
+* [FEATURE] Plot metrics with `DataAssistantResult.plot()` (#4873)
+* [FEATURE] RuleBasedProfiler/DataAssistant/MetricMultiBatchParameterBuilder: Enable Returning Metric Computation Results with batch_id Attribution (#4862)
+* [FEATURE] Enable variables to be specified at both Profiler and its constituent individual Rule levels (#4912)
+* [FEATURE] Enable self-initializing `ExpectColumnUniqueValueCountToBeBetween` (#4902)
+* [FEATURE] Improve diagnostic testing process (#4816)
+* [FEATURE] Add Azure CI/CD action to aid with style guide enforcement (type hints) (#4878)
+* [FEATURE] Add Azure CI/CD action to aid with style guide enforcement (docstrings) (#4617)
+* [FEATURE] Use formal interfaces to clean up DataAssistant and DataAssistantResult modules/classes (#4901)
+* [BUGFIX] fix validation issue for column domain type and implement expect_column_unique_value_count_to_be_between for VolumeDataAssistant (#4914)
+* [BUGFIX] Fix issue with not using the generated table name on read (#4905)
+* [BUGFIX] Add deprecation comment to RuntimeDataConnector
+* [BUGFIX] Ensure proper class_name within all RuleBasedProfilerConfig instantiations
+* [BUGFIX] fix rounding directive handling (#4887)
+* [BUGFIX] `great_expectations` import fails when SQL Alchemy is not installed (#4880)
+* [MAINTENANCE] Altair types cleanup (#4916)
+* [MAINTENANCE] test: update test time (#4911)
+* [MAINTENANCE] Add module docstring and simplify access to DatePart (#4910)
+* [MAINTENANCE] Chip away at type hint violations around data context (#4897)
+* [MAINTENANCE] Improve error message outputted to user in DocstringChecker action (#4895)
+* [MAINTENANCE] Re-enable bigquery tests (#4903)
+* [MAINTENANCE] Unit tests for sqlalchemy splitter methods, docs and other improvements (#4900)
+* [MAINTENANCE] Move plot logic from `DataAssistant` into `DataAssistantResult` (#4896)
+* [MAINTENANCE] Add condition to primary pipeline to ensure `import_ge` stage doesn't cause misleading Slack notifications (#4898)
+* [MAINTENANCE] Refactor `RuleBasedProfilerConfig` (#4882)
+* [MAINTENANCE] Refactor DataAssistant Access to Parameter Computation Results and Plotting Utilities (#4893)
+* [MAINTENANCE] Update `dgtest-overrides` list to include all test files not captured by primary strategy (#4891)
+* [MAINTENANCE] Add dgtest-overrides section to dependency_graph Azure pipeline
+* [MAINTENANCE] Datasource and DataContext-level tests for RuntimeDataConnector changes (#4866)
+* [MAINTENANCE] Temporarily disable bigquery tests. (#4888)
+* [MAINTENANCE] Import GE after running `ge init` in packaging CI pipeline (#4885)
+* [MAINTENANCE] Add CI stage importing GE with only required dependencies installed (#4884)
+* [MAINTENANCE] `DataAssistantResult.plot()` conditional formatting and tooltips (#4881)
+* [MAINTENANCE] split data context files (#4879)
+* [MAINTENANCE] Add Tanner to CODEOWNERS for schemas.py (#4875)
+* [MAINTENANCE]  Use defined constants for ParameterNode accessor keys (#4872)
+
 0.15.1
 -----------------
 * [FEATURE] Additional Rule-Based Profiler Parameter/Variable Access Methods (#4814)

diff --git a/great_expectations/deployment_version b/great_expectations/deployment_version
@@ -1 +1 @@
-0.15.1
+0.15.2
diff --git a/great_expectations/execution_engine/sqlalchemy_data_splitter.py b/great_expectations/execution_engine/sqlalchemy_data_splitter.py
@@ -1,3 +1,19 @@
+"""Create queries for use in sql data splitting.
+
+This module contains utilities for generating queries used by execution engines
+and data connectors to split data into batches based on the data itself. It
+is typically used from within either an execution engine or a data connector,
+not by itself.
+
+    Typical usage example:
+        __init__():
+            self._sqlalchemy_data_splitter = SqlAlchemyDataSplitter()
+
+        elsewhere():
+            splitter = self._sqlalchemy_data_splitter.get_splitter_method()
+            split_query_or_clause = splitter()
+"""
+
 import datetime
 import enum
 from typing import Callable, List, Union
@@ -39,7 +55,13 @@ def __eq__(self, other):
 
 
 class SqlAlchemyDataSplitter:
-    """Methods for splitting data accessible via SqlAlchemyExecutionEngine."""
+    """Methods for splitting data accessible via SqlAlchemyExecutionEngine.
+
+    Note, for convenience, you can also access DatePart via the instance variable
+    date_part e.g. SqlAlchemyDataSplitter.date_part.MONTH
+    """
+
+    date_part: DatePart = DatePart
 
     def get_splitter_method(self, splitter_method_name: str) -> Callable:
         """Get the appropriate splitter method from the method name.

diff --git a/great_expectations/expectations/core/expect_column_max_to_be_between.py b/great_expectations/expectations/core/expect_column_max_to_be_between.py
@@ -142,21 +142,22 @@ class ExpectColumnMaxToBeBetween(ColumnExpectation):
     default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
         name="expect_column_max_to_be_between",  # Convention: use "expectation_type" as profiler name.
         config_version=1.0,
-        variables={
-            "strict_min": False,
-            "strict_max": False,
-            "false_positive_rate": 0.05,
-            "estimator": "bootstrap",
-            "num_bootstrap_samples": 9999,
-            "bootstrap_random_seed": None,
-            "truncate_values": {
-                "lower_bound": None,
-                "upper_bound": None,
-            },
-            "round_decimals": None,
-        },
+        variables={},
         rules={
             "default_expect_column_max_to_be_between_rule": {
+                "variables": {
+                    "strict_min": False,
+                    "strict_max": False,
+                    "false_positive_rate": 0.05,
+                    "estimator": "bootstrap",
+                    "num_bootstrap_samples": 9999,
+                    "bootstrap_random_seed": None,
+                    "truncate_values": {
+                        "lower_bound": None,
+                        "upper_bound": None,
+                    },
+                    "round_decimals": 0,
+                },
                 "domain_builder": {
                     "class_name": "ColumnDomainBuilder",
                     "module_name": "great_expectations.rule_based_profiler.domain_builder",

diff --git a/great_expectations/expectations/core/expect_column_min_to_be_between.py b/great_expectations/expectations/core/expect_column_min_to_be_between.py
@@ -135,21 +135,22 @@ class ExpectColumnMinToBeBetween(ColumnExpectation):
     default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
         name="expect_column_min_to_be_between",  # Convention: use "expectation_type" as profiler name.
         config_version=1.0,
-        variables={
-            "strict_min": False,
-            "strict_max": False,
-            "false_positive_rate": 0.05,
-            "estimator": "bootstrap",
-            "num_bootstrap_samples": 9999,
-            "bootstrap_random_seed": None,
-            "truncate_values": {
-                "lower_bound": None,
-                "upper_bound": None,
-            },
-            "round_decimals": None,
-        },
+        variables={},
         rules={
             "default_expect_column_min_to_be_between_rule": {
+                "variables": {
+                    "strict_min": False,
+                    "strict_max": False,
+                    "false_positive_rate": 0.05,
+                    "estimator": "bootstrap",
+                    "num_bootstrap_samples": 9999,
+                    "bootstrap_random_seed": None,
+                    "truncate_values": {
+                        "lower_bound": None,
+                        "upper_bound": None,
+                    },
+                    "round_decimals": 0,
+                },
                 "domain_builder": {
                     "class_name": "ColumnDomainBuilder",
                     "module_name": "great_expectations.rule_based_profiler.domain_builder",

diff --git a/great_expectations/expectations/core/expect_column_quantile_values_to_be_between.py b/great_expectations/expectations/core/expect_column_quantile_values_to_be_between.py
@@ -177,25 +177,26 @@ class ExpectColumnQuantileValuesToBeBetween(ColumnExpectation):
     default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
         name="expect_column_quantile_values_to_be_between",  # Convention: use "expectation_type" as profiler name.
         config_version=1.0,
-        variables={
-            "quantiles": [
-                0.25,
-                0.5,
-                0.75,
-            ],
-            "allow_relative_error": "linear",
-            "false_positive_rate": 0.05,
-            "estimator": "bootstrap",
-            "num_bootstrap_samples": 9999,
-            "bootstrap_random_seed": None,
-            "truncate_values": {
-                "lower_bound": None,
-                "upper_bound": None,
-            },
-            "round_decimals": 1,
-        },
+        variables={},
         rules={
             "default_expect_column_quantile_values_to_be_between_rule": {
+                "variables": {
+                    "quantiles": [
+                        0.25,
+                        0.5,
+                        0.75,
+                    ],
+                    "allow_relative_error": "linear",
+                    "false_positive_rate": 0.05,
+                    "estimator": "bootstrap",
+                    "num_bootstrap_samples": 9999,
+                    "bootstrap_random_seed": None,
+                    "truncate_values": {
+                        "lower_bound": None,
+                        "upper_bound": None,
+                    },
+                    "round_decimals": 1,
+                },
                 "domain_builder": {
                     "class_name": "ColumnDomainBuilder",
                     "module_name": "great_expectations.rule_based_profiler.domain_builder",