[BUGFIX] Misconfigured Expectations affecting unassociated Checkpoints (

#9491) Co-authored-by: Chetan Kini <chetan@superconductive.com>
great-expectations · Mar 5, 2024 · 3c13218 · 3c13218
1 parent 0d91d31
commit 3c13218
Show file tree

Hide file tree

Showing 7 changed files with 83 additions and 18 deletions.
diff --git a/great_expectations/data_context/data_context/abstract_data_context.py b/great_expectations/data_context/data_context/abstract_data_context.py
@@ -3675,11 +3675,21 @@ def _construct_data_context_id(self) -> str:
 
     def _compile_evaluation_parameter_dependencies(self) -> None:
         self._evaluation_parameter_dependencies = {}
-        # NOTE: Chetan - 20211118: This iteration is reverting the behavior performed here:
-        # https://github.com/great-expectations/great_expectations/pull/3377
-        # This revision was necessary due to breaking changes but will need to be brought back in a future ticket.
+        # we have to iterate through all expectation suites because evaluation parameters
+        # can reference metric values from other suites
         for key in self.expectations_store.list_keys():
-            expectation_suite_dict: dict = self.expectations_store.get(key)
+            try:
+                expectation_suite_dict: dict = self.expectations_store.get(key)
+            except ValidationError as e:
+                # if a suite that isn't associated with the checkpoint compiling eval params is misconfigured
+                # we should ignore that instead of breaking all checkpoints in the entire context
+                warnings.warn(
+                    f"Suite with identifier {key} was not considered when compiling evaluation parameter dependencies "
+                    f"because it failed to load with message: {e}",
+                    UserWarning,
+                )
+                continue
+
             if not expectation_suite_dict:
                 continue
             expectation_suite = ExpectationSuite(**expectation_suite_dict)

diff --git a/great_expectations/self_check/util.py b/great_expectations/self_check/util.py
@@ -517,7 +517,6 @@ def get_dataset(  # noqa: C901, PLR0912, PLR0913, PLR0915
             spark_config={
                 "spark.sql.catalogImplementation": "hive",
                 "spark.executor.memory": "450m",
-                # "spark.driver.allowMultipleContexts": "true",  # This directive does not appear to have any effect.
             }
         )
         # We need to allow null values in some column types that do not support them natively, so we skip

diff --git a/pyproject.toml b/pyproject.toml
@@ -510,6 +510,9 @@ filterwarnings = [
     # Found when running test_case_runner_v2_api[SparkDFDataset/column_pair_map_expectations/expect_column_pair_values_to_be_in_set:basic_positive_test_without_nulls]    'ignore: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate\(\) instead.:FutureWarning',
     # Example Acutal Warning: FutureWarning: is_datetime64tz_dtype is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.DatetimeTZDtype)` instead.
     'ignore: is_datetime64tz_dtype is deprecated and will be removed in a future version. Check `isinstance\(dtype, pd.DatetimeTZDtype\)` instead.',
+    # Example Actual Warning:
+    # ResourceWarning: unclosed <socket.socket fd=231, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6, laddr=('127.0.0.1', 60004), raddr=('127.0.0.1', 46627)>
+    "ignore: unclosed <socket.socket:ResourceWarning",
     # pymysql
     # Example Actual Warning: pymysql.err.Warning: (1292, "Truncated incorrect DOUBLE value: 'cat'")
     # Found in tests/test_definitions/test_expectations_v2_api.py, if not found in v3 api remove this ignore directive with the v2 api code.
@@ -560,8 +563,6 @@ filterwarnings = [
     # Example Actual Warning:
     # DeprecationWarning: Importing ErrorTree directly from the jsonschema package is deprecated and will become an ImportError. Import it from jsonschema.exceptions instead.
     "ignore: Importing ErrorTree directly from the jsonschema package is deprecated and will become an ImportError. Import it from jsonschema.exceptions instead.:DeprecationWarning",
-
-
     # sqlalchemy
     # Example Actual Warning:
     # sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings.  Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)

diff --git a/tests/core/test_util.py b/tests/core/test_util.py
@@ -1,5 +1,6 @@
+import datetime
+
 import pytest
-from freezegun import freeze_time
 
 from great_expectations.core.util import (
     AzureUrl,
@@ -11,9 +12,10 @@
 )
 
 
-@freeze_time("11/05/1955")
 @pytest.mark.unit
 def test_substitute_all_strftime_format_strings():
+    now = datetime.datetime.utcnow()
+
     input_dict = {
         "month_no": "%m",
         "just_a_string": "Bloopy!",
@@ -24,13 +26,13 @@ def test_substitute_all_strftime_format_strings():
         "list": ["a", 123, "%a"],
     }
     expected_output_dict = {
-        "month_no": "11",
+        "month_no": now.strftime("%m"),
         "just_a_string": "Bloopy!",
-        "string_with_month_word": "Today we are in the month November!",
+        "string_with_month_word": f"Today we are in the month {now.strftime('%B')}!",
         "number": "90210",
         "escaped_percent": "'%m' is the format string for month number",
-        "inner_dict": {"day_word_full": "Saturday"},
-        "list": ["a", 123, "Sat"],
+        "inner_dict": {"day_word_full": now.strftime("%A")},
+        "list": ["a", 123, now.strftime("%a")],
     }
     assert substitute_all_strftime_format_strings(input_dict) == expected_output_dict
 

diff --git a/tests/data_context/store/test_data_context_store.py b/tests/data_context/store/test_data_context_store.py
@@ -38,17 +38,17 @@ def test_serialize_cloud_mode(basic_data_context_config: DataContextConfig):
         "profiler_store_name": None,
         "plugins_directory": "plugins/",
         "stores": {
-            "evaluation_parameter_store": {
-                "class_name": "EvaluationParameterStore",
-                "module_name": "great_expectations.data_context.store",
-            },
             "checkpoint_store": {
                 "class_name": "CheckpointStore",
                 "store_backend": {
                     "base_directory": "checkpoints/",
                     "class_name": "TupleFilesystemStoreBackend",
                 },
             },
+            "evaluation_parameter_store": {
+                "class_name": "EvaluationParameterStore",
+                "module_name": "great_expectations.data_context.store",
+            },
             "expectations_store": {
                 "class_name": "ExpectationsStore",
                 "store_backend": {

diff --git a/tests/data_context/test_data_context.py b/tests/data_context/test_data_context.py
@@ -271,6 +271,60 @@ def test_compile_evaluation_parameter_dependencies(
     )
 
 
+@pytest.mark.filesystem
+def test_compile_evaluation_parameter_dependencies_broken_suite(
+    data_context_parameterized_expectation_suite: FileDataContext,
+):
+    broken_suite_path = pathlib.Path(
+        data_context_parameterized_expectation_suite.root_directory,
+        "expectations",
+        "broken_suite.json",
+    )
+    broken_suite_dict = {
+        "expectation_suite_name": "broken suite",
+        "expectations": [
+            {
+                "expectation_type": "expect_column_values_to_be_between",
+                "kwargs": {
+                    "column": "col_1",
+                    "max_value": 5,
+                    "min_value": 1,
+                    "mostly": 0.5,
+                },
+                "meta": {},
+                "not_a_valid_expectation_config_arg": "break it!",
+            },
+        ],
+        "meta": {"great_expectations_version": "0.18.8"},
+    }
+    with broken_suite_path.open("w", encoding="UTF-8") as fp:
+        json.dump(obj=broken_suite_dict, fp=fp)
+
+    assert (
+        data_context_parameterized_expectation_suite._evaluation_parameter_dependencies
+        == {}
+    )
+    with pytest.warns(UserWarning):
+        data_context_parameterized_expectation_suite._compile_evaluation_parameter_dependencies()
+    assert (
+        data_context_parameterized_expectation_suite._evaluation_parameter_dependencies
+        == {
+            "source_diabetes_data.default": [
+                {
+                    "metric_kwargs_id": {
+                        "column=patient_nbr": [
+                            "expect_column_unique_value_count_to_be_between.result.observed_value"
+                        ]
+                    }
+                }
+            ],
+            "source_patient_data.default": [
+                "expect_table_row_count_to_equal.result.observed_value"
+            ],
+        }
+    )
+
+
 @pytest.mark.filesystem
 @mock.patch("great_expectations.data_context.store.DatasourceStore.update_by_name")
 def test_update_datasource_persists_changes_with_store(

diff --git a/tests/expectations/core/test_expect_queried_table_row_count_to_be.py b/tests/expectations/core/test_expect_queried_table_row_count_to_be.py
@@ -182,7 +182,6 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_override_query_
     observed,
     row_condition,
     spark_session,
-    basic_spark_df_execution_engine,
     titanic_df,
 ):
     df: pd.DataFrame = titanic_df