Skip to content

Commit

Permalink
[BUGFIX] Misconfigured Expectations affecting unassociated Checkpoints (
Browse files Browse the repository at this point in the history
#9491)

Co-authored-by: Chetan Kini <chetan@superconductive.com>
  • Loading branch information
NathanFarmer and cdkini committed Mar 5, 2024
1 parent 0d91d31 commit 3c13218
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3675,11 +3675,21 @@ def _construct_data_context_id(self) -> str:

def _compile_evaluation_parameter_dependencies(self) -> None:
self._evaluation_parameter_dependencies = {}
# NOTE: Chetan - 20211118: This iteration is reverting the behavior performed here:
# https://github.com/great-expectations/great_expectations/pull/3377
# This revision was necessary due to breaking changes but will need to be brought back in a future ticket.
# we have to iterate through all expectation suites because evaluation parameters
# can reference metric values from other suites
for key in self.expectations_store.list_keys():
expectation_suite_dict: dict = self.expectations_store.get(key)
try:
expectation_suite_dict: dict = self.expectations_store.get(key)
except ValidationError as e:
# if a suite that isn't associated with the checkpoint compiling eval params is misconfigured
# we should ignore that instead of breaking all checkpoints in the entire context
warnings.warn(
f"Suite with identifier {key} was not considered when compiling evaluation parameter dependencies "
f"because it failed to load with message: {e}",
UserWarning,
)
continue

if not expectation_suite_dict:
continue
expectation_suite = ExpectationSuite(**expectation_suite_dict)
Expand Down
1 change: 0 additions & 1 deletion great_expectations/self_check/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,6 @@ def get_dataset( # noqa: C901, PLR0912, PLR0913, PLR0915
spark_config={
"spark.sql.catalogImplementation": "hive",
"spark.executor.memory": "450m",
# "spark.driver.allowMultipleContexts": "true", # This directive does not appear to have any effect.
}
)
# We need to allow null values in some column types that do not support them natively, so we skip
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,9 @@ filterwarnings = [
# Found when running test_case_runner_v2_api[SparkDFDataset/column_pair_map_expectations/expect_column_pair_values_to_be_in_set:basic_positive_test_without_nulls] 'ignore: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate\(\) instead.:FutureWarning',
# Example Acutal Warning: FutureWarning: is_datetime64tz_dtype is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.DatetimeTZDtype)` instead.
'ignore: is_datetime64tz_dtype is deprecated and will be removed in a future version. Check `isinstance\(dtype, pd.DatetimeTZDtype\)` instead.',
# Example Actual Warning:
# ResourceWarning: unclosed <socket.socket fd=231, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6, laddr=('127.0.0.1', 60004), raddr=('127.0.0.1', 46627)>
"ignore: unclosed <socket.socket:ResourceWarning",
# pymysql
# Example Actual Warning: pymysql.err.Warning: (1292, "Truncated incorrect DOUBLE value: 'cat'")
# Found in tests/test_definitions/test_expectations_v2_api.py, if not found in v3 api remove this ignore directive with the v2 api code.
Expand Down Expand Up @@ -560,8 +563,6 @@ filterwarnings = [
# Example Actual Warning:
# DeprecationWarning: Importing ErrorTree directly from the jsonschema package is deprecated and will become an ImportError. Import it from jsonschema.exceptions instead.
"ignore: Importing ErrorTree directly from the jsonschema package is deprecated and will become an ImportError. Import it from jsonschema.exceptions instead.:DeprecationWarning",


# sqlalchemy
# Example Actual Warning:
# sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
Expand Down
14 changes: 8 additions & 6 deletions tests/core/test_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime

import pytest
from freezegun import freeze_time

from great_expectations.core.util import (
AzureUrl,
Expand All @@ -11,9 +12,10 @@
)


@freeze_time("11/05/1955")
@pytest.mark.unit
def test_substitute_all_strftime_format_strings():
now = datetime.datetime.utcnow()

input_dict = {
"month_no": "%m",
"just_a_string": "Bloopy!",
Expand All @@ -24,13 +26,13 @@ def test_substitute_all_strftime_format_strings():
"list": ["a", 123, "%a"],
}
expected_output_dict = {
"month_no": "11",
"month_no": now.strftime("%m"),
"just_a_string": "Bloopy!",
"string_with_month_word": "Today we are in the month November!",
"string_with_month_word": f"Today we are in the month {now.strftime('%B')}!",
"number": "90210",
"escaped_percent": "'%m' is the format string for month number",
"inner_dict": {"day_word_full": "Saturday"},
"list": ["a", 123, "Sat"],
"inner_dict": {"day_word_full": now.strftime("%A")},
"list": ["a", 123, now.strftime("%a")],
}
assert substitute_all_strftime_format_strings(input_dict) == expected_output_dict

Expand Down
8 changes: 4 additions & 4 deletions tests/data_context/store/test_data_context_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ def test_serialize_cloud_mode(basic_data_context_config: DataContextConfig):
"profiler_store_name": None,
"plugins_directory": "plugins/",
"stores": {
"evaluation_parameter_store": {
"class_name": "EvaluationParameterStore",
"module_name": "great_expectations.data_context.store",
},
"checkpoint_store": {
"class_name": "CheckpointStore",
"store_backend": {
"base_directory": "checkpoints/",
"class_name": "TupleFilesystemStoreBackend",
},
},
"evaluation_parameter_store": {
"class_name": "EvaluationParameterStore",
"module_name": "great_expectations.data_context.store",
},
"expectations_store": {
"class_name": "ExpectationsStore",
"store_backend": {
Expand Down
54 changes: 54 additions & 0 deletions tests/data_context/test_data_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,60 @@ def test_compile_evaluation_parameter_dependencies(
)


@pytest.mark.filesystem
def test_compile_evaluation_parameter_dependencies_broken_suite(
data_context_parameterized_expectation_suite: FileDataContext,
):
broken_suite_path = pathlib.Path(
data_context_parameterized_expectation_suite.root_directory,
"expectations",
"broken_suite.json",
)
broken_suite_dict = {
"expectation_suite_name": "broken suite",
"expectations": [
{
"expectation_type": "expect_column_values_to_be_between",
"kwargs": {
"column": "col_1",
"max_value": 5,
"min_value": 1,
"mostly": 0.5,
},
"meta": {},
"not_a_valid_expectation_config_arg": "break it!",
},
],
"meta": {"great_expectations_version": "0.18.8"},
}
with broken_suite_path.open("w", encoding="UTF-8") as fp:
json.dump(obj=broken_suite_dict, fp=fp)

assert (
data_context_parameterized_expectation_suite._evaluation_parameter_dependencies
== {}
)
with pytest.warns(UserWarning):
data_context_parameterized_expectation_suite._compile_evaluation_parameter_dependencies()
assert (
data_context_parameterized_expectation_suite._evaluation_parameter_dependencies
== {
"source_diabetes_data.default": [
{
"metric_kwargs_id": {
"column=patient_nbr": [
"expect_column_unique_value_count_to_be_between.result.observed_value"
]
}
}
],
"source_patient_data.default": [
"expect_table_row_count_to_equal.result.observed_value"
],
}
)


@pytest.mark.filesystem
@mock.patch("great_expectations.data_context.store.DatasourceStore.update_by_name")
def test_update_datasource_persists_changes_with_store(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,6 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_override_query_
observed,
row_condition,
spark_session,
basic_spark_df_execution_engine,
titanic_df,
):
df: pd.DataFrame = titanic_df
Expand Down

0 comments on commit 3c13218

Please sign in to comment.