Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BUGFIX] Misconfigured Expectations affecting unassociated Checkpoints #9491

Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3823,11 +3823,21 @@ def _construct_data_context_id(self) -> str:

def _compile_evaluation_parameter_dependencies(self) -> None:
self._evaluation_parameter_dependencies = {}
# NOTE: Chetan - 20211118: This iteration is reverting the behavior performed here:
# https://github.com/great-expectations/great_expectations/pull/3377
# This revision was necessary due to breaking changes but will need to be brought back in a future ticket.
# we have to iterate through all expectation suites because evaluation parameters
# can reference metric values from other suites
for key in self.expectations_store.list_keys():
expectation_suite_dict: dict = cast(dict, self.expectations_store.get(key))
try:
expectation_suite_dict: dict = cast(
dict, self.expectations_store.get(key)
)
except ValidationError as e:
# if a suite that isn't associated with the checkpoint compiling eval params is misconfigured
# we should ignore that instead of breaking all checkpoints in the entire context
logger.info(
NathanFarmer marked this conversation as resolved.
Show resolved Hide resolved
f"Suite with identifier {key} was not considered when compiling evaluation parameter dependencies "
f"because it failed to load with message: {e}"
)
continue
if not expectation_suite_dict:
continue
expectation_suite = ExpectationSuite(
Expand Down
1 change: 0 additions & 1 deletion great_expectations/self_check/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,6 @@ def get_dataset( # noqa: C901, PLR0912, PLR0913, PLR0915
spark_config={
"spark.sql.catalogImplementation": "hive",
"spark.executor.memory": "450m",
# "spark.driver.allowMultipleContexts": "true", # This directive does not appear to have any effect.
}
)
# We need to allow null values in some column types that do not support them natively, so we skip
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,9 @@ filterwarnings = [
# Found when running test_case_runner_v2_api[SparkDFDataset/column_pair_map_expectations/expect_column_pair_values_to_be_in_set:basic_positive_test_without_nulls] 'ignore: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate\(\) instead.:FutureWarning',
# Example Acutal Warning: FutureWarning: is_datetime64tz_dtype is deprecated and will be removed in a future version. Check `isinstance(dtype, pd.DatetimeTZDtype)` instead.
'ignore: is_datetime64tz_dtype is deprecated and will be removed in a future version. Check `isinstance\(dtype, pd.DatetimeTZDtype\)` instead.',
# Example Actual Warning:
# ResourceWarning: unclosed <socket.socket fd=231, family=AddressFamily.AF_INET, type=SocketKind.SOCK_STREAM, proto=6, laddr=('127.0.0.1', 60004), raddr=('127.0.0.1', 46627)>
"ignore: unclosed <socket.socket:ResourceWarning",
# pymysql
# Example Actual Warning: pymysql.err.Warning: (1292, "Truncated incorrect DOUBLE value: 'cat'")
# Found in tests/test_definitions/test_expectations_v2_api.py, if not found in v3 api remove this ignore directive with the v2 api code.
Expand Down Expand Up @@ -564,8 +567,6 @@ filterwarnings = [
# Example Actual Warning:
# DeprecationWarning: Importing ErrorTree directly from the jsonschema package is deprecated and will become an ImportError. Import it from jsonschema.exceptions instead.
"ignore: Importing ErrorTree directly from the jsonschema package is deprecated and will become an ImportError. Import it from jsonschema.exceptions instead.:DeprecationWarning",


# sqlalchemy
# Example Actual Warning:
# sqlalchemy.exc.RemovedIn20Warning: Deprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. To prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to "sqlalchemy<2.0". Set environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message. (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)
Expand Down
14 changes: 8 additions & 6 deletions tests/core/test_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime

import pytest
from freezegun import freeze_time

from great_expectations.core.util import (
AzureUrl,
Expand All @@ -11,9 +12,10 @@
)


@freeze_time("11/05/1955")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was failing python 3.11 on 0.18.x

@pytest.mark.unit
def test_substitute_all_strftime_format_strings():
now = datetime.datetime.utcnow()

input_dict = {
"month_no": "%m",
"just_a_string": "Bloopy!",
Expand All @@ -24,13 +26,13 @@ def test_substitute_all_strftime_format_strings():
"list": ["a", 123, "%a"],
}
expected_output_dict = {
"month_no": "11",
"month_no": now.strftime("%m"),
"just_a_string": "Bloopy!",
"string_with_month_word": "Today we are in the month November!",
"string_with_month_word": f"Today we are in the month {now.strftime('%B')}!",
"number": "90210",
"escaped_percent": "'%m' is the format string for month number",
"inner_dict": {"day_word_full": "Saturday"},
"list": ["a", 123, "Sat"],
"inner_dict": {"day_word_full": now.strftime("%A")},
"list": ["a", 123, now.strftime("%a")],
}
assert substitute_all_strftime_format_strings(input_dict) == expected_output_dict

Expand Down
16 changes: 16 additions & 0 deletions tests/data_context/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ def basic_data_context_config():
"evaluation_parameter_store_name": "evaluation_parameter_store",
"validations_store_name": "does_not_have_to_be_real",
"expectations_store_name": "expectations_store",
"checkpoint_store_name": "checkpoint_store",
"config_variables_file_path": "uncommitted/config_variables.yml",
"datasources": {},
"stores": {
Expand All @@ -175,6 +176,13 @@ def basic_data_context_config():
"base_directory": "expectations/",
},
},
"checkpoint_store": {
"class_name": "CheckpointStore",
"store_backend": {
"class_name": "TupleFilesystemStoreBackend",
"base_directory": "checkpoints/",
},
},
NathanFarmer marked this conversation as resolved.
Show resolved Hide resolved
"evaluation_parameter_store": {
"module_name": "great_expectations.data_context.store",
"class_name": "EvaluationParameterStore",
Expand Down Expand Up @@ -226,6 +234,7 @@ def data_context_config_with_datasources(conn_string_password):
"evaluation_parameter_store_name": "evaluation_parameter_store",
"validations_store_name": "does_not_have_to_be_real",
"expectations_store_name": "expectations_store",
"checkpoint_store_name": "checkpoint_store",
"config_variables_file_path": "uncommitted/config_variables.yml",
"datasources": {
"Datasource 1: Redshift": {
Expand Down Expand Up @@ -327,6 +336,13 @@ def data_context_config_with_datasources(conn_string_password):
"base_directory": "expectations/",
},
},
"checkpoint_store": {
"class_name": "CheckpointStore",
"store_backend": {
"class_name": "TupleFilesystemStoreBackend",
"base_directory": "checkpoints/",
},
},
"evaluation_parameter_store": {
"module_name": "great_expectations.data_context.store",
"class_name": "EvaluationParameterStore",
Expand Down
7 changes: 7 additions & 0 deletions tests/data_context/store/test_data_context_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ def test_serialize_cloud_mode(basic_data_context_config: DataContextConfig):
},
"plugins_directory": "plugins/",
"stores": {
"checkpoint_store": {
"class_name": "CheckpointStore",
"store_backend": {
"base_directory": "checkpoints/",
"class_name": "TupleFilesystemStoreBackend",
},
},
"evaluation_parameter_store": {
"class_name": "EvaluationParameterStore",
"module_name": "great_expectations.data_context.store",
Expand Down
53 changes: 53 additions & 0 deletions tests/data_context/test_data_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,59 @@ def test_compile_evaluation_parameter_dependencies(
)


@pytest.mark.filesystem
def test_compile_evaluation_parameter_dependencies_broken_suite(
data_context_parameterized_expectation_suite: FileDataContext,
):
broken_suite_path = pathlib.Path(
data_context_parameterized_expectation_suite.root_directory,
"expectations",
"broken_suite.json",
)
broken_suite_dict = {
"expectation_suite_name": "broken suite",
"expectations": [
{
"expectation_type": "expect_column_values_to_be_between",
"kwargs": {
"column": "col_1",
"max_value": 5,
"min_value": 1,
"mostly": 0.5,
},
"meta": {},
"not_a_valid_expectation_config_arg": "break it!",
},
],
"meta": {"great_expectations_version": "0.18.8"},
}
with broken_suite_path.open("w", encoding="UTF-8") as fp:
json.dump(obj=broken_suite_dict, fp=fp)

assert (
data_context_parameterized_expectation_suite._evaluation_parameter_dependencies
== {}
)
data_context_parameterized_expectation_suite._compile_evaluation_parameter_dependencies()
assert (
data_context_parameterized_expectation_suite._evaluation_parameter_dependencies
== {
"source_diabetes_data.default": [
{
"metric_kwargs_id": {
"column=patient_nbr": [
"expect_column_unique_value_count_to_be_between.result.observed_value"
]
}
}
],
"source_patient_data.default": [
"expect_table_row_count_to_equal.result.observed_value"
],
}
)


@pytest.mark.filesystem
@mock.patch("great_expectations.data_context.store.DatasourceStore.update_by_name")
def test_update_datasource_persists_changes_with_store(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,6 @@ def test_expect_queried_column_value_frequency_to_meet_threshold_override_query_
observed,
row_condition,
spark_session,
basic_spark_df_execution_engine,
titanic_df,
):
df: pd.DataFrame = titanic_df
Expand Down