From 512ae6396785db92d71ae7cdc3bc076f26bd12c4 Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Mon, 18 Jul 2022 01:38:08 -0700 Subject: [PATCH 1/6] clean up --- .../core/expectation_configuration.py | 25 ++++--- great_expectations/core/expectation_suite.py | 69 +++++++++++++------ .../core/expectation_validation_result.py | 2 +- .../execution_engine/execution_engine.py | 2 +- .../column_quantile_values.py | 14 ++-- .../data_assistant/data_assistant.py | 20 ++---- .../onboarding_data_assistant.py | 4 +- .../rule_based_profiler/types/domain.py | 4 +- 8 files changed, 85 insertions(+), 55 deletions(-) diff --git a/great_expectations/core/expectation_configuration.py b/great_expectations/core/expectation_configuration.py index 18f3593d5863..370e1dabc00d 100644 --- a/great_expectations/core/expectation_configuration.py +++ b/great_expectations/core/expectation_configuration.py @@ -1138,6 +1138,7 @@ def get_domain_kwargs(self) -> dict: "default_kwarg_values", {} ) domain_keys = expectation_kwargs_dict["domain_kwargs"] + domain_kwargs = { key: self.kwargs.get(key, default_kwarg_values.get(key)) for key in domain_keys @@ -1147,6 +1148,7 @@ def get_domain_kwargs(self) -> dict: raise InvalidExpectationKwargsError( f"Missing domain kwargs: {list(missing_kwargs)}" ) + return domain_kwargs def get_success_kwargs(self) -> dict: @@ -1169,12 +1171,14 @@ def get_success_kwargs(self) -> dict: "default_kwarg_values", {} ) success_keys = expectation_kwargs_dict["success_kwargs"] + domain_kwargs = self.get_domain_kwargs() success_kwargs = { key: self.kwargs.get(key, default_kwarg_values.get(key)) for key in success_keys } success_kwargs.update(domain_kwargs) + return success_kwargs def get_runtime_kwargs(self, runtime_configuration: Optional[dict] = None) -> dict: @@ -1202,6 +1206,7 @@ def get_runtime_kwargs(self, runtime_configuration: Optional[dict] = None) -> di lookup_kwargs = deepcopy(self.kwargs) if runtime_configuration: lookup_kwargs.update(runtime_configuration) + runtime_kwargs = { key: lookup_kwargs.get(key, default_kwarg_values.get(key)) for key in runtime_keys @@ -1210,10 +1215,11 @@ def get_runtime_kwargs(self, runtime_configuration: Optional[dict] = None) -> di runtime_kwargs["result_format"] ) runtime_kwargs.update(success_kwargs) + return runtime_kwargs def applies_to_same_domain( - self, other_expectation_configuration: "ExpectationConfiguration" + self, other_expectation_configuration: "ExpectationConfiguration" # noqa: F821 ) -> bool: if ( not self.expectation_type @@ -1225,15 +1231,17 @@ def applies_to_same_domain( == other_expectation_configuration.get_domain_kwargs() ) + # noinspection PyPep8Naming def isEquivalentTo( self, - other: Union[dict, "ExpectationConfiguration"], + other: Union[dict, "ExpectationConfiguration"], # noqa: F821 match_type: str = "success", ) -> bool: """ExpectationConfiguration equivalence does not include meta, and relies on *equivalence* of kwargs.""" if not isinstance(other, self.__class__): if isinstance(other, dict): try: + # noinspection PyNoneFunctionAssignment other = expectationConfigurationSchema.load(other) except ValidationError: logger.debug( @@ -1244,6 +1252,7 @@ def isEquivalentTo( else: # Delegate comparison to the other instance return NotImplemented + if match_type == "domain": return all( ( @@ -1252,7 +1261,7 @@ def isEquivalentTo( ) ) - elif match_type == "success": + if match_type == "success": return all( ( self.expectation_type == other.expectation_type, @@ -1260,13 +1269,14 @@ def isEquivalentTo( ) ) - elif match_type == "runtime": + if match_type == "runtime": return all( ( self.expectation_type == other.expectation_type, self.kwargs == other.kwargs, ) ) + return False def __eq__(self, other): @@ -1328,9 +1338,7 @@ def get_evaluation_parameter_dependencies(self) -> dict: try: urn = ge_urn.parseString(string_urn) except ParserError: - logger.warning( - f"Unable to parse great_expectations urn {value['$PARAMETER']}" - ) + logger.warning("Unable to parse great_expectations urn['$PARAMETER']") continue # Query stores do not have "expectation_suite_name" @@ -1340,6 +1348,7 @@ def get_evaluation_parameter_dependencies(self) -> dict: self._update_dependencies_with_expectation_suite_urn(dependencies, urn) dependencies = _deduplicate_evaluation_parameter_dependencies(dependencies) + return dependencies @staticmethod @@ -1373,7 +1382,7 @@ def validate( validator: Any, # Can't type as Validator due to import cycle runtime_configuration=None, ): - expectation_impl = self._get_expectation_impl() + expectation_impl: "Expectation" = self._get_expectation_impl() # noqa: F821 return expectation_impl(self).validate( validator=validator, runtime_configuration=runtime_configuration, diff --git a/great_expectations/core/expectation_suite.py b/great_expectations/core/expectation_suite.py index a2f521470e88..36250df74e64 100644 --- a/great_expectations/core/expectation_suite.py +++ b/great_expectations/core/expectation_suite.py @@ -125,6 +125,7 @@ def add_citation( ) self.meta["citations"].append(citation) + # noinspection PyPep8Naming def isEquivalentTo(self, other): """ ExpectationSuite equivalence relies only on expectations and evaluation parameters. It does not include: @@ -136,6 +137,7 @@ def isEquivalentTo(self, other): if not isinstance(other, self.__class__): if isinstance(other, dict): try: + # noinspection PyNoneFunctionAssignment,PyTypeChecker other_dict: dict = expectationSuiteSchema.load(other) other: ExpectationSuite = ExpectationSuite( **other_dict, data_context=self._data_context @@ -243,18 +245,6 @@ def get_citations( return citations return self._sort_citations(citations=citations) - def get_table_expectations(self): - """Return a list of table expectations.""" - return [ - e - for e in self.expectations - if e.expectation_type.startswith("expect_table_") - ] - - def get_column_expectations(self): - """Return a list of column map expectations.""" - return [e for e in self.expectations if "column" in e.kwargs] - @staticmethod def _filter_citations( citations: List[Dict[str, Any]], filter_key @@ -382,19 +372,23 @@ def find_expectation_indexes( raise TypeError( "Must provide either expectation_configuration or ge_cloud_id" ) + if expectation_configuration and not isinstance( expectation_configuration, ExpectationConfiguration ): raise InvalidExpectationConfigurationError( "Ensure that expectation configuration is valid." ) + match_indexes = [] for idx, expectation in enumerate(self.expectations): if ge_cloud_id is not None: if str(expectation.ge_cloud_id) == str(ge_cloud_id): match_indexes.append(idx) else: - if expectation.isEquivalentTo(expectation_configuration, match_type): + if expectation.isEquivalentTo( + other=expectation_configuration, match_type=match_type + ): match_indexes.append(idx) return match_indexes @@ -426,13 +420,15 @@ def find_expectations( raise TypeError( "Must provide either expectation_configuration or ge_cloud_id" ) - found_expectation_indexes = self.find_expectation_indexes( + + found_expectation_indexes: List[int] = self.find_expectation_indexes( expectation_configuration, match_type, ge_cloud_id ) + if len(found_expectation_indexes) > 0: return [self.expectations[idx] for idx in found_expectation_indexes] - else: - return [] + + return [] def replace_expectation( self, @@ -574,20 +570,24 @@ def _add_expectation( expectation_configuration.ge_cloud_id = ( existing_expectation_ge_cloud_id ) + self.expectations[ found_expectation_indexes[0] ] = expectation_configuration else: if send_usage_event: self.send_usage_event(success=False) + raise DataContextError( "A matching ExpectationConfiguration already exists. If you would like to overwrite this " "ExpectationConfiguration, set overwrite_existing=True" ) else: self.append_expectation(expectation_configuration) + if send_usage_event: self.send_usage_event(success=True) + return expectation_configuration def send_usage_event(self, success: bool) -> None: @@ -667,17 +667,44 @@ def add_expectation( overwrite_existing=overwrite_existing, ) + def get_table_expectations(self) -> List[ExpectationConfiguration]: + """Return a list of table expectations.""" + return [ + e + for e in self.expectations + if e.expectation_type.startswith("expect_table_") + ] + + def get_column_expectations(self) -> List[ExpectationConfiguration]: + """Return a list of column map expectations.""" + return [e for e in self.expectations if "column" in e.kwargs] + + def get_column_pair_expectations(self) -> List[ExpectationConfiguration]: + """Return a list of column_pair map expectations.""" + return [ + e + for e in self.expectations + if "column_A" in e.kwargs and "column_B" in e.kwargs + ] + + def get_multicolumn_expectations(self) -> List[ExpectationConfiguration]: + """Return a list of multicolumn map expectations.""" + return [e for e in self.expectations if "column_list" in e.kwargs] + def get_grouped_and_ordered_expectations_by_column( self, expectation_type_filter: Optional[str] = None ) -> Tuple[Dict[str, List[ExpectationConfiguration]], List[str]]: - expectations_by_column = {} - ordered_columns = [] + expectations_by_column: Dict[str, List[ExpectationConfiguration]] = {} + ordered_columns: List[str] = [] + column: str + expectation: ExpectationConfiguration for expectation in self.expectations: if "column" in expectation.kwargs: column = expectation.kwargs["column"] else: column = "_nocolumn" + if column not in expectations_by_column: expectations_by_column[column] = [] @@ -692,7 +719,7 @@ def get_grouped_and_ordered_expectations_by_column( expectation.expectation_type == "expect_table_columns_to_match_ordered_list" ): - exp_column_list = expectation.kwargs["column_list"] + exp_column_list: List[str] = expectation.kwargs["column_list"] if exp_column_list and len(exp_column_list) > 0: ordered_columns = exp_column_list @@ -703,8 +730,8 @@ def get_grouped_and_ordered_expectations_by_column( # names from entire evr, else use alphabetic sort if set(sorted_columns) == set(ordered_columns): return expectations_by_column, ordered_columns - else: - return expectations_by_column, sorted_columns + + return expectations_by_column, sorted_columns class ExpectationSuiteSchema(Schema): diff --git a/great_expectations/core/expectation_validation_result.py b/great_expectations/core/expectation_validation_result.py index d7addd292111..117d8b96ae0c 100644 --- a/great_expectations/core/expectation_validation_result.py +++ b/great_expectations/core/expectation_validation_result.py @@ -116,7 +116,7 @@ def __eq__(self, other): or ( self.expectation_config is not None and self.expectation_config.isEquivalentTo( - other.expectation_config + other=other.expectation_config, match_type="success" ) ), # Result is a dictionary allowed to have nested dictionaries that are still of complex types (e.g. diff --git a/great_expectations/execution_engine/execution_engine.py b/great_expectations/execution_engine/execution_engine.py index dcf5c34406db..61f1195f1950 100644 --- a/great_expectations/execution_engine/execution_engine.py +++ b/great_expectations/execution_engine/execution_engine.py @@ -53,10 +53,10 @@ class MetricFunctionTypes(Enum): class MetricDomainTypes(Enum): + TABLE = "table" COLUMN = "column" COLUMN_PAIR = "column_pair" MULTICOLUMN = "multicolumn" - TABLE = "table" class DataConnectorStorageDataReferenceResolver: diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py index 981cee90413c..069258da37d8 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py @@ -77,6 +77,7 @@ def _pandas(cls, column, quantiles, allow_relative_error, **kwargs): f"If specified for pandas, allow_relative_error must be one an allowed value for the 'interpolation'" f"parameter of .quantile() (one of {interpolation_options})" ) + return column.quantile(quantiles, interpolation=allow_relative_error).tolist() @metric_value(engine=SqlAlchemyExecutionEngine) @@ -188,19 +189,22 @@ def _spark( ) = execution_engine.get_compute_domain( metric_domain_kwargs, domain_type=MetricDomainTypes.COLUMN ) - allow_relative_error = metric_value_kwargs.get("allow_relative_error", False) quantiles = metric_value_kwargs["quantiles"] column = accessor_domain_kwargs["column"] - if allow_relative_error is False: + + allow_relative_error = metric_value_kwargs.get("allow_relative_error", False) + if not allow_relative_error: allow_relative_error = 0.0 + if ( not isinstance(allow_relative_error, float) - or allow_relative_error < 0 - or allow_relative_error > 1 + or allow_relative_error < 0.0 + or allow_relative_error > 1.0 ): raise ValueError( - "SparkDFDataset requires relative error to be False or to be a float between 0 and 1." + "SparkDFExecutionEngine requires relative error to be False or to be a float between 0 and 1." ) + return df.approxQuantile(column, list(quantiles), allow_relative_error) diff --git a/great_expectations/rule_based_profiler/data_assistant/data_assistant.py b/great_expectations/rule_based_profiler/data_assistant/data_assistant.py index 73b4c7ae6db7..8a87b700ec9c 100644 --- a/great_expectations/rule_based_profiler/data_assistant/data_assistant.py +++ b/great_expectations/rule_based_profiler/data_assistant/data_assistant.py @@ -498,16 +498,6 @@ def is_abstract(cls) -> bool: """ return isabstract(cls) - @property - def metrics_parameter_builders_by_domain( - self, - ) -> Dict[Domain, List[ParameterBuilder]]: - """ - Returns: - Dictionary of "ParameterBuilder" objects, keyed by ("domain_type", "rule_name")-specified "Domain" object. - """ - return self._metrics_parameter_builders_by_domain - @abstractmethod def get_variables(self) -> Optional[Dict[str, Any]]: """ @@ -544,9 +534,9 @@ def _build_data_assistant_result( def get_metrics_by_domain(self) -> Dict[Domain, Dict[str, ParameterNode]]: """ Obtain subset of all parameter values for fully-qualified parameter names by domain, available from entire - "RuleBasedProfiler" state, where "Domain" objects are among keys included in provisions as proscribed by return - value of "DataAssistant.metrics_parameter_builders_by_domain" interface property and fully-qualified parameter - names match interface properties of "ParameterBuilder" objects, corresponding to these partial "Domain" objects. + "RuleBasedProfiler" state, where "Domain" objects are among keys included in provisions as proscribed by value + of "DataAssistant._metrics_parameter_builders_by_domain" private attribute and fully-qualified parameter names + match interface properties of "ParameterBuilder" objects, corresponding to these partial "Domain" objects. Returns: Dictionaries of values for fully-qualified parameter names by Domain for metrics, from "RuleBasedpRofiler" @@ -561,7 +551,7 @@ def get_metrics_by_domain(self) -> Dict[Domain, Dict[str, ParameterNode]]: lambda element: any( element[0].is_superset(other=domain_key) for domain_key in list( - self.metrics_parameter_builders_by_domain.keys() + self._metrics_parameter_builders_by_domain.keys() ) ), self.profiler.get_parameter_values_for_fully_qualified_parameter_names_by_domain().items(), @@ -577,7 +567,7 @@ def get_metrics_by_domain(self) -> Dict[Domain, Dict[str, ParameterNode]]: parameter_builder.json_serialized_fully_qualified_parameter_name for parameter_builder in parameter_builders ] - for domain, parameter_builders in self.metrics_parameter_builders_by_domain.items() + for domain, parameter_builders in self._metrics_parameter_builders_by_domain.items() } parameter_values_for_fully_qualified_parameter_names: Dict[str, ParameterNode] diff --git a/great_expectations/rule_based_profiler/data_assistant/onboarding_data_assistant.py b/great_expectations/rule_based_profiler/data_assistant/onboarding_data_assistant.py index 46deaf65646a..2b328768bcc7 100644 --- a/great_expectations/rule_based_profiler/data_assistant/onboarding_data_assistant.py +++ b/great_expectations/rule_based_profiler/data_assistant/onboarding_data_assistant.py @@ -540,7 +540,7 @@ def _build_numeric_columns_rule() -> Rule: 0.5, 0.75, ], - "allow_relative_error": "linear", + "allow_relative_error": False, "false_positive_rate": 0.05, "quantile_statistic_interpolation_method": "auto", "estimator": "bootstrap", @@ -727,7 +727,7 @@ def _build_datetime_columns_rule() -> Rule: "mostly": 1.0, "strict_min": False, "strict_max": False, - "allow_relative_error": "linear", + "allow_relative_error": False, "false_positive_rate": 0.05, "quantile_statistic_interpolation_method": "auto", "estimator": "bootstrap", diff --git a/great_expectations/rule_based_profiler/types/domain.py b/great_expectations/rule_based_profiler/types/domain.py index 70fcc13a6524..6faf558992e3 100644 --- a/great_expectations/rule_based_profiler/types/domain.py +++ b/great_expectations/rule_based_profiler/types/domain.py @@ -58,7 +58,7 @@ def __init__( ) -> None: if isinstance(domain_type, str): try: - domain_type = MetricDomainTypes(domain_type) + domain_type = MetricDomainTypes(domain_type.lower()) except (TypeError, KeyError) as e: raise ValueError( f""" {e}: Cannot instantiate Domain (domain_type "{str(domain_type)}" of type \ @@ -205,7 +205,7 @@ def to_json_dict(self) -> dict: def _convert_dictionaries_to_domain_kwargs( self, source: Optional[Any] = None - ) -> Optional[Union[Any, "Domain"]]: + ) -> Optional[Union[Any, "Domain"]]: # noqa: F821 if source is None: return None From 27e578e7f0326f135ea616dc251327823be42fc0 Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Mon, 18 Jul 2022 12:27:19 -0700 Subject: [PATCH 2/6] move MetricDomainTypes to a common location --- great_expectations/core/metric_domain_types.py | 11 +++++++++++ .../execution_engine/execution_engine.py | 8 +------- .../execution_engine/pandas_execution_engine.py | 2 +- .../execution_engine/sparkdf_execution_engine.py | 2 +- great_expectations/expectations/expectation.py | 2 +- .../column_aggregate_metrics/column_histogram.py | 2 +- .../metrics/column_aggregate_metrics/column_median.py | 2 +- .../column_quantile_values.py | 2 +- .../column_aggregate_metrics/column_value_counts.py | 2 +- .../column_values_between_count.py | 2 +- .../metrics/query_metrics/query_column.py | 2 +- .../expectations/metrics/query_metrics/query_table.py | 2 +- .../metrics/table_metrics/table_column_types.py | 2 +- .../expectations/metrics/table_metrics/table_head.py | 2 +- .../categorical_column_domain_builder.py | 2 +- .../domain_builder/column_domain_builder.py | 2 +- .../domain_builder/column_pair_domain_builder.py | 2 +- .../domain_builder/domain_builder.py | 2 +- .../domain_builder/multi_column_domain_builder.py | 2 +- .../domain_builder/table_domain_builder.py | 2 +- .../helpers/runtime_environment.py | 2 +- .../rule_based_profiler/helpers/util.py | 2 +- .../data_assistant_result/data_assistant_result.py | 2 +- .../rule_based_profiler/types/domain.py | 2 +- great_expectations/validator/validator.py | 2 +- tests/conftest.py | 2 +- .../execution_engine/test_pandas_execution_engine.py | 2 +- .../execution_engine/test_sparkdf_execution_engine.py | 2 +- .../test_sqlalchemy_execution_engine.py | 2 +- .../test_profiler_basic_workflows.py | 2 +- .../test_profiler_user_workflows.py | 2 +- tests/rule_based_profiler/conftest.py | 2 +- .../data_assistant/test_onboarding_data_assistant.py | 2 +- .../data_assistant/test_volume_data_assistant.py | 2 +- .../test_categorical_column_domain_builder.py | 2 +- .../domain_builder/test_domain_builder.py | 2 +- .../test_map_metric_column_domain_builder.py | 2 +- .../test_default_expectation_configuration_builder.py | 2 +- ...columns_set_match_multi_batch_parameter_builder.py | 2 +- ...pected_map_metric_multi_batch_parameter_builder.py | 2 +- ...eric_metric_range_multi_batch_parameter_builder.py | 2 +- .../parameter_builder/test_parameter_container.py | 2 +- .../test_partition_parameter_builder.py | 2 +- .../test_regex_pattern_string_parameter_builder.py | 2 +- ...est_simple_date_format_string_parameter_builder.py | 2 +- .../test_value_set_multi_batch_parameter_builder.py | 2 +- .../BasicExample_RBP_Instantiation_and_running.ipynb | 2 +- .../my_custom_semantic_type_column_domain_builder.py | 2 +- 48 files changed, 58 insertions(+), 53 deletions(-) create mode 100644 great_expectations/core/metric_domain_types.py diff --git a/great_expectations/core/metric_domain_types.py b/great_expectations/core/metric_domain_types.py new file mode 100644 index 000000000000..a2966e88230a --- /dev/null +++ b/great_expectations/core/metric_domain_types.py @@ -0,0 +1,11 @@ +import enum +import logging + +logger = logging.getLogger(__name__) + + +class MetricDomainTypes(enum.Enum): + TABLE = "table" + COLUMN = "column" + COLUMN_PAIR = "column_pair" + MULTICOLUMN = "multicolumn" diff --git a/great_expectations/execution_engine/execution_engine.py b/great_expectations/execution_engine/execution_engine.py index 61f1195f1950..d11fd49882e2 100644 --- a/great_expectations/execution_engine/execution_engine.py +++ b/great_expectations/execution_engine/execution_engine.py @@ -9,6 +9,7 @@ import great_expectations.exceptions as ge_exceptions from great_expectations.core.batch import BatchMarkers, BatchSpec +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.util import AzureUrl, DBFSPath, GCSUrl, S3Url from great_expectations.expectations.registry import get_metric_provider from great_expectations.expectations.row_conditions import ( @@ -52,13 +53,6 @@ class MetricFunctionTypes(Enum): AGGREGATE_VALUE = "value" # "aggregate_value" -class MetricDomainTypes(Enum): - TABLE = "table" - COLUMN = "column" - COLUMN_PAIR = "column_pair" - MULTICOLUMN = "multicolumn" - - class DataConnectorStorageDataReferenceResolver: DATA_CONNECTOR_NAME_TO_STORAGE_NAME_MAP: Dict[str, str] = { "InferredAssetS3DataConnector": "S3", diff --git a/great_expectations/execution_engine/pandas_execution_engine.py b/great_expectations/execution_engine/pandas_execution_engine.py index 2ae7eeef6672..c806a47e07c6 100644 --- a/great_expectations/execution_engine/pandas_execution_engine.py +++ b/great_expectations/execution_engine/pandas_execution_engine.py @@ -19,9 +19,9 @@ RuntimeDataBatchSpec, S3BatchSpec, ) +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.util import AzureUrl, GCSUrl, S3Url, sniff_s3_compression from great_expectations.execution_engine import ExecutionEngine -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.execution_engine.pandas_batch_data import PandasBatchData from great_expectations.execution_engine.split_and_sample.pandas_data_sampler import ( PandasDataSampler, diff --git a/great_expectations/execution_engine/sparkdf_execution_engine.py b/great_expectations/execution_engine/sparkdf_execution_engine.py index 8ce0d6671db9..0295195c540b 100644 --- a/great_expectations/execution_engine/sparkdf_execution_engine.py +++ b/great_expectations/execution_engine/sparkdf_execution_engine.py @@ -16,6 +16,7 @@ RuntimeDataBatchSpec, ) from great_expectations.core.id_dict import IDDict +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.util import AzureUrl, get_or_create_spark_application from great_expectations.exceptions import ( BatchSpecError, @@ -25,7 +26,6 @@ ) from great_expectations.exceptions import exceptions as ge_exceptions from great_expectations.execution_engine import ExecutionEngine -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.execution_engine.sparkdf_batch_data import SparkDFBatchData from great_expectations.execution_engine.split_and_sample.sparkdf_data_sampler import ( SparkDataSampler, diff --git a/great_expectations/expectations/expectation.py b/great_expectations/expectations/expectation.py index 511e2bb79f38..c66f6c876d5a 100644 --- a/great_expectations/expectations/expectation.py +++ b/great_expectations/expectations/expectation.py @@ -47,6 +47,7 @@ from great_expectations.core.expectation_validation_result import ( ExpectationValidationResult, ) +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.util import convert_to_json_serializable, nested_update from great_expectations.exceptions import ( ExpectationNotFoundError, @@ -55,7 +56,6 @@ InvalidExpectationKwargsError, ) from great_expectations.execution_engine import ExecutionEngine, PandasExecutionEngine -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.expectations.registry import ( _registered_metrics, _registered_renderers, diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py index 86872c18348b..745d33a23068 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.util import ( convert_to_json_serializable, get_sql_dialect_floating_point_infinity_value, @@ -14,7 +15,6 @@ SparkDFExecutionEngine, SqlAlchemyExecutionEngine, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.expectations.metrics.column_aggregate_metric_provider import ( ColumnAggregateMetricProvider, ) diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py index 02f77d949da9..2c914406f3c4 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py @@ -4,13 +4,13 @@ import pandas as pd from great_expectations.core import ExpectationConfiguration +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.execution_engine import ( ExecutionEngine, PandasExecutionEngine, SparkDFExecutionEngine, SqlAlchemyExecutionEngine, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.expectations.metrics.column_aggregate_metric_provider import ( ColumnAggregateMetricProvider, column_aggregate_value, diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py index 069258da37d8..24bfbbd77362 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py @@ -7,12 +7,12 @@ import numpy as np +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.execution_engine import ( PandasExecutionEngine, SparkDFExecutionEngine, SqlAlchemyExecutionEngine, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.execution_engine.util import get_approximate_percentile_disc_sql from great_expectations.expectations.metrics.column_aggregate_metric_provider import ( ColumnAggregateMetricProvider, diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py index 12e274d74d86..44216b186ade 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py @@ -2,12 +2,12 @@ import pandas as pd +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.execution_engine import ( PandasExecutionEngine, SparkDFExecutionEngine, SqlAlchemyExecutionEngine, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.expectations.metrics.column_aggregate_metric_provider import ( ColumnAggregateMetricProvider, ) diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py index 6fd08d240e58..0f8531a952d2 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py @@ -2,13 +2,13 @@ import numpy as np +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.util import get_sql_dialect_floating_point_infinity_value from great_expectations.execution_engine import ( PandasExecutionEngine, SparkDFExecutionEngine, SqlAlchemyExecutionEngine, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.expectations.metrics.import_manager import sa from great_expectations.expectations.metrics.metric_provider import ( MetricProvider, diff --git a/great_expectations/expectations/metrics/query_metrics/query_column.py b/great_expectations/expectations/metrics/query_metrics/query_column.py index 5dec6a84adf1..15a60a847adf 100644 --- a/great_expectations/expectations/metrics/query_metrics/query_column.py +++ b/great_expectations/expectations/metrics/query_metrics/query_column.py @@ -1,10 +1,10 @@ from typing import Any, Dict, List, Optional, Union +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.execution_engine import ( SparkDFExecutionEngine, SqlAlchemyExecutionEngine, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.expectations.metrics.import_manager import ( pyspark_sql_DataFrame, pyspark_sql_Row, diff --git a/great_expectations/expectations/metrics/query_metrics/query_table.py b/great_expectations/expectations/metrics/query_metrics/query_table.py index 0919cafa09d4..9ca354c13558 100644 --- a/great_expectations/expectations/metrics/query_metrics/query_table.py +++ b/great_expectations/expectations/metrics/query_metrics/query_table.py @@ -1,10 +1,10 @@ from typing import Any, Dict, List, Optional, Union +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.execution_engine import ( SparkDFExecutionEngine, SqlAlchemyExecutionEngine, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.expectations.metrics.import_manager import ( pyspark_sql_DataFrame, pyspark_sql_Row, diff --git a/great_expectations/expectations/metrics/table_metrics/table_column_types.py b/great_expectations/expectations/metrics/table_metrics/table_column_types.py index 5f9e93b601fe..2d6079e0dcc3 100644 --- a/great_expectations/expectations/metrics/table_metrics/table_column_types.py +++ b/great_expectations/expectations/metrics/table_metrics/table_column_types.py @@ -1,12 +1,12 @@ from typing import Any, Dict +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.exceptions import GreatExpectationsError from great_expectations.execution_engine import ( PandasExecutionEngine, SparkDFExecutionEngine, SqlAlchemyExecutionEngine, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.execution_engine.sqlalchemy_batch_data import ( SqlAlchemyBatchData, ) diff --git a/great_expectations/expectations/metrics/table_metrics/table_head.py b/great_expectations/expectations/metrics/table_metrics/table_head.py index 7ef7694abb5a..6312fcd566b0 100644 --- a/great_expectations/expectations/metrics/table_metrics/table_head.py +++ b/great_expectations/expectations/metrics/table_metrics/table_head.py @@ -2,12 +2,12 @@ import pandas as pd +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.execution_engine import ( PandasExecutionEngine, SparkDFExecutionEngine, SqlAlchemyExecutionEngine, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.expectations.metrics.import_manager import sa from great_expectations.expectations.metrics.metric_provider import metric_value from great_expectations.expectations.metrics.table_metric_provider import ( diff --git a/great_expectations/rule_based_profiler/domain_builder/categorical_column_domain_builder.py b/great_expectations/rule_based_profiler/domain_builder/categorical_column_domain_builder.py index 869364bae04f..f09d20575e6d 100644 --- a/great_expectations/rule_based_profiler/domain_builder/categorical_column_domain_builder.py +++ b/great_expectations/rule_based_profiler/domain_builder/categorical_column_domain_builder.py @@ -1,6 +1,6 @@ from typing import Any, Dict, Iterable, List, Optional, Set, Tuple, Union -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.domain_builder import ColumnDomainBuilder from great_expectations.rule_based_profiler.helpers.cardinality_checker import ( AbsoluteCardinalityLimit, diff --git a/great_expectations/rule_based_profiler/domain_builder/column_domain_builder.py b/great_expectations/rule_based_profiler/domain_builder/column_domain_builder.py index 60805484c8fe..6bbf9a393548 100644 --- a/great_expectations/rule_based_profiler/domain_builder/column_domain_builder.py +++ b/great_expectations/rule_based_profiler/domain_builder/column_domain_builder.py @@ -1,8 +1,8 @@ from typing import Iterable, List, Optional, Set, Tuple, Union, cast import great_expectations.exceptions as ge_exceptions +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.data_context.util import instantiate_class_from_config -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler.domain_builder import DomainBuilder from great_expectations.rule_based_profiler.helpers.util import ( build_domains_from_column_names, diff --git a/great_expectations/rule_based_profiler/domain_builder/column_pair_domain_builder.py b/great_expectations/rule_based_profiler/domain_builder/column_pair_domain_builder.py index 8781742255f7..41fc943a1b37 100644 --- a/great_expectations/rule_based_profiler/domain_builder/column_pair_domain_builder.py +++ b/great_expectations/rule_based_profiler/domain_builder/column_pair_domain_builder.py @@ -1,7 +1,7 @@ from typing import Dict, List, Optional, Union import great_expectations.exceptions as ge_exceptions -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.domain_builder import ColumnDomainBuilder from great_expectations.rule_based_profiler.types import ( INFERRED_SEMANTIC_TYPE_KEY, diff --git a/great_expectations/rule_based_profiler/domain_builder/domain_builder.py b/great_expectations/rule_based_profiler/domain_builder/domain_builder.py index e6f065066c1c..ffbed58f70cb 100644 --- a/great_expectations/rule_based_profiler/domain_builder/domain_builder.py +++ b/great_expectations/rule_based_profiler/domain_builder/domain_builder.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union from great_expectations.core.batch import Batch, BatchRequestBase -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.helpers.util import ( get_batch_ids as get_batch_ids_from_batch_list_or_batch_request, ) diff --git a/great_expectations/rule_based_profiler/domain_builder/multi_column_domain_builder.py b/great_expectations/rule_based_profiler/domain_builder/multi_column_domain_builder.py index 70297e04ddc4..de40e837a1f1 100644 --- a/great_expectations/rule_based_profiler/domain_builder/multi_column_domain_builder.py +++ b/great_expectations/rule_based_profiler/domain_builder/multi_column_domain_builder.py @@ -1,7 +1,7 @@ from typing import Dict, List, Optional, Union import great_expectations.exceptions as ge_exceptions -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.domain_builder import ColumnDomainBuilder from great_expectations.rule_based_profiler.types import ( INFERRED_SEMANTIC_TYPE_KEY, diff --git a/great_expectations/rule_based_profiler/domain_builder/table_domain_builder.py b/great_expectations/rule_based_profiler/domain_builder/table_domain_builder.py index f36276ec8484..09dcee77f492 100644 --- a/great_expectations/rule_based_profiler/domain_builder/table_domain_builder.py +++ b/great_expectations/rule_based_profiler/domain_builder/table_domain_builder.py @@ -1,6 +1,6 @@ from typing import List, Optional -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.domain_builder import DomainBuilder from great_expectations.rule_based_profiler.types import Domain, ParameterContainer diff --git a/great_expectations/rule_based_profiler/helpers/runtime_environment.py b/great_expectations/rule_based_profiler/helpers/runtime_environment.py index 88a0864c735d..4a1143a686ec 100644 --- a/great_expectations/rule_based_profiler/helpers/runtime_environment.py +++ b/great_expectations/rule_based_profiler/helpers/runtime_environment.py @@ -3,8 +3,8 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union, cast +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.util import convert_to_json_serializable -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.types import SerializableDictDot logger = logging.getLogger(__name__) diff --git a/great_expectations/rule_based_profiler/helpers/util.py b/great_expectations/rule_based_profiler/helpers/util.py index e775cea75136..4814a6563398 100644 --- a/great_expectations/rule_based_profiler/helpers/util.py +++ b/great_expectations/rule_based_profiler/helpers/util.py @@ -18,7 +18,7 @@ RuntimeBatchRequest, materialize_batch_request, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.types import ( FULLY_QUALIFIED_PARAMETER_NAME_SEPARATOR_CHARACTER, INFERRED_SEMANTIC_TYPE_KEY, diff --git a/great_expectations/rule_based_profiler/types/data_assistant_result/data_assistant_result.py b/great_expectations/rule_based_profiler/types/data_assistant_result/data_assistant_result.py index 6109a6191144..688d3a6f3eeb 100644 --- a/great_expectations/rule_based_profiler/types/data_assistant_result/data_assistant_result.py +++ b/great_expectations/rule_based_profiler/types/data_assistant_result/data_assistant_result.py @@ -13,6 +13,7 @@ from great_expectations import __version__ as ge_version from great_expectations.core import ExpectationConfiguration, ExpectationSuite +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.usage_statistics.events import UsageStatsEvents from great_expectations.core.usage_statistics.usage_statistics import ( UsageStatisticsHandler, @@ -24,7 +25,6 @@ in_jupyter_notebook, nested_update, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler.config import RuleConfig from great_expectations.rule_based_profiler.helpers.util import ( get_or_create_expectation_suite, diff --git a/great_expectations/rule_based_profiler/types/domain.py b/great_expectations/rule_based_profiler/types/domain.py index 6faf558992e3..ae4072b86b91 100644 --- a/great_expectations/rule_based_profiler/types/domain.py +++ b/great_expectations/rule_based_profiler/types/domain.py @@ -4,8 +4,8 @@ from typing import Any, Dict, Optional, Union from great_expectations.core import IDDict +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.util import convert_to_json_serializable -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.types import SerializableDictDot, SerializableDotDict from great_expectations.util import ( deep_filter_properties_iterable, diff --git a/great_expectations/validator/validator.py b/great_expectations/validator/validator.py index 04a767fd92f2..93ddabe2b42b 100644 --- a/great_expectations/validator/validator.py +++ b/great_expectations/validator/validator.py @@ -25,6 +25,7 @@ ExpectationValidationResult, ) from great_expectations.core.id_dict import BatchSpec +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.run_identifier import RunIdentifier from great_expectations.core.util import convert_to_json_serializable from great_expectations.data_asset.util import recursively_convert_to_json_serializable @@ -41,7 +42,6 @@ SparkDFExecutionEngine, SqlAlchemyExecutionEngine, ) -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.execution_engine.pandas_batch_data import PandasBatchData from great_expectations.expectations.registry import ( get_expectation_impl, diff --git a/tests/conftest.py b/tests/conftest.py index f37fc2878f27..aaf1c094b3fa 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,6 +20,7 @@ from great_expectations.core.expectation_validation_result import ( ExpectationValidationResult, ) +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.usage_statistics.usage_statistics import ( UsageStatisticsHandler, ) @@ -48,7 +49,6 @@ get_filesystem_one_level_directory_glob_path_list, ) from great_expectations.datasource.new_datasource import BaseDatasource, Datasource -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler.config import RuleBasedProfilerConfig from great_expectations.rule_based_profiler.config.base import ( ruleBasedProfilerConfigSchema, diff --git a/tests/execution_engine/test_pandas_execution_engine.py b/tests/execution_engine/test_pandas_execution_engine.py index a880fc475cd5..d20a95240ab3 100644 --- a/tests/execution_engine/test_pandas_execution_engine.py +++ b/tests/execution_engine/test_pandas_execution_engine.py @@ -13,7 +13,7 @@ import great_expectations.exceptions as ge_exceptions from great_expectations.core.batch_spec import RuntimeDataBatchSpec, S3BatchSpec -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.execution_engine.pandas_execution_engine import ( PandasExecutionEngine, storage, diff --git a/tests/execution_engine/test_sparkdf_execution_engine.py b/tests/execution_engine/test_sparkdf_execution_engine.py index 764dafbffb28..2de66e454e96 100644 --- a/tests/execution_engine/test_sparkdf_execution_engine.py +++ b/tests/execution_engine/test_sparkdf_execution_engine.py @@ -7,8 +7,8 @@ import great_expectations.exceptions as ge_exceptions from great_expectations.core.batch_spec import PathBatchSpec, RuntimeDataBatchSpec +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.execution_engine import SparkDFExecutionEngine -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.expectations.row_conditions import ( RowCondition, RowConditionParserType, diff --git a/tests/execution_engine/test_sqlalchemy_execution_engine.py b/tests/execution_engine/test_sqlalchemy_execution_engine.py index e45be2bd2a0b..7451b1b02546 100644 --- a/tests/execution_engine/test_sqlalchemy_execution_engine.py +++ b/tests/execution_engine/test_sqlalchemy_execution_engine.py @@ -9,8 +9,8 @@ RuntimeQueryBatchSpec, SqlAlchemyDatasourceBatchSpec, ) +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.data_context.util import file_relative_path -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.execution_engine.sqlalchemy_execution_engine import ( SqlAlchemyExecutionEngine, ) diff --git a/tests/integration/profiling/rule_based_profiler/test_profiler_basic_workflows.py b/tests/integration/profiling/rule_based_profiler/test_profiler_basic_workflows.py index 6d51df9fe15d..f767628c03a3 100644 --- a/tests/integration/profiling/rule_based_profiler/test_profiler_basic_workflows.py +++ b/tests/integration/profiling/rule_based_profiler/test_profiler_basic_workflows.py @@ -5,9 +5,9 @@ from great_expectations.core.batch import BatchRequest from great_expectations.core.expectation_configuration import ExpectationConfiguration +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.data_context import DataContext from great_expectations.data_context.util import file_relative_path -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler import RuleBasedProfilerResult from great_expectations.rule_based_profiler.domain_builder import ( ColumnDomainBuilder, diff --git a/tests/integration/profiling/rule_based_profiler/test_profiler_user_workflows.py b/tests/integration/profiling/rule_based_profiler/test_profiler_user_workflows.py index 74f282a74809..6aa5bfdc47ba 100644 --- a/tests/integration/profiling/rule_based_profiler/test_profiler_user_workflows.py +++ b/tests/integration/profiling/rule_based_profiler/test_profiler_user_workflows.py @@ -18,9 +18,9 @@ ExpectationValidationResult, ) from great_expectations.core.batch import BatchRequest +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.util import convert_to_json_serializable from great_expectations.datasource import DataConnector, Datasource -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.expectations.registry import get_expectation_impl from great_expectations.rule_based_profiler import RuleBasedProfilerResult from great_expectations.rule_based_profiler.config.base import ( diff --git a/tests/rule_based_profiler/conftest.py b/tests/rule_based_profiler/conftest.py index 0b6fcbccea1c..abf4f4c994be 100644 --- a/tests/rule_based_profiler/conftest.py +++ b/tests/rule_based_profiler/conftest.py @@ -5,7 +5,7 @@ import pytest from ruamel.yaml import YAML -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler import RuleBasedProfiler from great_expectations.rule_based_profiler.config.base import RuleBasedProfilerConfig from great_expectations.rule_based_profiler.domain_builder import ColumnDomainBuilder diff --git a/tests/rule_based_profiler/data_assistant/test_onboarding_data_assistant.py b/tests/rule_based_profiler/data_assistant/test_onboarding_data_assistant.py index f14601ac9ed9..d385fc676efb 100644 --- a/tests/rule_based_profiler/data_assistant/test_onboarding_data_assistant.py +++ b/tests/rule_based_profiler/data_assistant/test_onboarding_data_assistant.py @@ -10,8 +10,8 @@ from great_expectations import DataContext from great_expectations.core import ExpectationSuite +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.usage_statistics.events import UsageStatsEvents -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler.types import ( FULLY_QUALIFIED_PARAMETER_NAME_ATTRIBUTED_VALUE_KEY, Domain, diff --git a/tests/rule_based_profiler/data_assistant/test_volume_data_assistant.py b/tests/rule_based_profiler/data_assistant/test_volume_data_assistant.py index 230f40380e81..7e9d04ad235f 100644 --- a/tests/rule_based_profiler/data_assistant/test_volume_data_assistant.py +++ b/tests/rule_based_profiler/data_assistant/test_volume_data_assistant.py @@ -11,8 +11,8 @@ from great_expectations import DataContext from great_expectations.core import ExpectationConfiguration, ExpectationSuite from great_expectations.core.batch import Batch +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.usage_statistics.events import UsageStatsEvents -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler.config import RuleBasedProfilerConfig from great_expectations.rule_based_profiler.data_assistant import ( DataAssistant, diff --git a/tests/rule_based_profiler/domain_builder/test_categorical_column_domain_builder.py b/tests/rule_based_profiler/domain_builder/test_categorical_column_domain_builder.py index 4299bbb8d41d..73415942e8fd 100644 --- a/tests/rule_based_profiler/domain_builder/test_categorical_column_domain_builder.py +++ b/tests/rule_based_profiler/domain_builder/test_categorical_column_domain_builder.py @@ -4,8 +4,8 @@ from great_expectations import DataContext from great_expectations.core.batch import BatchRequest +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.exceptions import ProfilerConfigurationError -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler.domain_builder import DomainBuilder from great_expectations.rule_based_profiler.domain_builder.categorical_column_domain_builder import ( CategoricalColumnDomainBuilder, diff --git a/tests/rule_based_profiler/domain_builder/test_domain_builder.py b/tests/rule_based_profiler/domain_builder/test_domain_builder.py index 7c7cb4b0fe28..67409826c0f5 100644 --- a/tests/rule_based_profiler/domain_builder/test_domain_builder.py +++ b/tests/rule_based_profiler/domain_builder/test_domain_builder.py @@ -6,7 +6,7 @@ import great_expectations.exceptions as ge_exceptions from great_expectations import DataContext -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.domain_builder import ( ColumnDomainBuilder, ColumnPairDomainBuilder, diff --git a/tests/rule_based_profiler/domain_builder/test_map_metric_column_domain_builder.py b/tests/rule_based_profiler/domain_builder/test_map_metric_column_domain_builder.py index 364534b26bd8..e20fda127e6b 100644 --- a/tests/rule_based_profiler/domain_builder/test_map_metric_column_domain_builder.py +++ b/tests/rule_based_profiler/domain_builder/test_map_metric_column_domain_builder.py @@ -2,7 +2,7 @@ from great_expectations import DataContext from great_expectations.core.batch import BatchRequest -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.domain_builder import ( MapMetricColumnDomainBuilder, ) diff --git a/tests/rule_based_profiler/expectation_configuration_builder/test_default_expectation_configuration_builder.py b/tests/rule_based_profiler/expectation_configuration_builder/test_default_expectation_configuration_builder.py index 7a5dbb82d997..31005def7dcc 100644 --- a/tests/rule_based_profiler/expectation_configuration_builder/test_default_expectation_configuration_builder.py +++ b/tests/rule_based_profiler/expectation_configuration_builder/test_default_expectation_configuration_builder.py @@ -4,8 +4,8 @@ import great_expectations.exceptions as ge_exceptions from great_expectations.core.expectation_configuration import ExpectationConfiguration +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.data_context import DataContext -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler.config import ParameterBuilderConfig from great_expectations.rule_based_profiler.expectation_configuration_builder import ( DefaultExpectationConfigurationBuilder, diff --git a/tests/rule_based_profiler/parameter_builder/test_mean_table_columns_set_match_multi_batch_parameter_builder.py b/tests/rule_based_profiler/parameter_builder/test_mean_table_columns_set_match_multi_batch_parameter_builder.py index bcab34686acb..cb3a1d9060da 100644 --- a/tests/rule_based_profiler/parameter_builder/test_mean_table_columns_set_match_multi_batch_parameter_builder.py +++ b/tests/rule_based_profiler/parameter_builder/test_mean_table_columns_set_match_multi_batch_parameter_builder.py @@ -1,7 +1,7 @@ from typing import Dict, Optional from great_expectations import DataContext -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.helpers.util import ( get_parameter_value_and_validate_return_type, ) diff --git a/tests/rule_based_profiler/parameter_builder/test_mean_unexpected_map_metric_multi_batch_parameter_builder.py b/tests/rule_based_profiler/parameter_builder/test_mean_unexpected_map_metric_multi_batch_parameter_builder.py index d964b4549b32..c3f497235e11 100644 --- a/tests/rule_based_profiler/parameter_builder/test_mean_unexpected_map_metric_multi_batch_parameter_builder.py +++ b/tests/rule_based_profiler/parameter_builder/test_mean_unexpected_map_metric_multi_batch_parameter_builder.py @@ -4,7 +4,7 @@ import pytest from great_expectations import DataContext -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.config import ParameterBuilderConfig from great_expectations.rule_based_profiler.helpers.util import ( get_parameter_value_and_validate_return_type, diff --git a/tests/rule_based_profiler/parameter_builder/test_numeric_metric_range_multi_batch_parameter_builder.py b/tests/rule_based_profiler/parameter_builder/test_numeric_metric_range_multi_batch_parameter_builder.py index 194574085950..b34d05a5ca28 100644 --- a/tests/rule_based_profiler/parameter_builder/test_numeric_metric_range_multi_batch_parameter_builder.py +++ b/tests/rule_based_profiler/parameter_builder/test_numeric_metric_range_multi_batch_parameter_builder.py @@ -6,8 +6,8 @@ import scipy.stats as stats import great_expectations.exceptions as ge_exceptions +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.data_context import DataContext -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler.config import ParameterBuilderConfig from great_expectations.rule_based_profiler.helpers.util import NP_EPSILON from great_expectations.rule_based_profiler.parameter_builder import ( diff --git a/tests/rule_based_profiler/parameter_builder/test_parameter_container.py b/tests/rule_based_profiler/parameter_builder/test_parameter_container.py index f7faad77d6b1..cafba9a753f2 100644 --- a/tests/rule_based_profiler/parameter_builder/test_parameter_container.py +++ b/tests/rule_based_profiler/parameter_builder/test_parameter_container.py @@ -1,6 +1,6 @@ from typing import Dict, List -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.types import ( Domain, ParameterContainer, diff --git a/tests/rule_based_profiler/parameter_builder/test_partition_parameter_builder.py b/tests/rule_based_profiler/parameter_builder/test_partition_parameter_builder.py index 283eee9c4907..07fd880e18df 100644 --- a/tests/rule_based_profiler/parameter_builder/test_partition_parameter_builder.py +++ b/tests/rule_based_profiler/parameter_builder/test_partition_parameter_builder.py @@ -1,7 +1,7 @@ from typing import Dict, Optional from great_expectations import DataContext -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.helpers.util import ( get_parameter_value_and_validate_return_type, ) diff --git a/tests/rule_based_profiler/parameter_builder/test_regex_pattern_string_parameter_builder.py b/tests/rule_based_profiler/parameter_builder/test_regex_pattern_string_parameter_builder.py index 85b0eb4c21a3..63926407012f 100644 --- a/tests/rule_based_profiler/parameter_builder/test_regex_pattern_string_parameter_builder.py +++ b/tests/rule_based_profiler/parameter_builder/test_regex_pattern_string_parameter_builder.py @@ -12,9 +12,9 @@ BatchRequest, ) from great_expectations.core.id_dict import BatchSpec, IDDict +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.data_context import DataContext from great_expectations.execution_engine import PandasExecutionEngine -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler.helpers.util import ( get_parameter_value_and_validate_return_type, ) diff --git a/tests/rule_based_profiler/parameter_builder/test_simple_date_format_string_parameter_builder.py b/tests/rule_based_profiler/parameter_builder/test_simple_date_format_string_parameter_builder.py index f93ecc5a227f..7fcc224c1ced 100644 --- a/tests/rule_based_profiler/parameter_builder/test_simple_date_format_string_parameter_builder.py +++ b/tests/rule_based_profiler/parameter_builder/test_simple_date_format_string_parameter_builder.py @@ -3,8 +3,8 @@ import pytest import great_expectations.exceptions.exceptions as ge_exceptions +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.data_context import DataContext -from great_expectations.execution_engine.execution_engine import MetricDomainTypes from great_expectations.rule_based_profiler.helpers.util import ( get_parameter_value_and_validate_return_type, ) diff --git a/tests/rule_based_profiler/parameter_builder/test_value_set_multi_batch_parameter_builder.py b/tests/rule_based_profiler/parameter_builder/test_value_set_multi_batch_parameter_builder.py index ca20ed22d71a..0b68ca04f104 100644 --- a/tests/rule_based_profiler/parameter_builder/test_value_set_multi_batch_parameter_builder.py +++ b/tests/rule_based_profiler/parameter_builder/test_value_set_multi_batch_parameter_builder.py @@ -3,7 +3,7 @@ import pytest from great_expectations import DataContext -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.helpers.util import ( get_parameter_value_and_validate_return_type, ) diff --git a/tests/test_fixtures/rule_based_profiler/example_notebooks/BasicExample_RBP_Instantiation_and_running.ipynb b/tests/test_fixtures/rule_based_profiler/example_notebooks/BasicExample_RBP_Instantiation_and_running.ipynb index f204eeb3df62..362991326445 100644 --- a/tests/test_fixtures/rule_based_profiler/example_notebooks/BasicExample_RBP_Instantiation_and_running.ipynb +++ b/tests/test_fixtures/rule_based_profiler/example_notebooks/BasicExample_RBP_Instantiation_and_running.ipynb @@ -1183,7 +1183,7 @@ "outputs": [], "source": [ "from great_expectations.rule_based_profiler.types.domain import Domain\n", - "from great_expectations.execution_engine.execution_engine import MetricDomainTypes\n", + "from great_expectations.core.metric_domain_types import MetricDomainTypes\n", "from great_expectations.rule_based_profiler.types import ParameterContainer" ] }, diff --git a/tests/test_fixtures/rule_based_profiler/plugins/my_custom_semantic_type_column_domain_builder.py b/tests/test_fixtures/rule_based_profiler/plugins/my_custom_semantic_type_column_domain_builder.py index 18162d68ec2a..2c44760ab48c 100644 --- a/tests/test_fixtures/rule_based_profiler/plugins/my_custom_semantic_type_column_domain_builder.py +++ b/tests/test_fixtures/rule_based_profiler/plugins/my_custom_semantic_type_column_domain_builder.py @@ -1,7 +1,7 @@ from typing import List, Optional, Union from great_expectations import DataContext -from great_expectations.execution_engine.execution_engine import MetricDomainTypes +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.rule_based_profiler.domain_builder import DomainBuilder from great_expectations.rule_based_profiler.helpers.util import ( build_domains_from_column_names, From 3b6c029409c99f36b6b48b5ee2780d07f6366d50 Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Mon, 18 Jul 2022 14:29:07 -0700 Subject: [PATCH 3/6] Implement get expectations by domain type --- great_expectations/core/expectation_suite.py | 59 +++++++++++++++++++- tests/core/test_expectation_suite.py | 27 +++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/great_expectations/core/expectation_suite.py b/great_expectations/core/expectation_suite.py index 36250df74e64..7c5f308e337e 100644 --- a/great_expectations/core/expectation_suite.py +++ b/great_expectations/core/expectation_suite.py @@ -3,7 +3,7 @@ import logging import uuid from copy import deepcopy -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import great_expectations as ge from great_expectations import __version__ as ge_version @@ -15,6 +15,7 @@ ExpectationConfigurationSchema, expectationConfigurationSchema, ) +from great_expectations.core.metric_domain_types import MetricDomainTypes from great_expectations.core.usage_statistics.events import UsageStatsEvents from great_expectations.core.util import ( convert_to_json_serializable, @@ -733,6 +734,61 @@ def get_grouped_and_ordered_expectations_by_column( return expectations_by_column, sorted_columns + def get_grouped_and_ordered_expectations_by_domain_type( + self, + ) -> Dict[str, List[ExpectationConfiguration]]: + expectation_configurations_by_domain: Dict[ + str, List[ExpectationConfiguration] + ] = self._get_expectations_by_domain_using_accessor_method( + domain_type=MetricDomainTypes.TABLE.value, + accessor_method=self.get_table_expectations, + ) + expectation_configurations_by_domain.update( + self._get_expectations_by_domain_using_accessor_method( + domain_type=MetricDomainTypes.COLUMN.value, + accessor_method=self.get_column_expectations, + ) + ) + expectation_configurations_by_domain.update( + self._get_expectations_by_domain_using_accessor_method( + domain_type=MetricDomainTypes.COLUMN_PAIR.value, + accessor_method=self.get_column_pair_expectations, + ) + ) + expectation_configurations_by_domain.update( + self._get_expectations_by_domain_using_accessor_method( + domain_type=MetricDomainTypes.MULTICOLUMN.value, + accessor_method=self.get_multicolumn_expectations, + ) + ) + return expectation_configurations_by_domain + + @staticmethod + def _get_expectations_by_domain_using_accessor_method( + domain_type: str, accessor_method: Callable + ) -> Dict[str, List[ExpectationConfiguration]]: + expectation_configurations_by_domain: Dict[ + str, List[ExpectationConfiguration] + ] = {} + + expectation_configurations: List[ExpectationConfiguration] + domain_kwargs: dict + expectation_configuration: ExpectationConfiguration + + for expectation_configuration in accessor_method(): + expectation_configurations = expectation_configurations_by_domain.get( + domain_type + ) + if expectation_configurations is None: + expectation_configurations = [] + expectation_configurations_by_domain[ + domain_type + ] = expectation_configurations + + expectation_configurations.append(expectation_configuration) + + return expectation_configurations_by_domain + class ExpectationSuiteSchema(Schema): expectation_suite_name = fields.Str() @@ -744,6 +800,7 @@ class ExpectationSuiteSchema(Schema): # NOTE: 20191107 - JPC - we may want to remove clean_empty and update tests to require the other fields; # doing so could also allow us not to have to make a copy of data in the pre_dump method. + # noinspection PyMethodMayBeStatic def clean_empty(self, data): if isinstance(data, ExpectationSuite): if not hasattr(data, "evaluation_parameters"): diff --git a/tests/core/test_expectation_suite.py b/tests/core/test_expectation_suite.py index 5a71e356f793..19a240359300 100644 --- a/tests/core/test_expectation_suite.py +++ b/tests/core/test_expectation_suite.py @@ -1,4 +1,5 @@ import datetime +import itertools from copy import copy, deepcopy from typing import Any, Dict, List @@ -550,3 +551,29 @@ def test_get_column_expectations( ): obs = suite_with_table_and_column_expectations.get_column_expectations() assert obs == [exp1, exp2, exp3, exp4] + + +def test_get_expectations_by_domain_type( + suite_with_table_and_column_expectations, + exp1, + exp2, + exp3, + exp4, + column_pair_expectation, + table_exp1, + table_exp2, + table_exp3, +): + obs = ( + suite_with_table_and_column_expectations.get_grouped_and_ordered_expectations_by_domain_type() + ) + assert list(itertools.chain.from_iterable(obs.values())) == [ + table_exp1, + table_exp2, + table_exp3, + exp1, + exp2, + exp3, + exp4, + column_pair_expectation, + ] From 4abdf49c10e1ba4ebcfc2c84716ba89eb1163589 Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Mon, 18 Jul 2022 14:31:37 -0700 Subject: [PATCH 4/6] Implement get expectations by domain type --- great_expectations/core/expectation_suite.py | 1 + great_expectations/util.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/great_expectations/core/expectation_suite.py b/great_expectations/core/expectation_suite.py index 7c5f308e337e..591fdbc29c87 100644 --- a/great_expectations/core/expectation_suite.py +++ b/great_expectations/core/expectation_suite.py @@ -330,6 +330,7 @@ def remove_all_expectations_of_type( ) -> List[ExpectationConfiguration]: if isinstance(expectation_types, str): expectation_types = [expectation_types] + removed_expectations = [ expectation for expectation in self.expectations diff --git a/great_expectations/util.py b/great_expectations/util.py index 759d034e4d26..2cc692fc19e9 100644 --- a/great_expectations/util.py +++ b/great_expectations/util.py @@ -881,6 +881,7 @@ def validate( from great_expectations.data_context import DataContext data_context = DataContext(data_context) + expectation_suite = data_context.get_expectation_suite( expectation_suite_name=expectation_suite_name ) @@ -892,14 +893,17 @@ def validate( expectation_suite: ExpectationSuite = ExpectationSuite( **expectation_suite_dict, data_context=data_context ) + if data_asset_name is not None: raise ValueError( "When providing an expectation suite, data_asset_name cannot also be provided." ) + if expectation_suite_name is not None: raise ValueError( "When providing an expectation suite, expectation_suite_name cannot also be provided." ) + logger.info( f"Validating data_asset_name {data_asset_name} with expectation_suite_name {expectation_suite.expectation_suite_name}" ) @@ -950,6 +954,7 @@ def validate( data_asset_ = _convert_to_dataset_class( data_asset, dataset_class=data_asset_class, expectation_suite=expectation_suite ) + return data_asset_.validate(*args, data_context=data_context, **kwargs) From 39479f28dd2e8d8126273081baed577660d62f04 Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Mon, 18 Jul 2022 14:41:36 -0700 Subject: [PATCH 5/6] Implement get expectations by domain type --- great_expectations/core/expectation_suite.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/great_expectations/core/expectation_suite.py b/great_expectations/core/expectation_suite.py index 591fdbc29c87..a63e49084c2e 100644 --- a/great_expectations/core/expectation_suite.py +++ b/great_expectations/core/expectation_suite.py @@ -738,6 +738,10 @@ def get_grouped_and_ordered_expectations_by_column( def get_grouped_and_ordered_expectations_by_domain_type( self, ) -> Dict[str, List[ExpectationConfiguration]]: + """ + Returns "ExpectationConfiguration" list in predetermined order by passing appropriate methods for retrieving + "ExpectationConfiguration" lists by corresponding "domain_type" (with "table" first; then "column", and so on). + """ expectation_configurations_by_domain: Dict[ str, List[ExpectationConfiguration] ] = self._get_expectations_by_domain_using_accessor_method( From e23813b5375f6860dcf1a65372ad9ed5ef9b74de Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Mon, 18 Jul 2022 15:01:29 -0700 Subject: [PATCH 6/6] Implement get expectations by expectation type --- great_expectations/core/expectation_suite.py | 31 ++++++++++++++++++-- tests/core/test_expectation_suite.py | 26 ++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/great_expectations/core/expectation_suite.py b/great_expectations/core/expectation_suite.py index a63e49084c2e..708f41b63efa 100644 --- a/great_expectations/core/expectation_suite.py +++ b/great_expectations/core/expectation_suite.py @@ -735,6 +735,35 @@ def get_grouped_and_ordered_expectations_by_column( return expectations_by_column, sorted_columns + def get_grouped_and_ordered_expectations_by_expectation_type( + self, + ) -> List[ExpectationConfiguration]: + """ + Returns "ExpectationConfiguration" list, grouped by "expectation_type", in predetermined designated order. + """ + table_expectation_configurations: List[ExpectationConfiguration] = sorted( + self.get_table_expectations(), + key=lambda element: element["expectation_type"], + ) + column_expectation_configurations: List[ExpectationConfiguration] = sorted( + self.get_column_expectations(), + key=lambda element: element["expectation_type"], + ) + column_pair_expectation_configurations: List[ExpectationConfiguration] = sorted( + self.get_column_pair_expectations(), + key=lambda element: element["expectation_type"], + ) + multicolumn_expectation_configurations: List[ExpectationConfiguration] = sorted( + self.get_multicolumn_expectations(), + key=lambda element: element["expectation_type"], + ) + return ( + table_expectation_configurations + + column_expectation_configurations + + column_pair_expectation_configurations + + multicolumn_expectation_configurations + ) + def get_grouped_and_ordered_expectations_by_domain_type( self, ) -> Dict[str, List[ExpectationConfiguration]]: @@ -777,9 +806,7 @@ def _get_expectations_by_domain_using_accessor_method( ] = {} expectation_configurations: List[ExpectationConfiguration] - domain_kwargs: dict expectation_configuration: ExpectationConfiguration - for expectation_configuration in accessor_method(): expectation_configurations = expectation_configurations_by_domain.get( domain_type diff --git a/tests/core/test_expectation_suite.py b/tests/core/test_expectation_suite.py index 19a240359300..73c0ed3ca87c 100644 --- a/tests/core/test_expectation_suite.py +++ b/tests/core/test_expectation_suite.py @@ -553,6 +553,32 @@ def test_get_column_expectations( assert obs == [exp1, exp2, exp3, exp4] +def test_get_expectations_by_expectation_type( + suite_with_table_and_column_expectations, + exp1, + exp2, + exp3, + exp4, + column_pair_expectation, + table_exp1, + table_exp2, + table_exp3, +): + obs = ( + suite_with_table_and_column_expectations.get_grouped_and_ordered_expectations_by_expectation_type() + ) + assert obs == [ + table_exp1, + table_exp2, + table_exp3, + exp1, + exp2, + exp3, + exp4, + column_pair_expectation, + ] + + def test_get_expectations_by_domain_type( suite_with_table_and_column_expectations, exp1,