Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEATURE] Provide methods for returning ExpectationConfiguration list grouped by expectation_type and by domain_type #5532

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
91 changes: 90 additions & 1 deletion great_expectations/core/expectation_suite.py
Expand Up @@ -3,7 +3,7 @@
import logging
import uuid
from copy import deepcopy
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import great_expectations as ge
from great_expectations import __version__ as ge_version
Expand All @@ -15,6 +15,7 @@
ExpectationConfigurationSchema,
expectationConfigurationSchema,
)
from great_expectations.core.metric_domain_types import MetricDomainTypes
from great_expectations.core.usage_statistics.events import UsageStatsEvents
from great_expectations.core.util import (
convert_to_json_serializable,
Expand Down Expand Up @@ -329,6 +330,7 @@ def remove_all_expectations_of_type(
) -> List[ExpectationConfiguration]:
if isinstance(expectation_types, str):
expectation_types = [expectation_types]

removed_expectations = [
expectation
for expectation in self.expectations
Expand Down Expand Up @@ -733,6 +735,92 @@ def get_grouped_and_ordered_expectations_by_column(

return expectations_by_column, sorted_columns

def get_grouped_and_ordered_expectations_by_expectation_type(
self,
) -> List[ExpectationConfiguration]:
"""
Returns "ExpectationConfiguration" list, grouped by "expectation_type", in predetermined designated order.
"""
table_expectation_configurations: List[ExpectationConfiguration] = sorted(
self.get_table_expectations(),
key=lambda element: element["expectation_type"],
)
column_expectation_configurations: List[ExpectationConfiguration] = sorted(
self.get_column_expectations(),
key=lambda element: element["expectation_type"],
)
column_pair_expectation_configurations: List[ExpectationConfiguration] = sorted(
self.get_column_pair_expectations(),
key=lambda element: element["expectation_type"],
)
multicolumn_expectation_configurations: List[ExpectationConfiguration] = sorted(
self.get_multicolumn_expectations(),
key=lambda element: element["expectation_type"],
)
return (
table_expectation_configurations
+ column_expectation_configurations
+ column_pair_expectation_configurations
+ multicolumn_expectation_configurations
)

def get_grouped_and_ordered_expectations_by_domain_type(
self,
) -> Dict[str, List[ExpectationConfiguration]]:
"""
Returns "ExpectationConfiguration" list in predetermined order by passing appropriate methods for retrieving
"ExpectationConfiguration" lists by corresponding "domain_type" (with "table" first; then "column", and so on).
"""
expectation_configurations_by_domain: Dict[
str, List[ExpectationConfiguration]
] = self._get_expectations_by_domain_using_accessor_method(
domain_type=MetricDomainTypes.TABLE.value,
accessor_method=self.get_table_expectations,
)
expectation_configurations_by_domain.update(
self._get_expectations_by_domain_using_accessor_method(
domain_type=MetricDomainTypes.COLUMN.value,
accessor_method=self.get_column_expectations,
)
)
expectation_configurations_by_domain.update(
self._get_expectations_by_domain_using_accessor_method(
domain_type=MetricDomainTypes.COLUMN_PAIR.value,
accessor_method=self.get_column_pair_expectations,
)
)
expectation_configurations_by_domain.update(
self._get_expectations_by_domain_using_accessor_method(
domain_type=MetricDomainTypes.MULTICOLUMN.value,
accessor_method=self.get_multicolumn_expectations,
)
)
return expectation_configurations_by_domain

@staticmethod
def _get_expectations_by_domain_using_accessor_method(
domain_type: str, accessor_method: Callable
) -> Dict[str, List[ExpectationConfiguration]]:
expectation_configurations_by_domain: Dict[
str, List[ExpectationConfiguration]
] = {}

expectation_configurations: List[ExpectationConfiguration]
expectation_configuration: ExpectationConfiguration
for expectation_configuration in accessor_method():
expectation_configurations = expectation_configurations_by_domain.get(
domain_type
)
if expectation_configurations is None:
expectation_configurations = []
expectation_configurations_by_domain[
domain_type
] = expectation_configurations

expectation_configurations.append(expectation_configuration)

return expectation_configurations_by_domain


class ExpectationSuiteSchema(Schema):
expectation_suite_name = fields.Str()
Expand All @@ -744,6 +832,7 @@ class ExpectationSuiteSchema(Schema):

# NOTE: 20191107 - JPC - we may want to remove clean_empty and update tests to require the other fields;
# doing so could also allow us not to have to make a copy of data in the pre_dump method.
# noinspection PyMethodMayBeStatic
def clean_empty(self, data):
if isinstance(data, ExpectationSuite):
if not hasattr(data, "evaluation_parameters"):
Expand Down
5 changes: 5 additions & 0 deletions great_expectations/util.py
Expand Up @@ -881,6 +881,7 @@ def validate(
from great_expectations.data_context import DataContext

data_context = DataContext(data_context)

expectation_suite = data_context.get_expectation_suite(
expectation_suite_name=expectation_suite_name
)
Expand All @@ -892,14 +893,17 @@ def validate(
expectation_suite: ExpectationSuite = ExpectationSuite(
**expectation_suite_dict, data_context=data_context
)

if data_asset_name is not None:
raise ValueError(
"When providing an expectation suite, data_asset_name cannot also be provided."
)

if expectation_suite_name is not None:
raise ValueError(
"When providing an expectation suite, expectation_suite_name cannot also be provided."
)

logger.info(
f"Validating data_asset_name {data_asset_name} with expectation_suite_name {expectation_suite.expectation_suite_name}"
)
Expand Down Expand Up @@ -950,6 +954,7 @@ def validate(
data_asset_ = _convert_to_dataset_class(
data_asset, dataset_class=data_asset_class, expectation_suite=expectation_suite
)

return data_asset_.validate(*args, data_context=data_context, **kwargs)


Expand Down
53 changes: 53 additions & 0 deletions tests/core/test_expectation_suite.py
@@ -1,4 +1,5 @@
import datetime
import itertools
from copy import copy, deepcopy
from typing import Any, Dict, List

Expand Down Expand Up @@ -550,3 +551,55 @@ def test_get_column_expectations(
):
obs = suite_with_table_and_column_expectations.get_column_expectations()
assert obs == [exp1, exp2, exp3, exp4]


def test_get_expectations_by_expectation_type(
suite_with_table_and_column_expectations,
exp1,
exp2,
exp3,
exp4,
column_pair_expectation,
table_exp1,
table_exp2,
table_exp3,
):
obs = (
suite_with_table_and_column_expectations.get_grouped_and_ordered_expectations_by_expectation_type()
)
assert obs == [
table_exp1,
table_exp2,
table_exp3,
exp1,
exp2,
exp3,
exp4,
column_pair_expectation,
]


def test_get_expectations_by_domain_type(
suite_with_table_and_column_expectations,
exp1,
exp2,
exp3,
exp4,
column_pair_expectation,
table_exp1,
table_exp2,
table_exp3,
):
obs = (
suite_with_table_and_column_expectations.get_grouped_and_ordered_expectations_by_domain_type()
)
assert list(itertools.chain.from_iterable(obs.values())) == [
table_exp1,
table_exp2,
table_exp3,
exp1,
exp2,
exp3,
exp4,
column_pair_expectation,
]