Skip to content

Commit

Permalink
Merge branch 'develop' into FEATURE/GREAT-727/GREAT-733/splitting_dat…
Browse files Browse the repository at this point in the history
…a_assets_into_batches_using_datetime_columns_in_spark_prelim_2
  • Loading branch information
anthonyburdi committed Apr 25, 2022
2 parents 3fa6f2a + 81ec5cb commit c9f4071
Show file tree
Hide file tree
Showing 10 changed files with 363 additions and 81 deletions.
47 changes: 45 additions & 2 deletions great_expectations/core/expectation_suite.py
Expand Up @@ -523,7 +523,7 @@ def patch_expectation(
def _add_expectation(
self,
expectation_configuration: ExpectationConfiguration,
send_usage_event: bool,
send_usage_event: bool = True,
match_type: str = "domain",
overwrite_existing: bool = True,
) -> ExpectationConfiguration:
Expand All @@ -540,6 +540,7 @@ def _add_expectation(
and so whether we should add or replace.
overwrite_existing: If the expectation already exists, this will overwrite if True and raise an error if
False.
Returns:
The ExpectationConfiguration to add or replace.
Raises:
Expand Down Expand Up @@ -598,19 +599,61 @@ def send_usage_event(self, success: bool):
success=success,
)

def add_expectation_configurations(
self,
expectation_configurations: List[ExpectationConfiguration],
send_usage_event: bool = True,
match_type: str = "domain",
overwrite_existing: bool = True,
) -> List[ExpectationConfiguration]:
"""
Args:
expectation_configurations: The List of candidate new/modifed "ExpectationConfiguration" objects for Suite.
send_usage_event: Whether to send a usage_statistics event. When called through ExpectationSuite class'
public add_expectation() method, this is set to `True`.
match_type: The criteria used to determine whether the Suite already has an "ExpectationConfiguration"
object, matching the specified criteria, and thus whether we should add or replace (i.e., "upsert").
overwrite_existing: If "ExpectationConfiguration" already exists, this will cause it to be overwritten if
True and raise an error if False.
Returns:
The List of "ExpectationConfiguration" objects attempted to be added or replaced (can differ from the list
of "ExpectationConfiguration" objects in "self.expectations" at the completion of this method's execution).
Raises:
More than one match
One match if overwrite_existing = False
"""
expectation_configuration: ExpectationConfiguration
expectation_configurations_attempted_to_be_added: List[
ExpectationConfiguration
] = [
self.add_expectation(
expectation_configuration=expectation_configuration,
send_usage_event=send_usage_event,
match_type=match_type,
overwrite_existing=overwrite_existing,
)
for expectation_configuration in expectation_configurations
]
return expectation_configurations_attempted_to_be_added

def add_expectation(
self,
expectation_configuration: ExpectationConfiguration,
send_usage_event: bool = True,
match_type: str = "domain",
overwrite_existing: bool = True,
) -> ExpectationConfiguration:
"""
Args:
expectation_configuration: The ExpectationConfiguration to add or update
send_usage_event: Whether to send a usage_statistics event. When called through ExpectationSuite class'
public add_expectation() method, this is set to `True`.
match_type: The criteria used to determine whether the Suite already has an ExpectationConfiguration
and so whether we should add or replace.
overwrite_existing: If the expectation already exists, this will overwrite if True and raise an error if
False.
Returns:
The ExpectationConfiguration to add or replace.
Raises:
Expand All @@ -619,7 +662,7 @@ def add_expectation(
"""
return self._add_expectation(
expectation_configuration=expectation_configuration,
send_usage_event=True,
send_usage_event=send_usage_event,
match_type=match_type,
overwrite_existing=overwrite_existing,
)
Expand Down
3 changes: 0 additions & 3 deletions great_expectations/core/usage_statistics/anonymizers/base.py
Expand Up @@ -72,9 +72,6 @@ def get_parent_class(
object_module_name = object_config.get("module_name")
object_class = load_class(object_class_name, object_module_name)

object_class_name = object_class.__name__
object_module_name = object_class.__module__

# Utilize candidate list if provided.
if classes_to_check:
for class_to_check in classes_to_check:
Expand Down
61 changes: 61 additions & 0 deletions great_expectations/core/util.py
Expand Up @@ -69,6 +69,12 @@

_SUFFIX_TO_PD_KWARG = {"gz": "gzip", "zip": "zip", "bz2": "bz2", "xz": "xz"}

TEMPORARY_EXPECTATION_SUITE_NAME_PREFIX: str = "tmp"
TEMPORARY_EXPECTATION_SUITE_NAME_STEM: str = "suite"
TEMPORARY_EXPECTATION_SUITE_NAME_PATTERN: re.Pattern = re.compile(
rf"^{TEMPORARY_EXPECTATION_SUITE_NAME_PREFIX}\..+\.{TEMPORARY_EXPECTATION_SUITE_NAME_STEM}\w{8}"
)


def nested_update(
d: Union[Iterable, dict],
Expand Down Expand Up @@ -768,3 +774,58 @@ def get_sql_dialect_floating_point_infinity_value(
return res["NegativeInfinity"]
else:
return res["PositiveInfinity"]


def get_or_create_expectation_suite(
data_context: "BaseDataContext", # noqa: F821
expectation_suite: Optional["ExpectationSuite"] = None, # noqa: F821
expectation_suite_name: Optional[str] = None,
component_name: Optional[str] = None,
) -> "ExpectationSuite": # noqa: F821
"""
Use "expectation_suite" if provided. If not, then if "expectation_suite_name" is specified, then create
"ExpectationSuite" from it. Otherwise, generate temporary "expectation_suite_name" using supplied "component_name".
"""
suite: "ExpectationSuite" # noqa: F821

generate_temp_expectation_suite_name: bool
create_expectation_suite: bool

if expectation_suite is not None and expectation_suite_name is not None:
if expectation_suite.expectation_suite_name != expectation_suite_name:
raise ValueError(
'Mutually inconsistent "expectation_suite" and "expectation_suite_name" were specified.'
)

return expectation_suite
elif expectation_suite is None and expectation_suite_name is not None:
generate_temp_expectation_suite_name = False
create_expectation_suite = True
elif expectation_suite is not None and expectation_suite_name is None:
generate_temp_expectation_suite_name = False
create_expectation_suite = False
else:
generate_temp_expectation_suite_name = True
create_expectation_suite = True

if generate_temp_expectation_suite_name:
if not component_name:
component_name = "test"

expectation_suite_name = f"{TEMPORARY_EXPECTATION_SUITE_NAME_PREFIX}.{component_name}.{TEMPORARY_EXPECTATION_SUITE_NAME_STEM}{str(uuid.uuid4())[:8]}"

if create_expectation_suite:
try:
# noinspection PyUnusedLocal
expectation_suite = data_context.get_expectation_suite(
expectation_suite_name=expectation_suite_name
)
except ge_exceptions.DataContextError:
expectation_suite = data_context.create_expectation_suite(
expectation_suite_name=expectation_suite_name
)
print(
f'Created ExpectationSuite "{expectation_suite.expectation_suite_name}".'
)

return expectation_suite
Expand Up @@ -39,7 +39,12 @@ class DataAssistant(ABC):
name="my_volume_data_assistant",
validator=validator,
)
result: DataAssistantResult = data_assistant.run()
result: DataAssistantResult = data_assistant.run(
expectation_suite=None,
expectation_suite_name="my_suite",
include_citation=True,
save_updated_expectation_suite=False,
)
Then:
metrics: Dict[Domain, Dict[str, ParameterNode]] = result.metrics
Expand Down Expand Up @@ -140,6 +145,7 @@ def run(
expectation_suite: Optional[ExpectationSuite] = None,
expectation_suite_name: Optional[str] = None,
include_citation: bool = True,
save_updated_expectation_suite: bool = False,
) -> DataAssistantResult:
"""
Run the DataAssistant as it is currently configured.
Expand All @@ -149,6 +155,7 @@ def run(
expectation_suite_name: A name for returned "ExpectationSuite"
include_citation: Flag, which controls whether or not to effective Profiler configuration should be included
as a citation in metadata of the "ExpectationSuite" computeds and returned by "RuleBasedProfiler"
save_updated_expectation_suite: Flag, constrolling whether or not updated "ExpectationSuite" must be saved
Returns:
DataAssistantResult: The result object for the DataAssistant
Expand All @@ -167,6 +174,7 @@ def run(
expectation_suite=expectation_suite,
expectation_suite_name=expectation_suite_name,
include_citation=include_citation,
save_updated_expectation_suite=save_updated_expectation_suite,
)
return self._build_data_assistant_result(
data_assistant_result=data_assistant_result
Expand Down Expand Up @@ -301,13 +309,15 @@ def get_expectation_suite(
expectation_suite: Optional[ExpectationSuite] = None,
expectation_suite_name: Optional[str] = None,
include_citation: bool = True,
save_updated_expectation_suite: bool = False,
) -> ExpectationSuite:
"""
Args:
expectation_suite: An existing "ExpectationSuite" to update
expectation_suite_name: A name for returned "ExpectationSuite"
include_citation: Flag, which controls whether or not to effective Profiler configuration should be included
as a citation in metadata of the "ExpectationSuite" computeds and returned by "RuleBasedProfiler"
include_citation: Flag, which controls whether or not effective "RuleBasedProfiler" configuration should be
included as a citation in metadata of the "ExpectationSuite" computeds and returned by "RuleBasedProfiler"
save_updated_expectation_suite: Flag, constrolling whether or not updated "ExpectationSuite" must be saved
Returns:
"ExpectationSuite" using "ExpectationConfiguration" objects, computed by "RuleBasedProfiler" state
Expand All @@ -316,6 +326,7 @@ def get_expectation_suite(
expectation_suite=expectation_suite,
expectation_suite_name=expectation_suite_name,
include_citation=include_citation,
save_updated_expectation_suite=save_updated_expectation_suite,
)


Expand All @@ -335,6 +346,7 @@ def run_profiler_on_data(
expectation_suite: Optional[ExpectationSuite] = None,
expectation_suite_name: Optional[str] = None,
include_citation: bool = True,
save_updated_expectation_suite: bool = False,
) -> None:
if rules is None:
rules = []
Expand All @@ -358,4 +370,5 @@ def run_profiler_on_data(
expectation_suite=expectation_suite,
expectation_suite_name=expectation_suite_name,
include_citation=include_citation,
save_updated_expectation_suite=save_updated_expectation_suite,
)
56 changes: 36 additions & 20 deletions great_expectations/rule_based_profiler/rule_based_profiler.py
Expand Up @@ -2,7 +2,6 @@
import json
import logging
import sys
import uuid
from typing import Any, Dict, List, Optional, Set, Union

from tqdm.auto import tqdm
Expand All @@ -22,7 +21,11 @@
get_profiler_run_usage_statistics,
usage_statistics_enabled_method,
)
from great_expectations.core.util import nested_update
from great_expectations.core.util import (
TEMPORARY_EXPECTATION_SUITE_NAME_PATTERN,
get_or_create_expectation_suite,
nested_update,
)
from great_expectations.data_context.store import ProfilerStore
from great_expectations.data_context.types.resource_identifiers import (
ConfigurationIdentifier,
Expand Down Expand Up @@ -299,28 +302,33 @@ def get_expectation_suite(
expectation_suite: Optional[ExpectationSuite] = None,
expectation_suite_name: Optional[str] = None,
include_citation: bool = True,
save_updated_expectation_suite: bool = False,
) -> ExpectationSuite:
"""
Args:
expectation_suite: An existing ExpectationSuite to update.
expectation_suite_name: A name for returned ExpectationSuite.
include_citation: Whether or not to include the Profiler config in the metadata for the ExpectationSuite produced by the Profiler
expectation_suite: An existing "ExpectationSuite" to update
expectation_suite_name: A name for returned "ExpectationSuite"
include_citation: Flag, which controls whether or not "RuleBasedProfiler" configuration should be included
as a citation in metadata of the "ExpectationSuite" computeds and returned by "RuleBasedProfiler"
save_updated_expectation_suite: Flag, constrolling whether or not updated "ExpectationSuite" must be saved
Returns:
ExpectationSuite using ExpectationConfiguration objects, accumulated from RuleState of every Rule executed.
"ExpectationSuite" using "ExpectationConfiguration" objects, computed by "RuleBasedProfiler" state
"""
assert not (
expectation_suite and expectation_suite_name
), "Ambiguous arguments provided; you may pass in an ExpectationSuite or provide a name to instantiate a new one (but you may not do both)."

if expectation_suite is None:
if expectation_suite_name is None:
expectation_suite_name = f"tmp.profiler_{self.__class__.__name__}_suite_{str(uuid.uuid4())[:8]}"
save_updated_expectation_suite = (
save_updated_expectation_suite and expectation_suite is None
)

expectation_suite = ExpectationSuite(
expectation_suite_name=expectation_suite_name,
data_context=self._data_context,
)
expectation_suite = get_or_create_expectation_suite(
data_context=self._data_context,
expectation_suite=expectation_suite,
expectation_suite_name=expectation_suite_name,
component_name=None,
)

if include_citation:
expectation_suite.add_citation(
Expand All @@ -332,13 +340,21 @@ def get_expectation_suite(
ExpectationConfiguration
] = self._get_expectation_configurations()

expectation_configuration: ExpectationConfiguration
for expectation_configuration in expectation_configurations:
expectation_suite._add_expectation(
expectation_configuration=expectation_configuration,
send_usage_event=False,
match_type="domain",
overwrite_existing=True,
expectation_suite.add_expectation_configurations(
expectation_configurations=expectation_configurations,
send_usage_event=False,
match_type="domain",
overwrite_existing=True,
)

if (
save_updated_expectation_suite
and not TEMPORARY_EXPECTATION_SUITE_NAME_PATTERN.match(
expectation_suite_name
)
):
self._data_context.save_expectation_suite(
expectation_suite=expectation_suite
)

return expectation_suite
Expand Down
7 changes: 6 additions & 1 deletion great_expectations/validator/validator.py
Expand Up @@ -428,7 +428,12 @@ def _build_expectation_configuration(
)
expectation_configurations: List[
ExpectationConfiguration
] = profiler.get_expectation_suite().expectations
] = profiler.get_expectation_suite(
expectation_suite=None,
expectation_suite_name=None,
include_citation=True,
save_updated_expectation_suite=False,
).expectations

configuration = expectation_configurations[0]

Expand Down
5 changes: 1 addition & 4 deletions tests/checkpoint/conftest.py
Expand Up @@ -19,10 +19,7 @@ def titanic_pandas_data_context_stats_enabled_and_expectation_suite_with_one_exp
expectation_type="expect_column_values_to_be_between",
kwargs={"column": "col1", "min_value": 1, "max_value": 2},
)
# NOTE Will 20211208 _add_expectation() method, although being called by an ExpectationSuite instance, is being
# called within a fixture, and so will call the private method _add_expectation() and prevent it from sending a
# usage_event.
suite._add_expectation(expectation, send_usage_event=False)
suite.add_expectation(expectation, send_usage_event=False)
context.save_expectation_suite(suite)
return context

Expand Down

0 comments on commit c9f4071

Please sign in to comment.