Merge branch 'develop' into FEATURE/GREAT-727/GREAT-733/splitting_dat…

…a_assets_into_batches_using_datetime_columns_in_spark_prelim_2
great-expectations · Apr 25, 2022 · c9f4071 · c9f4071
2 parents 3fa6f2a + 81ec5cb
commit c9f4071
Show file tree

Hide file tree

Showing 10 changed files with 363 additions and 81 deletions.
diff --git a/great_expectations/core/expectation_suite.py b/great_expectations/core/expectation_suite.py
@@ -523,7 +523,7 @@ def patch_expectation(
     def _add_expectation(
         self,
         expectation_configuration: ExpectationConfiguration,
-        send_usage_event: bool,
+        send_usage_event: bool = True,
         match_type: str = "domain",
         overwrite_existing: bool = True,
     ) -> ExpectationConfiguration:
@@ -540,6 +540,7 @@ def _add_expectation(
                 and so whether we should add or replace.
             overwrite_existing: If the expectation already exists, this will overwrite if True and raise an error if
                 False.
+
         Returns:
             The ExpectationConfiguration to add or replace.
         Raises:
@@ -598,19 +599,61 @@ def send_usage_event(self, success: bool):
                 success=success,
             )
 
+    def add_expectation_configurations(
+        self,
+        expectation_configurations: List[ExpectationConfiguration],
+        send_usage_event: bool = True,
+        match_type: str = "domain",
+        overwrite_existing: bool = True,
+    ) -> List[ExpectationConfiguration]:
+        """
+        Args:
+            expectation_configurations: The List of candidate new/modifed "ExpectationConfiguration" objects for Suite.
+            send_usage_event: Whether to send a usage_statistics event. When called through ExpectationSuite class'
+                public add_expectation() method, this is set to `True`.
+            match_type: The criteria used to determine whether the Suite already has an "ExpectationConfiguration"
+                object, matching the specified criteria, and thus whether we should add or replace (i.e., "upsert").
+            overwrite_existing: If "ExpectationConfiguration" already exists, this will cause it to be overwritten if
+                True and raise an error if False.
+
+        Returns:
+            The List of "ExpectationConfiguration" objects attempted to be added or replaced (can differ from the list
+            of "ExpectationConfiguration" objects in "self.expectations" at the completion of this method's execution).
+        Raises:
+            More than one match
+            One match if overwrite_existing = False
+        """
+        expectation_configuration: ExpectationConfiguration
+        expectation_configurations_attempted_to_be_added: List[
+            ExpectationConfiguration
+        ] = [
+            self.add_expectation(
+                expectation_configuration=expectation_configuration,
+                send_usage_event=send_usage_event,
+                match_type=match_type,
+                overwrite_existing=overwrite_existing,
+            )
+            for expectation_configuration in expectation_configurations
+        ]
+        return expectation_configurations_attempted_to_be_added
+
     def add_expectation(
         self,
         expectation_configuration: ExpectationConfiguration,
+        send_usage_event: bool = True,
         match_type: str = "domain",
         overwrite_existing: bool = True,
     ) -> ExpectationConfiguration:
         """
         Args:
             expectation_configuration: The ExpectationConfiguration to add or update
+            send_usage_event: Whether to send a usage_statistics event. When called through ExpectationSuite class'
+                public add_expectation() method, this is set to `True`.
             match_type: The criteria used to determine whether the Suite already has an ExpectationConfiguration
                 and so whether we should add or replace.
             overwrite_existing: If the expectation already exists, this will overwrite if True and raise an error if
                 False.
+
         Returns:
             The ExpectationConfiguration to add or replace.
         Raises:
@@ -619,7 +662,7 @@ def add_expectation(
         """
         return self._add_expectation(
             expectation_configuration=expectation_configuration,
-            send_usage_event=True,
+            send_usage_event=send_usage_event,
             match_type=match_type,
             overwrite_existing=overwrite_existing,
         )

diff --git a/great_expectations/core/usage_statistics/anonymizers/base.py b/great_expectations/core/usage_statistics/anonymizers/base.py
@@ -72,9 +72,6 @@ def get_parent_class(
                 object_module_name = object_config.get("module_name")
                 object_class = load_class(object_class_name, object_module_name)
 
-            object_class_name = object_class.__name__
-            object_module_name = object_class.__module__
-
             # Utilize candidate list if provided.
             if classes_to_check:
                 for class_to_check in classes_to_check:

diff --git a/great_expectations/core/util.py b/great_expectations/core/util.py
@@ -69,6 +69,12 @@
 
 _SUFFIX_TO_PD_KWARG = {"gz": "gzip", "zip": "zip", "bz2": "bz2", "xz": "xz"}
 
+TEMPORARY_EXPECTATION_SUITE_NAME_PREFIX: str = "tmp"
+TEMPORARY_EXPECTATION_SUITE_NAME_STEM: str = "suite"
+TEMPORARY_EXPECTATION_SUITE_NAME_PATTERN: re.Pattern = re.compile(
+    rf"^{TEMPORARY_EXPECTATION_SUITE_NAME_PREFIX}\..+\.{TEMPORARY_EXPECTATION_SUITE_NAME_STEM}\w{8}"
+)
+
 
 def nested_update(
     d: Union[Iterable, dict],
@@ -768,3 +774,58 @@ def get_sql_dialect_floating_point_infinity_value(
             return res["NegativeInfinity"]
         else:
             return res["PositiveInfinity"]
+
+
+def get_or_create_expectation_suite(
+    data_context: "BaseDataContext",  # noqa: F821
+    expectation_suite: Optional["ExpectationSuite"] = None,  # noqa: F821
+    expectation_suite_name: Optional[str] = None,
+    component_name: Optional[str] = None,
+) -> "ExpectationSuite":  # noqa: F821
+    """
+    Use "expectation_suite" if provided.  If not, then if "expectation_suite_name" is specified, then create
+    "ExpectationSuite" from it.  Otherwise, generate temporary "expectation_suite_name" using supplied "component_name".
+    """
+    suite: "ExpectationSuite"  # noqa: F821
+
+    generate_temp_expectation_suite_name: bool
+    create_expectation_suite: bool
+
+    if expectation_suite is not None and expectation_suite_name is not None:
+        if expectation_suite.expectation_suite_name != expectation_suite_name:
+            raise ValueError(
+                'Mutually inconsistent "expectation_suite" and "expectation_suite_name" were specified.'
+            )
+
+        return expectation_suite
+    elif expectation_suite is None and expectation_suite_name is not None:
+        generate_temp_expectation_suite_name = False
+        create_expectation_suite = True
+    elif expectation_suite is not None and expectation_suite_name is None:
+        generate_temp_expectation_suite_name = False
+        create_expectation_suite = False
+    else:
+        generate_temp_expectation_suite_name = True
+        create_expectation_suite = True
+
+    if generate_temp_expectation_suite_name:
+        if not component_name:
+            component_name = "test"
+
+        expectation_suite_name = f"{TEMPORARY_EXPECTATION_SUITE_NAME_PREFIX}.{component_name}.{TEMPORARY_EXPECTATION_SUITE_NAME_STEM}{str(uuid.uuid4())[:8]}"
+
+    if create_expectation_suite:
+        try:
+            # noinspection PyUnusedLocal
+            expectation_suite = data_context.get_expectation_suite(
+                expectation_suite_name=expectation_suite_name
+            )
+        except ge_exceptions.DataContextError:
+            expectation_suite = data_context.create_expectation_suite(
+                expectation_suite_name=expectation_suite_name
+            )
+            print(
+                f'Created ExpectationSuite "{expectation_suite.expectation_suite_name}".'
+            )
+
+    return expectation_suite
diff --git a/great_expectations/rule_based_profiler/data_assistant/data_assistant.py b/great_expectations/rule_based_profiler/data_assistant/data_assistant.py
@@ -39,7 +39,12 @@ class DataAssistant(ABC):
         name="my_volume_data_assistant",
         validator=validator,
     )
-    result: DataAssistantResult = data_assistant.run()
+    result: DataAssistantResult = data_assistant.run(
+        expectation_suite=None,
+        expectation_suite_name="my_suite",
+        include_citation=True,
+        save_updated_expectation_suite=False,
+    )
 
     Then:
         metrics: Dict[Domain, Dict[str, ParameterNode]] = result.metrics
@@ -140,6 +145,7 @@ def run(
         expectation_suite: Optional[ExpectationSuite] = None,
         expectation_suite_name: Optional[str] = None,
         include_citation: bool = True,
+        save_updated_expectation_suite: bool = False,
     ) -> DataAssistantResult:
         """
         Run the DataAssistant as it is currently configured.
@@ -149,6 +155,7 @@ def run(
             expectation_suite_name: A name for returned "ExpectationSuite"
             include_citation: Flag, which controls whether or not to effective Profiler configuration should be included
             as a citation in metadata of the "ExpectationSuite" computeds and returned by "RuleBasedProfiler"
+            save_updated_expectation_suite: Flag, constrolling whether or not updated "ExpectationSuite" must be saved
 
         Returns:
             DataAssistantResult: The result object for the DataAssistant
@@ -167,6 +174,7 @@ def run(
             expectation_suite=expectation_suite,
             expectation_suite_name=expectation_suite_name,
             include_citation=include_citation,
+            save_updated_expectation_suite=save_updated_expectation_suite,
         )
         return self._build_data_assistant_result(
             data_assistant_result=data_assistant_result
@@ -301,13 +309,15 @@ def get_expectation_suite(
         expectation_suite: Optional[ExpectationSuite] = None,
         expectation_suite_name: Optional[str] = None,
         include_citation: bool = True,
+        save_updated_expectation_suite: bool = False,
     ) -> ExpectationSuite:
         """
         Args:
             expectation_suite: An existing "ExpectationSuite" to update
             expectation_suite_name: A name for returned "ExpectationSuite"
-            include_citation: Flag, which controls whether or not to effective Profiler configuration should be included
-            as a citation in metadata of the "ExpectationSuite" computeds and returned by "RuleBasedProfiler"
+            include_citation: Flag, which controls whether or not effective "RuleBasedProfiler" configuration should be
+            included as a citation in metadata of the "ExpectationSuite" computeds and returned by "RuleBasedProfiler"
+            save_updated_expectation_suite: Flag, constrolling whether or not updated "ExpectationSuite" must be saved
 
         Returns:
             "ExpectationSuite" using "ExpectationConfiguration" objects, computed by "RuleBasedProfiler" state
@@ -316,6 +326,7 @@ def get_expectation_suite(
             expectation_suite=expectation_suite,
             expectation_suite_name=expectation_suite_name,
             include_citation=include_citation,
+            save_updated_expectation_suite=save_updated_expectation_suite,
         )
 
 
@@ -335,6 +346,7 @@ def run_profiler_on_data(
     expectation_suite: Optional[ExpectationSuite] = None,
     expectation_suite_name: Optional[str] = None,
     include_citation: bool = True,
+    save_updated_expectation_suite: bool = False,
 ) -> None:
     if rules is None:
         rules = []
@@ -358,4 +370,5 @@ def run_profiler_on_data(
         expectation_suite=expectation_suite,
         expectation_suite_name=expectation_suite_name,
         include_citation=include_citation,
+        save_updated_expectation_suite=save_updated_expectation_suite,
     )
diff --git a/great_expectations/rule_based_profiler/rule_based_profiler.py b/great_expectations/rule_based_profiler/rule_based_profiler.py
@@ -2,7 +2,6 @@
 import json
 import logging
 import sys
-import uuid
 from typing import Any, Dict, List, Optional, Set, Union
 
 from tqdm.auto import tqdm
@@ -22,7 +21,11 @@
     get_profiler_run_usage_statistics,
     usage_statistics_enabled_method,
 )
-from great_expectations.core.util import nested_update
+from great_expectations.core.util import (
+    TEMPORARY_EXPECTATION_SUITE_NAME_PATTERN,
+    get_or_create_expectation_suite,
+    nested_update,
+)
 from great_expectations.data_context.store import ProfilerStore
 from great_expectations.data_context.types.resource_identifiers import (
     ConfigurationIdentifier,
@@ -299,28 +302,33 @@ def get_expectation_suite(
         expectation_suite: Optional[ExpectationSuite] = None,
         expectation_suite_name: Optional[str] = None,
         include_citation: bool = True,
+        save_updated_expectation_suite: bool = False,
     ) -> ExpectationSuite:
         """
         Args:
-            expectation_suite: An existing ExpectationSuite to update.
-            expectation_suite_name: A name for returned ExpectationSuite.
-            include_citation: Whether or not to include the Profiler config in the metadata for the ExpectationSuite produced by the Profiler
+            expectation_suite: An existing "ExpectationSuite" to update
+            expectation_suite_name: A name for returned "ExpectationSuite"
+            include_citation: Flag, which controls whether or not "RuleBasedProfiler" configuration should be included
+            as a citation in metadata of the "ExpectationSuite" computeds and returned by "RuleBasedProfiler"
+            save_updated_expectation_suite: Flag, constrolling whether or not updated "ExpectationSuite" must be saved
 
         Returns:
-            ExpectationSuite using ExpectationConfiguration objects, accumulated from RuleState of every Rule executed.
+            "ExpectationSuite" using "ExpectationConfiguration" objects, computed by "RuleBasedProfiler" state
         """
         assert not (
             expectation_suite and expectation_suite_name
         ), "Ambiguous arguments provided; you may pass in an ExpectationSuite or provide a name to instantiate a new one (but you may not do both)."
 
-        if expectation_suite is None:
-            if expectation_suite_name is None:
-                expectation_suite_name = f"tmp.profiler_{self.__class__.__name__}_suite_{str(uuid.uuid4())[:8]}"
+        save_updated_expectation_suite = (
+            save_updated_expectation_suite and expectation_suite is None
+        )
 
-            expectation_suite = ExpectationSuite(
-                expectation_suite_name=expectation_suite_name,
-                data_context=self._data_context,
-            )
+        expectation_suite = get_or_create_expectation_suite(
+            data_context=self._data_context,
+            expectation_suite=expectation_suite,
+            expectation_suite_name=expectation_suite_name,
+            component_name=None,
+        )
 
         if include_citation:
             expectation_suite.add_citation(
@@ -332,13 +340,21 @@ def get_expectation_suite(
             ExpectationConfiguration
         ] = self._get_expectation_configurations()
 
-        expectation_configuration: ExpectationConfiguration
-        for expectation_configuration in expectation_configurations:
-            expectation_suite._add_expectation(
-                expectation_configuration=expectation_configuration,
-                send_usage_event=False,
-                match_type="domain",
-                overwrite_existing=True,
+        expectation_suite.add_expectation_configurations(
+            expectation_configurations=expectation_configurations,
+            send_usage_event=False,
+            match_type="domain",
+            overwrite_existing=True,
+        )
+
+        if (
+            save_updated_expectation_suite
+            and not TEMPORARY_EXPECTATION_SUITE_NAME_PATTERN.match(
+                expectation_suite_name
+            )
+        ):
+            self._data_context.save_expectation_suite(
+                expectation_suite=expectation_suite
             )
 
         return expectation_suite

diff --git a/great_expectations/validator/validator.py b/great_expectations/validator/validator.py
@@ -428,7 +428,12 @@ def _build_expectation_configuration(
             )
             expectation_configurations: List[
                 ExpectationConfiguration
-            ] = profiler.get_expectation_suite().expectations
+            ] = profiler.get_expectation_suite(
+                expectation_suite=None,
+                expectation_suite_name=None,
+                include_citation=True,
+                save_updated_expectation_suite=False,
+            ).expectations
 
             configuration = expectation_configurations[0]
 

diff --git a/tests/checkpoint/conftest.py b/tests/checkpoint/conftest.py
@@ -19,10 +19,7 @@ def titanic_pandas_data_context_stats_enabled_and_expectation_suite_with_one_exp
         expectation_type="expect_column_values_to_be_between",
         kwargs={"column": "col1", "min_value": 1, "max_value": 2},
     )
-    # NOTE Will 20211208 _add_expectation() method, although being called by an ExpectationSuite instance, is being
-    # called within a fixture, and so will call the private method _add_expectation() and prevent it from sending a
-    # usage_event.
-    suite._add_expectation(expectation, send_usage_event=False)
+    suite.add_expectation(expectation, send_usage_event=False)
     context.save_expectation_suite(suite)
     return context