clean up (#4959)

great-expectations · Apr 26, 2022 · b907c7d · b907c7d
1 parent b048544
commit b907c7d
Show file tree

Hide file tree

Showing 6 changed files with 135 additions and 124 deletions.
diff --git a/great_expectations/exceptions/exceptions.py b/great_expectations/exceptions/exceptions.py
@@ -135,14 +135,6 @@ class ProfilerNotFoundError(ProfilerError):
     pass
 
 
-class DataAssistantError(GreatExpectationsError):
-    pass
-
-
-class DataAssistantNotFoundError(DataAssistantError):
-    pass
-
-
 class InvalidConfigError(DataContextError):
     def __init__(self, message):
         self.message = message

diff --git a/great_expectations/expectations/expectation.py b/great_expectations/expectations/expectation.py
@@ -10,11 +10,9 @@
 from inspect import isabstract
 from typing import Dict, List, Optional, Tuple, Union
 
-import pandas as pd
 from dateutil.parser import parse
 
 from great_expectations import __version__ as ge_version
-from great_expectations.core.batch import Batch
 from great_expectations.core.expectation_configuration import (
     ExpectationConfiguration,
     parse_result_format,
@@ -94,6 +92,7 @@
 )
 
 
+# noinspection PyMethodParameters
 class MetaExpectation(ABCMeta):
     """MetaExpectation registers Expectations as they are defined, adding them to the Expectation registry.
 
@@ -103,9 +102,12 @@ class MetaExpectation(ABCMeta):
 
     def __new__(cls, clsname, bases, attrs):
         newclass = super().__new__(cls, clsname, bases, attrs)
+        # noinspection PyUnresolvedReferences
         if not newclass.is_abstract():
             newclass.expectation_type = camel_to_snake(clsname)
             register_expectation(newclass)
+
+        # noinspection PyUnresolvedReferences
         newclass._register_renderer_functions()
         default_kwarg_values = {}
         for base in reversed(bases):

diff --git a/great_expectations/rule_based_profiler/domain_builder/domain_builder.py b/great_expectations/rule_based_profiler/domain_builder/domain_builder.py
@@ -20,7 +20,7 @@
 from great_expectations.validator.metric_configuration import MetricConfiguration
 
 
-class DomainBuilder(Builder, ABC):
+class DomainBuilder(ABC, Builder):
     """
     A DomainBuilder provides methods to get domains based on one or more batches of data.
     """

diff --git a/...ule_based_profiler/expectation_configuration_builder/expectation_configuration_builder.py b/...ule_based_profiler/expectation_configuration_builder/expectation_configuration_builder.py
@@ -20,7 +20,7 @@
 logger.setLevel(logging.INFO)
 
 
-class ExpectationConfigurationBuilder(Builder, ABC):
+class ExpectationConfigurationBuilder(ABC, Builder):
     exclude_field_names: Set[str] = Builder.exclude_field_names | {
         "validation_parameter_builders",
     }

diff --git a/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py b/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py
@@ -91,7 +91,7 @@ def to_json_dict(self) -> dict:
         return convert_to_json_serializable(data=self.to_dict())
 
 
-class ParameterBuilder(Builder, ABC):
+class ParameterBuilder(ABC, Builder):
     """
     A ParameterBuilder implementation provides support for building Expectation Configuration Parameters suitable for
     use in other ParameterBuilders or in ConfigurationBuilders as part of profiling.

diff --git a/tests/rule_based_profiler/data_assistant/test_volume_data_assistant.py b/tests/rule_based_profiler/data_assistant/test_volume_data_assistant.py
@@ -28,111 +28,8 @@
 from tests.test_utils import set_bootstrap_random_seed_variable
 
 
-def run_volume_data_assistant_result_jupyter_notebook_with_new_cell(
-    context: DataContext, new_cell: str
-):
-    """
-    To set this test up we:
-    - create a suite
-    - write code (as a string) for creating a VolumeDataAssistantResult
-    - add a new cell to the notebook that was passed to this method
-    - write both cells to ipynb file
-
-    We then:
-    - load the notebook back from disk
-    - execute the notebook (Note: this will raise various errors like
-      CellExecutionError if any cell in the notebook fails)
-    """
-
-    root_dir: str = context.root_directory
-    expectation_suite_name: str = "test_suite"
-    context.create_expectation_suite(expectation_suite_name)
-    notebook_path: str = os.path.join(root_dir, f"run_volume_data_assistant.ipynb")
-    notebook_code: str = """
-    from typing import Optional, Union
-
-    import uuid
-
-    import great_expectations as ge
-    from great_expectations.data_context import BaseDataContext
-    from great_expectations.validator.validator import Validator
-    from great_expectations.rule_based_profiler.data_assistant import (
-        DataAssistant,
-        VolumeDataAssistant,
-    )
-    from great_expectations.rule_based_profiler.types.data_assistant_result import DataAssistantResult
-    from great_expectations.rule_based_profiler.helpers.util import get_validator_with_expectation_suite
-    import great_expectations.exceptions as ge_exceptions
-    """
-    notebook_code += """
-    context = ge.get_context()
-
-    batch_request: dict = {
-        "datasource_name": "taxi_pandas",
-        "data_connector_name": "monthly",
-        "data_asset_name": "my_reports",
-    }
-
-    validator: Validator = get_validator_with_expectation_suite(
-        batch_request=batch_request,
-        data_context=context,
-        expectation_suite_name=None,
-        expectation_suite=None,
-        component_name="volume_data_assistant",
-    )
-
-    data_assistant: DataAssistant = VolumeDataAssistant(
-        name="test_volume_data_assistant",
-        validator=validator,
-    )
-
-    expectation_suite_name: str = "test_suite"
-    data_assistant_result: DataAssistantResult = data_assistant.run(
-        expectation_suite_name=expectation_suite_name,
-    )
-    """
-
-    nb = nbformat.v4.new_notebook()
-    nb["cells"] = []
-    nb["cells"].append(nbformat.v4.new_code_cell(notebook_code))
-    nb["cells"].append(nbformat.v4.new_code_cell(new_cell))
-
-    # Write notebook to path and load it as NotebookNode
-    with open(notebook_path, "w") as f:
-        nbformat.write(nb, f)
-
-    nb: nbformat.notebooknode.NotebookNode = load_notebook_from_path(
-        notebook_path=notebook_path
-    )
-
-    # Run notebook
-    ep: nbconvert.preprocessors.ExecutePreprocessor = (
-        nbconvert.preprocessors.ExecutePreprocessor(timeout=60, kernel_name="python3")
-    )
-    ep.preprocess(nb, {"metadata": {"path": root_dir}})
-
-
-@freeze_time("09/26/2019 13:42:41")
-def test_get_metrics_and_expectations(
-    quentin_columnar_table_multi_batch_data_context,
-):
-    context: DataContext = quentin_columnar_table_multi_batch_data_context
-
-    batch_request: dict = {
-        "datasource_name": "taxi_pandas",
-        "data_connector_name": "monthly",
-        "data_asset_name": "my_reports",
-    }
-
-    validator: Validator = get_validator_with_expectation_suite(
-        batch_request=batch_request,
-        data_context=context,
-        expectation_suite_name=None,
-        expectation_suite=None,
-        component_name="volume_data_assistant",
-    )
-    assert len(validator.batches) == 36
-
+@pytest.fixture()
+def quentin_expected_metrics_by_domain() -> Dict[Domain, Dict[str, Any]]:
     expected_metrics_by_domain: Dict[Domain, Dict[str, Any]] = {
         Domain(domain_type="table",): {
             "$parameter.table_row_count": {
@@ -1841,7 +1738,11 @@ def test_get_metrics_and_expectations(
             }
         },
     }
+    return expected_metrics_by_domain
 
+
+@pytest.fixture()
+def quentin_expected_expectation_suite() -> ExpectationSuite:
     expected_expect_table_row_count_to_be_between_expectation_configuration: ExpectationConfiguration = ExpectationConfiguration(
         **{
             "expectation_type": "expect_table_row_count_to_be_between",
@@ -2465,6 +2366,11 @@ def test_get_metrics_and_expectations(
 
     expected_expectation_suite.meta = expected_expectation_suite_meta
 
+    return expected_expectation_suite
+
+
+@pytest.fixture()
+def quentin_expected_rule_based_profiler_configuration() -> RuleBasedProfilerConfig:
     expected_rule_based_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
         config_version=1.0,
         name="test_volume_data_assistant",
@@ -2591,6 +2497,116 @@ def test_get_metrics_and_expectations(
             },
         },
     )
+    return expected_rule_based_profiler_config
+
+
+def run_volume_data_assistant_result_jupyter_notebook_with_new_cell(
+    context: DataContext, new_cell: str
+):
+    """
+    To set this test up we:
+    - create a suite
+    - write code (as a string) for creating a VolumeDataAssistantResult
+    - add a new cell to the notebook that was passed to this method
+    - write both cells to ipynb file
+
+    We then:
+    - load the notebook back from disk
+    - execute the notebook (Note: this will raise various errors like
+      CellExecutionError if any cell in the notebook fails)
+    """
+
+    root_dir: str = context.root_directory
+    expectation_suite_name: str = "test_suite"
+    context.create_expectation_suite(expectation_suite_name)
+    notebook_path: str = os.path.join(root_dir, f"run_volume_data_assistant.ipynb")
+    notebook_code: str = """
+    from typing import Optional, Union
+
+    import uuid
+
+    import great_expectations as ge
+    from great_expectations.data_context import BaseDataContext
+    from great_expectations.validator.validator import Validator
+    from great_expectations.rule_based_profiler.data_assistant import (
+        DataAssistant,
+        VolumeDataAssistant,
+    )
+    from great_expectations.rule_based_profiler.types.data_assistant_result import DataAssistantResult
+    from great_expectations.rule_based_profiler.helpers.util import get_validator_with_expectation_suite
+    import great_expectations.exceptions as ge_exceptions
+    """
+    notebook_code += """
+    context = ge.get_context()
+
+    batch_request: dict = {
+        "datasource_name": "taxi_pandas",
+        "data_connector_name": "monthly",
+        "data_asset_name": "my_reports",
+    }
+
+    validator: Validator = get_validator_with_expectation_suite(
+        batch_request=batch_request,
+        data_context=context,
+        expectation_suite_name=None,
+        expectation_suite=None,
+        component_name="volume_data_assistant",
+    )
+
+    data_assistant: DataAssistant = VolumeDataAssistant(
+        name="test_volume_data_assistant",
+        validator=validator,
+    )
+
+    expectation_suite_name: str = "test_suite"
+    data_assistant_result: DataAssistantResult = data_assistant.run(
+        expectation_suite_name=expectation_suite_name,
+    )
+    """
+
+    nb = nbformat.v4.new_notebook()
+    nb["cells"] = []
+    nb["cells"].append(nbformat.v4.new_code_cell(notebook_code))
+    nb["cells"].append(nbformat.v4.new_code_cell(new_cell))
+
+    # Write notebook to path and load it as NotebookNode
+    with open(notebook_path, "w") as f:
+        nbformat.write(nb, f)
+
+    nb: nbformat.notebooknode.NotebookNode = load_notebook_from_path(
+        notebook_path=notebook_path
+    )
+
+    # Run notebook
+    ep: nbconvert.preprocessors.ExecutePreprocessor = (
+        nbconvert.preprocessors.ExecutePreprocessor(timeout=60, kernel_name="python3")
+    )
+    ep.preprocess(nb, {"metadata": {"path": root_dir}})
+
+
+@freeze_time("09/26/2019 13:42:41")
+def test_get_metrics_and_expectations(
+    quentin_columnar_table_multi_batch_data_context,
+    quentin_expected_metrics_by_domain,
+    quentin_expected_expectation_suite,
+    quentin_expected_rule_based_profiler_configuration,
+):
+    context: DataContext = quentin_columnar_table_multi_batch_data_context
+
+    batch_request: dict = {
+        "datasource_name": "taxi_pandas",
+        "data_connector_name": "monthly",
+        "data_asset_name": "my_reports",
+    }
+
+    validator: Validator = get_validator_with_expectation_suite(
+        batch_request=batch_request,
+        data_context=context,
+        expectation_suite_name=None,
+        expectation_suite=None,
+        component_name="volume_data_assistant",
+    )
+    assert len(validator.batches) == 36
 
     # Utilize a consistent seed to deal with probabilistic nature of this feature.
     data_assistant: DataAssistant = VolumeDataAssistant(
@@ -2599,27 +2615,28 @@ def test_get_metrics_and_expectations(
     )
     set_bootstrap_random_seed_variable(profiler=data_assistant.profiler)
     data_assistant_result: DataAssistantResult = data_assistant.run(
-        expectation_suite_name=expectation_suite_name,
+        expectation_suite_name=quentin_expected_expectation_suite.expectation_suite_name,
     )
 
-    assert data_assistant_result.metrics_by_domain == expected_metrics_by_domain
+    assert data_assistant_result.metrics_by_domain == quentin_expected_metrics_by_domain
     assert (
         data_assistant_result.expectation_suite.expectations
-        == expected_expectation_configurations
+        == quentin_expected_expectation_suite.expectations
     )
 
     data_assistant_result.expectation_suite.meta.pop("great_expectations_version", None)
 
-    assert data_assistant_result.expectation_suite == expected_expectation_suite
+    assert data_assistant_result.expectation_suite == quentin_expected_expectation_suite
 
     assert (
-        data_assistant_result.expectation_suite.meta == expected_expectation_suite_meta
+        data_assistant_result.expectation_suite.meta
+        == quentin_expected_expectation_suite.meta
     )
 
     assert deep_filter_properties_iterable(
         properties=data_assistant_result.profiler_config.to_json_dict()
     ) == deep_filter_properties_iterable(
-        properties=expected_rule_based_profiler_config.to_json_dict()
+        properties=quentin_expected_rule_based_profiler_configuration.to_json_dict()
     )