From b907c7df36eebd030eb3ce1cf7c97214e53cb6ec Mon Sep 17 00:00:00 2001 From: Alex Sherstinsky Date: Tue, 26 Apr 2022 09:13:45 -0700 Subject: [PATCH] clean up (#4959) --- great_expectations/exceptions/exceptions.py | 8 - .../expectations/expectation.py | 6 +- .../domain_builder/domain_builder.py | 2 +- .../expectation_configuration_builder.py | 2 +- .../parameter_builder/parameter_builder.py | 2 +- .../test_volume_data_assistant.py | 239 ++++++++++-------- 6 files changed, 135 insertions(+), 124 deletions(-) diff --git a/great_expectations/exceptions/exceptions.py b/great_expectations/exceptions/exceptions.py index 3f04b0dfbe1d..91aa9017b825 100644 --- a/great_expectations/exceptions/exceptions.py +++ b/great_expectations/exceptions/exceptions.py @@ -135,14 +135,6 @@ class ProfilerNotFoundError(ProfilerError): pass -class DataAssistantError(GreatExpectationsError): - pass - - -class DataAssistantNotFoundError(DataAssistantError): - pass - - class InvalidConfigError(DataContextError): def __init__(self, message): self.message = message diff --git a/great_expectations/expectations/expectation.py b/great_expectations/expectations/expectation.py index a49d4ab2e4cb..0bc59e44dc5b 100644 --- a/great_expectations/expectations/expectation.py +++ b/great_expectations/expectations/expectation.py @@ -10,11 +10,9 @@ from inspect import isabstract from typing import Dict, List, Optional, Tuple, Union -import pandas as pd from dateutil.parser import parse from great_expectations import __version__ as ge_version -from great_expectations.core.batch import Batch from great_expectations.core.expectation_configuration import ( ExpectationConfiguration, parse_result_format, @@ -94,6 +92,7 @@ ) +# noinspection PyMethodParameters class MetaExpectation(ABCMeta): """MetaExpectation registers Expectations as they are defined, adding them to the Expectation registry. @@ -103,9 +102,12 @@ class MetaExpectation(ABCMeta): def __new__(cls, clsname, bases, attrs): newclass = super().__new__(cls, clsname, bases, attrs) + # noinspection PyUnresolvedReferences if not newclass.is_abstract(): newclass.expectation_type = camel_to_snake(clsname) register_expectation(newclass) + + # noinspection PyUnresolvedReferences newclass._register_renderer_functions() default_kwarg_values = {} for base in reversed(bases): diff --git a/great_expectations/rule_based_profiler/domain_builder/domain_builder.py b/great_expectations/rule_based_profiler/domain_builder/domain_builder.py index 95a38635c9e4..532b2814f29a 100644 --- a/great_expectations/rule_based_profiler/domain_builder/domain_builder.py +++ b/great_expectations/rule_based_profiler/domain_builder/domain_builder.py @@ -20,7 +20,7 @@ from great_expectations.validator.metric_configuration import MetricConfiguration -class DomainBuilder(Builder, ABC): +class DomainBuilder(ABC, Builder): """ A DomainBuilder provides methods to get domains based on one or more batches of data. """ diff --git a/great_expectations/rule_based_profiler/expectation_configuration_builder/expectation_configuration_builder.py b/great_expectations/rule_based_profiler/expectation_configuration_builder/expectation_configuration_builder.py index 7c17de61f386..3d0a4b2ccc83 100644 --- a/great_expectations/rule_based_profiler/expectation_configuration_builder/expectation_configuration_builder.py +++ b/great_expectations/rule_based_profiler/expectation_configuration_builder/expectation_configuration_builder.py @@ -20,7 +20,7 @@ logger.setLevel(logging.INFO) -class ExpectationConfigurationBuilder(Builder, ABC): +class ExpectationConfigurationBuilder(ABC, Builder): exclude_field_names: Set[str] = Builder.exclude_field_names | { "validation_parameter_builders", } diff --git a/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py b/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py index 23465bac07ba..2410cd841fcc 100644 --- a/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py +++ b/great_expectations/rule_based_profiler/parameter_builder/parameter_builder.py @@ -91,7 +91,7 @@ def to_json_dict(self) -> dict: return convert_to_json_serializable(data=self.to_dict()) -class ParameterBuilder(Builder, ABC): +class ParameterBuilder(ABC, Builder): """ A ParameterBuilder implementation provides support for building Expectation Configuration Parameters suitable for use in other ParameterBuilders or in ConfigurationBuilders as part of profiling. diff --git a/tests/rule_based_profiler/data_assistant/test_volume_data_assistant.py b/tests/rule_based_profiler/data_assistant/test_volume_data_assistant.py index 291401438436..a7f22973429e 100644 --- a/tests/rule_based_profiler/data_assistant/test_volume_data_assistant.py +++ b/tests/rule_based_profiler/data_assistant/test_volume_data_assistant.py @@ -28,111 +28,8 @@ from tests.test_utils import set_bootstrap_random_seed_variable -def run_volume_data_assistant_result_jupyter_notebook_with_new_cell( - context: DataContext, new_cell: str -): - """ - To set this test up we: - - create a suite - - write code (as a string) for creating a VolumeDataAssistantResult - - add a new cell to the notebook that was passed to this method - - write both cells to ipynb file - - We then: - - load the notebook back from disk - - execute the notebook (Note: this will raise various errors like - CellExecutionError if any cell in the notebook fails) - """ - - root_dir: str = context.root_directory - expectation_suite_name: str = "test_suite" - context.create_expectation_suite(expectation_suite_name) - notebook_path: str = os.path.join(root_dir, f"run_volume_data_assistant.ipynb") - notebook_code: str = """ - from typing import Optional, Union - - import uuid - - import great_expectations as ge - from great_expectations.data_context import BaseDataContext - from great_expectations.validator.validator import Validator - from great_expectations.rule_based_profiler.data_assistant import ( - DataAssistant, - VolumeDataAssistant, - ) - from great_expectations.rule_based_profiler.types.data_assistant_result import DataAssistantResult - from great_expectations.rule_based_profiler.helpers.util import get_validator_with_expectation_suite - import great_expectations.exceptions as ge_exceptions - """ - notebook_code += """ - context = ge.get_context() - - batch_request: dict = { - "datasource_name": "taxi_pandas", - "data_connector_name": "monthly", - "data_asset_name": "my_reports", - } - - validator: Validator = get_validator_with_expectation_suite( - batch_request=batch_request, - data_context=context, - expectation_suite_name=None, - expectation_suite=None, - component_name="volume_data_assistant", - ) - - data_assistant: DataAssistant = VolumeDataAssistant( - name="test_volume_data_assistant", - validator=validator, - ) - - expectation_suite_name: str = "test_suite" - data_assistant_result: DataAssistantResult = data_assistant.run( - expectation_suite_name=expectation_suite_name, - ) - """ - - nb = nbformat.v4.new_notebook() - nb["cells"] = [] - nb["cells"].append(nbformat.v4.new_code_cell(notebook_code)) - nb["cells"].append(nbformat.v4.new_code_cell(new_cell)) - - # Write notebook to path and load it as NotebookNode - with open(notebook_path, "w") as f: - nbformat.write(nb, f) - - nb: nbformat.notebooknode.NotebookNode = load_notebook_from_path( - notebook_path=notebook_path - ) - - # Run notebook - ep: nbconvert.preprocessors.ExecutePreprocessor = ( - nbconvert.preprocessors.ExecutePreprocessor(timeout=60, kernel_name="python3") - ) - ep.preprocess(nb, {"metadata": {"path": root_dir}}) - - -@freeze_time("09/26/2019 13:42:41") -def test_get_metrics_and_expectations( - quentin_columnar_table_multi_batch_data_context, -): - context: DataContext = quentin_columnar_table_multi_batch_data_context - - batch_request: dict = { - "datasource_name": "taxi_pandas", - "data_connector_name": "monthly", - "data_asset_name": "my_reports", - } - - validator: Validator = get_validator_with_expectation_suite( - batch_request=batch_request, - data_context=context, - expectation_suite_name=None, - expectation_suite=None, - component_name="volume_data_assistant", - ) - assert len(validator.batches) == 36 - +@pytest.fixture() +def quentin_expected_metrics_by_domain() -> Dict[Domain, Dict[str, Any]]: expected_metrics_by_domain: Dict[Domain, Dict[str, Any]] = { Domain(domain_type="table",): { "$parameter.table_row_count": { @@ -1841,7 +1738,11 @@ def test_get_metrics_and_expectations( } }, } + return expected_metrics_by_domain + +@pytest.fixture() +def quentin_expected_expectation_suite() -> ExpectationSuite: expected_expect_table_row_count_to_be_between_expectation_configuration: ExpectationConfiguration = ExpectationConfiguration( **{ "expectation_type": "expect_table_row_count_to_be_between", @@ -2465,6 +2366,11 @@ def test_get_metrics_and_expectations( expected_expectation_suite.meta = expected_expectation_suite_meta + return expected_expectation_suite + + +@pytest.fixture() +def quentin_expected_rule_based_profiler_configuration() -> RuleBasedProfilerConfig: expected_rule_based_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig( config_version=1.0, name="test_volume_data_assistant", @@ -2591,6 +2497,116 @@ def test_get_metrics_and_expectations( }, }, ) + return expected_rule_based_profiler_config + + +def run_volume_data_assistant_result_jupyter_notebook_with_new_cell( + context: DataContext, new_cell: str +): + """ + To set this test up we: + - create a suite + - write code (as a string) for creating a VolumeDataAssistantResult + - add a new cell to the notebook that was passed to this method + - write both cells to ipynb file + + We then: + - load the notebook back from disk + - execute the notebook (Note: this will raise various errors like + CellExecutionError if any cell in the notebook fails) + """ + + root_dir: str = context.root_directory + expectation_suite_name: str = "test_suite" + context.create_expectation_suite(expectation_suite_name) + notebook_path: str = os.path.join(root_dir, f"run_volume_data_assistant.ipynb") + notebook_code: str = """ + from typing import Optional, Union + + import uuid + + import great_expectations as ge + from great_expectations.data_context import BaseDataContext + from great_expectations.validator.validator import Validator + from great_expectations.rule_based_profiler.data_assistant import ( + DataAssistant, + VolumeDataAssistant, + ) + from great_expectations.rule_based_profiler.types.data_assistant_result import DataAssistantResult + from great_expectations.rule_based_profiler.helpers.util import get_validator_with_expectation_suite + import great_expectations.exceptions as ge_exceptions + """ + notebook_code += """ + context = ge.get_context() + + batch_request: dict = { + "datasource_name": "taxi_pandas", + "data_connector_name": "monthly", + "data_asset_name": "my_reports", + } + + validator: Validator = get_validator_with_expectation_suite( + batch_request=batch_request, + data_context=context, + expectation_suite_name=None, + expectation_suite=None, + component_name="volume_data_assistant", + ) + + data_assistant: DataAssistant = VolumeDataAssistant( + name="test_volume_data_assistant", + validator=validator, + ) + + expectation_suite_name: str = "test_suite" + data_assistant_result: DataAssistantResult = data_assistant.run( + expectation_suite_name=expectation_suite_name, + ) + """ + + nb = nbformat.v4.new_notebook() + nb["cells"] = [] + nb["cells"].append(nbformat.v4.new_code_cell(notebook_code)) + nb["cells"].append(nbformat.v4.new_code_cell(new_cell)) + + # Write notebook to path and load it as NotebookNode + with open(notebook_path, "w") as f: + nbformat.write(nb, f) + + nb: nbformat.notebooknode.NotebookNode = load_notebook_from_path( + notebook_path=notebook_path + ) + + # Run notebook + ep: nbconvert.preprocessors.ExecutePreprocessor = ( + nbconvert.preprocessors.ExecutePreprocessor(timeout=60, kernel_name="python3") + ) + ep.preprocess(nb, {"metadata": {"path": root_dir}}) + + +@freeze_time("09/26/2019 13:42:41") +def test_get_metrics_and_expectations( + quentin_columnar_table_multi_batch_data_context, + quentin_expected_metrics_by_domain, + quentin_expected_expectation_suite, + quentin_expected_rule_based_profiler_configuration, +): + context: DataContext = quentin_columnar_table_multi_batch_data_context + + batch_request: dict = { + "datasource_name": "taxi_pandas", + "data_connector_name": "monthly", + "data_asset_name": "my_reports", + } + + validator: Validator = get_validator_with_expectation_suite( + batch_request=batch_request, + data_context=context, + expectation_suite_name=None, + expectation_suite=None, + component_name="volume_data_assistant", + ) + assert len(validator.batches) == 36 # Utilize a consistent seed to deal with probabilistic nature of this feature. data_assistant: DataAssistant = VolumeDataAssistant( @@ -2599,27 +2615,28 @@ def test_get_metrics_and_expectations( ) set_bootstrap_random_seed_variable(profiler=data_assistant.profiler) data_assistant_result: DataAssistantResult = data_assistant.run( - expectation_suite_name=expectation_suite_name, + expectation_suite_name=quentin_expected_expectation_suite.expectation_suite_name, ) - assert data_assistant_result.metrics_by_domain == expected_metrics_by_domain + assert data_assistant_result.metrics_by_domain == quentin_expected_metrics_by_domain assert ( data_assistant_result.expectation_suite.expectations - == expected_expectation_configurations + == quentin_expected_expectation_suite.expectations ) data_assistant_result.expectation_suite.meta.pop("great_expectations_version", None) - assert data_assistant_result.expectation_suite == expected_expectation_suite + assert data_assistant_result.expectation_suite == quentin_expected_expectation_suite assert ( - data_assistant_result.expectation_suite.meta == expected_expectation_suite_meta + data_assistant_result.expectation_suite.meta + == quentin_expected_expectation_suite.meta ) assert deep_filter_properties_iterable( properties=data_assistant_result.profiler_config.to_json_dict() ) == deep_filter_properties_iterable( - properties=expected_rule_based_profiler_config.to_json_dict() + properties=quentin_expected_rule_based_profiler_configuration.to_json_dict() )