Skip to content

Commit

Permalink
clean up (#4959)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexsherstinsky committed Apr 26, 2022
1 parent b048544 commit b907c7d
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 124 deletions.
8 changes: 0 additions & 8 deletions great_expectations/exceptions/exceptions.py
Expand Up @@ -135,14 +135,6 @@ class ProfilerNotFoundError(ProfilerError):
pass


class DataAssistantError(GreatExpectationsError):
pass


class DataAssistantNotFoundError(DataAssistantError):
pass


class InvalidConfigError(DataContextError):
def __init__(self, message):
self.message = message
Expand Down
6 changes: 4 additions & 2 deletions great_expectations/expectations/expectation.py
Expand Up @@ -10,11 +10,9 @@
from inspect import isabstract
from typing import Dict, List, Optional, Tuple, Union

import pandas as pd
from dateutil.parser import parse

from great_expectations import __version__ as ge_version
from great_expectations.core.batch import Batch
from great_expectations.core.expectation_configuration import (
ExpectationConfiguration,
parse_result_format,
Expand Down Expand Up @@ -94,6 +92,7 @@
)


# noinspection PyMethodParameters
class MetaExpectation(ABCMeta):
"""MetaExpectation registers Expectations as they are defined, adding them to the Expectation registry.
Expand All @@ -103,9 +102,12 @@ class MetaExpectation(ABCMeta):

def __new__(cls, clsname, bases, attrs):
newclass = super().__new__(cls, clsname, bases, attrs)
# noinspection PyUnresolvedReferences
if not newclass.is_abstract():
newclass.expectation_type = camel_to_snake(clsname)
register_expectation(newclass)

# noinspection PyUnresolvedReferences
newclass._register_renderer_functions()
default_kwarg_values = {}
for base in reversed(bases):
Expand Down
Expand Up @@ -20,7 +20,7 @@
from great_expectations.validator.metric_configuration import MetricConfiguration


class DomainBuilder(Builder, ABC):
class DomainBuilder(ABC, Builder):
"""
A DomainBuilder provides methods to get domains based on one or more batches of data.
"""
Expand Down
Expand Up @@ -20,7 +20,7 @@
logger.setLevel(logging.INFO)


class ExpectationConfigurationBuilder(Builder, ABC):
class ExpectationConfigurationBuilder(ABC, Builder):
exclude_field_names: Set[str] = Builder.exclude_field_names | {
"validation_parameter_builders",
}
Expand Down
Expand Up @@ -91,7 +91,7 @@ def to_json_dict(self) -> dict:
return convert_to_json_serializable(data=self.to_dict())


class ParameterBuilder(Builder, ABC):
class ParameterBuilder(ABC, Builder):
"""
A ParameterBuilder implementation provides support for building Expectation Configuration Parameters suitable for
use in other ParameterBuilders or in ConfigurationBuilders as part of profiling.
Expand Down
239 changes: 128 additions & 111 deletions tests/rule_based_profiler/data_assistant/test_volume_data_assistant.py
Expand Up @@ -28,111 +28,8 @@
from tests.test_utils import set_bootstrap_random_seed_variable


def run_volume_data_assistant_result_jupyter_notebook_with_new_cell(
context: DataContext, new_cell: str
):
"""
To set this test up we:
- create a suite
- write code (as a string) for creating a VolumeDataAssistantResult
- add a new cell to the notebook that was passed to this method
- write both cells to ipynb file
We then:
- load the notebook back from disk
- execute the notebook (Note: this will raise various errors like
CellExecutionError if any cell in the notebook fails)
"""

root_dir: str = context.root_directory
expectation_suite_name: str = "test_suite"
context.create_expectation_suite(expectation_suite_name)
notebook_path: str = os.path.join(root_dir, f"run_volume_data_assistant.ipynb")
notebook_code: str = """
from typing import Optional, Union
import uuid
import great_expectations as ge
from great_expectations.data_context import BaseDataContext
from great_expectations.validator.validator import Validator
from great_expectations.rule_based_profiler.data_assistant import (
DataAssistant,
VolumeDataAssistant,
)
from great_expectations.rule_based_profiler.types.data_assistant_result import DataAssistantResult
from great_expectations.rule_based_profiler.helpers.util import get_validator_with_expectation_suite
import great_expectations.exceptions as ge_exceptions
"""
notebook_code += """
context = ge.get_context()
batch_request: dict = {
"datasource_name": "taxi_pandas",
"data_connector_name": "monthly",
"data_asset_name": "my_reports",
}
validator: Validator = get_validator_with_expectation_suite(
batch_request=batch_request,
data_context=context,
expectation_suite_name=None,
expectation_suite=None,
component_name="volume_data_assistant",
)
data_assistant: DataAssistant = VolumeDataAssistant(
name="test_volume_data_assistant",
validator=validator,
)
expectation_suite_name: str = "test_suite"
data_assistant_result: DataAssistantResult = data_assistant.run(
expectation_suite_name=expectation_suite_name,
)
"""

nb = nbformat.v4.new_notebook()
nb["cells"] = []
nb["cells"].append(nbformat.v4.new_code_cell(notebook_code))
nb["cells"].append(nbformat.v4.new_code_cell(new_cell))

# Write notebook to path and load it as NotebookNode
with open(notebook_path, "w") as f:
nbformat.write(nb, f)

nb: nbformat.notebooknode.NotebookNode = load_notebook_from_path(
notebook_path=notebook_path
)

# Run notebook
ep: nbconvert.preprocessors.ExecutePreprocessor = (
nbconvert.preprocessors.ExecutePreprocessor(timeout=60, kernel_name="python3")
)
ep.preprocess(nb, {"metadata": {"path": root_dir}})


@freeze_time("09/26/2019 13:42:41")
def test_get_metrics_and_expectations(
quentin_columnar_table_multi_batch_data_context,
):
context: DataContext = quentin_columnar_table_multi_batch_data_context

batch_request: dict = {
"datasource_name": "taxi_pandas",
"data_connector_name": "monthly",
"data_asset_name": "my_reports",
}

validator: Validator = get_validator_with_expectation_suite(
batch_request=batch_request,
data_context=context,
expectation_suite_name=None,
expectation_suite=None,
component_name="volume_data_assistant",
)
assert len(validator.batches) == 36

@pytest.fixture()
def quentin_expected_metrics_by_domain() -> Dict[Domain, Dict[str, Any]]:
expected_metrics_by_domain: Dict[Domain, Dict[str, Any]] = {
Domain(domain_type="table",): {
"$parameter.table_row_count": {
Expand Down Expand Up @@ -1841,7 +1738,11 @@ def test_get_metrics_and_expectations(
}
},
}
return expected_metrics_by_domain


@pytest.fixture()
def quentin_expected_expectation_suite() -> ExpectationSuite:
expected_expect_table_row_count_to_be_between_expectation_configuration: ExpectationConfiguration = ExpectationConfiguration(
**{
"expectation_type": "expect_table_row_count_to_be_between",
Expand Down Expand Up @@ -2465,6 +2366,11 @@ def test_get_metrics_and_expectations(

expected_expectation_suite.meta = expected_expectation_suite_meta

return expected_expectation_suite


@pytest.fixture()
def quentin_expected_rule_based_profiler_configuration() -> RuleBasedProfilerConfig:
expected_rule_based_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
config_version=1.0,
name="test_volume_data_assistant",
Expand Down Expand Up @@ -2591,6 +2497,116 @@ def test_get_metrics_and_expectations(
},
},
)
return expected_rule_based_profiler_config


def run_volume_data_assistant_result_jupyter_notebook_with_new_cell(
context: DataContext, new_cell: str
):
"""
To set this test up we:
- create a suite
- write code (as a string) for creating a VolumeDataAssistantResult
- add a new cell to the notebook that was passed to this method
- write both cells to ipynb file
We then:
- load the notebook back from disk
- execute the notebook (Note: this will raise various errors like
CellExecutionError if any cell in the notebook fails)
"""

root_dir: str = context.root_directory
expectation_suite_name: str = "test_suite"
context.create_expectation_suite(expectation_suite_name)
notebook_path: str = os.path.join(root_dir, f"run_volume_data_assistant.ipynb")
notebook_code: str = """
from typing import Optional, Union
import uuid
import great_expectations as ge
from great_expectations.data_context import BaseDataContext
from great_expectations.validator.validator import Validator
from great_expectations.rule_based_profiler.data_assistant import (
DataAssistant,
VolumeDataAssistant,
)
from great_expectations.rule_based_profiler.types.data_assistant_result import DataAssistantResult
from great_expectations.rule_based_profiler.helpers.util import get_validator_with_expectation_suite
import great_expectations.exceptions as ge_exceptions
"""
notebook_code += """
context = ge.get_context()
batch_request: dict = {
"datasource_name": "taxi_pandas",
"data_connector_name": "monthly",
"data_asset_name": "my_reports",
}
validator: Validator = get_validator_with_expectation_suite(
batch_request=batch_request,
data_context=context,
expectation_suite_name=None,
expectation_suite=None,
component_name="volume_data_assistant",
)
data_assistant: DataAssistant = VolumeDataAssistant(
name="test_volume_data_assistant",
validator=validator,
)
expectation_suite_name: str = "test_suite"
data_assistant_result: DataAssistantResult = data_assistant.run(
expectation_suite_name=expectation_suite_name,
)
"""

nb = nbformat.v4.new_notebook()
nb["cells"] = []
nb["cells"].append(nbformat.v4.new_code_cell(notebook_code))
nb["cells"].append(nbformat.v4.new_code_cell(new_cell))

# Write notebook to path and load it as NotebookNode
with open(notebook_path, "w") as f:
nbformat.write(nb, f)

nb: nbformat.notebooknode.NotebookNode = load_notebook_from_path(
notebook_path=notebook_path
)

# Run notebook
ep: nbconvert.preprocessors.ExecutePreprocessor = (
nbconvert.preprocessors.ExecutePreprocessor(timeout=60, kernel_name="python3")
)
ep.preprocess(nb, {"metadata": {"path": root_dir}})


@freeze_time("09/26/2019 13:42:41")
def test_get_metrics_and_expectations(
quentin_columnar_table_multi_batch_data_context,
quentin_expected_metrics_by_domain,
quentin_expected_expectation_suite,
quentin_expected_rule_based_profiler_configuration,
):
context: DataContext = quentin_columnar_table_multi_batch_data_context

batch_request: dict = {
"datasource_name": "taxi_pandas",
"data_connector_name": "monthly",
"data_asset_name": "my_reports",
}

validator: Validator = get_validator_with_expectation_suite(
batch_request=batch_request,
data_context=context,
expectation_suite_name=None,
expectation_suite=None,
component_name="volume_data_assistant",
)
assert len(validator.batches) == 36

# Utilize a consistent seed to deal with probabilistic nature of this feature.
data_assistant: DataAssistant = VolumeDataAssistant(
Expand All @@ -2599,27 +2615,28 @@ def test_get_metrics_and_expectations(
)
set_bootstrap_random_seed_variable(profiler=data_assistant.profiler)
data_assistant_result: DataAssistantResult = data_assistant.run(
expectation_suite_name=expectation_suite_name,
expectation_suite_name=quentin_expected_expectation_suite.expectation_suite_name,
)

assert data_assistant_result.metrics_by_domain == expected_metrics_by_domain
assert data_assistant_result.metrics_by_domain == quentin_expected_metrics_by_domain
assert (
data_assistant_result.expectation_suite.expectations
== expected_expectation_configurations
== quentin_expected_expectation_suite.expectations
)

data_assistant_result.expectation_suite.meta.pop("great_expectations_version", None)

assert data_assistant_result.expectation_suite == expected_expectation_suite
assert data_assistant_result.expectation_suite == quentin_expected_expectation_suite

assert (
data_assistant_result.expectation_suite.meta == expected_expectation_suite_meta
data_assistant_result.expectation_suite.meta
== quentin_expected_expectation_suite.meta
)

assert deep_filter_properties_iterable(
properties=data_assistant_result.profiler_config.to_json_dict()
) == deep_filter_properties_iterable(
properties=expected_rule_based_profiler_config.to_json_dict()
properties=quentin_expected_rule_based_profiler_configuration.to_json_dict()
)


Expand Down

0 comments on commit b907c7d

Please sign in to comment.