Skip to content

Commit

Permalink
[MAINTENANCE] Remove DataContext from DataAssistant (#4931)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexsherstinsky committed Apr 22, 2022
1 parent 52c30b3 commit 2eb16f2
Show file tree
Hide file tree
Showing 27 changed files with 386 additions and 365 deletions.
3 changes: 2 additions & 1 deletion great_expectations/core/batch.py
Expand Up @@ -628,9 +628,10 @@ def head(self, n_rows=5, fetch_all=False):
return self._data.execution_engine.resolve_metrics((metric,))[metric.id]


# TODO: <Alex>ALEX -- Make this helper utility of general use.</Alex>
def materialize_batch_request(
batch_request: Optional[Union[BatchRequestBase, dict]] = None,
) -> Optional[Union[BatchRequest, RuntimeBatchRequest]]:
) -> Optional[BatchRequestBase]:
effective_batch_request: dict = get_batch_request_as_dict(
batch_request=batch_request
)
Expand Down
Expand Up @@ -3,7 +3,6 @@

from great_expectations.core import ExpectationSuite
from great_expectations.core.batch import Batch, BatchRequestBase
from great_expectations.data_context import BaseDataContext
from great_expectations.execution_engine.execution_engine import MetricDomainTypes
from great_expectations.rule_based_profiler.domain_builder import DomainBuilder
from great_expectations.rule_based_profiler.expectation_configuration_builder import (
Expand All @@ -15,9 +14,6 @@
from great_expectations.rule_based_profiler.helpers.util import (
convert_variables_to_dict,
)
from great_expectations.rule_based_profiler.helpers.util import (
get_validator as get_validator_using_batch_list_or_batch_request,
)
from great_expectations.rule_based_profiler.parameter_builder import ParameterBuilder
from great_expectations.rule_based_profiler.rule import Rule
from great_expectations.rule_based_profiler.rule_based_profiler import (
Expand All @@ -29,6 +25,7 @@
DataAssistantResult,
)
from great_expectations.util import measure_execution_time
from great_expectations.validator.validator import Validator


class DataAssistant(ABC):
Expand All @@ -40,8 +37,7 @@ class DataAssistant(ABC):
data_assistant: DataAssistant = VolumeDataAssistant(
name="my_volume_data_assistant",
batch_request=batch_request,
data_context=context,
validator=validator,
)
result: DataAssistantResult = data_assistant.run()
Expand All @@ -56,8 +52,7 @@ class DataAssistant(ABC):
def __init__(
self,
name: str,
batch_request: Union[BatchRequestBase, dict],
data_context: BaseDataContext = None,
validator: Validator,
):
"""
DataAssistant subclasses guide "RuleBasedProfiler" to contain Rule configurations to embody profiling behaviors,
Expand All @@ -66,29 +61,17 @@ def __init__(
and overall "ExpectationSuite" object, immediately available for validating underlying data "Batch" objects.
Args:
name: the name of this DataAssistant object.
batch_request: specified for querying data Batch objects.
data_context: DataContext
name: the name of this DataAssistant object
validator: Validator object, containing loaded Batch objects as well as Expectation and Metric operations
"""
self._name = name

self._data_context = data_context

self._validator = get_validator_using_batch_list_or_batch_request(
purpose=self.name,
data_context=self.data_context,
batch_list=None,
batch_request=batch_request,
domain=None,
variables=None,
parameters=None,
)
self._validator = validator

self._profiler = RuleBasedProfiler(
name=self.name,
config_version=1.0,
variables=None,
data_context=self.data_context,
data_context=self._validator.data_context,
)
self._build_profiler()

Expand Down Expand Up @@ -164,7 +147,8 @@ def run(
Args:
expectation_suite: An existing "ExpectationSuite" to update
expectation_suite_name: A name for returned "ExpectationSuite"
include_citation: Whether or not to include the Profiler config in the metadata for "ExpectationSuite" produced by "RuleBasedProfiler"
include_citation: Flag, which controls whether or not to effective Profiler configuration should be included
as a citation in metadata of the "ExpectationSuite" computeds and returned by "RuleBasedProfiler"
Returns:
DataAssistantResult: The result object for the DataAssistant
Expand Down Expand Up @@ -192,10 +176,6 @@ def run(
def name(self) -> str:
return self._name

@property
def data_context(self) -> BaseDataContext:
return self._data_context

@property
def profiler(self) -> BaseRuleBasedProfiler:
return self._profiler
Expand Down Expand Up @@ -268,8 +248,8 @@ def get_metrics_by_domain(self) -> Dict[Domain, Dict[str, ParameterNode]]:
value of "DataAssistant.metrics_parameter_builders_by_domain_type" interface property and actual fully-qualified
parameter names match interface properties of "ParameterBuilder" objects, corresponding to these "domain" types.
returns:
dictionaries of values for fully-qualified parameter names by domain for metrics, computed by "rulebasedprofiler" state.
Returns:
Dictionaries of values for fully-qualified parameter names by Domain for metrics, from "RuleBasedpRofiler"
"""
# noinspection PyTypeChecker
parameter_values_for_fully_qualified_parameter_names_by_domain: Dict[
Expand Down Expand Up @@ -326,7 +306,8 @@ def get_expectation_suite(
Args:
expectation_suite: An existing "ExpectationSuite" to update
expectation_suite_name: A name for returned "ExpectationSuite"
include_citation: Whether or not to include the Profiler config in the metadata for "ExpectationSuite" produced by "RuleBasedProfiler"
include_citation: Flag, which controls whether or not to effective Profiler configuration should be included
as a citation in metadata of the "ExpectationSuite" computeds and returned by "RuleBasedProfiler"
Returns:
"ExpectationSuite" using "ExpectationConfiguration" objects, computed by "RuleBasedProfiler" state
Expand Down
@@ -1,7 +1,5 @@
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional

from great_expectations.core.batch import BatchRequestBase
from great_expectations.data_context import BaseDataContext
from great_expectations.execution_engine.execution_engine import MetricDomainTypes
from great_expectations.rule_based_profiler.data_assistant import DataAssistant
from great_expectations.rule_based_profiler.parameter_builder import (
Expand All @@ -16,6 +14,7 @@
DataAssistantResult,
VolumeDataAssistantResult,
)
from great_expectations.validator.validator import Validator


class VolumeDataAssistant(DataAssistant):
Expand All @@ -31,13 +30,11 @@ class VolumeDataAssistant(DataAssistant):
def __init__(
self,
name: str,
batch_request: Union[BatchRequestBase, dict],
data_context: BaseDataContext = None,
validator: Validator,
):
super().__init__(
name=name,
batch_request=batch_request,
data_context=data_context,
validator=validator,
)

@property
Expand Down
Expand Up @@ -52,7 +52,7 @@ def __init__(
limit_mode: Optional[Union[CardinalityLimitMode, str]] = None,
max_unique_values: Optional[Union[str, int]] = None,
max_proportion_unique: Optional[Union[str, float]] = None,
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
):
"""Create column domains where cardinality is within the specified limit.
Expand Down Expand Up @@ -88,7 +88,7 @@ def __init__(
cardinality limit to use when filtering columns.
max_proportion_unique: proportion of unique values for a
custom cardinality limit to use when filtering columns.
data_context: DataContext associated with this profiler.
data_context: BaseDataContext associated with this DomainBuilder
"""
if exclude_column_names is None:
exclude_column_names = [
Expand Down
Expand Up @@ -39,7 +39,7 @@ def __init__(
exclude_semantic_types: Optional[
Union[str, SemanticDomainTypes, List[Union[str, SemanticDomainTypes]]]
] = None,
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
):
"""
A semantic type is distinguished from the structured column type;
Expand All @@ -56,7 +56,7 @@ def __init__(
to be included
exclude_semantic_types: single/multiple type specifications using SemanticDomainTypes (or str equivalents)
to be excluded
data_context: DataContext
data_context: BaseDataContext associated with this DomainBuilder
Inclusion/Exclusion Logic:
(include_column_names|table_columns - exclude_column_names) + (include_semantic_types - exclude_semantic_types)
Expand Down
Expand Up @@ -14,12 +14,12 @@ class ColumnPairDomainBuilder(ColumnDomainBuilder):
def __init__(
self,
include_column_names: Optional[Union[str, Optional[List[str]]]] = None,
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
):
"""
Args:
include_column_names: Explicitly specified exactly two desired columns.
data_context: DataContext associated with this profiler.
include_column_names: Explicitly specified exactly two desired columns
data_context: BaseDataContext associated with this DomainBuilder
"""
super().__init__(
include_column_names=include_column_names,
Expand Down
Expand Up @@ -27,11 +27,11 @@ class DomainBuilder(Builder, ABC):

def __init__(
self,
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
):
"""
Args:
data_context: DataContext
data_context: BaseDataContext associated with DomainBuilder
"""
super().__init__(data_context=data_context)

Expand Down
Expand Up @@ -37,7 +37,7 @@ def __init__(
max_unexpected_values: Union[str, int] = 0,
max_unexpected_ratio: Optional[Union[str, float]] = None,
min_max_unexpected_values_proportion: Union[str, float] = 9.75e-1,
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
):
"""
Create column domains using tolerance for inter-Batch proportion of adherence to intra-Batch "unexpected_count"
Expand All @@ -59,9 +59,9 @@ def __init__(
max_unexpected_values: maximum "unexpected_count" value of "map_metric_name" (intra-Batch)
max_unexpected_ratio: maximum "unexpected_count" value of "map_metric_name" divided by number of records
(intra-Batch); if both "max_unexpected_values" and "max_unexpected_ratio" are specified, then
"max_unexpected_ratio" is used (and "max_unexpected_values" is ignored).
"max_unexpected_ratio" is used (and "max_unexpected_values" is ignored)
min_max_unexpected_values_proportion: minimum fraction of Batch objects adhering to "max_unexpected_values"
data_context: DataContext associated with this profiler.
data_context: BaseDataContext associated with this DomainBuilder
For example (using default values of "max_unexpected_values" and "min_max_unexpected_values_proportion"):
Suppose that "map_metric_name" is "column_values.nonnull" and consider the following three Batches of data:
Expand Down
Expand Up @@ -14,12 +14,12 @@ class MultiColumnDomainBuilder(ColumnDomainBuilder):
def __init__(
self,
include_column_names: Optional[Union[str, Optional[List[str]]]] = None,
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
):
"""
Args:
include_column_names: Explicitly specified desired columns.
data_context: DataContext associated with this profiler.
include_column_names: Explicitly specified desired columns
data_context: BaseDataContext associated with this DomainBuilder
"""
super().__init__(
include_column_names=include_column_names,
Expand Down
Expand Up @@ -8,11 +8,11 @@
class TableDomainBuilder(DomainBuilder):
def __init__(
self,
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
):
"""
Args:
data_context: DataContext
data_context: BaseDataContext associated with this DomainBuilder
"""
super().__init__(data_context=data_context)

Expand Down
Expand Up @@ -70,19 +70,19 @@ def __init__(
validation_parameter_builder_configs: Optional[
List[ParameterBuilderConfig]
] = None,
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
**kwargs,
):
"""
Args:
expectation_type: the "expectation_type" argument of "ExpectationConfiguration" object to be emitted.
meta: the "meta" argument of "ExpectationConfiguration" object to be emitted.
meta: the "meta" argument of "ExpectationConfiguration" object to be emitted
condition: Boolean statement (expressed as string and following specified grammar), which controls whether
or not underlying logic should be executed and thus resulting "ExpectationConfiguration" emitted.
or not underlying logic should be executed and thus resulting "ExpectationConfiguration" emitted
validation_parameter_builder_configs: ParameterBuilder configurations, having whose outputs available (as
fully-qualified parameter names) is pre-requisite for present ExpectationConfigurationBuilder instance.
These "ParameterBuilder" configurations help build kwargs needed for this "ExpectationConfigurationBuilder".
data_context: DataContext
fully-qualified parameter names) is pre-requisite for present ExpectationConfigurationBuilder instance
These "ParameterBuilder" configurations help build kwargs needed for this "ExpectationConfigurationBuilder"
data_context: BaseDataContext associated with this ExpectationConfigurationBuilder
kwargs: additional arguments
"""

Expand Down
Expand Up @@ -31,7 +31,7 @@ def __init__(
validation_parameter_builder_configs: Optional[
List[ParameterBuilderConfig]
] = None,
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
**kwargs
):
"""
Expand All @@ -41,8 +41,8 @@ def __init__(
expectation_type: the "expectation_type" argument of "ExpectationConfiguration" object to be emitted.
validation_parameter_builder_configs: ParameterBuilder configurations, having whose outputs available (as
fully-qualified parameter names) is pre-requisite for present ExpectationConfigurationBuilder instance.
These "ParameterBuilder" configurations help build kwargs needed for this "ExpectationConfigurationBuilder".
data_context: DataContext
These "ParameterBuilder" configurations help build kwargs needed for this "ExpectationConfigurationBuilder"
data_context: BaseDataContext associated with this ExpectationConfigurationBuilder
kwargs: additional arguments
"""

Expand Down Expand Up @@ -146,7 +146,7 @@ def validation_parameter_builders(self) -> Optional[List[ParameterBuilder]]:

def init_rule_expectation_configuration_builders(
expectation_configuration_builder_configs: List[dict],
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
) -> List["ExpectationConfigurationBuilder"]: # noqa: F821
expectation_configuration_builder_config: dict
return [
Expand All @@ -162,7 +162,7 @@ def init_expectation_configuration_builder(
expectation_configuration_builder_config: Union[
"ExpectationConfigurationBuilder", dict # noqa: F821
],
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
) -> "ExpectationConfigurationBuilder": # noqa: F821
if not isinstance(expectation_configuration_builder_config, dict):
expectation_configuration_builder_config = (
Expand Down
10 changes: 5 additions & 5 deletions great_expectations/rule_based_profiler/helpers/util.py
Expand Up @@ -37,7 +37,7 @@
def get_validator(
purpose: str,
*,
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
batch_list: Optional[List[Batch]] = None,
batch_request: Optional[Union[str, BatchRequestBase, dict]] = None,
domain: Optional[Domain] = None,
Expand Down Expand Up @@ -92,7 +92,7 @@ def get_validator(


def get_batch_ids(
data_context: Optional["DataContext"] = None, # noqa: F821
data_context: Optional["BaseDataContext"] = None, # noqa: F821
batch_list: Optional[List[Batch]] = None,
batch_request: Optional[Union[str, BatchRequestBase, dict]] = None,
domain: Optional[Domain] = None,
Expand Down Expand Up @@ -126,7 +126,7 @@ def get_batch_ids(


def build_batch_request(
batch_request: Optional[Union[str, BatchRequest, RuntimeBatchRequest, dict]] = None,
batch_request: Optional[Union[str, BatchRequestBase, dict]] = None,
domain: Optional[Domain] = None,
variables: Optional[ParameterContainer] = None,
parameters: Optional[Dict[str, ParameterContainer]] = None,
Expand All @@ -136,11 +136,11 @@ def build_batch_request(

# Obtain BatchRequest from "rule state" (i.e., variables and parameters); from instance variable otherwise.
effective_batch_request: Optional[
Union[BatchRequest, RuntimeBatchRequest, dict]
Union[BatchRequestBase, dict]
] = get_parameter_value_and_validate_return_type(
domain=domain,
parameter_reference=batch_request,
expected_return_type=(BatchRequest, RuntimeBatchRequest, dict),
expected_return_type=(BatchRequestBase, dict),
variables=variables,
parameters=parameters,
)
Expand Down

0 comments on commit 2eb16f2

Please sign in to comment.