New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[FEATURE] Introduce ParameterBuilder Evaluation Dependencies and Validation Dependencies 2022 03 23 66 #4531
Changes from all commits
aa42f9b
8840ded
90c1629
b59582f
fd70e39
fb44b70
6b55bc2
c88451f
28dd2fc
9029833
6335b6e
619de93
1594d97
8c8e632
72628eb
c83d51e
3f0a9e9
b5a5460
8bd778d
00e40d4
66d1692
0f1a97c
0a80670
9982c44
9d28e26
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,14 +109,10 @@ def __init__( | |
batch_request: Optional[Union[dict, str]] = None, | ||
**kwargs, | ||
): | ||
if module_name is not None: | ||
self.module_name = module_name | ||
self.module_name = module_name | ||
self.class_name = class_name | ||
|
||
if class_name is not None: | ||
self.class_name = class_name | ||
|
||
if batch_request is not None: | ||
self.batch_request = batch_request | ||
self.batch_request = batch_request | ||
|
||
for k, v in kwargs.items(): | ||
setattr(self, k, v) | ||
|
@@ -155,22 +151,21 @@ def __init__( | |
name: str, | ||
class_name: str, | ||
module_name: Optional[str] = None, | ||
evaluation_parameter_builder_configs: Optional[list] = None, | ||
json_serialize: bool = True, | ||
batch_request: Optional[Union[dict, str]] = None, | ||
**kwargs, | ||
): | ||
self.name = name | ||
self.module_name = module_name | ||
self.class_name = class_name | ||
|
||
if module_name is not None: | ||
self.module_name = module_name | ||
self.name = name | ||
|
||
if class_name is not None: | ||
self.class_name = class_name | ||
self.evaluation_parameter_builder_configs = evaluation_parameter_builder_configs | ||
|
||
self.json_serialize = json_serialize | ||
|
||
if batch_request is not None: | ||
self.batch_request = batch_request | ||
self.batch_request = batch_request | ||
|
||
for k, v in kwargs.items(): | ||
setattr(self, k, v) | ||
|
@@ -201,6 +196,15 @@ class Meta: | |
required=True, | ||
allow_none=False, | ||
) | ||
evaluation_parameter_builder_configs = fields.List( | ||
cls_or_instance=fields.Nested( | ||
lambda: ParameterBuilderConfigSchema(), | ||
required=True, | ||
allow_none=False, | ||
), | ||
required=False, | ||
allow_none=True, | ||
) | ||
json_serialize = fields.Boolean( | ||
required=False, | ||
allow_none=True, | ||
|
@@ -219,22 +223,20 @@ def __init__( | |
class_name: str, | ||
module_name: Optional[str] = None, | ||
meta: Optional[dict] = None, | ||
validation_parameter_builder_configs: Optional[list] = None, | ||
batch_request: Optional[Union[dict, str]] = None, | ||
**kwargs, | ||
): | ||
self.expectation_type = expectation_type | ||
self.module_name = module_name | ||
self.class_name = class_name | ||
|
||
if module_name is not None: | ||
self.module_name = module_name | ||
self.expectation_type = expectation_type | ||
|
||
if class_name is not None: | ||
self.class_name = class_name | ||
self.meta = meta | ||
|
||
if meta is not None: | ||
self.meta = meta | ||
self.validation_parameter_builder_configs = validation_parameter_builder_configs | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same notes here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cdkini I think that this is totally fine here -- we do not want empty ( |
||
if batch_request is not None: | ||
self.batch_request = batch_request | ||
self.batch_request = batch_request | ||
|
||
for k, v in kwargs.items(): | ||
setattr(self, k, v) | ||
|
@@ -275,6 +277,15 @@ class Meta: | |
required=False, | ||
allow_none=True, | ||
) | ||
validation_parameter_builder_configs = fields.List( | ||
cls_or_instance=fields.Nested( | ||
lambda: ParameterBuilderConfigSchema(), | ||
required=True, | ||
allow_none=False, | ||
), | ||
required=False, | ||
allow_none=True, | ||
) | ||
batch_request = fields.Raw( | ||
required=False, | ||
allow_none=True, | ||
|
@@ -380,14 +391,15 @@ def __init__( | |
variables: Optional[Dict[str, Any]] = None, | ||
commented_map: Optional[CommentedMap] = None, | ||
): | ||
self.module_name = module_name | ||
self.class_name = class_name | ||
|
||
self.name = name | ||
|
||
self.config_version = config_version | ||
self.rules = rules | ||
if class_name is not None: | ||
self.class_name = class_name | ||
if module_name is not None: | ||
self.module_name = module_name | ||
|
||
self.variables = variables | ||
self.rules = rules | ||
|
||
super().__init__(commented_map=commented_map) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,12 @@ | |
|
||
from great_expectations.core.batch import Batch, BatchRequest, RuntimeBatchRequest | ||
from great_expectations.core.expectation_configuration import ExpectationConfiguration | ||
from great_expectations.rule_based_profiler.config import ParameterBuilderConfig | ||
from great_expectations.rule_based_profiler.helpers.util import ( | ||
init_rule_parameter_builders, | ||
set_batch_list_or_batch_request_on_builder, | ||
) | ||
from great_expectations.rule_based_profiler.parameter_builder import ParameterBuilder | ||
from great_expectations.rule_based_profiler.types import ( | ||
Builder, | ||
Domain, | ||
|
@@ -15,9 +21,16 @@ | |
|
||
|
||
class ExpectationConfigurationBuilder(Builder, ABC): | ||
exclude_field_names: Set[str] = Builder.exclude_field_names | { | ||
"validation_parameter_builders", | ||
} | ||
|
||
def __init__( | ||
self, | ||
expectation_type: str, | ||
validation_parameter_builder_configs: Optional[ | ||
List[ParameterBuilderConfig] | ||
] = None, | ||
batch_list: Optional[List[Batch]] = None, | ||
batch_request: Optional[ | ||
Union[str, BatchRequest, RuntimeBatchRequest, dict] | ||
|
@@ -30,6 +43,8 @@ def __init__( | |
|
||
Args: | ||
expectation_type: the "expectation_type" argument of "ExpectationConfiguration" object to be emitted. | ||
validation_parameter_builder_configs: ParameterBuilder configurations, having whose outputs available (as | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure I understand this docstring as written - these configs are optional right? And optionally instead of specifying them here they can be specified in the parameter_builder section of the config? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @anthonyburdi Let's discuss -- I would like to offer a clarification, which I feel might be useful. Thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Discussed clarifying this in a follow-on PR. |
||
fully-qualified parameter names) is pre-requisite for present ExpectationConfigurationBuilder instance. | ||
batch_list: explicitly passed Batch objects for parameter computation (take precedence over batch_request). | ||
batch_request: specified in ParameterBuilder configuration to get Batch objects for parameter computation. | ||
data_context: DataContext | ||
|
@@ -44,6 +59,11 @@ def __init__( | |
|
||
self._expectation_type = expectation_type | ||
|
||
self._validation_parameter_builders = init_rule_parameter_builders( | ||
parameter_builder_configs=validation_parameter_builder_configs, | ||
data_context=self._data_context, | ||
) | ||
Comment on lines
+62
to
+65
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cdkini Please see this comment/plan: #4524 (comment) -- thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for closing the loop on this @alexsherstinsky. I actually didn't know that you could directly link comments :) |
||
|
||
""" | ||
Since ExpectationConfigurationBuilderConfigSchema allows arbitrary fields (as ExpectationConfiguration kwargs) | ||
to be provided, they must be all converted to public property accessors and/or public fields in order for all | ||
|
@@ -65,10 +85,43 @@ def build_expectation_configuration( | |
variables: Optional[ParameterContainer] = None, | ||
parameters: Optional[Dict[str, ParameterContainer]] = None, | ||
) -> ExpectationConfiguration: | ||
self._resolve_validation_dependencies( | ||
parameter_container=parameter_container, | ||
domain=domain, | ||
variables=variables, | ||
parameters=parameters, | ||
) | ||
|
||
return self._build_expectation_configuration( | ||
domain=domain, variables=variables, parameters=parameters | ||
) | ||
|
||
def _resolve_validation_dependencies( | ||
self, | ||
parameter_container: ParameterContainer, | ||
domain: Domain, | ||
variables: Optional[ParameterContainer] = None, | ||
parameters: Optional[Dict[str, ParameterContainer]] = None, | ||
) -> None: | ||
validation_parameter_builders: List[ParameterBuilder] = ( | ||
self.validation_parameter_builders or [] | ||
) | ||
|
||
validation_parameter_builder: ParameterBuilder | ||
for validation_parameter_builder in validation_parameter_builders: | ||
set_batch_list_or_batch_request_on_builder( | ||
builder=validation_parameter_builder, | ||
batch_list=self.batch_list, | ||
batch_request=self.batch_request, | ||
force_batch_data=False, | ||
) | ||
validation_parameter_builder.build_parameters( | ||
parameter_container=parameter_container, | ||
domain=domain, | ||
variables=variables, | ||
parameters=parameters, | ||
) | ||
|
||
@abstractmethod | ||
def _build_expectation_configuration( | ||
self, | ||
|
@@ -81,3 +134,7 @@ def _build_expectation_configuration( | |
@property | ||
def expectation_type(self) -> str: | ||
return self._expectation_type | ||
|
||
@property | ||
def validation_parameter_builders(self) -> Optional[List[ParameterBuilder]]: | ||
return self._validation_parameter_builders |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,10 +19,12 @@ | |
from great_expectations.data_context.util import instantiate_class_from_config | ||
from great_expectations.execution_engine.execution_engine import MetricDomainTypes | ||
from great_expectations.rule_based_profiler.types import ( | ||
PARAMETER_KEY, | ||
VARIABLES_PREFIX, | ||
Builder, | ||
Domain, | ||
ParameterContainer, | ||
get_fully_qualified_parameter_names, | ||
get_parameter_value_by_fully_qualified_parameter_name, | ||
is_fully_qualified_parameter_name_literal_string_format, | ||
) | ||
|
@@ -461,6 +463,71 @@ def init_expectation_configuration_builder( | |
return expectation_configuration_builder | ||
|
||
|
||
def resolve_evaluation_dependencies( | ||
parameter_builder: "ParameterBuilder", # noqa: F821 | ||
parameter_container: ParameterContainer, | ||
domain: Domain, | ||
variables: Optional[ParameterContainer] = None, | ||
parameters: Optional[Dict[str, ParameterContainer]] = None, | ||
) -> None: | ||
Comment on lines
+466
to
+472
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could we add a docstring here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cdkini I will add it (in the next commit). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cdkini I added a docstring. Thanks! |
||
""" | ||
This method computes ("resolves") pre-requisite ("evaluation") dependencies (i.e., results of executing other | ||
"ParameterBuilder" objects), whose output(s) are needed by specified "ParameterBuilder" object to fulfill its goals. | ||
""" | ||
|
||
# Step 1: Check if any "evaluation_parameter_builders" are configured for specified "ParameterBuilder" object. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Loving the comments (super helpful in reading the code) but also wondering if this could be refactored into a few smaller methods. Similar thougts with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @Shinnnyshinshin I would not want to disturb |
||
evaluation_parameter_builders: List[ | ||
"ParameterBuilder" # noqa: F821 | ||
] = parameter_builder.evaluation_parameter_builders | ||
if not evaluation_parameter_builders: | ||
return | ||
|
||
# Step 2: Obtain all fully-qualified parameter names ("variables" and "parameter" keys) in namespace of "Domain" | ||
# (fully-qualified parameter names are stored in "ParameterNode" objects of "ParameterContainer" of "Domain" | ||
# whenever "ParameterBuilder.build_parameters()" is executed for "ParameterBuilder.fully_qualified_parameter_name"). | ||
fully_qualified_parameter_names: List[str] = get_fully_qualified_parameter_names( | ||
domain=domain, | ||
variables=variables, | ||
parameters=parameters, | ||
) | ||
|
||
# Step 3: Check for presence of fully-qualified parameter names of "ParameterBuilder" objects, obtained by iterating | ||
# over evaluation dependencies. "Execute ParameterBuilder.build_parameters()" if absent from "Domain" scoped list. | ||
evaluation_parameter_builder: "ParameterBuilder" # noqa: F821 | ||
for evaluation_parameter_builder in evaluation_parameter_builders: | ||
fully_qualified_evaluation_parameter_builder_name: str = ( | ||
f"{PARAMETER_KEY}{evaluation_parameter_builder.name}" | ||
) | ||
|
||
if ( | ||
fully_qualified_evaluation_parameter_builder_name | ||
not in fully_qualified_parameter_names | ||
): | ||
set_batch_list_or_batch_request_on_builder( | ||
builder=evaluation_parameter_builder, | ||
batch_list=parameter_builder.batch_list, | ||
batch_request=parameter_builder.batch_request, | ||
force_batch_data=False, | ||
) | ||
|
||
evaluation_parameter_builder.build_parameters( | ||
parameter_container=parameter_container, | ||
domain=domain, | ||
variables=variables, | ||
parameters=parameters, | ||
) | ||
|
||
# Step 4: Any "ParameterBuilder" object, including members of "evaluation_parameter_builders" list may be | ||
# configured with its own "evaluation_parameter_builders" list. Recursive call handles such situations. | ||
resolve_evaluation_dependencies( | ||
parameter_builder=evaluation_parameter_builder, | ||
parameter_container=parameter_container, | ||
domain=domain, | ||
variables=variables, | ||
parameters=parameters, | ||
) | ||
|
||
|
||
def set_batch_list_or_batch_request_on_builder( | ||
builder: Builder, | ||
batch_list: Optional[List[Batch]] = None, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we know what this
list
contains?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cdkini Yes, it contains
ParameterBuilder
configurations. Since we did not expand this deeper in the signature of the constructor of theRuleBasedProfilerConfig
, I did not do that here either. We can do it (in a separate tiny PR). Thanks.