Skip to content

Commit

Permalink
refactor: start refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
cdkini committed Jan 21, 2022
1 parent 5c4b6be commit 4e99543
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 85 deletions.
173 changes: 88 additions & 85 deletions great_expectations/rule_based_profiler/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ class Profiler:
def __init__(
self,
*,
profiler_config: Optional[Dict[str, Dict[str, Dict[str, Any]]]] = None,
name: str,
config_version: float,
rules: dict,
variables: Optional[dict] = None,
data_context: Optional[DataContext] = None,
):
"""
Expand All @@ -111,92 +114,92 @@ def __init__(
profiler_config: Variables and Rules configuration as a dictionary
data_context: DataContext object that defines a full runtime environment (data access, etc.)
"""
self._profiler_config = profiler_config
self._name = name
self._config_version = config_version
self._rules = rules
self._variables = variables or {}
self._data_context = data_context
self._rules = []

rules_configs: Dict[str, Dict[str, Any]] = self._profiler_config.get(
"rules", {}
)
rule_name: str
rule_config: Dict[str, Any]

for rule_name, rule_config in rules_configs.items():
domain_builder_config: dict = rule_config.get("domain_builder")

if domain_builder_config is None:
raise ge_exceptions.ProfilerConfigurationError(
message=f'Invalid rule "{rule_name}": no domain_builder found.'
)

domain_builder: DomainBuilder = instantiate_class_from_config(
config=domain_builder_config,
runtime_environment={"data_context": data_context},
config_defaults={
"module_name": "great_expectations.rule_based_profiler.domain_builder"
},
)

parameter_builders: List[ParameterBuilder] = []

parameter_builder_configs: dict = rule_config.get("parameter_builders")

if parameter_builder_configs:
parameter_builder_config: dict
for parameter_builder_config in parameter_builder_configs:
parameter_builders.append(
instantiate_class_from_config(
config=parameter_builder_config,
runtime_environment={"data_context": data_context},
config_defaults={
"module_name": "great_expectations.rule_based_profiler.parameter_builder"
},
)
)

expectation_configuration_builders: List[
ExpectationConfigurationBuilder
] = []

expectation_configuration_builder_configs: dict = rule_config.get(
"expectation_configuration_builders"
)

if expectation_configuration_builder_configs:
expectation_configuration_builder_config: dict
for (
expectation_configuration_builder_config
) in expectation_configuration_builder_configs:
expectation_configuration_builders.append(
instantiate_class_from_config(
config=expectation_configuration_builder_config,
runtime_environment={},
config_defaults={
"class_name": "DefaultExpectationConfigurationBuilder",
"module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder",
},
)
)

variables_configs: Dict[str, Dict] = self._profiler_config.get(
"variables", {}
)
variables: Optional[ParameterContainer] = None

if variables_configs:
variables = build_parameter_container_for_variables(
variables_configs=variables_configs
)

self._rules.append(
Rule(
name=rule_name,
domain_builder=domain_builder,
parameter_builders=parameter_builders,
expectation_configuration_builders=expectation_configuration_builders,
variables=variables,
)
)
# rules_configs: Dict[str, Dict[str, Any]] = rules
# rule_name: str
# rule_config: Dict[str, Any]

# for rule_name, rule_config in rules_configs.items():
# domain_builder_config: dict = rule_config.get("domain_builder")

# if domain_builder_config is None:
# raise ge_exceptions.ProfilerConfigurationError(
# message=f'Invalid rule "{rule_name}": no domain_builder found.'
# )

# domain_builder: DomainBuilder = instantiate_class_from_config(
# config=domain_builder_config,
# runtime_environment={"data_context": data_context},
# config_defaults={
# "module_name": "great_expectations.rule_based_profiler.domain_builder"
# },
# )

# parameter_builders: List[ParameterBuilder] = []

# parameter_builder_configs: dict = rule_config.get("parameter_builders")

# if parameter_builder_configs:
# parameter_builder_config: dict
# for parameter_builder_config in parameter_builder_configs:
# parameter_builders.append(
# instantiate_class_from_config(
# config=parameter_builder_config,
# runtime_environment={"data_context": data_context},
# config_defaults={
# "module_name": "great_expectations.rule_based_profiler.parameter_builder"
# },
# )
# )

# expectation_configuration_builders: List[
# ExpectationConfigurationBuilder
# ] = []

# expectation_configuration_builder_configs: dict = rule_config.get(
# "expectation_configuration_builders"
# )

# if expectation_configuration_builder_configs:
# expectation_configuration_builder_config: dict
# for (
# expectation_configuration_builder_config
# ) in expectation_configuration_builder_configs:
# expectation_configuration_builders.append(
# instantiate_class_from_config(
# config=expectation_configuration_builder_config,
# runtime_environment={},
# config_defaults={
# "class_name": "DefaultExpectationConfigurationBuilder",
# "module_name": "great_expectations.rule_based_profiler.expectation_configuration_builder",
# },
# )
# )

# variables_configs: Dict[str, Dict] = self._profiler_config.get(
# "variables", {}
# )
# variables: Optional[ParameterContainer] = None

# if variables_configs:
# variables = build_parameter_container_for_variables(
# variables_configs=variables_configs
# )

# self._rules.append(
# Rule(
# name=rule_name,
# domain_builder=domain_builder,
# parameter_builders=parameter_builders,
# expectation_configuration_builders=expectation_configuration_builders,
# variables=variables,
# )
# )

def profile(
self,
Expand Down
40 changes: 40 additions & 0 deletions tests/rule_based_profiler/config/test_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from unittest import mock

import pytest
from ruamel.yaml.comments import CommentedMap

from great_expectations.data_context.util import instantiate_class_from_config
from great_expectations.marshmallow__shade.exceptions import ValidationError
from great_expectations.rule_based_profiler.config import (
DomainBuilderConfig,
Expand All @@ -15,6 +18,7 @@
RuleConfig,
RuleConfigSchema,
)
from great_expectations.rule_based_profiler.profiler import Profiler


def test_not_null_schema_raises_error_with_improperly_implemented_subclass():
Expand Down Expand Up @@ -293,3 +297,39 @@ def test_rule_based_profiler_from_commented_map():
commented_map = CommentedMap(data)
config = RuleBasedProfilerConfig.from_commented_map(commented_map)
assert all(hasattr(config, k) for k in data)


@mock.patch("great_expectations.data_context.data_context.DataContext")
def test_rule_based_profiler_instantiate_class_from_config(mock_data_context):
config = {
"name": "my_RBP",
"config_version": 1.0,
"variables": {"foo": "bar"},
"rules": {
"rule_1": {
"name": "rule_1",
"domain_builder": {"class_name": "DomainBuilder"},
"parameter_builders": [
{"class_name": "ParameterBuilder", "name": "my_parameter"}
],
"expectation_configuration_builders": [
{
"class_name": "ExpectationConfigurationBuilder",
"expectation_type": "expect_column_pair_values_A_to_be_greater_than_B",
}
],
},
},
}
# profiler = Profiler(**config)
profiler = instantiate_class_from_config(
config=config,
runtime_environment={"data_context": mock_data_context},
config_defaults={
"class_name": "Profiler",
"module_name": "great_expectations.rule_based_profiler.profiler",
},
)
assert isinstance(profiler, Profiler)
print(profiler.__dict__)
assert False

0 comments on commit 4e99543

Please sign in to comment.