Skip to content

Commit

Permalink
[MAINTENANCE] Refactor RuleBasedProfilerConfig (#4882)
Browse files Browse the repository at this point in the history
* refactor: start cleanup

* refactor: fix all tests that use class/module name

* fix: patch broken RBP tests

* chore: conditionally pop class/module name

* refactor: override from_commented_map in RBP

* chore: add docstring

* chore: pop values
  • Loading branch information
cdkini committed Apr 19, 2022
1 parent d5c0393 commit abe2ec6
Show file tree
Hide file tree
Showing 15 changed files with 36 additions and 39 deletions.
Expand Up @@ -3297,6 +3297,8 @@ def add_profiler(
# Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields.
validated_config: dict = ruleBasedProfilerConfigSchema.load(config_data)
profiler_config: dict = ruleBasedProfilerConfigSchema.dump(validated_config)
profiler_config.pop("class_name")
profiler_config.pop("module_name")

config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(**profiler_config)

Expand Down
2 changes: 0 additions & 2 deletions great_expectations/data_context/store/profiler_store.py
Expand Up @@ -25,8 +25,6 @@ def serialization_self_check(self, pretty_print: bool) -> None:
test_profiler_name = f"profiler_{''.join([random.choice(list('0123456789ABCDEF')) for _ in range(20)])}"
test_profiler_configuration = RuleBasedProfilerConfig(
name=test_profiler_name,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
config_version=1.0,
rules={},
)
Expand Down
Expand Up @@ -142,8 +142,6 @@ class ExpectColumnMaxToBeBetween(ColumnExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_max_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"strict_min": False,
"strict_max": False,
Expand Down
Expand Up @@ -135,8 +135,6 @@ class ExpectColumnMinToBeBetween(ColumnExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_min_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"strict_min": False,
"strict_max": False,
Expand Down
Expand Up @@ -177,8 +177,6 @@ class ExpectColumnQuantileValuesToBeBetween(ColumnExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_quantile_values_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"quantiles": [
0.25,
Expand Down
Expand Up @@ -145,8 +145,6 @@ class ExpectColumnValuesToBeBetween(ColumnMapExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_values_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"mostly": 1.0,
"strict_min": False,
Expand Down
Expand Up @@ -148,8 +148,6 @@ class ExpectColumnValuesToBeInSet(ColumnMapExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_values_to_be_in_set", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"mostly": 1.0,
},
Expand Down
Expand Up @@ -114,8 +114,6 @@ class ExpectTableRowCountToBeBetween(TableExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_table_row_count_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"false_positive_rate": 0.05,
"estimator": "bootstrap",
Expand Down
28 changes: 22 additions & 6 deletions great_expectations/rule_based_profiler/config/base.py
Expand Up @@ -9,6 +9,7 @@
from great_expectations.marshmallow__shade import (
INCLUDE,
Schema,
ValidationError,
fields,
post_dump,
post_load,
Expand Down Expand Up @@ -386,13 +387,11 @@ def __init__(
name: str,
config_version: float,
rules: Dict[str, dict], # see RuleConfig
class_name: Optional[str] = None,
module_name: Optional[str] = None,
variables: Optional[Dict[str, Any]] = None,
commented_map: Optional[CommentedMap] = None,
):
self.module_name = module_name
self.class_name = class_name
self.module_name = "great_expectations.rule_based_profiler"
self.class_name = "RuleBasedProfiler"

self.name = name

Expand All @@ -411,6 +410,25 @@ def get_config_class(cls) -> Type["RuleBasedProfilerConfig"]: # noqa: F821
def get_schema_class(cls) -> Type["RuleBasedProfilerConfigSchema"]: # noqa: F821
return RuleBasedProfilerConfigSchema

@classmethod
def from_commented_map(
cls, commented_map: CommentedMap
) -> "RuleBasedProfilerConfig":
"""Override parent implementation to pop unnecessary attrs from config.
Please see parent BaseYamlConfig for more details.
"""
try:
config: dict = cls._get_schema_instance().load(commented_map)
config.pop("class_name", None)
config.pop("module_name", None)
return cls.get_config_class()(commented_map=commented_map, **config)
except ValidationError:
logger.error(
"Encountered errors during loading config. See ValidationError for more details."
)
raise

def to_json_dict(self) -> dict:
"""
# TODO: <Alex>2/4/2022</Alex>
Expand Down Expand Up @@ -501,8 +519,6 @@ def resolve_config_using_acceptable_arguments(
}

return cls(
class_name=profiler.__class__.__name__,
module_name=profiler.__class__.__module__,
name=profiler.config.name,
config_version=profiler.config.config_version,
variables=runtime_variables,
Expand Down
2 changes: 0 additions & 2 deletions great_expectations/rule_based_profiler/rule_based_profiler.py
Expand Up @@ -1262,8 +1262,6 @@ def __init__(
data_context: DataContext object that defines a full runtime environment (data access, etc.)
"""
profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
class_name=self.__class__.__name__,
module_name=self.__class__.__module__,
name=name,
config_version=config_version,
variables=variables,
Expand Down
4 changes: 3 additions & 1 deletion tests/conftest.py
Expand Up @@ -2370,7 +2370,6 @@ def profiler_config_with_placeholder_args(
"""
return RuleBasedProfilerConfig(
name=profiler_name,
class_name="RuleBasedProfiler",
config_version=1.0,
variables={
"false_positive_threshold": 1.0e-2,
Expand Down Expand Up @@ -2440,6 +2439,9 @@ def populated_profiler_store(
)
deserialized_config: dict = ruleBasedProfilerConfigSchema.load(serialized_config)

deserialized_config.pop("module_name")
deserialized_config.pop("class_name")

profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
**deserialized_config
)
Expand Down
Expand Up @@ -245,7 +245,7 @@ def test_profiler_save_and_load(data_context_with_taxi_data):
res: dict = my_rbp.config.to_json_dict()
assert res == {
"class_name": "RuleBasedProfiler",
"module_name": "great_expectations.rule_based_profiler.rule_based_profiler",
"module_name": "great_expectations.rule_based_profiler",
"name": "my_rbp",
"config_version": 1.0,
"rules": None,
Expand All @@ -259,7 +259,7 @@ def test_profiler_save_and_load(data_context_with_taxi_data):

res = my_loaded_profiler.config.to_json_dict()
assert res == {
"module_name": "great_expectations.rule_based_profiler.rule_based_profiler",
"module_name": "great_expectations.rule_based_profiler",
"class_name": "RuleBasedProfiler",
"name": "my_rbp",
"config_version": 1.0,
Expand Down
17 changes: 7 additions & 10 deletions tests/rule_based_profiler/config/test_base.py
Expand Up @@ -189,8 +189,6 @@ def test_rule_config_unsuccessfully_loads_with_missing_required_fields():
def test_rule_based_profiler_config_successfully_loads_with_required_args():
data = {
"name": "my_RBP",
"class_name": "RuleBasedProfiler",
"module_name": "great_expectations.rule_based_profiler",
"config_version": 1.0,
"rules": {
"rule_1": {
Expand Down Expand Up @@ -218,8 +216,6 @@ def test_rule_based_profiler_config_successfully_loads_with_required_args():
def test_rule_based_profiler_config_successfully_loads_with_optional_args():
data = {
"name": "my_RBP",
"class_name": "RuleBasedProfiler",
"module_name": "great_expectations.rule_based_profiler",
"config_version": 1.0,
"variables": {"foo": "bar"},
"rules": {
Expand Down Expand Up @@ -263,8 +259,6 @@ def test_rule_based_profiler_config_unsuccessfully_loads_with_missing_required_f
def test_rule_based_profiler_from_commented_map():
data = {
"name": "my_RBP",
"class_name": "RuleBasedProfiler",
"module_name": "great_expectations.rule_based_profiler",
"config_version": 1.0,
"variables": {"foo": "bar"},
"rules": {
Expand All @@ -291,13 +285,14 @@ def test_resolve_config_using_acceptable_arguments(
profiler_with_placeholder_args: RuleBasedProfiler,
) -> None:
old_config: RuleBasedProfilerConfig = profiler_with_placeholder_args.config
old_config.module_name = profiler_with_placeholder_args.__class__.__module__
old_config.class_name = profiler_with_placeholder_args.__class__.__name__

# Roundtrip through schema validation to add/or restore any missing fields.
old_deserialized_config: dict = ruleBasedProfilerConfigSchema.load(
old_config.to_json_dict()
)
old_deserialized_config.pop("class_name")
old_deserialized_config.pop("module_name")

old_config = RuleBasedProfilerConfig(**old_deserialized_config)

# Brand new config is created but existing attributes are unchanged
Expand All @@ -310,12 +305,14 @@ def test_resolve_config_using_acceptable_arguments(
# Roundtrip through schema validation to add/or restore any missing fields.
# new_deserialized_config: dict = ruleBasedProfilerConfigSchema.load(new_config.to_json_dict())
new_deserialized_config: dict = new_config.to_json_dict()
new_deserialized_config.pop("class_name")
new_deserialized_config.pop("module_name")

new_config = RuleBasedProfilerConfig(**new_deserialized_config)

assert id(old_config) != id(new_config)
assert all(
old_config[attr] == new_config[attr]
for attr in ("class_name", "config_version", "module_name", "name")
old_config[attr] == new_config[attr] for attr in ("config_version", "name")
)


Expand Down
Expand Up @@ -249,9 +249,7 @@ def test_get_metrics_and_expectations(
expected_expectation_suite.meta = expected_expectation_suite_meta

expected_rule_based_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
class_name="RuleBasedProfiler",
config_version=1.0,
module_name="great_expectations.rule_based_profiler.rule_based_profiler",
name="test_volume_data_assistant",
variables={
"false_positive_rate": 0.05,
Expand Down
2 changes: 0 additions & 2 deletions tests/rule_based_profiler/test_rule_based_profiler.py
Expand Up @@ -1099,8 +1099,6 @@ def test_add_profiler_with_batch_request_containing_batch_data_raises_error(
):
profiler_config = RuleBasedProfilerConfig(
name="my_profiler_config",
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
config_version=1.0,
rules={
"rule_1": {
Expand Down

0 comments on commit abe2ec6

Please sign in to comment.