Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MAINTENANCE] Refactor RuleBasedProfilerConfig #4882

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -3297,6 +3297,8 @@ def add_profiler(
# Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields.
validated_config: dict = ruleBasedProfilerConfigSchema.load(config_data)
profiler_config: dict = ruleBasedProfilerConfigSchema.dump(validated_config)
profiler_config.pop("class_name")
profiler_config.pop("module_name")

config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(**profiler_config)

Expand Down
2 changes: 0 additions & 2 deletions great_expectations/data_context/store/profiler_store.py
Expand Up @@ -25,8 +25,6 @@ def serialization_self_check(self, pretty_print: bool) -> None:
test_profiler_name = f"profiler_{''.join([random.choice(list('0123456789ABCDEF')) for _ in range(20)])}"
test_profiler_configuration = RuleBasedProfilerConfig(
name=test_profiler_name,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
config_version=1.0,
rules={},
)
Expand Down
Expand Up @@ -142,8 +142,6 @@ class ExpectColumnMaxToBeBetween(ColumnExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_max_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"strict_min": False,
"strict_max": False,
Expand Down
Expand Up @@ -135,8 +135,6 @@ class ExpectColumnMinToBeBetween(ColumnExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_min_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"strict_min": False,
"strict_max": False,
Expand Down
Expand Up @@ -177,8 +177,6 @@ class ExpectColumnQuantileValuesToBeBetween(ColumnExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_quantile_values_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"quantiles": [
0.25,
Expand Down
Expand Up @@ -145,8 +145,6 @@ class ExpectColumnValuesToBeBetween(ColumnMapExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_values_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"mostly": 1.0,
"strict_min": False,
Expand Down
Expand Up @@ -148,8 +148,6 @@ class ExpectColumnValuesToBeInSet(ColumnMapExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_column_values_to_be_in_set", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"mostly": 1.0,
},
Expand Down
Expand Up @@ -114,8 +114,6 @@ class ExpectTableRowCountToBeBetween(TableExpectation):
default_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
name="expect_table_row_count_to_be_between", # Convention: use "expectation_type" as profiler name.
config_version=1.0,
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
variables={
"false_positive_rate": 0.05,
"estimator": "bootstrap",
Expand Down
28 changes: 22 additions & 6 deletions great_expectations/rule_based_profiler/config/base.py
Expand Up @@ -9,6 +9,7 @@
from great_expectations.marshmallow__shade import (
INCLUDE,
Schema,
ValidationError,
fields,
post_dump,
post_load,
Expand Down Expand Up @@ -386,13 +387,11 @@ def __init__(
name: str,
config_version: float,
rules: Dict[str, dict], # see RuleConfig
class_name: Optional[str] = None,
module_name: Optional[str] = None,
variables: Optional[Dict[str, Any]] = None,
commented_map: Optional[CommentedMap] = None,
):
self.module_name = module_name
self.class_name = class_name
self.module_name = "great_expectations.rule_based_profiler"
self.class_name = "RuleBasedProfiler"

self.name = name

Expand All @@ -411,6 +410,25 @@ def get_config_class(cls) -> Type["RuleBasedProfilerConfig"]: # noqa: F821
def get_schema_class(cls) -> Type["RuleBasedProfilerConfigSchema"]: # noqa: F821
return RuleBasedProfilerConfigSchema

@classmethod
def from_commented_map(
cls, commented_map: CommentedMap
) -> "RuleBasedProfilerConfig":
"""Override parent implementation to pop unnecessary attrs from config.

Please see parent BaseYamlConfig for more details.
"""
try:
config: dict = cls._get_schema_instance().load(commented_map)
config.pop("class_name", None)
config.pop("module_name", None)
Comment on lines +423 to +424
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Overrode the parent method because we need to pop these values. Could this be done in a cleaner fashion?

return cls.get_config_class()(commented_map=commented_map, **config)
except ValidationError:
logger.error(
"Encountered errors during loading config. See ValidationError for more details."
)
raise

def to_json_dict(self) -> dict:
"""
# TODO: <Alex>2/4/2022</Alex>
Expand Down Expand Up @@ -501,8 +519,6 @@ def resolve_config_using_acceptable_arguments(
}

return cls(
class_name=profiler.__class__.__name__,
module_name=profiler.__class__.__module__,
name=profiler.config.name,
config_version=profiler.config.config_version,
variables=runtime_variables,
Expand Down
2 changes: 0 additions & 2 deletions great_expectations/rule_based_profiler/rule_based_profiler.py
Expand Up @@ -1262,8 +1262,6 @@ def __init__(
data_context: DataContext object that defines a full runtime environment (data access, etc.)
"""
profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
class_name=self.__class__.__name__,
module_name=self.__class__.__module__,
name=name,
config_version=config_version,
variables=variables,
Expand Down
4 changes: 3 additions & 1 deletion tests/conftest.py
Expand Up @@ -2370,7 +2370,6 @@ def profiler_config_with_placeholder_args(
"""
return RuleBasedProfilerConfig(
name=profiler_name,
class_name="RuleBasedProfiler",
config_version=1.0,
variables={
"false_positive_threshold": 1.0e-2,
Expand Down Expand Up @@ -2440,6 +2439,9 @@ def populated_profiler_store(
)
deserialized_config: dict = ruleBasedProfilerConfigSchema.load(serialized_config)

deserialized_config.pop("module_name")
deserialized_config.pop("class_name")

profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
**deserialized_config
)
Expand Down
Expand Up @@ -245,7 +245,7 @@ def test_profiler_save_and_load(data_context_with_taxi_data):
res: dict = my_rbp.config.to_json_dict()
assert res == {
"class_name": "RuleBasedProfiler",
"module_name": "great_expectations.rule_based_profiler.rule_based_profiler",
"module_name": "great_expectations.rule_based_profiler",
"name": "my_rbp",
"config_version": 1.0,
"rules": None,
Expand All @@ -259,7 +259,7 @@ def test_profiler_save_and_load(data_context_with_taxi_data):

res = my_loaded_profiler.config.to_json_dict()
assert res == {
"module_name": "great_expectations.rule_based_profiler.rule_based_profiler",
"module_name": "great_expectations.rule_based_profiler",
"class_name": "RuleBasedProfiler",
"name": "my_rbp",
"config_version": 1.0,
Expand Down
17 changes: 7 additions & 10 deletions tests/rule_based_profiler/config/test_base.py
Expand Up @@ -189,8 +189,6 @@ def test_rule_config_unsuccessfully_loads_with_missing_required_fields():
def test_rule_based_profiler_config_successfully_loads_with_required_args():
data = {
"name": "my_RBP",
"class_name": "RuleBasedProfiler",
"module_name": "great_expectations.rule_based_profiler",
"config_version": 1.0,
"rules": {
"rule_1": {
Expand Down Expand Up @@ -218,8 +216,6 @@ def test_rule_based_profiler_config_successfully_loads_with_required_args():
def test_rule_based_profiler_config_successfully_loads_with_optional_args():
data = {
"name": "my_RBP",
"class_name": "RuleBasedProfiler",
"module_name": "great_expectations.rule_based_profiler",
"config_version": 1.0,
"variables": {"foo": "bar"},
"rules": {
Expand Down Expand Up @@ -263,8 +259,6 @@ def test_rule_based_profiler_config_unsuccessfully_loads_with_missing_required_f
def test_rule_based_profiler_from_commented_map():
data = {
"name": "my_RBP",
"class_name": "RuleBasedProfiler",
"module_name": "great_expectations.rule_based_profiler",
"config_version": 1.0,
"variables": {"foo": "bar"},
"rules": {
Expand All @@ -291,13 +285,14 @@ def test_resolve_config_using_acceptable_arguments(
profiler_with_placeholder_args: RuleBasedProfiler,
) -> None:
old_config: RuleBasedProfilerConfig = profiler_with_placeholder_args.config
old_config.module_name = profiler_with_placeholder_args.__class__.__module__
old_config.class_name = profiler_with_placeholder_args.__class__.__name__

# Roundtrip through schema validation to add/or restore any missing fields.
old_deserialized_config: dict = ruleBasedProfilerConfigSchema.load(
old_config.to_json_dict()
)
old_deserialized_config.pop("class_name")
old_deserialized_config.pop("module_name")

old_config = RuleBasedProfilerConfig(**old_deserialized_config)

# Brand new config is created but existing attributes are unchanged
Expand All @@ -310,12 +305,14 @@ def test_resolve_config_using_acceptable_arguments(
# Roundtrip through schema validation to add/or restore any missing fields.
# new_deserialized_config: dict = ruleBasedProfilerConfigSchema.load(new_config.to_json_dict())
new_deserialized_config: dict = new_config.to_json_dict()
new_deserialized_config.pop("class_name")
new_deserialized_config.pop("module_name")

new_config = RuleBasedProfilerConfig(**new_deserialized_config)

assert id(old_config) != id(new_config)
assert all(
old_config[attr] == new_config[attr]
for attr in ("class_name", "config_version", "module_name", "name")
old_config[attr] == new_config[attr] for attr in ("config_version", "name")
)


Expand Down
Expand Up @@ -249,9 +249,7 @@ def test_get_metrics_and_expectations(
expected_expectation_suite.meta = expected_expectation_suite_meta

expected_rule_based_profiler_config: RuleBasedProfilerConfig = RuleBasedProfilerConfig(
class_name="RuleBasedProfiler",
config_version=1.0,
module_name="great_expectations.rule_based_profiler.rule_based_profiler",
name="test_volume_data_assistant",
variables={
"false_positive_rate": 0.05,
Expand Down
2 changes: 0 additions & 2 deletions tests/rule_based_profiler/test_rule_based_profiler.py
Expand Up @@ -1099,8 +1099,6 @@ def test_add_profiler_with_batch_request_containing_batch_data_raises_error(
):
profiler_config = RuleBasedProfilerConfig(
name="my_profiler_config",
class_name="RuleBasedProfiler",
module_name="great_expectations.rule_based_profiler",
config_version=1.0,
rules={
"rule_1": {
Expand Down