Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add string support for data check action code #3167

Merged
merged 10 commits into from
Dec 23, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ Release Notes
-------------
**Future Releases**
* Enhancements
* Added string support for DataCheckActionCode :pr:`3167`
* Fixes
* Changes
* Documentation Changes
Expand Down
5 changes: 3 additions & 2 deletions evalml/data_checks/data_check_action.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
"""Recommended action returned by a DataCheck."""

from evalml.data_checks.data_check_action_code import DataCheckActionCode
from evalml.data_checks.utils import handle_data_check_action_code


class DataCheckAction:
"""A recommended action returned by a DataCheck.

Args:
action_code (DataCheckActionCode): Action code associated with the action.
action_code (str, DataCheckActionCode): Action code associated with the action.
data_check_name (str): Name of data check.
metadata (dict, optional): Additional useful information associated with the action. Defaults to None.
"""

def __init__(self, action_code, data_check_name, metadata=None):
self.action_code = action_code
self.action_code = handle_data_check_action_code(action_code)
self.data_check_name = data_check_name
self.metadata = {"columns": None, "rows": None}
if metadata is not None:
Expand Down
10 changes: 10 additions & 0 deletions evalml/data_checks/data_check_action_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,13 @@ class DataCheckActionCode(Enum):
@classproperty
def _all_values(cls):
return {code.value.upper(): code for code in list(cls)}

def __str__(self):
"""String representation of the DataCheckActionCode enum."""
datacheck_action_code_dict = {
DataCheckActionCode.DROP_COL.name: "drop_col",
DataCheckActionCode.DROP_ROWS.name: "drop_rows",
DataCheckActionCode.IMPUTE_COL.name: "impute_col",
DataCheckActionCode.TRANSFORM_TARGET.name: "transform_target",
}
return datacheck_action_code_dict[self.name]
33 changes: 33 additions & 0 deletions evalml/data_checks/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Utility methods for the data checks in EvalML."""
from .data_check_action_code import DataCheckActionCode


def handle_data_check_action_code(action_code):
"""Handles data check action codes by either returning the DataCheckActionCode or converting from a str.

Args:
action_code (str or DataCheckActionCode): Data check action code that needs to be handled.

Returns:
DataCheckActionCode enum

Raises:
KeyError: If input is not a valid DataCheckActionCode enum value.
ValueError: If input is not a string or DatCheckActionCode object.

Examples:
>>> assert handle_data_check_action_code("drop_col") == DataCheckActionCode.DROP_COL
>>> assert handle_data_check_action_code("DROP_ROWS") == DataCheckActionCode.DROP_ROWS
>>> assert handle_data_check_action_code("Impute_col") == DataCheckActionCode.IMPUTE_COL
"""
if isinstance(action_code, str):
try:
tpe = DataCheckActionCode._all_values[action_code.upper()]
bchen1116 marked this conversation as resolved.
Show resolved Hide resolved
except KeyError:
raise KeyError("Action code '{}' does not exist".format(action_code))
return tpe
if isinstance(action_code, DataCheckActionCode):
return action_code
raise ValueError(
"`handle_data_check_action_code` was not passed a str or DataCheckActionCode object"
)
30 changes: 30 additions & 0 deletions evalml/tests/data_checks_tests/test_data_check_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,33 @@ def test_convert_dict_to_action(dummy_data_check_name):
data_check_action_dict_with_other_metadata
)
assert data_check_action == expected_data_check_action


@pytest.mark.parametrize(
"action_code,expected_code",
[
("drop_rows", DataCheckActionCode.DROP_ROWS),
("Drop_col", DataCheckActionCode.DROP_COL),
("TRANSFORM_TARGET", DataCheckActionCode.TRANSFORM_TARGET),
],
)
def test_data_check_action_equality_string_input(
bchen1116 marked this conversation as resolved.
Show resolved Hide resolved
action_code, expected_code, dummy_data_check_name
):
data_check_action = DataCheckAction(action_code, dummy_data_check_name)
data_check_action_eq = DataCheckAction(expected_code, dummy_data_check_name)

assert data_check_action == data_check_action
assert data_check_action == data_check_action_eq
assert data_check_action_eq == data_check_action

data_check_action = DataCheckAction(
action_code, None, metadata={"same detail": "same same same"}
)
data_check_action_eq = DataCheckAction(
expected_code, None, metadata={"same detail": "same same same"}
)

assert data_check_action == data_check_action
assert data_check_action == data_check_action_eq
assert data_check_action.to_dict() == data_check_action_eq.to_dict()
35 changes: 35 additions & 0 deletions evalml/tests/data_checks_tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pytest

from evalml.data_checks import DataCheckActionCode
from evalml.data_checks.utils import handle_data_check_action_code
from evalml.problem_types import ProblemTypes


def test_handle_action_code_errors():
with pytest.raises(KeyError, match="Action code 'dropping cols' does not"):
handle_data_check_action_code("dropping cols")

with pytest.raises(
ValueError,
match="`handle_data_check_action_code` was not passed a str or DataCheckActionCode object",
):
handle_data_check_action_code(None)

with pytest.raises(
ValueError,
match="`handle_data_check_action_code` was not passed a str or DataCheckActionCode object",
):
handle_data_check_action_code(ProblemTypes.BINARY)
bchen1116 marked this conversation as resolved.
Show resolved Hide resolved


@pytest.mark.parametrize(
"action_code,expected_code",
[
("drop_rows", DataCheckActionCode.DROP_ROWS),
("Drop_col", DataCheckActionCode.DROP_COL),
("TRANSFORM_TARGET", DataCheckActionCode.TRANSFORM_TARGET),
(DataCheckActionCode.IMPUTE_COL, DataCheckActionCode.IMPUTE_COL),
],
)
def test_handle_action_code(action_code, expected_code):
assert handle_data_check_action_code(action_code) == expected_code