alteryx · ParthivNaresh · Apr 5, 2023 · Mar 21, 2023 · Mar 21, 2023 · Mar 28, 2023
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -2,6 +2,7 @@ Release Notes
 -------------
 **Future Releases**
     * Enhancements
+        * Allow ``InvalidTargetDataCheck`` to return a ``DROP_ROWS`` ``DataCheckActionOption`` :pr:`4116`
     * Fixes
     * Changes
         * Removed unnecessary logic from imputer components prior to nullable type handling :pr:`4038`, :pr:`4043`

diff --git a/evalml/data_checks/data_check_message.py b/evalml/data_checks/data_check_message.py
@@ -7,9 +7,10 @@ class DataCheckMessage:
 
     Args:
         message (str): Message string.
-        data_check_name (str): Name of data check.
-        message_code (DataCheckMessageCode): Message code associated with message. Defaults to None.
-        details (dict): Additional useful information associated with the message. Defaults to None.
+        data_check_name (str): Name of the associated data check.
+        message_code (DataCheckMessageCode, optional): Message code associated with the message. Defaults to None.
+        details (dict, optional): Additional useful information associated with the message. Defaults to None.
+        action_options (list, optional): A list of `DataCheckActionOption`s associated with the message. Defaults to None.
     """
 
     message_type = None

diff --git a/evalml/data_checks/invalid_target_data_check.py b/evalml/data_checks/invalid_target_data_check.py
@@ -37,16 +37,25 @@ class InvalidTargetDataCheck(DataCheck):
         objective (str or ObjectiveBase): Name or instance of the objective class.
         n_unique (int): Number of unique target values to store when problem type is binary and target
             incorrectly has more than 2 unique values. Non-negative integer. If None, stores all unique values. Defaults to 100.
+        null_strategy (str): The type of action option that should be returned if the target is partially null.
+            The options are `impute` (default) and `drop`.
+            `impute` - Will return a `DataCheckActionOption` for imputing the target column.
+            `drop` - Will return a `DataCheckActionOption` for dropping the null rows in the target column.
     """
 
     multiclass_continuous_threshold = 0.05
 
-    def __init__(self, problem_type, objective, n_unique=100):
+    def __init__(self, problem_type, objective, n_unique=100, null_strategy="impute"):
         self.problem_type = handle_problem_types(problem_type)
         self.objective = get_objective(objective)
         if n_unique is not None and n_unique <= 0:
             raise ValueError("`n_unique` must be a non-negative integer value.")
         self.n_unique = n_unique
+        if null_strategy is None or null_strategy.lower() not in ["impute", "drop"]:
+            raise ValueError(
+                "The acceptable values for 'null_strategy' are 'impute' and 'drop'.",
+            )
+        self.null_strategy = null_strategy
 
     def validate(self, X, y):
         """Check if the target data is considered invalid. If the input features argument is not None, it will be used to check that the target and features have the same dimensions and indices.
@@ -243,6 +252,37 @@ def _check_target_has_nan(self, y, messages):
         elif null_rows.any():
             num_null_rows = null_rows.sum()
             pct_null_rows = null_rows.mean() * 100
+            rows_to_drop = null_rows.loc[null_rows].index.tolist()
+
+            action_options = []
+            impute_action_option = DataCheckActionOption(
+                DataCheckActionCode.IMPUTE_COL,
+                data_check_name=self.name,
+                parameters={
+                    "impute_strategy": {
+                        "parameter_type": DCAOParameterType.GLOBAL,
+                        "type": "category",
+                        "categories": ["mean", "most_frequent"]
+                        if is_regression(self.problem_type)
+                        else ["most_frequent"],
+                        "default_value": "mean"
+                        if is_regression(self.problem_type)
+                        else "most_frequent",
+                    },
+                },
+                metadata={"is_target": True},
+            )
+            drop_action_option = DataCheckActionOption(
+                DataCheckActionCode.DROP_ROWS,
+                data_check_name=self.name,
+                metadata={"is_target": True, "rows": rows_to_drop},
+            )
+
+            if self.null_strategy.lower() == "impute":
+                action_options.append(impute_action_option)
+            elif self.null_strategy.lower() == "drop":
+                action_options.append(drop_action_option)
+
             messages.append(
                 DataCheckError(
                     message="{} row(s) ({}%) of target values are null".format(
@@ -255,25 +295,7 @@ def _check_target_has_nan(self, y, messages):
                         "num_null_rows": num_null_rows,
                         "pct_null_rows": pct_null_rows,
                     },
-                    action_options=[
-                        DataCheckActionOption(
-                            DataCheckActionCode.IMPUTE_COL,
-                            data_check_name=self.name,
-                            parameters={
-                                "impute_strategy": {
-                                    "parameter_type": DCAOParameterType.GLOBAL,
-                                    "type": "category",
-                                    "categories": ["mean", "most_frequent"]
-                                    if is_regression(self.problem_type)
-                                    else ["most_frequent"],
-                                    "default_value": "mean"
-                                    if is_regression(self.problem_type)
-                                    else "most_frequent",
-                                },
-                            },
-                            metadata={"is_target": True},
-                        ),
-                    ],
+                    action_options=action_options,
                 ).to_dict(),
             )
 

diff --git a/evalml/tests/data_checks_tests/test_invalid_target_data_check.py b/evalml/tests/data_checks_tests/test_invalid_target_data_check.py
@@ -34,6 +34,19 @@ def test_invalid_target_data_check_invalid_n_unique():
         )
 
 
+@pytest.mark.parametrize("null_strategy", ["invalid", None])
+def test_invalid_target_data_check_invalid_null_strategy(null_strategy):
+    with pytest.raises(
+        ValueError,
+        match="The acceptable values for 'null_strategy' are 'impute' and 'drop'.",
+    ):
+        InvalidTargetDataCheck(
+            "regression",
+            get_default_primary_search_objective("regression"),
+            null_strategy=null_strategy,
+        )
+
+
 def test_invalid_target_data_check_nan_error():
     X = pd.DataFrame({"col": [1, 2, 3]})
     invalid_targets_check = InvalidTargetDataCheck(
@@ -148,6 +161,55 @@ def test_invalid_target_y_none():
     ]
 
 
+@pytest.mark.parametrize("null_strategy", ["Impute", "DROP"])
+def test_invalid_target_data_null_strategies(null_strategy):
+    invalid_targets_check = InvalidTargetDataCheck(
+        "regression",
+        get_default_primary_search_objective("regression"),
+        null_strategy=null_strategy,
+    )
+
+    expected_action_options = []
+    impute_action_option = DataCheckActionOption(
+        DataCheckActionCode.IMPUTE_COL,
+        data_check_name=invalid_targets_data_check_name,
+        parameters={
+            "impute_strategy": {
+                "parameter_type": DCAOParameterType.GLOBAL,
+                "type": "category",
+                "categories": ["mean", "most_frequent"],
+                "default_value": "mean",
+            },
+        },
+        metadata={"is_target": True},
+    )
+    drop_action_option = DataCheckActionOption(
+        DataCheckActionCode.DROP_ROWS,
+        data_check_name=invalid_targets_data_check_name,
+        metadata={"is_target": True, "rows": [0, 3]},
+    )
+    if null_strategy.lower() == "impute":
+        expected_action_options.append(impute_action_option)
+    elif null_strategy.lower() == "drop":
+        expected_action_options.append(drop_action_option)
+
+    expected = [
+        DataCheckError(
+            message="2 row(s) (40.0%) of target values are null",
+            data_check_name=invalid_targets_data_check_name,
+            message_code=DataCheckMessageCode.TARGET_HAS_NULL,
+            details={"num_null_rows": 2, "pct_null_rows": 40.0},
+            action_options=expected_action_options,
+        ).to_dict(),
+    ]
+
+    y = pd.Series([None, 3.5, 2.8, None, 0])
+    X = pd.DataFrame({"col": range(len(y))})
+
+    messages = invalid_targets_check.validate(X, y)
+    assert messages == expected
+
+
 def test_invalid_target_data_input_formats():
     invalid_targets_check = InvalidTargetDataCheck(
         "binary",