alteryx · ParthivNaresh · Apr 14, 2022 · Apr 8, 2022 · Apr 8, 2022 · Apr 11, 2022
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -3,6 +3,7 @@
 
 **Future Releases**
     * Enhancements
+        * Updated ``make_pipeline_from_data_check_output`` to work with time series problems. :pr:`3454`
         * Added ``TimeSeriesImputer`` component :pr:`3374`
         * Replaced ``pipeline_parameters`` and ``custom_hyperparameters`` with ``search_parameters`` in ``AutoMLSearch`` :pr:`3373`, :pr:`3427`
         * Added ``TimeSeriesRegularizer`` to smooth uninferrable date ranges for time series problems :pr:`3376`

diff --git a/evalml/data_checks/data_check_action_code.py b/evalml/data_checks/data_check_action_code.py
@@ -19,6 +19,9 @@ class DataCheckActionCode(Enum):
     TRANSFORM_TARGET = "transform_target"
     """Action code for transforming the target data."""
 
+    REGULARIZE_AND_IMPUTE_DATASET = "regularize_and_impute_dataset"
+    """Action code for regularizing and imputing all features and target time series data."""
+
     @classproperty
     def _all_values(cls):
         return {code.value.upper(): code for code in list(cls)}
@@ -30,5 +33,6 @@ def __str__(self):
             DataCheckActionCode.DROP_ROWS.name: "drop_rows",
             DataCheckActionCode.IMPUTE_COL.name: "impute_col",
             DataCheckActionCode.TRANSFORM_TARGET.name: "transform_target",
+            DataCheckActionCode.REGULARIZE_AND_IMPUTE_DATASET.name: "regularize_and_impute_dataset",
         }
         return datacheck_action_code_dict[self.name]
diff --git a/evalml/data_checks/data_check_message_code.py b/evalml/data_checks/data_check_message_code.py
@@ -109,6 +109,9 @@ class DataCheckMessageCode(Enum):
     DATETIME_INFORMATION_NOT_FOUND = "datetime_information_not_found"
     """Message code for when datetime information can not be found or is in an unaccepted format."""
 
+    DATETIME_NO_FREQUENCY_INFERRED = "datetime_no_frequency_inferred"
+    """Message code for when no frequency can be inferred in the datetime values."""
+
     DATETIME_HAS_UNEVEN_INTERVALS = "datetime_has_uneven_intervals"
     """Message code for when the datetime values have uneven intervals."""
 

diff --git a/evalml/data_checks/datetime_format_data_check.py b/evalml/data_checks/datetime_format_data_check.py
@@ -2,7 +2,14 @@
 import pandas as pd
 from woodwork.statistics_utils import infer_frequency
 
-from evalml.data_checks import DataCheck, DataCheckError, DataCheckMessageCode
+from evalml.data_checks import (
+    DataCheck,
+    DataCheckActionCode,
+    DataCheckActionOption,
+    DataCheckError,
+    DataCheckMessageCode,
+    DCAOParameterType,
+)
 from evalml.utils import infer_feature_types
 
 
@@ -43,7 +50,7 @@ def validate(self, X, y):
             ...         "message": "No frequency could be detected in column 'dates', possibly due to uneven intervals.",
             ...         "data_check_name": "DateTimeFormatDataCheck",
             ...         "level": "error",
-            ...         "code": "DATETIME_HAS_UNEVEN_INTERVALS",
+            ...         "code": "DATETIME_NO_FREQUENCY_INFERRED",
             ...         "details": {"columns": None, "rows": None},
             ...         "action_options": []
             ...      }
@@ -53,6 +60,7 @@ def validate(self, X, y):
 
             >>> X = pd.DataFrame(pd.date_range("2021-01-01", periods=9).append(pd.date_range("2021-01-31", periods=50)), columns=["dates"])
             >>> y = pd.Series([0, 1, 0, 1, 1, 0, 0, 0, 1, 0])
+            >>> ww_payload = infer_frequency(X["dates"], debug=True, window_length=5, threshold=0.8)
             >>> datetime_format_dc = DateTimeFormatDataCheck(datetime_column="dates")
             >>> assert datetime_format_dc.validate(X, y) == [
             ...     {
@@ -62,13 +70,44 @@ def validate(self, X, y):
             ...         "code": "DATETIME_IS_MISSING_VALUES",
             ...         "details": {"columns": None, "rows": None},
             ...         "action_options": []
-            ...      }
+            ...      },
+            ...     {
+            ...         "message": "A frequency was detected in column 'dates', but there are faulty datetime values that need to be addressed.",
+            ...         "data_check_name": "DateTimeFormatDataCheck",
+            ...         "level": "error",
+            ...         "code": "DATETIME_HAS_UNEVEN_INTERVALS",
+            ...         "details": {'columns': None, 'rows': None},
+            ...         "action_options": [
+            ...             {
+            ...                 'code': 'REGULARIZE_AND_IMPUTE_DATASET',
+            ...                 'data_check_name': 'DateTimeFormatDataCheck',
+            ...                 'metadata': {
+            ...                         'columns': None,
+            ...                         'is_target': True,
+            ...                         'rows': None
+            ...                 },
+            ...                 'parameters': {
+            ...                         'time_index': {
+            ...                             'default_value': 'dates',
+            ...                             'parameter_type': 'global',
+            ...                             'type': 'str'
+            ...                         },
+            ...                         'frequency_payload': {
+            ...                             'default_value': ww_payload,
+            ...                             'parameter_type': 'global',
+            ...                             'type': 'tuple'
+            ...                         }
+            ...                 }
+            ...             }
+            ...         ]
+            ...     }
             ... ]
 
             The column "dates" has a repeat of the date 2021-01-09 appended to the end, which is considered redundant and will raise an error.
 
             >>> X = pd.DataFrame(pd.date_range("2021-01-01", periods=9).append(pd.date_range("2021-01-09", periods=1)), columns=["dates"])
             >>> y = pd.Series([0, 1, 0, 1, 1, 0, 0, 0, 1, 0])
+            >>> ww_payload = infer_frequency(X["dates"], debug=True, window_length=5, threshold=0.8)
             >>> datetime_format_dc = DateTimeFormatDataCheck(datetime_column="dates")
             >>> assert datetime_format_dc.validate(X, y) == [
             ...     {
@@ -78,12 +117,43 @@ def validate(self, X, y):
             ...         "code": "DATETIME_HAS_REDUNDANT_ROW",
             ...         "details": {"columns": None, "rows": None},
             ...         "action_options": []
-            ...      }
+            ...      },
+            ...     {
+            ...         "message": "A frequency was detected in column 'dates', but there are faulty datetime values that need to be addressed.",
+            ...         "data_check_name": "DateTimeFormatDataCheck",
+            ...         "level": "error",
+            ...         "code": "DATETIME_HAS_UNEVEN_INTERVALS",
+            ...         "details": {'columns': None, 'rows': None},
+            ...         "action_options": [
+            ...             {
+            ...                 'code': 'REGULARIZE_AND_IMPUTE_DATASET',
+            ...                 'data_check_name': 'DateTimeFormatDataCheck',
+            ...                 'metadata': {
+            ...                         'columns': None,
+            ...                         'is_target': True,
+            ...                         'rows': None
+            ...                 },
+            ...                 'parameters': {
+            ...                         'time_index': {
+            ...                             'default_value': 'dates',
+            ...                             'parameter_type': 'global',
+            ...                             'type': 'str'
+            ...                         },
+            ...                         'frequency_payload': {
+            ...                             'default_value': ww_payload,
+            ...                             'parameter_type': 'global',
+            ...                             'type': 'tuple'
+            ...                         }
+            ...                 }
+            ...             }
+            ...         ]
+            ...     }
             ... ]
 
             The column "Weeks" has a date that does not follow the weekly pattern, which is considered misaligned.
 
             >>> X = pd.DataFrame(pd.date_range("2021-01-01", freq="W", periods=12).append(pd.date_range("2021-03-22", periods=1)), columns=["Weeks"])
+            >>> ww_payload = infer_frequency(X["Weeks"], debug=True, window_length=5, threshold=0.8)
             >>> datetime_format_dc = DateTimeFormatDataCheck(datetime_column="Weeks")
             >>> assert datetime_format_dc.validate(X, y) == [
             ...     {
@@ -93,7 +163,83 @@ def validate(self, X, y):
             ...         "details": {"columns": None, "rows": None},
             ...         "code": "DATETIME_HAS_MISALIGNED_VALUES",
             ...         "action_options": []
-            ...      }
+            ...      },
+            ...     {
+            ...         "message": "A frequency was detected in column 'Weeks', but there are faulty datetime values that need to be addressed.",
+            ...         "data_check_name": "DateTimeFormatDataCheck",
+            ...         "level": "error",
+            ...         "code": "DATETIME_HAS_UNEVEN_INTERVALS",
+            ...         "details": {'columns': None, 'rows': None},
+            ...         "action_options": [
+            ...             {
+            ...                 'code': 'REGULARIZE_AND_IMPUTE_DATASET',
+            ...                 'data_check_name': 'DateTimeFormatDataCheck',
+            ...                 'metadata': {
+            ...                         'columns': None,
+            ...                         'is_target': True,
+            ...                         'rows': None
+            ...                 },
+            ...                 'parameters': {
+            ...                         'time_index': {
+            ...                             'default_value': 'Weeks',
+            ...                             'parameter_type': 'global',
+            ...                             'type': 'str'
+            ...                         },
+            ...                         'frequency_payload': {
+            ...                             'default_value': ww_payload,
+            ...                             'parameter_type': 'global',
+            ...                             'type': 'tuple'
+            ...                         }
+            ...                 }
+            ...             }
+            ...         ]
+            ...     }
+            ... ]
+
+            The column "Weeks" has a date that does not follow the weekly pattern, which is considered misaligned.
+
+            >>> X = pd.DataFrame(pd.date_range("2021-01-01", freq="W", periods=12).append(pd.date_range("2021-03-22", periods=1)), columns=["Weeks"])
+            >>> ww_payload = infer_frequency(X["Weeks"], debug=True, window_length=5, threshold=0.8)
+            >>> datetime_format_dc = DateTimeFormatDataCheck(datetime_column="Weeks")
+            >>> assert datetime_format_dc.validate(X, y) == [
+            ...     {
+            ...         "message": "Column 'Weeks' has datetime values that do not align with the inferred frequency.",
+            ...         "data_check_name": "DateTimeFormatDataCheck",
+            ...         "level": "error",
+            ...         "details": {"columns": None, "rows": None},
+            ...         "code": "DATETIME_HAS_MISALIGNED_VALUES",
+            ...         "action_options": []
+            ...      },
+            ...     {
+            ...         "message": "A frequency was detected in column 'Weeks', but there are faulty datetime values that need to be addressed.",
+            ...         "data_check_name": "DateTimeFormatDataCheck",
+            ...         "level": "error",
+            ...         "code": "DATETIME_HAS_UNEVEN_INTERVALS",
+            ...         "details": {'columns': None, 'rows': None},
+            ...         "action_options": [
+            ...             {
+            ...                 'code': 'REGULARIZE_AND_IMPUTE_DATASET',
+            ...                 'data_check_name': 'DateTimeFormatDataCheck',
+            ...                 'metadata': {
+            ...                         'columns': None,
+            ...                         'is_target': True,
+            ...                         'rows': None
+            ...                 },
+            ...                 'parameters': {
+            ...                         'time_index': {
+            ...                             'default_value': 'Weeks',
+            ...                             'parameter_type': 'global',
+            ...                             'type': 'str'
+            ...                         },
+            ...                         'frequency_payload': {
+            ...                             'default_value': ww_payload,
+            ...                             'parameter_type': 'global',
+            ...                             'type': 'tuple'
+            ...                         }
+            ...                 }
+            ...             }
+            ...         ]
+            ...     }
             ... ]
 
             The column "Weeks" passed integers instead of datetime data, which will raise an error.
@@ -154,6 +300,7 @@ def validate(self, X, y):
             ...         ["2-12-21", "3-12-21"]]
             >>> dates[0][0] = None
             >>> df = pd.DataFrame(dates, columns=["days", "days2"])
+            >>> ww_payload = infer_frequency(pd.to_datetime(df["days"]), debug=True, window_length=5, threshold=0.8)
             >>> datetime_format_dc = DateTimeFormatDataCheck(datetime_column="days")
             >>> assert datetime_format_dc.validate(df, y) == [
             ...     {
@@ -163,6 +310,36 @@ def validate(self, X, y):
             ...         "details": {"columns": None, "rows": None},
             ...         "code": "DATETIME_HAS_NAN",
             ...         "action_options": []
+            ...      },
+            ...     {
+            ...         "message": "A frequency was detected in column 'days', but there are faulty datetime values that need to be addressed.",
+            ...         "data_check_name": "DateTimeFormatDataCheck",
+            ...         "level": "error",
+            ...         "code": "DATETIME_HAS_UNEVEN_INTERVALS",
+            ...         "details": {'columns': None, 'rows': None},
+            ...         "action_options": [
+            ...             {
+            ...                 'code': 'REGULARIZE_AND_IMPUTE_DATASET',
+            ...                 'data_check_name': 'DateTimeFormatDataCheck',
+            ...                 'metadata': {
+            ...                         'columns': None,
+            ...                         'is_target': True,
+            ...                         'rows': None
+            ...                 },
+            ...                 'parameters': {
+            ...                         'time_index': {
+            ...                             'default_value': 'days',
+            ...                             'parameter_type': 'global',
+            ...                             'type': 'str'
+            ...                         },
+            ...                         'frequency_payload': {
+            ...                             'default_value': ww_payload,
+            ...                             'parameter_type': 'global',
+            ...                             'type': 'tuple'
+            ...                         }
+            ...                 }
+            ...             }
+            ...         ]
             ...     }
             ... ]
             ...
@@ -270,7 +447,34 @@ def validate(self, X, y):
                 DataCheckError(
                     message=f"No frequency could be detected in column '{col_name}', possibly due to uneven intervals.",
                     data_check_name=self.name,
+                    message_code=DataCheckMessageCode.DATETIME_NO_FREQUENCY_INFERRED,
+                ).to_dict()
+            )
+        else:
+            messages.append(
+                DataCheckError(
+                    message=f"A frequency was detected in column '{col_name}', but there are faulty datetime values that need to be addressed.",
+                    data_check_name=self.name,
                     message_code=DataCheckMessageCode.DATETIME_HAS_UNEVEN_INTERVALS,
+                    action_options=[
+                        DataCheckActionOption(
+                            DataCheckActionCode.REGULARIZE_AND_IMPUTE_DATASET,
+                            data_check_name=self.name,
+                            parameters={
+                                "time_index": {
+                                    "parameter_type": DCAOParameterType.GLOBAL,
+                                    "type": "str",
+                                    "default_value": col_name,
+                                },
+                                "frequency_payload": {
+                                    "parameter_type": DCAOParameterType.GLOBAL,
+                                    "type": "tuple",
+                                    "default_value": ww_payload,
+                                },
+                            },
+                            metadata={"is_target": True},
+                        )
+                    ],
                 ).to_dict()
             )
 

diff --git a/evalml/data_checks/default_data_checks.py b/evalml/data_checks/default_data_checks.py
@@ -40,8 +40,8 @@ class DefaultDataChecks(DataChecks):
         problem_type (str): The problem type that is being validated. Can be regression, binary, or multiclass.
         objective (str or ObjectiveBase): Name or instance of the objective class.
         n_splits (int): The number of splits as determined by the data splitter being used. Defaults to 3.
-        datetime_column (str): The name of the column containing datetime information to be used for time series problems.
-        Default to "index" indicating that the datetime information is in the index of X or y.
+        problem_configuration (dict): Required for time series problem types. Values should be passed in for time_index,
+        gap, forecast_horizon, and max_delay.
     """
 
     _DEFAULT_DATA_CHECK_CLASSES = [