alteryx · freddyaboulton · Jan 13, 2021 · Jan 7, 2021 · Jan 7, 2021 · Jan 7, 2021
diff --git a/docs/source/api_reference.rst b/docs/source/api_reference.rst
@@ -247,7 +247,7 @@ Regressors are components that output a predicted target value.
     RandomForestRegressor
     XGBoostRegressor
     BaselineRegressor
-    TimeSeriesBaselineRegressor
+    TimeSeriesBaselineEstimator
     StackedEnsembleRegressor
     DecisionTreeRegressor
     LightGBMRegressor

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -11,6 +11,7 @@ Release Notes
         * Support graphviz 0.16 :pr:`1657`
         * Enhanced time series pipelines to accept empty features :pr:`1651`
         * Added support for list inputs for objectives :pr:`1663`
+        * Added support for ``AutoMLSearch`` to handle time series classification pipelines :pr:`1666`
     * Fixes
         * Fixed thresholding for pipelines in ``AutoMLSearch`` to only threshold binary classification pipelines :pr:`1622` :pr:`1626`
         * Updated ``load_data`` to return Woodwork structures and update default parameter value for ``index`` to ``None`` :pr:`1610`

diff --git a/evalml/automl/automl_search.py b/evalml/automl/automl_search.py
@@ -41,6 +41,8 @@
     ModeBaselineBinaryPipeline,
     ModeBaselineMulticlassPipeline,
     PipelineBase,
+    TimeSeriesBaselineBinaryPipeline,
+    TimeSeriesBaselineMulticlassPipeline,
     TimeSeriesBaselineRegressionPipeline
 )
 from evalml.pipelines.components.utils import get_estimators
@@ -634,10 +636,14 @@ def _add_baseline_pipelines(self):
         elif self.problem_type == ProblemTypes.REGRESSION:
             baseline = MeanBaselineRegressionPipeline(parameters={})
         else:
+            pipeline_class = {ProblemTypes.TIME_SERIES_REGRESSION: TimeSeriesBaselineRegressionPipeline,
+                              ProblemTypes.TIME_SERIES_MULTICLASS: TimeSeriesBaselineMulticlassPipeline,
+                              ProblemTypes.TIME_SERIES_BINARY: TimeSeriesBaselineBinaryPipeline}[self.problem_type]
             gap = self.problem_configuration['gap']
             max_delay = self.problem_configuration['max_delay']
-            baseline = TimeSeriesBaselineRegressionPipeline(parameters={"pipeline": {"gap": gap, "max_delay": max_delay},
-                                                                        "Time Series Baseline Regressor": {"gap": gap, "max_delay": max_delay}})
+            baseline = pipeline_class(parameters={"pipeline": {"gap": gap, "max_delay": max_delay},
+                                                  "Time Series Baseline Estimator": {"gap": gap, "max_delay": max_delay}})
+
         pipelines = [baseline]
         scores = self._evaluate_pipelines(pipelines, baseline=True)
         if scores == []:

diff --git a/evalml/automl/utils.py b/evalml/automl/utils.py
@@ -5,7 +5,11 @@
     TimeSeriesSplit,
     TrainingValidationSplit
 )
-from evalml.problem_types import ProblemTypes, handle_problem_types
+from evalml.problem_types import (
+    ProblemTypes,
+    handle_problem_types,
+    is_time_series
+)
 
 _LARGE_DATA_ROW_THRESHOLD = int(1e5)
 
@@ -25,7 +29,9 @@ def get_default_primary_search_objective(problem_type):
     objective_name = {'binary': 'Log Loss Binary',
                       'multiclass': 'Log Loss Multiclass',
                       'regression': 'R2',
-                      'time series regression': 'R2'}[problem_type.value]
+                      'time series regression': 'R2',
+                      'time series binary': 'Log Loss Binary',
+                      'time series multiclass': 'Log Loss Multiclass'}[problem_type.value]
     return get_objective(objective_name, return_instance=True)
 
 
@@ -51,9 +57,7 @@ def make_data_splitter(X, y, problem_type, problem_configuration=None, n_splits=
         data_splitter = KFold(n_splits=n_splits, random_state=random_state, shuffle=shuffle)
     elif problem_type in [ProblemTypes.BINARY, ProblemTypes.MULTICLASS]:
         data_splitter = StratifiedKFold(n_splits=n_splits, random_state=random_state, shuffle=shuffle)
-    elif problem_type in [ProblemTypes.TIME_SERIES_REGRESSION,
-                          ProblemTypes.TIME_SERIES_BINARY,
-                          ProblemTypes.TIME_SERIES_MULTICLASS]:
+    elif is_time_series(problem_type):
         if not problem_configuration:
             raise ValueError("problem_configuration is required for time series problem types")
         data_splitter = TimeSeriesSplit(n_splits=n_splits, gap=problem_configuration.get('gap'),

diff --git a/evalml/pipelines/__init__.py b/evalml/pipelines/__init__.py
@@ -52,5 +52,5 @@
 from .regression import (
     BaselineRegressionPipeline,
     MeanBaselineRegressionPipeline,
-    TimeSeriesBaselineRegressionPipeline
 )
+from .time_series_baselines import TimeSeriesBaselineRegressionPipeline, TimeSeriesBaselineBinaryPipeline, TimeSeriesBaselineMulticlassPipeline
diff --git a/evalml/pipelines/components/__init__.py b/evalml/pipelines/components/__init__.py
@@ -19,7 +19,7 @@
     BaselineRegressor,
     DecisionTreeClassifier,
     DecisionTreeRegressor,
-    TimeSeriesBaselineRegressor
+    TimeSeriesBaselineEstimator
 )
 from .transformers import (
     Transformer,

diff --git a/evalml/pipelines/components/estimators/__init__.py b/evalml/pipelines/components/estimators/__init__.py
@@ -16,5 +16,5 @@
                          ElasticNetRegressor,
                          ExtraTreesRegressor,
                          BaselineRegressor,
-                         TimeSeriesBaselineRegressor,
+                         TimeSeriesBaselineEstimator,
                          DecisionTreeRegressor)
diff --git a/evalml/pipelines/components/estimators/regressors/__init__.py b/evalml/pipelines/components/estimators/regressors/__init__.py
@@ -7,4 +7,4 @@
 from .et_regressor import ExtraTreesRegressor
 from .baseline_regressor import BaselineRegressor
 from .decision_tree_regressor import DecisionTreeRegressor
-from .time_series_baseline_regressor import TimeSeriesBaselineRegressor
+from .time_series_baseline_estimator import TimeSeriesBaselineEstimator
diff --git a/...ressors/time_series_baseline_regressor.py → ...ressors/time_series_baseline_estimator.py b/...ressors/time_series_baseline_regressor.py → ...ressors/time_series_baseline_estimator.py
@@ -6,23 +6,25 @@
 from evalml.problem_types import ProblemTypes
 from evalml.utils.gen_utils import (
     _convert_to_woodwork_structure,
-    _convert_woodwork_types_wrapper
+    _convert_woodwork_types_wrapper,
+    pad_with_nans
 )
 
 
-class TimeSeriesBaselineRegressor(Estimator):
-    """Time series regressor that predicts using the naive forecasting approach.
+class TimeSeriesBaselineEstimator(Estimator):
+    """Time series estimator that predicts using the naive forecasting approach.
 
-    This is useful as a simple baseline regressor for time series problems
+    This is useful as a simple baseline estimator for time series problems
     """
-    name = "Time Series Baseline Regressor"
+    name = "Time Series Baseline Estimator"
     hyperparameter_ranges = {}
     model_family = ModelFamily.BASELINE
-    supported_problem_types = [ProblemTypes.TIME_SERIES_REGRESSION]
+    supported_problem_types = [ProblemTypes.TIME_SERIES_REGRESSION, ProblemTypes.TIME_SERIES_BINARY,
+                               ProblemTypes.TIME_SERIES_MULTICLASS]
     predict_uses_y = True
 
     def __init__(self, gap=1, random_state=0, **kwargs):
-        """Baseline time series regressor that predicts using the naive forecasting approach.
+        """Baseline time series estimator that predicts using the naive forecasting approach.
 
         Arguments:
             gap (int): gap between prediction date and target date and must be a positive integer. If gap is 0, target date will be shifted ahead by 1 time period.
@@ -54,7 +56,7 @@ def fit(self, X, y=None):
 
     def predict(self, X, y=None):
         if y is None:
-            raise ValueError("Cannot predict Time Series Baseline Regressor if y is None")
+            raise ValueError("Cannot predict Time Series Baseline Estimator if y is None")
         y = _convert_to_woodwork_structure(y)
         y = _convert_woodwork_types_wrapper(y.to_series())
 
@@ -63,9 +65,21 @@ def predict(self, X, y=None):
 
         return y
 
+    def predict_proba(self, X, y=None):
+        if y is None:
+            raise ValueError("Cannot predict Time Series Baseline Estimator if y is None")
+        y = _convert_to_woodwork_structure(y)
+        y = _convert_woodwork_types_wrapper(y.to_series())
+        preds = self.predict(X, y).dropna(axis=0, how='any').astype('int')
+        proba_arr = np.zeros((len(preds), y.max() + 1))
+        proba_arr[np.arange(len(preds)), preds] = 1
+        return pad_with_nans(pd.DataFrame(proba_arr), len(y) - len(preds))
+
     @property
     def feature_importance(self):
-        """Returns importance associated with each feature. Since baseline regressors do not use input features to calculate predictions, returns an array of zeroes.
+        """Returns importance associated with each feature.
+
+        Since baseline estimators do not use input features to calculate predictions, returns an array of zeroes.
 
         Returns:
             np.ndarray (float): an array of zeroes

diff --git a/evalml/pipelines/regression/__init__.py b/evalml/pipelines/regression/__init__.py
@@ -1,2 +1 @@
 from .baseline_regression import BaselineRegressionPipeline, MeanBaselineRegressionPipeline
-from .time_series_baseline_regression import TimeSeriesBaselineRegressionPipeline
diff --git a/evalml/pipelines/regression/time_series_baseline_regression.py b/evalml/pipelines/regression/time_series_baseline_regression.py
diff --git a/evalml/pipelines/time_series_baselines.py b/evalml/pipelines/time_series_baselines.py
@@ -0,0 +1,23 @@
+from evalml.pipelines import (
+    TimeSeriesBinaryClassificationPipeline,
+    TimeSeriesMulticlassClassificationPipeline,
+    TimeSeriesRegressionPipeline
+)
+
+
+class TimeSeriesBaselineRegressionPipeline(TimeSeriesRegressionPipeline):
+    """Baseline Pipeline for time series regression problems."""
+    _name = "Time Series Baseline Regression Pipeline"
+    component_graph = ["Time Series Baseline Estimator"]
+
+
+class TimeSeriesBaselineBinaryPipeline(TimeSeriesBinaryClassificationPipeline):
+    """Baseline Pipeline for time series binary classification problems."""
+    _name = "Time Series Baseline Binary Pipeline"
+    component_graph = ["Time Series Baseline Estimator"]
+
+
+class TimeSeriesBaselineMulticlassPipeline(TimeSeriesMulticlassClassificationPipeline):
+    """Baseline Pipeline for time series multiclass classification problems."""
+    _name = "Time Series Baseline Multiclass Pipeline"
+    component_graph = ["Time Series Baseline Estimator"]
diff --git a/evalml/pipelines/time_series_classification_pipelines.py b/evalml/pipelines/time_series_classification_pipelines.py
@@ -111,7 +111,11 @@ def predict(self, X, y=None, objective=None):
         y = _convert_woodwork_types_wrapper(y.to_series())
         n_features = max(len(y), X.shape[0])
         predictions = self._predict(X, y, objective=objective, pad=False)
-        predictions = pd.Series(self._decode_targets(predictions), name=self.input_target_name)
+
+        # In case gap is 0 and this is a baseline pipeline, we drop the nans in the
+        # predictions before decoding them
+        predictions = pd.Series(self._decode_targets(predictions.dropna()), name=self.input_target_name)
+
         return pad_with_nans(predictions, max(0, n_features - predictions.shape[0]))
 
     def predict_proba(self, X, y=None):

diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
@@ -30,7 +30,11 @@
     TextFeaturizer
 )
 from evalml.pipelines.components.utils import all_components, get_estimators
-from evalml.problem_types import ProblemTypes, handle_problem_types
+from evalml.problem_types import (
+    ProblemTypes,
+    handle_problem_types,
+    is_time_series
+)
 from evalml.utils import get_logger
 from evalml.utils.gen_utils import _convert_to_woodwork_structure
 
@@ -67,7 +71,7 @@ def _get_preprocessing_components(X, y, problem_type, text_columns, estimator_cl
     if add_datetime_featurizer:
         pp_components.append(DateTimeFeaturizer)
 
-    if problem_type in [ProblemTypes.TIME_SERIES_REGRESSION]:
+    if is_time_series(problem_type):
         pp_components.append(DelayedFeatureTransformer)
 
     categorical_cols = X.select('category')

diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py
@@ -48,15 +48,11 @@
     BinaryClassificationPipeline,
     Estimator,
     MulticlassClassificationPipeline,
-    RegressionPipeline,
-    TimeSeriesRegressionPipeline
+    RegressionPipeline
 )
 from evalml.pipelines.components.utils import get_estimators
 from evalml.pipelines.utils import make_pipeline
-from evalml.preprocessing.data_splitters import (
-    TimeSeriesSplit,
-    TrainingValidationSplit
-)
+from evalml.preprocessing.data_splitters import TrainingValidationSplit
 from evalml.problem_types import ProblemTypes, handle_problem_types
 from evalml.tuners import NoParamsException, RandomSearchTuner
 from evalml.utils.gen_utils import (
@@ -1976,32 +1972,6 @@ def test_automl_validates_problem_configuration(X_y_binary):
     assert problem_config == {"max_delay": 2, "gap": 3}
 
 
-@patch('evalml.pipelines.TimeSeriesRegressionPipeline.score', return_value={"R2": 0.3})
-@patch('evalml.pipelines.TimeSeriesRegressionPipeline.fit')
-def test_automl_time_series_regression(mock_fit, mock_score, X_y_regression):
-    X, y = X_y_regression
-
-    configuration = {"gap": 0, "max_delay": 0, 'delay_target': False, 'delay_features': True}
-
-    class Pipeline1(TimeSeriesRegressionPipeline):
-        name = "Pipeline 1"
-        component_graph = ["Delayed Feature Transformer", "Random Forest Regressor"]
-
-    class Pipeline2(TimeSeriesRegressionPipeline):
-        name = "Pipeline 2"
-        component_graph = ["Delayed Feature Transformer", "Elastic Net Regressor"]
-
-    automl = AutoMLSearch(X_train=X, y_train=y, problem_type="time series regression", problem_configuration=configuration,
-                          allowed_pipelines=[Pipeline1, Pipeline2], max_batches=2)
-    automl.search()
-    assert isinstance(automl.data_splitter, TimeSeriesSplit)
-    for result in automl.results['pipeline_results'].values():
-        if result["id"] == 0:
-            continue
-        assert result['parameters']['Delayed Feature Transformer'] == configuration
-        assert result['parameters']['pipeline'] == configuration
-
-
 @patch('evalml.objectives.BinaryClassificationObjective.optimize_threshold')
 def test_automl_best_pipeline(mock_optimize, X_y_binary):
     X, y = X_y_binary
@@ -2085,7 +2055,7 @@ def test_timeseries_baseline_init_with_correct_gap_max_delay(mock_fit, mock_scor
 
     # Best pipeline is baseline pipeline because we only run one iteration
     assert automl.best_pipeline.parameters == {"pipeline": {"gap": 6, "max_delay": 3},
-                                               "Time Series Baseline Regressor": {"gap": 6, "max_delay": 3}}
+                                               "Time Series Baseline Estimator": {"gap": 6, "max_delay": 3}}
 
 
 @pytest.mark.parametrize('problem_type', [ProblemTypes.BINARY, ProblemTypes.MULTICLASS,

diff --git a/evalml/tests/automl_tests/test_automl_search_classification.py b/evalml/tests/automl_tests/test_automl_search_classification.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pandas as pd
 import pytest
-from sklearn.model_selection import StratifiedKFold, TimeSeriesSplit
+from sklearn.model_selection import StratifiedKFold
 from skopt.space import Categorical
 
 from evalml import AutoMLSearch
@@ -22,10 +22,13 @@
     ModeBaselineBinaryPipeline,
     ModeBaselineMulticlassPipeline,
     MulticlassClassificationPipeline,
-    PipelineBase
+    PipelineBase,
+    TimeSeriesBaselineBinaryPipeline,
+    TimeSeriesBaselineMulticlassPipeline
 )
 from evalml.pipelines.components.utils import get_estimators
 from evalml.pipelines.utils import make_pipeline
+from evalml.preprocessing import TimeSeriesSplit
 from evalml.problem_types import ProblemTypes
 
 
@@ -77,8 +80,8 @@ def test_data_splitter(X_y_binary):
     assert isinstance(automl.rankings, pd.DataFrame)
     assert len(automl.results['pipeline_results'][0]["cv_data"]) == cv_folds
 
-    automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', data_splitter=TimeSeriesSplit(cv_folds), max_iterations=1,
-                          n_jobs=1)
+    automl = AutoMLSearch(X_train=X, y_train=y, problem_type='binary', data_splitter=TimeSeriesSplit(n_splits=cv_folds),
+                          max_iterations=1, n_jobs=1)
     automl.search()
 
     assert isinstance(automl.rankings, pd.DataFrame)
@@ -677,3 +680,37 @@ def test_automl_multiclass_nonlinear_pipeline_search_more_iterations(nonlinear_m
     assert start_iteration_callback.call_args_list[0][0][0] == ModeBaselineMulticlassPipeline
     assert start_iteration_callback.call_args_list[1][0][0] == nonlinear_multiclass_pipeline_class
     assert start_iteration_callback.call_args_list[4][0][0] == nonlinear_multiclass_pipeline_class
+
+
+@pytest.mark.parametrize('problem_type', [ProblemTypes.TIME_SERIES_MULTICLASS, ProblemTypes.TIME_SERIES_BINARY])
+@patch('evalml.pipelines.TimeSeriesMulticlassClassificationPipeline.score')
+@patch('evalml.pipelines.TimeSeriesBinaryClassificationPipeline.score')
+@patch('evalml.pipelines.TimeSeriesMulticlassClassificationPipeline.fit')
+@patch('evalml.pipelines.TimeSeriesBinaryClassificationPipeline.fit')
+def test_automl_supports_time_series_classification(mock_binary_fit, mock_multi_fit, mock_binary_score, mock_multiclass_score,
+                                                    problem_type, X_y_binary, X_y_multi):
+    if problem_type == ProblemTypes.TIME_SERIES_BINARY:
+        X, y = X_y_binary
+        baseline = TimeSeriesBaselineBinaryPipeline
+        mock_binary_score.return_value = {"Log Loss Binary": 0.2}
+        problem_type = 'time series binary'
+    else:
+        X, y = X_y_multi
+        baseline = TimeSeriesBaselineMulticlassPipeline
+        mock_multiclass_score.return_value = {"Log Loss Multiclass": 0.25}
+        problem_type = 'time series multiclass'
+
+    configuration = {"gap": 0, "max_delay": 0, 'delay_target': False, 'delay_features': True}
+
+    automl = AutoMLSearch(X_train=X, y_train=y, problem_type=problem_type,
+                          problem_configuration=configuration,
+                          max_batches=2)
+    automl.search()
+    assert isinstance(automl.data_splitter, TimeSeriesSplit)
+    for result in automl.results['pipeline_results'].values():
+        if result["id"] == 0:
+            assert result['pipeline_class'] == baseline
+            continue
+
+        assert result['parameters']['Delayed Feature Transformer'] == configuration
+        assert result['parameters']['pipeline'] == configuration
Original file line number	Diff line number	Diff line change
		@@ -1,2 +1 @@
		from .baseline_regression import BaselineRegressionPipeline, MeanBaselineRegressionPipeline
		from .time_series_baseline_regression import TimeSeriesBaselineRegressionPipeline