alteryx · angela97lin · Aug 18, 2021 · Aug 17, 2021 · Aug 17, 2021 · Aug 17, 2021
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -6,13 +6,16 @@ Release Notes
     * Fixes
     * Changes
         * Deleted ``_put_into_original_order`` helper function :pr:`2639`
+        * Refactored time series pipeline code using a time series pipeline base class :pr:`2649`
     * Documentation Changes
         * Add complete install command to README and Install section :pr:`2627`
     * Testing Changes
 
 .. warning::
 
     **Breaking Changes**
+        * ``TimeSeriesRegressionPipeline`` no longer inherits from ``TimeSeriesRegressionPipeline`` :pr:`2649`
+
 
 
 **v0.30.2 Aug. 16, 2021**

diff --git a/evalml/pipelines/time_series_classification_pipelines.py b/evalml/pipelines/time_series_classification_pipelines.py
@@ -6,7 +6,7 @@
 
 from evalml.objectives import get_objective
 from evalml.pipelines.classification_pipeline import ClassificationPipeline
-from evalml.pipelines.pipeline_meta import TimeSeriesPipelineBaseMeta
+from evalml.pipelines.time_series_pipeline_base import TimeSeriesPipelineBase
 from evalml.problem_types import ProblemTypes
 from evalml.utils import (
     drop_rows_with_nans,
@@ -15,9 +15,7 @@
 )
 
 
-class TimeSeriesClassificationPipeline(
-    ClassificationPipeline, metaclass=TimeSeriesPipelineBaseMeta
-):
+class TimeSeriesClassificationPipeline(TimeSeriesPipelineBase, ClassificationPipeline):
     """Pipeline base class for time series classification problems.
 
     Arguments:
@@ -33,37 +31,6 @@ class TimeSeriesClassificationPipeline(
         random_seed (int): Seed for the random number generator. Defaults to 0.
     """
 
-    def __init__(
-        self,
-        component_graph,
-        parameters=None,
-        custom_name=None,
-        random_seed=0,
-    ):
-        if "pipeline" not in parameters:
-            raise ValueError(
-                "date_index, gap, and max_delay parameters cannot be omitted from the parameters dict. "
-                "Please specify them as a dictionary with the key 'pipeline'."
-            )
-        pipeline_params = parameters["pipeline"]
-        self.date_index = pipeline_params["date_index"]
-        self.gap = pipeline_params["gap"]
-        self.max_delay = pipeline_params["max_delay"]
-        super().__init__(
-            component_graph,
-            custom_name=custom_name,
-            parameters=parameters,
-            random_seed=random_seed,
-        )
-
-    @staticmethod
-    def _convert_to_woodwork(X, y):
-        if X is None:
-            X = pd.DataFrame()
-        X = infer_feature_types(X)
-        y = infer_feature_types(y)
-        return X, y
-
     def fit(self, X, y):
         """Fit a time series classification pipeline.
 
@@ -77,26 +44,9 @@ def fit(self, X, y):
         X, y = self._convert_to_woodwork(X, y)
         self._encoder.fit(y)
         y = self._encode_targets(y)
-
-        self.input_target_name = y.name
-        X_t = self.component_graph.fit_features(X, y)
-
-        y_shifted = y.shift(-self.gap)
-        X_t, y_shifted = drop_rows_with_nans(X_t, y_shifted)
-        self.estimator.fit(X_t, y_shifted)
-        self.input_feature_names = self.component_graph.input_feature_names
+        self._fit(X, y)
         return self
 
-    def _estimator_predict(self, features, y):
-        """Get estimator predictions.
-
-        This helper passes y as an argument if needed by the estimator.
-        """
-        y_arg = None
-        if self.estimator.predict_uses_y:
-            y_arg = y
-        return self.estimator.predict(features, y=y_arg)
-
     def _estimator_predict_proba(self, features, y):
         """Get estimator predicted probabilities.
 
@@ -122,9 +72,9 @@ def predict(self, X, y=None, objective=None):
         """Make predictions using selected features.
 
         Arguments:
-            X (pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features]
-            y (pd.Series, np.ndarray, None): The target training targets of length [n_samples]
-            objective (Object or string): The objective to use to make predictions
+            X (pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features].
+            y (pd.Series, np.ndarray, None): The target training targets of length [n_samples].
+            objective (Object or string): The objective to use to make predictions.
 
         Returns:
             pd.Series: Predicted values.
@@ -145,10 +95,10 @@ def predict_proba(self, X, y=None):
         """Make probability estimates for labels.
 
         Arguments:
-            X (pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features]
+            X (pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features].
 
         Returns:
-            pd.DataFrame: Probability estimates
+            pd.DataFrame: Probability estimates.
         """
         X, y = self._convert_to_woodwork(X, y)
         y = self._encode_targets(y)
@@ -163,16 +113,15 @@ def score(self, X, y, objectives):
         """Evaluate model performance on current and additional objectives.
 
         Arguments:
-            X (pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features]
-            y (pd.Series): True labels of length [n_samples]
-            objectives (list): Non-empty list of objectives to score on
+            X (pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features].
+            y (pd.Series): True labels of length [n_samples].
+            objectives (list): Non-empty list of objectives to score on.
 
         Returns:
-            dict: Ordered dictionary of objective scores
+            dict: Ordered dictionary of objective scores.
         """
         X, y = self._convert_to_woodwork(X, y)
         objectives = self.create_objectives(objectives)
-
         y_encoded = self._encode_targets(y)
         y_shifted = y_encoded.shift(-self.gap)
         y_predicted, y_predicted_proba = self._compute_predictions(
@@ -193,7 +142,6 @@ def score(self, X, y, objectives):
 class TimeSeriesBinaryClassificationPipeline(
     BinaryClassificationPipelineMixin,
     TimeSeriesClassificationPipeline,
-    metaclass=TimeSeriesPipelineBaseMeta,
 ):
     """Pipeline base class for time series binary classification problems.
 

diff --git a/evalml/pipelines/time_series_pipeline_base.py b/evalml/pipelines/time_series_pipeline_base.py
@@ -0,0 +1,86 @@
+import pandas as pd
+
+from evalml.pipelines import PipelineBase
+from evalml.pipelines.pipeline_meta import TimeSeriesPipelineBaseMeta
+from evalml.utils import drop_rows_with_nans, infer_feature_types
+
+
+class TimeSeriesPipelineBase(PipelineBase, metaclass=TimeSeriesPipelineBaseMeta):
+
+    """Pipeline base class for time series problems.
+
+    Arguments:
+        component_graph (list or dict): List of components in order. Accepts strings or ComponentBase subclasses in the list.
+            Note that when duplicate components are specified in a list, the duplicate component names will be modified with the
+            component's index in the list. For example, the component graph
+            [Imputer, One Hot Encoder, Imputer, Logistic Regression Classifier] will have names
+            ["Imputer", "One Hot Encoder", "Imputer_2", "Logistic Regression Classifier"]
+        parameters (dict): Dictionary with component names as keys and dictionary of that component's parameters as values.
+             An empty dictionary {} implies using all default values for component parameters. Pipeline-level
+             parameters such as date_index, gap, and max_delay must be specified with the "pipeline" key. For example:
+             Pipeline(parameters={"pipeline": {"date_index": "Date", "max_delay": 4, "gap": 2}}).
+        random_seed (int): Seed for the random number generator. Defaults to 0.
+    """
+
+    def __init__(
+        self,
+        component_graph,
+        parameters=None,
+        custom_name=None,
+        random_seed=0,
+    ):
+        if not parameters or "pipeline" not in parameters:
+            raise ValueError(
+                "date_index, gap, and max_delay parameters cannot be omitted from the parameters dict. "
+                "Please specify them as a dictionary with the key 'pipeline'."
+            )
+        pipeline_params = parameters["pipeline"]
+        self.date_index = pipeline_params["date_index"]
+        self.gap = pipeline_params["gap"]
+        self.max_delay = pipeline_params["max_delay"]
+        super().__init__(
+            component_graph,
+            custom_name=custom_name,
+            parameters=parameters,
+            random_seed=random_seed,
+        )
+
+    @staticmethod
+    def _convert_to_woodwork(X, y):
+        if X is None:
+            X = pd.DataFrame()
+        X = infer_feature_types(X)
+        y = infer_feature_types(y)
+        return X, y
+
+    def fit(self, X, y):
+        """Fit a time series pipeline.
+
+        Arguments:
+            X (pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features].
+            y (pd.Series, np.ndarray): The target training targets of length [n_samples].
+
+        Returns:
+            self
+        """
+        X, y = self._convert_to_woodwork(X, y)
+        self._fit(X, y)
+        return self
+
+    def _fit(self, X, y):
+        self.input_target_name = y.name
+        X_t = self.component_graph.fit_features(X, y)
+        y_shifted = y.shift(-self.gap)
+        X_t, y_shifted = drop_rows_with_nans(X_t, y_shifted)
+        self.estimator.fit(X_t, y_shifted)
+        self.input_feature_names = self.component_graph.input_feature_names
+
+    def _estimator_predict(self, features, y):
+        """Get estimator predictions.
+
+        This helper passes y as an argument if needed by the estimator.
+        """
+        y_arg = None
+        if self.estimator.predict_uses_y:
+            y_arg = y
+        return self.estimator.predict(features, y=y_arg)
diff --git a/evalml/pipelines/time_series_regression_pipeline.py b/evalml/pipelines/time_series_regression_pipeline.py
@@ -1,7 +1,4 @@
-import pandas as pd
-
-from evalml.pipelines.pipeline_meta import TimeSeriesPipelineBaseMeta
-from evalml.pipelines.regression_pipeline import RegressionPipeline
+from evalml.pipelines.time_series_pipeline_base import TimeSeriesPipelineBase
 from evalml.problem_types import ProblemTypes
 from evalml.utils import (
     drop_rows_with_nans,
@@ -10,9 +7,7 @@
 )
 
 
-class TimeSeriesRegressionPipeline(
-    RegressionPipeline, metaclass=TimeSeriesPipelineBaseMeta
-):
+class TimeSeriesRegressionPipeline(TimeSeriesPipelineBase):
     """Pipeline base class for time series regression problems.
 
     Arguments:
@@ -29,79 +24,23 @@ class TimeSeriesRegressionPipeline(
     """
 
     problem_type = ProblemTypes.TIME_SERIES_REGRESSION
-    """ProblemTypes.TIME_SERIES_REGRESSIO"""
-
-    def __init__(
-        self,
-        component_graph,
-        parameters=None,
-        custom_name=None,
-        random_seed=0,
-    ):
-        if not parameters or "pipeline" not in parameters:
-            raise ValueError(
-                "date_index, gap, and max_delay parameters cannot be omitted from the parameters dict. "
-                "Please specify them as a dictionary with the key 'pipeline'."
-            )
-        pipeline_params = parameters["pipeline"]
-        self.date_index = pipeline_params["date_index"]
-        self.gap = pipeline_params["gap"]
-        self.max_delay = pipeline_params["max_delay"]
-        super().__init__(
-            component_graph,
-            custom_name=custom_name,
-            parameters=parameters,
-            random_seed=random_seed,
-        )
-
-    def fit(self, X, y):
-        """Fit a time series regression pipeline.
-
-        Arguments:
-            X (pd.DataFrame or np.ndarray): The input training data of shape [n_samples, n_features]
-            y (pd.Series, np.ndarray): The target training targets of length [n_samples]
-
-        Returns:
-            self
-        """
-        if X is None:
-            X = pd.DataFrame()
-
-        X = infer_feature_types(X)
-        y = infer_feature_types(y)
-
-        self.input_target_name = y.name
-        X_t = self.component_graph.fit_features(X, y)
-
-        y_shifted = y.shift(-self.gap)
-        X_t, y_shifted = drop_rows_with_nans(X_t, y_shifted)
-        self.estimator.fit(X_t, y_shifted)
-        self.input_feature_names = self.component_graph.input_feature_names
-
-        return self
+    """ProblemTypes.TIME_SERIES_REGRESSION"""
 
     def predict(self, X, y=None, objective=None):
         """Make predictions using selected features.
 
         Arguments:
-            X (pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features]
-            y (pd.Series, np.ndarray, None): The target training targets of length [n_samples]
-            objective (Object or string): The objective to use to make predictions
+            X (pd.DataFrame, or np.ndarray): Data of shape [n_samples, n_features].
+            y (pd.Series, np.ndarray, None): The target training targets of length [n_samples].
+            objective (Object or string): The objective to use to make predictions.
 
         Returns:
             pd.Series: Predicted values.
         """
-        if X is None:
-            X = pd.DataFrame()
-        X = infer_feature_types(X)
-        y = infer_feature_types(y)
+        X, y = self._convert_to_woodwork(X, y)
         features = self.compute_estimator_features(X, y)
         features_no_nan, y = drop_rows_with_nans(features, y)
-        y_arg = None
-        if self.estimator.predict_uses_y:
-            y_arg = y
-        predictions = self.estimator.predict(features_no_nan, y_arg)
-
+        predictions = self._estimator_predict(features_no_nan, y)
         predictions.index = y.index
         predictions = self.inverse_transform(predictions)
         predictions = predictions.rename(self.input_target_name)
@@ -114,23 +53,17 @@ def score(self, X, y, objectives):
         """Evaluate model performance on current and additional objectives.
 
         Arguments:
-            X (pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features]
-            y (pd.Series): True labels of length [n_samples]
-            objectives (list): Non-empty list of objectives to score on
+            X (pd.DataFrame or np.ndarray): Data of shape [n_samples, n_features].
+            y (pd.Series): True labels of length [n_samples].
+            objectives (list): Non-empty list of objectives to score on.
 
         Returns:
-            dict: Ordered dictionary of objective scores
+            dict: Ordered dictionary of objective scores.
         """
-        # Only converting X for the call to _score_all_objectives
-        if X is None:
-            X = pd.DataFrame()
-        X = infer_feature_types(X)
-        y = infer_feature_types(y)
-
+        X, y = self._convert_to_woodwork(X, y)
+        objectives = self.create_objectives(objectives)
         y_predicted = self.predict(X, y)
-
         y_shifted = y.shift(-self.gap)
-        objectives = self.create_objectives(objectives)
         y_shifted, y_predicted = drop_rows_with_nans(y_shifted, y_predicted)
         return self._score_all_objectives(
             X, y_shifted, y_predicted, y_pred_proba=None, objectives=objectives