alteryx · freddyaboulton · Nov 30, 2021 · Nov 8, 2021 · Nov 9, 2021 · Nov 9, 2021
diff --git a/.github/meta.yaml b/.github/meta.yaml
@@ -39,7 +39,7 @@ outputs:
         - shap >=0.36.0
         - texttable >=1.6.2
         - woodwork >=0.8.2
-        - featuretools>=1.0.0
+        - featuretools>=1.2.0
         - nlp-primitives>=2.0.0
         - python >=3.7.*
         - networkx >=2.5,<2.6

diff --git a/core-requirements.txt b/core-requirements.txt
@@ -15,5 +15,5 @@ texttable>=1.6.2
 woodwork>=0.8.2
 dask>=2021.10.0
 nlp-primitives>=2.0.0
-featuretools>=1.0.0
+featuretools>=1.2.0
 networkx>=2.5,<2.6
diff --git a/docs/source/api_index.rst b/docs/source/api_index.rst
@@ -175,7 +175,7 @@ Transformers are components that take in data as input and output transformed da
     evalml.pipelines.components.DropNullColumns
     evalml.pipelines.components.DateTimeFeaturizer
     evalml.pipelines.components.NaturalLanguageFeaturizer
-    evalml.pipelines.components.DelayedFeatureTransformer
+    evalml.pipelines.components.TimeSeriesFeaturizer
     evalml.pipelines.components.DFSTransformer
     evalml.pipelines.components.PolynomialDetrender
     evalml.pipelines.components.Undersampler

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -2,6 +2,7 @@ Release Notes
 -------------
 **Future Releases**
     * Enhancements
+        * Renamed ``DelayedFeatureTransformer`` to ``TimeSeriesFeaturizer`` and enhanced it to compute rolling features :pr:`3028`
     * Fixes
     * Changes
     * Documentation Changes
@@ -10,6 +11,7 @@ Release Notes
 .. warning::
 
     **Breaking Changes**
+        * Renamed ``DelayedFeatureTransformer`` to ``TimeSeriesFeaturizer`` :pr:`3028`
 
 
 **v0.38.0 Nov. 27, 2021**

diff --git a/docs/source/user_guide/timeseries.ipynb b/docs/source/user_guide/timeseries.ipynb
@@ -193,8 +193,9 @@
     "\n",
     "![estimator predictions](ts_viz/estimator_viz.png)\n",
     "\n",
-    "#### Feature engineering components for time series\n",
-    "For an example of a time-series feature engineering component see [DelayedFeatureTransformer](../autoapi/evalml/pipelines/components/index.rst#evalml.pipelines.components.DelayedFeatureTransformer)"
+    "#### Feature engineering components for time series",
+    "\n",
+    "For an example of a time-series feature engineering component see [TimeSeriesFeaturizer](../autoapi/evalml/pipelines/components/index.rst#evalml.pipelines.components.TimeSeriesFeaturizer)"
    ]
   },
   {

diff --git a/evalml/pipelines/__init__.py b/evalml/pipelines/__init__.py
@@ -28,7 +28,7 @@
     DecisionTreeRegressor,
     StackedEnsembleClassifier,
     StackedEnsembleRegressor,
-    DelayedFeatureTransformer,
+    TimeSeriesFeaturizer,
     DFSTransformer,
     KNeighborsClassifier,
     SVMClassifier,

diff --git a/evalml/pipelines/components/__init__.py b/evalml/pipelines/components/__init__.py
@@ -37,7 +37,7 @@
     RFClassifierSelectFromModel,
     RFRegressorSelectFromModel,
     PerColumnImputer,
-    DelayedFeatureTransformer,
+    TimeSeriesFeaturizer,
     SimpleImputer,
     Imputer,
     StandardScaler,

diff --git a/evalml/pipelines/components/estimators/regressors/time_series_baseline_estimator.py b/evalml/pipelines/components/estimators/regressors/time_series_baseline_estimator.py
@@ -3,7 +3,7 @@
 
 from evalml.model_family import ModelFamily
 from evalml.pipelines.components.estimators import Estimator
-from evalml.pipelines.components.transformers import DelayedFeatureTransformer
+from evalml.pipelines.components.transformers import TimeSeriesFeaturizer
 from evalml.problem_types import ProblemTypes
 from evalml.utils import infer_feature_types
 
@@ -83,13 +83,13 @@ def predict(self, X):
             ValueError: If input y is None.
         """
         X = infer_feature_types(X)
-        feature_name = DelayedFeatureTransformer.target_colname_prefix.format(
+        feature_name = TimeSeriesFeaturizer.target_colname_prefix.format(
             self.start_delay
         )
         if feature_name not in X.columns:
             raise ValueError(
                 "Time Series Baseline Estimator is meant to be used in a pipeline with "
-                "a DelayedFeaturesTransformer"
+                "a Time Series Featurizer"
             )
         return X.ww[feature_name]
 

diff --git a/evalml/pipelines/components/transformers/__init__.py b/evalml/pipelines/components/transformers/__init__.py
@@ -23,7 +23,7 @@
     DropNullColumns,
     LSA,
     NaturalLanguageFeaturizer,
-    DelayedFeatureTransformer,
+    TimeSeriesFeaturizer,
     DFSTransformer,
     PolynomialDetrender,
     LogTransformer,

diff --git a/evalml/pipelines/components/transformers/preprocessing/__init__.py b/evalml/pipelines/components/transformers/preprocessing/__init__.py
@@ -4,7 +4,7 @@
 from .text_transformer import TextTransformer
 from .lsa import LSA
 from .natural_language_featurizer import NaturalLanguageFeaturizer
-from .delayed_feature_transformer import DelayedFeatureTransformer
+from .time_series_featurizer import TimeSeriesFeaturizer
 from .featuretools import DFSTransformer
 from .polynomial_detrender import PolynomialDetrender
 from .log_transformer import LogTransformer

diff --git a/...processing/delayed_feature_transformer.py → ...s/preprocessing/time_series_featurizer.py b/...processing/delayed_feature_transformer.py → ...s/preprocessing/time_series_featurizer.py
@@ -1,6 +1,8 @@
 """Transformer that delays input features and target variable for time series problems."""
 import numpy as np
 import pandas as pd
+import woodwork as ww
+from featuretools.primitives import RollingMean
 from scipy.signal import find_peaks
 from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
 from skopt.space import Real
@@ -11,7 +13,7 @@
 from evalml.utils import infer_feature_types
 
 
-class DelayedFeatureTransformer(Transformer):
+class TimeSeriesFeaturizer(Transformer):
     """Transformer that delays input features and target variable for time series problems.
 
     This component uses an algorithm based on the autocorrelation values of the target variable
@@ -45,9 +47,14 @@ class DelayedFeatureTransformer(Transformer):
         random_seed (int): Seed for the random number generator. This transformer performs the same regardless of the random seed provided.
     """
 
-    name = "Delayed Feature Transformer"
-    hyperparameter_ranges = {"conf_level": Real(0.001, 1.0)}
-    """{}"""
+    name = "Time Series Featurizer"
+    hyperparameter_ranges = {
+        "conf_level": Real(0.001, 1.0),
+        "rolling_window_size": Real(0.001, 1.0),
+    }
+    """{"conf_level": Real(0.001, 1.0),
+        "rolling_window_size": Real(0.001, 1.0)
+    }"""
     needs_fitting = True
     target_colname_prefix = "target_delay_{}"
     """target_delay_{}"""
@@ -59,6 +66,7 @@ def __init__(
         gap=0,
         forecast_horizon=1,
         conf_level=0.05,
+        rolling_window_size=0.25,
         delay_features=True,
         delay_target=True,
         random_seed=0,
@@ -70,6 +78,7 @@ def __init__(
         self.delay_target = delay_target
         self.forecast_horizon = forecast_horizon
         self.gap = gap
+        self.rolling_window_size = rolling_window_size
         self.statistically_significant_lags = None
 
         if conf_level is None:
@@ -92,6 +101,7 @@ def __init__(
             "forecast_horizon": forecast_horizon,
             "conf_level": conf_level,
             "gap": gap,
+            "rolling_window_size": rolling_window_size,
         }
         parameters.update(kwargs)
         super().__init__(parameters=parameters, random_seed=random_seed)
@@ -160,62 +170,115 @@ def _find_significant_lags(y, conf_level, max_delay):
             significant_lags = all_lags
         return significant_lags
 
-    def transform(self, X, y=None):
-        """Computes the delayed features for all features in X and y.
+    def _compute_rolling_transforms(self, X, y, original_features):
+        """Compute the rolling features from the original features.
 
-        For each feature in X, it will add a column to the output dataframe for each
-        delay in the (inclusive) range [1, max_delay]. The values of each delayed feature are simply the original
-        feature shifted forward in time by the delay amount. For example, a delay of 3 units means that the feature
-        value at row n will be taken from the n-3rd row of that feature
+        Args:
+            X (pd.DataFrame or None): Data to transform.
+            y (pd.Series, or None): Target.
 
-        If y is not None, it will also compute the delayed values for the target variable.
+        Returns:
+            pd.DataFrame: Data with rolling features. All new features.
+        """
+        size = int(self.rolling_window_size * self.max_delay)
+        rolling_mean = RollingMean(
+            window_length=size + 1,
+            gap=self.start_delay,
+            min_periods=size + 1,
+        )
+        rolling_mean = rolling_mean.get_function()
+        numerics = set(
+            X.ww.select(["numeric"], return_schema=True).columns
+        ).intersection(original_features)
+        data = pd.DataFrame(
+            {f"{col}_rolling_mean": rolling_mean(X.index, X[col]) for col in numerics}
+        )
+        if y is not None and "numeric" in y.ww.semantic_tags:
+            data[f"target_rolling_mean"] = rolling_mean(y.index, y)
+        data.index = X.index
+        data.ww.init()
+        return data
+
+    def _compute_delays(self, X_ww, y, original_features):
+        """Computes the delayed features for all features in X and y.
+
+        Use the autocorrelation to determine delays.
 
         Args:
             X (pd.DataFrame): Data to transform.
             y (pd.Series, or None): Target.
 
         Returns:
-            pd.DataFrame: Transformed X.
+            pd.DataFrame: Data with original features and delays.
         """
-        # Normalize the data into pandas objects
-        X_ww = infer_feature_types(X)
         cols_to_delay = list(
             X_ww.ww.select(
                 ["numeric", "category", "boolean"], return_schema=True
             ).columns
         )
-        X_ww = X_ww.ww.copy()
         categorical_columns = self._get_categorical_columns(X_ww)
         cols_derived_from_categoricals = []
-        if self.delay_features and len(X) > 0:
+        lagged_features = {}
+        if self.delay_features and len(X_ww) > 0:
             X_categorical = self._encode_X_while_preserving_index(
                 X_ww[categorical_columns]
             )
             for col_name in cols_to_delay:
+
                 col = X_ww[col_name]
                 if col_name in categorical_columns:
                     col = X_categorical[col_name]
                 for t in self.statistically_significant_lags:
                     feature_name = f"{col_name}_delay_{self.start_delay + t}"
-                    X_ww.ww[f"{col_name}_delay_{self.start_delay + t}"] = col.shift(
-                        self.start_delay + t
-                    )
+                    lagged_features[
+                        f"{col_name}_delay_{self.start_delay + t}"
+                    ] = col.shift(self.start_delay + t)
                     if col_name in categorical_columns:
                         cols_derived_from_categoricals.append(feature_name)
         # Handle cases where the target was passed in
         if self.delay_target and y is not None:
-            y = infer_feature_types(y)
             if type(y.ww.logical_type) == logical_types.Categorical:
                 y = self._encode_y_while_preserving_index(y)
             for t in self.statistically_significant_lags:
-                X_ww.ww[
+                lagged_features[
                     self.target_colname_prefix.format(t + self.start_delay)
                 ] = y.shift(self.start_delay + t)
         # Features created from categorical columns should no longer be categorical
-        X_ww.ww.set_types({col: "Double" for col in cols_derived_from_categoricals})
-        return X_ww.ww.drop(cols_to_delay)
+        lagged_features = pd.DataFrame(lagged_features)
+        lagged_features.ww.init(
+            logical_types={col: "Double" for col in cols_derived_from_categoricals}
+        )
+        lagged_features.index = X_ww.index
+        return ww.concat_columns([X_ww, lagged_features])
+
+    def transform(self, X, y=None):
+        """Computes the delayed values and rolling means for X and y.
+
+        The chosen delays are determined by the autocorrelation function of the target variable. See the class docstring
+        for more information on how they are chosen. If y is None, all possible lags are chosen.
+
+        If y is not None, it will also compute the delayed values for the target variable.
+
+        The rolling means for all numeric features in X and y, if y is numeric, are also returned.
+
+        Args:
+            X (pd.DataFrame or None): Data to transform. None is expected when only the target variable is being used.
+            y (pd.Series, or None): Target.
+
+        Returns:
+            pd.DataFrame: Transformed X. No original features are returned.
+        """
+        if y is not None:
+            y = infer_feature_types(y)
+        # Normalize the data into pandas objects
+        X_ww = infer_feature_types(X)
+        original_features = [col for col in X_ww.columns if col != self.date_index]
+        delayed_features = self._compute_delays(X_ww, y, original_features)
+        rolling_means = self._compute_rolling_transforms(X_ww, y, original_features)
+        features = ww.concat_columns([delayed_features, rolling_means])
+        return features.ww.drop(original_features)
 
-    def fit_transform(self, X, y):
+    def fit_transform(self, X, y=None):
         """Fit the component and transform the input data.
 
         Args:

diff --git a/evalml/pipelines/utils.py b/evalml/pipelines/utils.py
@@ -22,7 +22,6 @@
     CatBoostRegressor,
     ComponentBase,
     DateTimeFeaturizer,
-    DelayedFeatureTransformer,
     DropColumns,
     DropNullColumns,
     DropRowsTransformer,
@@ -38,6 +37,7 @@
     StackedEnsembleRegressor,
     StandardScaler,
     TargetImputer,
+    TimeSeriesFeaturizer,
     Undersampler,
     URLFeaturizer,
 )
@@ -191,7 +191,7 @@ def _get_time_series_featurizer(X, y, problem_type, estimator_class, sampler_nam
         is_time_series(problem_type)
         and estimator_class.model_family != ModelFamily.ARIMA
     ):
-        components.append(DelayedFeatureTransformer)
+        components.append(TimeSeriesFeaturizer)
     return components
 
 
@@ -666,7 +666,7 @@ def make_timeseries_baseline_pipeline(problem_type, gap, forecast_horizon, date_
     }[problem_type]
     baseline = pipeline_class(
         component_graph=[
-            "Delayed Feature Transformer",
+            "Time Series Featurizer",
             "Time Series Baseline Estimator",
         ],
         custom_name=pipeline_name,
@@ -677,7 +677,7 @@ def make_timeseries_baseline_pipeline(problem_type, gap, forecast_horizon, date_
                 "max_delay": 0,
                 "forecast_horizon": forecast_horizon,
             },
-            "Delayed Feature Transformer": {
+            "Time Series Featurizer": {
                 "max_delay": 0,
                 "gap": gap,
                 "forecast_horizon": forecast_horizon,

diff --git a/evalml/tests/automl_tests/test_automl.py b/evalml/tests/automl_tests/test_automl.py
@@ -3721,14 +3721,15 @@ def test_timeseries_baseline_init_with_correct_gap_max_delay(AutoMLTestEnv, ts_d
             "max_delay": 0,
             "forecast_horizon": 7,
         },
-        "Delayed Feature Transformer": {
+        "Time Series Featurizer": {
             "date_index": "date",
             "delay_features": False,
             "delay_target": True,
             "max_delay": 0,
             "gap": 6,
             "forecast_horizon": 7,
             "conf_level": 0.05,
+            "rolling_window_size": 0.25,
         },
         "Time Series Baseline Estimator": {"forecast_horizon": 7, "gap": 6},
     }

diff --git a/evalml/tests/automl_tests/test_automl_search_classification.py b/evalml/tests/automl_tests/test_automl_search_classification.py
@@ -1156,6 +1156,7 @@ def test_automl_supports_time_series_classification(
         "delay_target": False,
         "delay_features": True,
         "conf_level": 0.05,
+        "rolling_window_size": 0.25,
     }
 
     automl = AutoMLSearch(
@@ -1175,7 +1176,7 @@ def test_automl_supports_time_series_classification(
             assert result["pipeline_class"] == baseline.__class__
             continue
 
-        assert result["parameters"]["Delayed Feature Transformer"] == configuration
+        assert result["parameters"]["Time Series Featurizer"] == configuration
         assert result["parameters"]["pipeline"] == configuration