diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index c5f43c0c8c..86dce8ef1a 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -4,6 +4,7 @@ Release Notes * Enhancements * Fixes * Fixed bug where ``calculate_permutation_importance`` was not calculating the right value for pipelines with target transformers :pr:`2782` + * Fixed bug where transformed target values were not used in ``fit`` for time series pipelines :pr:`2780` * Changes * Documentation Changes * Testing Changes diff --git a/evalml/pipelines/component_graph.py b/evalml/pipelines/component_graph.py index fecdb6fe7c..6527474ad3 100644 --- a/evalml/pipelines/component_graph.py +++ b/evalml/pipelines/component_graph.py @@ -203,7 +203,7 @@ def fit_features(self, X, y): y (pd.Series): The target training data of length [n_samples]. Returns: - pd.DataFrame: Transformed values. + Tuple (pd.DataFrame, pd.Series): Transformed features and target. """ return self._fit_transform_features_helper(True, X, y) @@ -217,7 +217,8 @@ def compute_final_component_features(self, X, y=None): Returns: pd.DataFrame: Transformed values. """ - return self._fit_transform_features_helper(False, X, y) + features, _ = self._fit_transform_features_helper(False, X, y) + return features def _fit_transform_features_helper(self, needs_fitting, X, y=None): """Transform all components save the final one, and returns the data that should be fed to the final component, usually an estimator. @@ -228,23 +229,23 @@ def _fit_transform_features_helper(self, needs_fitting, X, y=None): y (pd.Series): The target training data of length [n_samples]. Defaults to None. Returns: - pd.DataFrame: Transformed values. + Tuple: pd.DataFrame, pd.Series: Transformed features and target. """ if len(self.compute_order) <= 1: X = infer_feature_types(X) self.input_feature_names.update({self.compute_order[0]: list(X.columns)}) - return X + return X, y component_outputs = self._compute_features( self.compute_order[:-1], X, y=y, fit=needs_fitting ) - x_inputs, _ = self._consolidate_inputs_for_component( + x_inputs, y_output = self._consolidate_inputs_for_component( component_outputs, self.compute_order[-1], X, y ) if needs_fitting: self.input_feature_names.update( {self.compute_order[-1]: list(x_inputs.columns)} ) - return x_inputs + return x_inputs, y_output def _consolidate_inputs_for_component( self, component_outputs, component, X, y=None diff --git a/evalml/pipelines/time_series_pipeline_base.py b/evalml/pipelines/time_series_pipeline_base.py index 75b62fa813..a08f40bd6b 100644 --- a/evalml/pipelines/time_series_pipeline_base.py +++ b/evalml/pipelines/time_series_pipeline_base.py @@ -230,13 +230,13 @@ def predict(self, X, objective=None, X_train=None, y_train=None): def _fit(self, X, y): self.input_target_name = y.name - X_t = self.component_graph.fit_features(X, y) - X_t, y_shifted = drop_rows_with_nans(X_t, y) + X_t, y_t = self.component_graph.fit_features(X, y) + X_t, y_shifted = drop_rows_with_nans(X_t, y_t) if self.estimator is not None: self.estimator.fit(X_t, y_shifted) else: - self.component_graph.get_last_component().fit(X_t, y) + self.component_graph.get_last_component().fit(X_t, y_shifted) self.input_feature_names = self.component_graph.input_feature_names diff --git a/evalml/tests/pipeline_tests/test_time_series_pipeline.py b/evalml/tests/pipeline_tests/test_time_series_pipeline.py index 6d0d8704cd..47cdc07b37 100644 --- a/evalml/tests/pipeline_tests/test_time_series_pipeline.py +++ b/evalml/tests/pipeline_tests/test_time_series_pipeline.py @@ -14,7 +14,7 @@ TimeSeriesMulticlassClassificationPipeline, TimeSeriesRegressionPipeline, ) -from evalml.pipelines.components import DelayedFeatureTransformer +from evalml.pipelines.components import DelayedFeatureTransformer, Transformer from evalml.pipelines.utils import _get_pipeline_base_class from evalml.preprocessing.utils import is_classification from evalml.problem_types import ProblemTypes @@ -986,6 +986,66 @@ def test_binary_predict_pipeline_use_objective( mock_decision_function.assert_called() +@pytest.mark.parametrize( + "problem_type", + [ + ProblemTypes.TIME_SERIES_BINARY, + ProblemTypes.TIME_SERIES_MULTICLASS, + ProblemTypes.TIME_SERIES_REGRESSION, + ], +) +@patch("evalml.pipelines.LogisticRegressionClassifier.fit") +@patch("evalml.pipelines.components.ElasticNetRegressor.fit") +def test_time_series_pipeline_fit_with_transformed_target( + mock_en_fit, mock_lr_fit, problem_type, ts_data +): + class AddTwo(Transformer): + """Add Two to target for testing.""" + + modifies_target = True + modifies_features = False + + name = "AddTwo" + hyperparameter_ranges = {} + + def __init__(self, drop_old_columns=True, random_seed=0): + super().__init__(parameters={}, component_obj=None, random_seed=random_seed) + + def fit(self, X, y): + return self + + def transform(self, X, y): + return infer_feature_types(X), infer_feature_types(y) + 2 + + X, y = ts_data + y = y % 2 + + if is_classification(problem_type): + estimator = "Logistic Regression Classifier" + mock_to_check = mock_lr_fit + else: + estimator = "Elastic Net Regressor" + mock_to_check = mock_en_fit + + pipeline_class = _get_pipeline_base_class(problem_type) + pipeline = pipeline_class( + component_graph={ + "AddTwo": [AddTwo, "X", "y"], + "Estimator": [estimator, "X", "AddTwo.y"], + }, + parameters={ + "pipeline": { + "gap": 0, + "max_delay": 2, + "date_index": None, + "forecast_horizon": 3, + }, + }, + ) + pipeline.fit(X, y) + pd.testing.assert_series_equal(mock_to_check.call_args[0][1], y + 2) + + def test_time_series_pipeline_with_detrender(ts_data): pytest.importorskip( "sktime",