Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Release Notes
* Enhancements
* Fixes
* Fixed bug where ``calculate_permutation_importance`` was not calculating the right value for pipelines with target transformers :pr:`2782`
* Fixed bug where transformed target values were not used in ``fit`` for time series pipelines :pr:`2780`
* Changes
* Documentation Changes
* Testing Changes
Expand Down
13 changes: 7 additions & 6 deletions evalml/pipelines/component_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def fit_features(self, X, y):
y (pd.Series): The target training data of length [n_samples].

Returns:
pd.DataFrame: Transformed values.
Tuple (pd.DataFrame, pd.Series): Transformed features and target.
"""
return self._fit_transform_features_helper(True, X, y)

Expand All @@ -217,7 +217,8 @@ def compute_final_component_features(self, X, y=None):
Returns:
pd.DataFrame: Transformed values.
"""
return self._fit_transform_features_helper(False, X, y)
features, _ = self._fit_transform_features_helper(False, X, y)
return features

def _fit_transform_features_helper(self, needs_fitting, X, y=None):
"""Transform all components save the final one, and returns the data that should be fed to the final component, usually an estimator.
Expand All @@ -228,23 +229,23 @@ def _fit_transform_features_helper(self, needs_fitting, X, y=None):
y (pd.Series): The target training data of length [n_samples]. Defaults to None.

Returns:
pd.DataFrame: Transformed values.
Tuple: pd.DataFrame, pd.Series: Transformed features and target.
"""
if len(self.compute_order) <= 1:
X = infer_feature_types(X)
self.input_feature_names.update({self.compute_order[0]: list(X.columns)})
return X
return X, y
component_outputs = self._compute_features(
self.compute_order[:-1], X, y=y, fit=needs_fitting
)
x_inputs, _ = self._consolidate_inputs_for_component(
x_inputs, y_output = self._consolidate_inputs_for_component(
component_outputs, self.compute_order[-1], X, y
)
if needs_fitting:
self.input_feature_names.update(
{self.compute_order[-1]: list(x_inputs.columns)}
)
return x_inputs
return x_inputs, y_output

def _consolidate_inputs_for_component(
self, component_outputs, component, X, y=None
Expand Down
6 changes: 3 additions & 3 deletions evalml/pipelines/time_series_pipeline_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,13 @@ def predict(self, X, objective=None, X_train=None, y_train=None):

def _fit(self, X, y):
self.input_target_name = y.name
X_t = self.component_graph.fit_features(X, y)
X_t, y_shifted = drop_rows_with_nans(X_t, y)
X_t, y_t = self.component_graph.fit_features(X, y)
X_t, y_shifted = drop_rows_with_nans(X_t, y_t)

if self.estimator is not None:
self.estimator.fit(X_t, y_shifted)
else:
self.component_graph.get_last_component().fit(X_t, y)
self.component_graph.get_last_component().fit(X_t, y_shifted)

self.input_feature_names = self.component_graph.input_feature_names

Expand Down
62 changes: 61 additions & 1 deletion evalml/tests/pipeline_tests/test_time_series_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
TimeSeriesMulticlassClassificationPipeline,
TimeSeriesRegressionPipeline,
)
from evalml.pipelines.components import DelayedFeatureTransformer
from evalml.pipelines.components import DelayedFeatureTransformer, Transformer
from evalml.pipelines.utils import _get_pipeline_base_class
from evalml.preprocessing.utils import is_classification
from evalml.problem_types import ProblemTypes
Expand Down Expand Up @@ -986,6 +986,66 @@ def test_binary_predict_pipeline_use_objective(
mock_decision_function.assert_called()


@pytest.mark.parametrize(
"problem_type",
[
ProblemTypes.TIME_SERIES_BINARY,
ProblemTypes.TIME_SERIES_MULTICLASS,
ProblemTypes.TIME_SERIES_REGRESSION,
],
)
@patch("evalml.pipelines.LogisticRegressionClassifier.fit")
@patch("evalml.pipelines.components.ElasticNetRegressor.fit")
def test_time_series_pipeline_fit_with_transformed_target(
mock_en_fit, mock_lr_fit, problem_type, ts_data
):
class AddTwo(Transformer):
"""Add Two to target for testing."""

modifies_target = True
modifies_features = False

name = "AddTwo"
hyperparameter_ranges = {}

def __init__(self, drop_old_columns=True, random_seed=0):
super().__init__(parameters={}, component_obj=None, random_seed=random_seed)

def fit(self, X, y):
return self

def transform(self, X, y):
return infer_feature_types(X), infer_feature_types(y) + 2

X, y = ts_data
y = y % 2

if is_classification(problem_type):
estimator = "Logistic Regression Classifier"
mock_to_check = mock_lr_fit
else:
estimator = "Elastic Net Regressor"
mock_to_check = mock_en_fit

pipeline_class = _get_pipeline_base_class(problem_type)
pipeline = pipeline_class(
component_graph={
"AddTwo": [AddTwo, "X", "y"],
"Estimator": [estimator, "X", "AddTwo.y"],
},
parameters={
"pipeline": {
"gap": 0,
"max_delay": 2,
"date_index": None,
"forecast_horizon": 3,
},
},
)
pipeline.fit(X, y)
pd.testing.assert_series_equal(mock_to_check.call_args[0][1], y + 2)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting how this fails with Woodwork 0.6.0 because you're trying to add an int to a categorical but passes with 0.7.1 :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then how come min-dependencies git-test-other passes? That installs ww 0.6.0? 🤔

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I have woodwork 0.6.0 locally and this passes 🤔 )



def test_time_series_pipeline_with_detrender(ts_data):
pytest.importorskip(
"sktime",
Expand Down