Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update AutoML to use objective decision function during scoring for custom objectives #1934

Merged
merged 27 commits into from Mar 17, 2021
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
bf4b7a7
init
angela97lin Mar 5, 2021
7c751d3
Merge branch 'main' into 1868_objective
angela97lin Mar 5, 2021
5ba8267
Merge branch 'main' into 1868_objective
angela97lin Mar 6, 2021
6656538
Merge branch 'main' into 1868_objective
angela97lin Mar 8, 2021
ef32f72
Merge branch 'main' into 1868_objective
angela97lin Mar 9, 2021
1ba4628
wip
angela97lin Mar 9, 2021
edaf8a0
Merge branch 'main' into 1868_objective
angela97lin Mar 9, 2021
31667fc
this is kinda getting there
angela97lin Mar 9, 2021
386c192
Merge branch '1868_objective' of github.com:alteryx/evalml into 1868_…
angela97lin Mar 9, 2021
fcb4afb
release notes
angela97lin Mar 9, 2021
692532f
clean up
angela97lin Mar 10, 2021
3425491
add test for coverage
angela97lin Mar 10, 2021
f3096a0
Merge branch 'main' into 1868_objective
angela97lin Mar 14, 2021
0ac490f
Merge branch 'main' into 1868_objective
angela97lin Mar 15, 2021
0bb3f88
Merge branch 'main' into 1868_objective
angela97lin Mar 16, 2021
6107697
Merge branch 'main' into 1868_objective
angela97lin Mar 16, 2021
e549258
cleanup, add time series
angela97lin Mar 17, 2021
11d70d4
Merge branch '1868_objective' of github.com:alteryx/evalml into 1868_…
angela97lin Mar 17, 2021
70df88d
fix tests for time series mocks
angela97lin Mar 17, 2021
cbcc457
trying subclassing
angela97lin Mar 17, 2021
d4eaa6c
testing mixin
angela97lin Mar 17, 2021
aa54e25
fix tests and cleanup
angela97lin Mar 17, 2021
6786b33
separate out time series test
angela97lin Mar 17, 2021
0187e34
clean up _score_all_objectives
angela97lin Mar 17, 2021
15656dc
remove commented out code
angela97lin Mar 17, 2021
a191d96
moved optimize_threshold to mixin
angela97lin Mar 17, 2021
cf4a911
clean up test for multiclass
angela97lin Mar 17, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Expand Up @@ -8,6 +8,7 @@ Release Notes
* Updated ``make_pipeline`` to not add ``Imputer`` if input data does not have numeric or categorical columns :pr:`1967`
* Added recommended actions for the output of data check's ``validate`` method :pr:`1968`
* Fixes
* Updated binary classification pipelines to use objective decision function during scoring of custom objectives :pr:`1934`
* Changes
* Removed ``data_checks`` parameter, ``data_check_results`` and data checks logic from ``AutoMLSearch`` :pr:`1935`
* Documentation Changes
Expand Down
22 changes: 7 additions & 15 deletions evalml/pipelines/binary_classification_pipeline.py
@@ -1,23 +1,17 @@
from .binary_classification_pipeline_mixin import (
BinaryClassificationPipelineMixin
)

from evalml.objectives import get_objective
from evalml.pipelines.classification_pipeline import ClassificationPipeline
from evalml.problem_types import ProblemTypes
from evalml.utils import infer_feature_types


class BinaryClassificationPipeline(ClassificationPipeline):
class BinaryClassificationPipeline(BinaryClassificationPipelineMixin, ClassificationPipeline):
"""Pipeline subclass for all binary classification pipelines."""
_threshold = None
problem_type = ProblemTypes.BINARY

@property
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved all of this to BinaryClassificationPipelineMixin

def threshold(self):
"""Threshold used to make a prediction. Defaults to None."""
return self._threshold

@threshold.setter
def threshold(self, value):
self._threshold = value

def _predict(self, X, objective=None):
"""Make predictions using selected features.

Expand All @@ -37,10 +31,8 @@ def _predict(self, X, objective=None):
if self.threshold is None:
return self._component_graph.predict(X)
ypred_proba = self.predict_proba(X).to_dataframe()
ypred_proba = ypred_proba.iloc[:, 1]
if objective is None:
return infer_feature_types(ypred_proba > self.threshold)
return infer_feature_types(objective.decision_function(ypred_proba, threshold=self.threshold, X=X))
predictions = self._predict_with_objective(X, ypred_proba, objective)
return infer_feature_types(predictions)

def predict_proba(self, X):
"""Make probability estimates for labels. Assumes that the column at index 1 represents the positive label case.
Expand Down
55 changes: 55 additions & 0 deletions evalml/pipelines/binary_classification_pipeline_mixin.py
@@ -0,0 +1,55 @@
import sys
import traceback
from collections import OrderedDict

from evalml.exceptions import PipelineScoreError


class BinaryClassificationPipelineMixin():
_threshold = None

@property
def threshold(self):
"""Threshold used to make a prediction. Defaults to None."""
return self._threshold

@threshold.setter
def threshold(self, value):
self._threshold = value

def _predict_with_objective(self, X, ypred_proba, objective):
ypred_proba = ypred_proba.iloc[:, 1]
if objective is None:
return ypred_proba > self.threshold
return objective.decision_function(ypred_proba, threshold=self.threshold, X=X)

def _compute_predictions(self, X, y, objectives, time_series=False):
"""Compute predictions/probabilities based on objectives."""
y_predicted = None
y_predicted_proba = None
if any(o.score_needs_proba for o in objectives) or self.threshold is not None:
y_predicted_proba = self.predict_proba(X, y) if time_series else self.predict_proba(X)
if any(not o.score_needs_proba for o in objectives) and self.threshold is None:
y_predicted = self._predict(X, y, pad=True) if time_series else self._predict(X)
return y_predicted, y_predicted_proba

def _score_all_objectives(self, X, y, y_pred, y_pred_proba, objectives):
scored_successfully = OrderedDict()
exceptions = OrderedDict()
for objective in objectives:
try:
if not objective.is_defined_for_problem_type(self.problem_type):
raise ValueError(f'Invalid objective {objective.name} specified for problem type {self.problem_type}')
y_pred_to_use = y_pred
if self.threshold is not None and not objective.score_needs_proba:
y_pred_to_use = self._predict_with_objective(X, y_pred_proba, objective)
score = self._score(X, y, y_pred_proba if objective.score_needs_proba else y_pred_to_use, objective)
scored_successfully.update({objective.name: score})
except Exception as e:
tb = traceback.format_tb(sys.exc_info()[2])
exceptions[objective.name] = (e, tb)
if exceptions:
# If any objective failed, throw an PipelineScoreError
raise PipelineScoreError(exceptions, scored_successfully)
# No objectives failed, return the scores
return scored_successfully
16 changes: 6 additions & 10 deletions evalml/pipelines/time_series_classification_pipelines.py
@@ -1,5 +1,10 @@

import pandas as pd

from .binary_classification_pipeline_mixin import (
BinaryClassificationPipelineMixin
)

from evalml.objectives import get_objective
from evalml.pipelines.classification_pipeline import ClassificationPipeline
from evalml.pipelines.pipeline_meta import TimeSeriesPipelineBaseMeta
Expand Down Expand Up @@ -169,17 +174,8 @@ def score(self, X, y, objectives):
objectives=objectives)


class TimeSeriesBinaryClassificationPipeline(TimeSeriesClassificationPipeline, metaclass=TimeSeriesPipelineBaseMeta):
class TimeSeriesBinaryClassificationPipeline(BinaryClassificationPipelineMixin, TimeSeriesClassificationPipeline, metaclass=TimeSeriesPipelineBaseMeta):
problem_type = ProblemTypes.TIME_SERIES_BINARY
_threshold = None

@property
def threshold(self):
return self._threshold

@threshold.setter
def threshold(self, value):
self._threshold = value

def _predict(self, X, y, objective=None, pad=False):
features = self.compute_estimator_features(X, y)
Expand Down
Expand Up @@ -4,6 +4,9 @@
import pytest
import woodwork as ww

from evalml.exceptions import PipelineScoreError
from evalml.objectives import FraudCost


@patch('evalml.pipelines.ClassificationPipeline._decode_targets', return_value=[0, 1])
@patch('evalml.objectives.BinaryClassificationObjective.decision_function', return_value=pd.Series([1, 0]))
Expand Down Expand Up @@ -66,3 +69,33 @@ def test_binary_predict_pipeline_objective_mismatch(mock_transform, X_y_binary,
with pytest.raises(ValueError, match="You can only use a binary classification objective to make predictions for a binary classification pipeline."):
binary_pipeline.predict(X, "precision micro")
mock_transform.assert_called()


@pytest.mark.parametrize("is_time_series", [True, False])
@patch('evalml.objectives.FraudCost.decision_function')
def test_binary_predict_pipeline_use_objective(mock_decision_function, is_time_series,
X_y_binary, logistic_regression_binary_pipeline_class, time_series_binary_classification_pipeline_class):
X, y = X_y_binary
binary_pipeline = None
if is_time_series:
binary_pipeline = time_series_binary_classification_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1},
"pipeline": {"gap": 0, "max_delay": 0}})
mock_decision_function.return_value = pd.Series([0] * 98)

else:
binary_pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}})
mock_decision_function.return_value = pd.Series([0] * 100)

binary_pipeline.threshold = 0.7
binary_pipeline.fit(X, y)
fraud_cost = FraudCost(amount_col=0)
binary_pipeline.score(X, y, ['precision', 'auc', fraud_cost])
mock_decision_function.assert_called()


def test_binary_predict_pipeline_score_error(X_y_binary, logistic_regression_binary_pipeline_class):
X, y = X_y_binary
binary_pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}})
binary_pipeline.fit(X, y)
with pytest.raises(PipelineScoreError, match='Invalid objective MCC Multiclass specified for problem type binary'):
binary_pipeline.score(X, y, ['MCC Multiclass'])
14 changes: 11 additions & 3 deletions evalml/tests/pipeline_tests/test_time_series_baseline_pipeline.py
Expand Up @@ -62,9 +62,11 @@ def test_time_series_baseline_predict_proba(pipeline_class, gap, X_none):
@pytest.mark.parametrize("only_use_y", [True, False])
@pytest.mark.parametrize("gap,max_delay", [(0, 0), (1, 0), (0, 2), (1, 1), (1, 2), (2, 2), (7, 3), (2, 4)])
@patch("evalml.pipelines.RegressionPipeline._score_all_objectives")
@patch("evalml.pipelines.ClassificationPipeline._score_all_objectives")
@patch("evalml.pipelines.TimeSeriesClassificationPipeline._score_all_objectives")
@patch("evalml.pipelines.TimeSeriesBinaryClassificationPipeline._score_all_objectives")
@patch("evalml.pipelines.ClassificationPipeline._encode_targets", side_effect=lambda y: y)
def test_time_series_baseline_score_offset(mock_encode, mock_classification_score, mock_regression_score, gap, max_delay,
def test_time_series_baseline_score_offset(mock_encode, mock_binary_classification_score, mock_multiclass_classification_score,
mock_regression_score, gap, max_delay,
only_use_y, pipeline_class, ts_data):
X, y = ts_data

Expand All @@ -73,7 +75,13 @@ def test_time_series_baseline_score_offset(mock_encode, mock_classification_scor
expected_target = expected_target[1:]
clf = pipeline_class(parameters={"pipeline": {"gap": gap, "max_delay": max_delay},
"Time Series Baseline Estimator": {"gap": gap, "max_delay": max_delay}})
mock_score = mock_regression_score if pipeline_class == TimeSeriesBaselineRegressionPipeline else mock_classification_score
mock_score = None
if pipeline_class == TimeSeriesBaselineRegressionPipeline:
mock_score = mock_regression_score
elif pipeline_class == TimeSeriesBaselineBinaryPipeline:
mock_score = mock_binary_classification_score
else:
mock_score = mock_multiclass_classification_score
if only_use_y:
clf.fit(None, y)
clf.score(X=None, y=y, objectives=['MCC Binary'])
Expand Down
6 changes: 4 additions & 2 deletions evalml/tests/pipeline_tests/test_time_series_pipeline.py
Expand Up @@ -159,12 +159,14 @@ def mock_predict(df, y=None):
@patch("evalml.pipelines.components.LogisticRegressionClassifier.predict")
@patch("evalml.pipelines.TimeSeriesClassificationPipeline._encode_targets", side_effect=lambda y: y)
@patch("evalml.pipelines.PipelineBase._score_all_objectives")
def test_score_drops_nans(mock_score, mock_encode_targets,
@patch("evalml.pipelines.TimeSeriesBinaryClassificationPipeline._score_all_objectives")
def test_score_drops_nans(mock_binary_score, mock_score, mock_encode_targets,
mock_classifier_predict, mock_classifier_fit,
mock_regressor_predict, mock_regressor_fit,
pipeline_class,
estimator_name, gap, max_delay, include_delayed_features, only_use_y, ts_data):

if pipeline_class == TimeSeriesBinaryClassificationPipeline:
mock_score = mock_binary_score
if only_use_y and (not include_delayed_features or (max_delay == 0 and gap == 0)):
pytest.skip("This would result in an empty feature dataframe.")

Expand Down