Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update AutoML to use objective decision function during scoring for custom objectives #1934

Merged
merged 27 commits into from Mar 17, 2021
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
bf4b7a7
init
angela97lin Mar 5, 2021
7c751d3
Merge branch 'main' into 1868_objective
angela97lin Mar 5, 2021
5ba8267
Merge branch 'main' into 1868_objective
angela97lin Mar 6, 2021
6656538
Merge branch 'main' into 1868_objective
angela97lin Mar 8, 2021
ef32f72
Merge branch 'main' into 1868_objective
angela97lin Mar 9, 2021
1ba4628
wip
angela97lin Mar 9, 2021
edaf8a0
Merge branch 'main' into 1868_objective
angela97lin Mar 9, 2021
31667fc
this is kinda getting there
angela97lin Mar 9, 2021
386c192
Merge branch '1868_objective' of github.com:alteryx/evalml into 1868_…
angela97lin Mar 9, 2021
fcb4afb
release notes
angela97lin Mar 9, 2021
692532f
clean up
angela97lin Mar 10, 2021
3425491
add test for coverage
angela97lin Mar 10, 2021
f3096a0
Merge branch 'main' into 1868_objective
angela97lin Mar 14, 2021
0ac490f
Merge branch 'main' into 1868_objective
angela97lin Mar 15, 2021
0bb3f88
Merge branch 'main' into 1868_objective
angela97lin Mar 16, 2021
6107697
Merge branch 'main' into 1868_objective
angela97lin Mar 16, 2021
e549258
cleanup, add time series
angela97lin Mar 17, 2021
11d70d4
Merge branch '1868_objective' of github.com:alteryx/evalml into 1868_…
angela97lin Mar 17, 2021
70df88d
fix tests for time series mocks
angela97lin Mar 17, 2021
cbcc457
trying subclassing
angela97lin Mar 17, 2021
d4eaa6c
testing mixin
angela97lin Mar 17, 2021
aa54e25
fix tests and cleanup
angela97lin Mar 17, 2021
6786b33
separate out time series test
angela97lin Mar 17, 2021
0187e34
clean up _score_all_objectives
angela97lin Mar 17, 2021
15656dc
remove commented out code
angela97lin Mar 17, 2021
a191d96
moved optimize_threshold to mixin
angela97lin Mar 17, 2021
cf4a911
clean up test for multiclass
angela97lin Mar 17, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Expand Up @@ -7,6 +7,7 @@ Release Notes
* Added params to balanced classification data splitters for visibility :pr:`1966`
* Updated ``make_pipeline`` to not add ``Imputer`` if input data does not have numeric or categorical columns :pr:`1967`
* Fixes
* Updated binary classification pipelines to use objective decision function during scoring of custom objectives :pr:`1934`
* Changes
* Removed ``data_checks`` parameter, ``data_check_results`` and data checks logic from ``AutoMLSearch`` :pr:`1935`
* Documentation Changes
Expand Down
44 changes: 42 additions & 2 deletions evalml/pipelines/binary_classification_pipeline.py
@@ -1,3 +1,8 @@
import sys
import traceback
from collections import OrderedDict

from evalml.exceptions import PipelineScoreError
from evalml.objectives import get_objective
from evalml.pipelines.classification_pipeline import ClassificationPipeline
from evalml.problem_types import ProblemTypes
Expand Down Expand Up @@ -37,10 +42,14 @@ def _predict(self, X, objective=None):
if self.threshold is None:
return self._component_graph.predict(X)
ypred_proba = self.predict_proba(X).to_dataframe()
predictions = self._predict_with_objective(X, ypred_proba, objective)
return infer_feature_types(predictions)

def _predict_with_objective(self, X, ypred_proba, objective):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Helper function that takes in predict_proba, so we don't have to recalculate it for each objective.

ypred_proba = ypred_proba.iloc[:, 1]
if objective is None:
return infer_feature_types(ypred_proba > self.threshold)
return infer_feature_types(objective.decision_function(ypred_proba, threshold=self.threshold, X=X))
return ypred_proba > self.threshold
return objective.decision_function(ypred_proba, threshold=self.threshold, X=X)

def predict_proba(self, X):
"""Make probability estimates for labels. Assumes that the column at index 1 represents the positive label case.
Expand All @@ -60,3 +69,34 @@ def _score(X, y, predictions, objective):
if predictions.ndim > 1:
predictions = predictions.iloc[:, 1]
return ClassificationPipeline._score(X, y, predictions, objective)

def _compute_predictions(self, X, y, objectives, time_series=False):
"""Compute predictions/probabilities based on objectives."""
y_predicted = None
y_predicted_proba = None
if any(o.score_needs_proba for o in objectives) or (any(not o.score_needs_proba for o in objectives) and self.threshold is not None):
angela97lin marked this conversation as resolved.
Show resolved Hide resolved
y_predicted_proba = self.predict_proba(X, y) if time_series else self.predict_proba(X)
if any(not o.score_needs_proba for o in objectives) and self.threshold is None:
y_predicted = self._predict(X, y, pad=True) if time_series else self._predict(X)
angela97lin marked this conversation as resolved.
Show resolved Hide resolved
return y_predicted, y_predicted_proba

def _score_all_objectives(self, X, y, y_pred, y_pred_proba, objectives):
scored_successfully = OrderedDict()
exceptions = OrderedDict()
for objective in objectives:
try:
if not objective.is_defined_for_problem_type(self.problem_type):
raise ValueError(f'Invalid objective {objective.name} specified for problem type {self.problem_type}')
y_pred_to_use = y_pred
if self.threshold is not None and not objective.score_needs_proba:
y_pred_to_use = self._predict_with_objective(X, y_pred_proba, objective)
score = self._score(X, y, y_pred_proba if objective.score_needs_proba else y_pred_to_use, objective)
scored_successfully.update({objective.name: score})
except Exception as e:
tb = traceback.format_tb(sys.exc_info()[2])
exceptions[objective.name] = (e, tb)
if exceptions:
angela97lin marked this conversation as resolved.
Show resolved Hide resolved
# If any objective failed, throw an PipelineScoreError
raise PipelineScoreError(exceptions, scored_successfully)
# No objectives failed, return the scores
return scored_successfully
Expand Up @@ -4,6 +4,9 @@
import pytest
import woodwork as ww

from evalml.exceptions import PipelineScoreError
from evalml.objectives import FraudCost


@patch('evalml.pipelines.ClassificationPipeline._decode_targets', return_value=[0, 1])
@patch('evalml.objectives.BinaryClassificationObjective.decision_function', return_value=pd.Series([1, 0]))
Expand Down Expand Up @@ -66,3 +69,23 @@ def test_binary_predict_pipeline_objective_mismatch(mock_transform, X_y_binary,
with pytest.raises(ValueError, match="You can only use a binary classification objective to make predictions for a binary classification pipeline."):
binary_pipeline.predict(X, "precision micro")
mock_transform.assert_called()


@patch('evalml.objectives.FraudCost.decision_function')
def test_binary_predict_pipeline_use_objective(mock_decision_function, X_y_binary, logistic_regression_binary_pipeline_class):
X, y = X_y_binary
mock_decision_function.return_value = pd.Series([0] * 100)
binary_pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}})
binary_pipeline.threshold = 0.7
binary_pipeline.fit(X, y)
fraud_cost = FraudCost(amount_col=0)
binary_pipeline.score(X, y, ['precision', 'auc', fraud_cost])
mock_decision_function.assert_called()


def test_binary_predict_pipeline_score_error(X_y_binary, logistic_regression_binary_pipeline_class):
X, y = X_y_binary
binary_pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}})
binary_pipeline.fit(X, y)
with pytest.raises(PipelineScoreError, match='Invalid objective MCC Multiclass specified for problem type binary'):
binary_pipeline.score(X, y, ['MCC Multiclass'])