alteryx · angela97lin · Mar 17, 2021 · Mar 5, 2021 · Mar 5, 2021 · Mar 6, 2021
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -7,6 +7,7 @@ Release Notes
         * Added params to balanced classification data splitters for visibility :pr:`1966`
         * Updated ``make_pipeline`` to not add ``Imputer`` if input data does not have numeric or categorical columns :pr:`1967`
     * Fixes
+        * Updated binary classification pipelines to use objective decision function during scoring of custom objectives :pr:`1934`
     * Changes
         * Removed ``data_checks`` parameter, ``data_check_results`` and data checks logic from ``AutoMLSearch`` :pr:`1935`
     * Documentation Changes

diff --git a/evalml/pipelines/binary_classification_pipeline.py b/evalml/pipelines/binary_classification_pipeline.py
@@ -1,3 +1,8 @@
+import sys
+import traceback
+from collections import OrderedDict
+
+from evalml.exceptions import PipelineScoreError
 from evalml.objectives import get_objective
 from evalml.pipelines.classification_pipeline import ClassificationPipeline
 from evalml.problem_types import ProblemTypes
@@ -37,10 +42,14 @@ def _predict(self, X, objective=None):
         if self.threshold is None:
             return self._component_graph.predict(X)
         ypred_proba = self.predict_proba(X).to_dataframe()
+        predictions = self._predict_with_objective(X, ypred_proba, objective)
+        return infer_feature_types(predictions)
+
+    def _predict_with_objective(self, X, ypred_proba, objective):
         ypred_proba = ypred_proba.iloc[:, 1]
         if objective is None:
-            return infer_feature_types(ypred_proba > self.threshold)
-        return infer_feature_types(objective.decision_function(ypred_proba, threshold=self.threshold, X=X))
+            return ypred_proba > self.threshold
+        return objective.decision_function(ypred_proba, threshold=self.threshold, X=X)
 
     def predict_proba(self, X):
         """Make probability estimates for labels. Assumes that the column at index 1 represents the positive label case.
@@ -60,3 +69,34 @@ def _score(X, y, predictions, objective):
         if predictions.ndim > 1:
             predictions = predictions.iloc[:, 1]
         return ClassificationPipeline._score(X, y, predictions, objective)
+
+    def _compute_predictions(self, X, y, objectives, time_series=False):
+        """Compute predictions/probabilities based on objectives."""
+        y_predicted = None
+        y_predicted_proba = None
+        if any(o.score_needs_proba for o in objectives) or (any(not o.score_needs_proba for o in objectives) and self.threshold is not None):
+            y_predicted_proba = self.predict_proba(X, y) if time_series else self.predict_proba(X)
+        if any(not o.score_needs_proba for o in objectives) and self.threshold is None:
+            y_predicted = self._predict(X, y, pad=True) if time_series else self._predict(X)
+        return y_predicted, y_predicted_proba
+
+    def _score_all_objectives(self, X, y, y_pred, y_pred_proba, objectives):
+        scored_successfully = OrderedDict()
+        exceptions = OrderedDict()
+        for objective in objectives:
+            try:
+                if not objective.is_defined_for_problem_type(self.problem_type):
+                    raise ValueError(f'Invalid objective {objective.name} specified for problem type {self.problem_type}')
+                y_pred_to_use = y_pred
+                if self.threshold is not None and not objective.score_needs_proba:
+                    y_pred_to_use = self._predict_with_objective(X, y_pred_proba, objective)
+                score = self._score(X, y, y_pred_proba if objective.score_needs_proba else y_pred_to_use, objective)
+                scored_successfully.update({objective.name: score})
+            except Exception as e:
+                tb = traceback.format_tb(sys.exc_info()[2])
+                exceptions[objective.name] = (e, tb)
+        if exceptions:
+            # If any objective failed, throw an PipelineScoreError
+            raise PipelineScoreError(exceptions, scored_successfully)
+        # No objectives failed, return the scores
+        return scored_successfully
diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_binary_classification.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_binary_classification.py
@@ -4,6 +4,9 @@
 import pytest
 import woodwork as ww
 
+from evalml.exceptions import PipelineScoreError
+from evalml.objectives import FraudCost
+
 
 @patch('evalml.pipelines.ClassificationPipeline._decode_targets', return_value=[0, 1])
 @patch('evalml.objectives.BinaryClassificationObjective.decision_function', return_value=pd.Series([1, 0]))
@@ -66,3 +69,23 @@ def test_binary_predict_pipeline_objective_mismatch(mock_transform, X_y_binary,
     with pytest.raises(ValueError, match="You can only use a binary classification objective to make predictions for a binary classification pipeline."):
         binary_pipeline.predict(X, "precision micro")
     mock_transform.assert_called()
+
+
+@patch('evalml.objectives.FraudCost.decision_function')
+def test_binary_predict_pipeline_use_objective(mock_decision_function, X_y_binary, logistic_regression_binary_pipeline_class):
+    X, y = X_y_binary
+    mock_decision_function.return_value = pd.Series([0] * 100)
+    binary_pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}})
+    binary_pipeline.threshold = 0.7
+    binary_pipeline.fit(X, y)
+    fraud_cost = FraudCost(amount_col=0)
+    binary_pipeline.score(X, y, ['precision', 'auc', fraud_cost])
+    mock_decision_function.assert_called()
+
+
+def test_binary_predict_pipeline_score_error(X_y_binary, logistic_regression_binary_pipeline_class):
+    X, y = X_y_binary
+    binary_pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}})
+    binary_pipeline.fit(X, y)
+    with pytest.raises(PipelineScoreError, match='Invalid objective MCC Multiclass specified for problem type binary'):
+        binary_pipeline.score(X, y, ['MCC Multiclass'])