diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst index 741437f3ec..afc6af86a6 100644 --- a/docs/source/release_notes.rst +++ b/docs/source/release_notes.rst @@ -8,6 +8,7 @@ Release Notes * Updated ``make_pipeline`` to not add ``Imputer`` if input data does not have numeric or categorical columns :pr:`1967` * Added recommended actions for the output of data check's ``validate`` method :pr:`1968` * Fixes + * Updated binary classification pipelines to use objective decision function during scoring of custom objectives :pr:`1934` * Changes * Removed ``data_checks`` parameter, ``data_check_results`` and data checks logic from ``AutoMLSearch`` :pr:`1935` * Documentation Changes diff --git a/evalml/pipelines/binary_classification_pipeline.py b/evalml/pipelines/binary_classification_pipeline.py index 958329f226..8ebdf4dae2 100644 --- a/evalml/pipelines/binary_classification_pipeline.py +++ b/evalml/pipelines/binary_classification_pipeline.py @@ -1,23 +1,17 @@ +from .binary_classification_pipeline_mixin import ( + BinaryClassificationPipelineMixin +) + from evalml.objectives import get_objective from evalml.pipelines.classification_pipeline import ClassificationPipeline from evalml.problem_types import ProblemTypes from evalml.utils import infer_feature_types -class BinaryClassificationPipeline(ClassificationPipeline): +class BinaryClassificationPipeline(BinaryClassificationPipelineMixin, ClassificationPipeline): """Pipeline subclass for all binary classification pipelines.""" - _threshold = None problem_type = ProblemTypes.BINARY - @property - def threshold(self): - """Threshold used to make a prediction. Defaults to None.""" - return self._threshold - - @threshold.setter - def threshold(self, value): - self._threshold = value - def _predict(self, X, objective=None): """Make predictions using selected features. @@ -37,10 +31,8 @@ def _predict(self, X, objective=None): if self.threshold is None: return self._component_graph.predict(X) ypred_proba = self.predict_proba(X).to_dataframe() - ypred_proba = ypred_proba.iloc[:, 1] - if objective is None: - return infer_feature_types(ypred_proba > self.threshold) - return infer_feature_types(objective.decision_function(ypred_proba, threshold=self.threshold, X=X)) + predictions = self._predict_with_objective(X, ypred_proba, objective) + return infer_feature_types(predictions) def predict_proba(self, X): """Make probability estimates for labels. Assumes that the column at index 1 represents the positive label case. diff --git a/evalml/pipelines/binary_classification_pipeline_mixin.py b/evalml/pipelines/binary_classification_pipeline_mixin.py new file mode 100644 index 0000000000..770b4555c5 --- /dev/null +++ b/evalml/pipelines/binary_classification_pipeline_mixin.py @@ -0,0 +1,49 @@ + +class BinaryClassificationPipelineMixin(): + _threshold = None + + @property + def threshold(self): + """Threshold used to make a prediction. Defaults to None.""" + return self._threshold + + @threshold.setter + def threshold(self, value): + self._threshold = value + + def _predict_with_objective(self, X, ypred_proba, objective): + ypred_proba = ypred_proba.iloc[:, 1] + if objective is None: + return ypred_proba > self.threshold + return objective.decision_function(ypred_proba, threshold=self.threshold, X=X) + + def _compute_predictions(self, X, y, objectives, time_series=False): + """Compute predictions/probabilities based on objectives.""" + y_predicted = None + y_predicted_proba = None + if any(o.score_needs_proba for o in objectives) or self.threshold is not None: + y_predicted_proba = self.predict_proba(X, y) if time_series else self.predict_proba(X) + if any(not o.score_needs_proba for o in objectives) and self.threshold is None: + y_predicted = self._predict(X, y, pad=True) if time_series else self._predict(X) + return y_predicted, y_predicted_proba + + def _select_y_pred_for_score(self, X, y, y_pred, y_pred_proba, objective): + y_pred_to_use = y_pred + if self.threshold is not None and not objective.score_needs_proba: + y_pred_to_use = self._predict_with_objective(X, y_pred_proba, objective) + return y_pred_to_use + + def optimize_threshold(self, X, y, y_pred_proba, objective): + """Optimize the pipeline threshold given the objective to use. Only used for binary problems with objectives whose thresholds can be tuned. + + Arguments: + X (ww.DataTable): Input features + y (ww.DataColumn): Input target values + y_pred_proba (ww.DataColumn): The predicted probabilities of the target outputted by the pipeline + objective (ObjectiveBase): The objective to threshold with. Must have a tunable threshold. + """ + if self.can_tune_threshold_with_objective(objective): + targets = self._encode_targets(y.to_series()) + self.threshold = objective.optimize_threshold(y_pred_proba, targets, X) + else: + raise ValueError("Problem type must be binary and objective must be optimizable.") diff --git a/evalml/pipelines/classification_pipeline.py b/evalml/pipelines/classification_pipeline.py index f896a9c7a6..c5332ea224 100644 --- a/evalml/pipelines/classification_pipeline.py +++ b/evalml/pipelines/classification_pipeline.py @@ -136,18 +136,3 @@ def _compute_predictions(self, X, y, objectives, time_series=False): if any(not o.score_needs_proba for o in objectives): y_predicted = self._predict(X, y, pad=True) if time_series else self._predict(X) return y_predicted, y_predicted_proba - - def optimize_threshold(self, X, y, y_pred_proba, objective): - """Optimize the pipeline threshold given the objective to use. Only used for binary problems with objectives whose thresholds can be tuned. - - Arguments: - X (ww.DataTable): Input features - y (ww.DataColumn): Input target values - y_pred_proba (ww.DataColumn): The predicted probabilities of the target outputted by the pipeline - objective (ObjectiveBase): The objective to threshold with. Must have a tunable threshold. - """ - if self.can_tune_threshold_with_objective(objective): - targets = self._encode_targets(y.to_series()) - self.threshold = objective.optimize_threshold(y_pred_proba, targets, X) - else: - raise ValueError("Problem type must be binary and objective must be optimizable.") diff --git a/evalml/pipelines/pipeline_base.py b/evalml/pipelines/pipeline_base.py index 2689b66093..42d49b62b4 100644 --- a/evalml/pipelines/pipeline_base.py +++ b/evalml/pipelines/pipeline_base.py @@ -283,6 +283,7 @@ def _score_all_objectives(self, X, y, y_pred, y_pred_proba, objectives): try: if not objective.is_defined_for_problem_type(self.problem_type): raise ValueError(f'Invalid objective {objective.name} specified for problem type {self.problem_type}') + y_pred = self._select_y_pred_for_score(X, y, y_pred, y_pred_proba, objective) score = self._score(X, y, y_pred_proba if objective.score_needs_proba else y_pred, objective) scored_successfully.update({objective.name: score}) except Exception as e: @@ -294,6 +295,9 @@ def _score_all_objectives(self, X, y, y_pred, y_pred_proba, objectives): # No objectives failed, return the scores return scored_successfully + def _select_y_pred_for_score(self, X, y, y_pred, y_pred_proba, objective): + return y_pred + @classproperty def model_family(cls): """Returns model family of this pipeline template""" diff --git a/evalml/pipelines/time_series_classification_pipelines.py b/evalml/pipelines/time_series_classification_pipelines.py index fbd7d32bea..f845e4335e 100644 --- a/evalml/pipelines/time_series_classification_pipelines.py +++ b/evalml/pipelines/time_series_classification_pipelines.py @@ -1,5 +1,10 @@ + import pandas as pd +from .binary_classification_pipeline_mixin import ( + BinaryClassificationPipelineMixin +) + from evalml.objectives import get_objective from evalml.pipelines.classification_pipeline import ClassificationPipeline from evalml.pipelines.pipeline_meta import TimeSeriesPipelineBaseMeta @@ -169,17 +174,8 @@ def score(self, X, y, objectives): objectives=objectives) -class TimeSeriesBinaryClassificationPipeline(TimeSeriesClassificationPipeline, metaclass=TimeSeriesPipelineBaseMeta): +class TimeSeriesBinaryClassificationPipeline(BinaryClassificationPipelineMixin, TimeSeriesClassificationPipeline, metaclass=TimeSeriesPipelineBaseMeta): problem_type = ProblemTypes.TIME_SERIES_BINARY - _threshold = None - - @property - def threshold(self): - return self._threshold - - @threshold.setter - def threshold(self, value): - self._threshold = value def _predict(self, X, y, objective=None, pad=False): features = self.compute_estimator_features(X, y) diff --git a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_binary_classification.py b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_binary_classification.py index 5a2486459a..ad74be56fb 100644 --- a/evalml/tests/pipeline_tests/classification_pipeline_tests/test_binary_classification.py +++ b/evalml/tests/pipeline_tests/classification_pipeline_tests/test_binary_classification.py @@ -4,6 +4,9 @@ import pytest import woodwork as ww +from evalml.exceptions import PipelineScoreError +from evalml.objectives import FraudCost, get_objective + @patch('evalml.pipelines.ClassificationPipeline._decode_targets', return_value=[0, 1]) @patch('evalml.objectives.BinaryClassificationObjective.decision_function', return_value=pd.Series([1, 0])) @@ -66,3 +69,40 @@ def test_binary_predict_pipeline_objective_mismatch(mock_transform, X_y_binary, with pytest.raises(ValueError, match="You can only use a binary classification objective to make predictions for a binary classification pipeline."): binary_pipeline.predict(X, "precision micro") mock_transform.assert_called() + + +@patch('evalml.objectives.FraudCost.decision_function') +def test_binary_predict_pipeline_use_objective(mock_decision_function, X_y_binary, logistic_regression_binary_pipeline_class): + X, y = X_y_binary + binary_pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) + mock_decision_function.return_value = pd.Series([0] * 100) + + binary_pipeline.threshold = 0.7 + binary_pipeline.fit(X, y) + fraud_cost = FraudCost(amount_col=0) + binary_pipeline.score(X, y, ['precision', 'auc', fraud_cost]) + mock_decision_function.assert_called() + + +def test_binary_predict_pipeline_score_error(X_y_binary, logistic_regression_binary_pipeline_class): + X, y = X_y_binary + binary_pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) + binary_pipeline.fit(X, y) + with pytest.raises(PipelineScoreError, match='Invalid objective MCC Multiclass specified for problem type binary'): + binary_pipeline.score(X, y, ['MCC Multiclass']) + + +@patch('evalml.pipelines.BinaryClassificationPipeline.fit') +@patch('evalml.pipelines.BinaryClassificationPipeline.score') +@patch('evalml.pipelines.BinaryClassificationPipeline.predict_proba') +def test_pipeline_thresholding_errors(mock_binary_pred_proba, mock_binary_score, mock_binary_fit, + make_data_type, logistic_regression_binary_pipeline_class, X_y_binary): + X, y = X_y_binary + X = make_data_type('ww', X) + y = make_data_type('ww', pd.Series([f"String value {i}" for i in y])) + objective = get_objective("Log Loss Binary", return_instance=True) + pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) + pipeline.fit(X, y) + pred_proba = pipeline.predict_proba(X, y).iloc[:, 1] + with pytest.raises(ValueError, match="Problem type must be binary and objective must be optimizable"): + pipeline.optimize_threshold(X, y, pred_proba, objective) diff --git a/evalml/tests/pipeline_tests/test_pipelines.py b/evalml/tests/pipeline_tests/test_pipelines.py index 2f30833c1e..6c8b385a2e 100644 --- a/evalml/tests/pipeline_tests/test_pipelines.py +++ b/evalml/tests/pipeline_tests/test_pipelines.py @@ -2009,37 +2009,3 @@ def test_binary_pipeline_string_target_thresholding(make_data_type, logistic_reg pred_proba = pipeline.predict_proba(X, y).iloc[:, 1] pipeline.optimize_threshold(X, y, pred_proba, objective) assert pipeline.threshold is not None - - -@patch('evalml.pipelines.BinaryClassificationPipeline.fit') -@patch('evalml.pipelines.BinaryClassificationPipeline.score') -@patch('evalml.pipelines.BinaryClassificationPipeline.predict_proba') -@patch('evalml.pipelines.MulticlassClassificationPipeline.fit') -@patch('evalml.pipelines.MulticlassClassificationPipeline.score') -@patch('evalml.pipelines.MulticlassClassificationPipeline.predict') -def test_pipeline_thresholding_errors(mock_multi_predict, mock_multi_score, mock_multi_fit, - mock_binary_pred_proba, mock_binary_score, mock_binary_fit, - make_data_type, logistic_regression_binary_pipeline_class, - logistic_regression_multiclass_pipeline_class, X_y_multi, X_y_binary): - X, y = X_y_multi - X = make_data_type('ww', X) - y = make_data_type('ww', pd.Series([f"String value {i}" for i in y])) - objective = get_objective("F1 Macro", return_instance=True) - pipeline = logistic_regression_multiclass_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) - pipeline.fit(X, y) - pred_proba = pipeline.predict(X, y) - with pytest.raises(ValueError, match="Problem type must be binary and objective must be optimizable"): - pipeline.optimize_threshold(X, y, pred_proba, objective) - - objective = get_objective("Log Loss Multiclass") - with pytest.raises(ValueError, match="Problem type must be binary and objective must be optimizable"): - pipeline.optimize_threshold(X, y, pred_proba, objective) - X, y = X_y_binary - X = make_data_type('ww', X) - y = make_data_type('ww', pd.Series([f"String value {i}" for i in y])) - objective = get_objective("Log Loss Binary", return_instance=True) - pipeline = logistic_regression_binary_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}}) - pipeline.fit(X, y) - pred_proba = pipeline.predict_proba(X, y).iloc[:, 1] - with pytest.raises(ValueError, match="Problem type must be binary and objective must be optimizable"): - pipeline.optimize_threshold(X, y, pred_proba, objective) diff --git a/evalml/tests/pipeline_tests/test_time_series_baseline_pipeline.py b/evalml/tests/pipeline_tests/test_time_series_baseline_pipeline.py index 25791d7f05..d0538003c1 100644 --- a/evalml/tests/pipeline_tests/test_time_series_baseline_pipeline.py +++ b/evalml/tests/pipeline_tests/test_time_series_baseline_pipeline.py @@ -62,9 +62,11 @@ def test_time_series_baseline_predict_proba(pipeline_class, gap, X_none): @pytest.mark.parametrize("only_use_y", [True, False]) @pytest.mark.parametrize("gap,max_delay", [(0, 0), (1, 0), (0, 2), (1, 1), (1, 2), (2, 2), (7, 3), (2, 4)]) @patch("evalml.pipelines.RegressionPipeline._score_all_objectives") -@patch("evalml.pipelines.ClassificationPipeline._score_all_objectives") +@patch("evalml.pipelines.TimeSeriesClassificationPipeline._score_all_objectives") +@patch("evalml.pipelines.TimeSeriesBinaryClassificationPipeline._score_all_objectives") @patch("evalml.pipelines.ClassificationPipeline._encode_targets", side_effect=lambda y: y) -def test_time_series_baseline_score_offset(mock_encode, mock_classification_score, mock_regression_score, gap, max_delay, +def test_time_series_baseline_score_offset(mock_encode, mock_binary_classification_score, mock_multiclass_classification_score, + mock_regression_score, gap, max_delay, only_use_y, pipeline_class, ts_data): X, y = ts_data @@ -73,7 +75,13 @@ def test_time_series_baseline_score_offset(mock_encode, mock_classification_scor expected_target = expected_target[1:] clf = pipeline_class(parameters={"pipeline": {"gap": gap, "max_delay": max_delay}, "Time Series Baseline Estimator": {"gap": gap, "max_delay": max_delay}}) - mock_score = mock_regression_score if pipeline_class == TimeSeriesBaselineRegressionPipeline else mock_classification_score + mock_score = None + if pipeline_class == TimeSeriesBaselineRegressionPipeline: + mock_score = mock_regression_score + elif pipeline_class == TimeSeriesBaselineBinaryPipeline: + mock_score = mock_binary_classification_score + else: + mock_score = mock_multiclass_classification_score if only_use_y: clf.fit(None, y) clf.score(X=None, y=y, objectives=['MCC Binary']) diff --git a/evalml/tests/pipeline_tests/test_time_series_pipeline.py b/evalml/tests/pipeline_tests/test_time_series_pipeline.py index 866c098110..84453cb21a 100644 --- a/evalml/tests/pipeline_tests/test_time_series_pipeline.py +++ b/evalml/tests/pipeline_tests/test_time_series_pipeline.py @@ -7,7 +7,7 @@ from pandas.testing import assert_frame_equal, assert_series_equal from evalml.exceptions import PipelineNotYetFittedError -from evalml.objectives import get_objective +from evalml.objectives import FraudCost, get_objective from evalml.pipelines import ( TimeSeriesBinaryClassificationPipeline, TimeSeriesMulticlassClassificationPipeline, @@ -159,12 +159,14 @@ def mock_predict(df, y=None): @patch("evalml.pipelines.components.LogisticRegressionClassifier.predict") @patch("evalml.pipelines.TimeSeriesClassificationPipeline._encode_targets", side_effect=lambda y: y) @patch("evalml.pipelines.PipelineBase._score_all_objectives") -def test_score_drops_nans(mock_score, mock_encode_targets, +@patch("evalml.pipelines.TimeSeriesBinaryClassificationPipeline._score_all_objectives") +def test_score_drops_nans(mock_binary_score, mock_score, mock_encode_targets, mock_classifier_predict, mock_classifier_fit, mock_regressor_predict, mock_regressor_fit, pipeline_class, estimator_name, gap, max_delay, include_delayed_features, only_use_y, ts_data): - + if pipeline_class == TimeSeriesBinaryClassificationPipeline: + mock_score = mock_binary_score if only_use_y and (not include_delayed_features or (max_delay == 0 and gap == 0)): pytest.skip("This would result in an empty feature dataframe.") @@ -438,3 +440,16 @@ def test_ts_binary_pipeline_target_thresholding(make_data_type, time_series_bina pred_proba = binary_pipeline.predict_proba(X, y).iloc[:, 1] binary_pipeline.optimize_threshold(X, y, pred_proba, objective) assert binary_pipeline.threshold is not None + + +@patch('evalml.objectives.FraudCost.decision_function') +def test_binary_predict_pipeline_use_objective(mock_decision_function, X_y_binary, time_series_binary_classification_pipeline_class): + X, y = X_y_binary + binary_pipeline = time_series_binary_classification_pipeline_class(parameters={"Logistic Regression Classifier": {"n_jobs": 1}, + "pipeline": {"gap": 0, "max_delay": 0}}) + mock_decision_function.return_value = pd.Series([0] * 98) + binary_pipeline.threshold = 0.7 + binary_pipeline.fit(X, y) + fraud_cost = FraudCost(amount_col=0) + binary_pipeline.score(X, y, ['precision', 'auc', fraud_cost]) + mock_decision_function.assert_called()