Skip to content

Commit

Permalink
Percent better than baseline (#1050)
Browse files Browse the repository at this point in the history
* Adding infrastructure to be able to compute percent difference between scores.

* Adding percent_better_than_baseline_column to AutoML search.

* Fixing broken automl tests and updating release notes for PR 1050.

* Adding is_percentage and perfect_score to CostBenefitMatrix.

* Removing is_percentage from ObjectiveBase.

* Rewording some test variable names and minor tweaks to AutoML Search related to computing % better than baseline.

* Updating docstring in calculate_percent_difference.
  • Loading branch information
freddyaboulton committed Aug 18, 2020
1 parent e00fa4b commit 88a5f1b
Show file tree
Hide file tree
Showing 10 changed files with 171 additions and 8 deletions.
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Release Notes
* Added new LSA component for text featurization :pr:`1022`
* Added guide on installing with conda :pr:`1041`
* Standardized error when calling transform/predict before fit for pipelines :pr:`1048`
* Added `percent_better_than_baseline` to Automl search rankings and full rankings table :pr:`1050`
* Fixes
* Updated TextFeaturizer component to no longer require an internet connection to run :pr:`1022`
* Fixed non-deterministic element of TextFeaturizer transformations :pr:`1022`
Expand Down
5 changes: 4 additions & 1 deletion docs/source/user_guide/objectives.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@
"\n",
"* `score_needs_proba`: Only for classification objectives. `True` if the objective is intended to function with predicted probabilities as opposed to predicted values (example: cross entropy for classifiers).\n",
"\n",
"* `decision_function`: Only for binary classification objectives. This function takes predicted probabilities that were output from the model and a binary classification threshold, and returns predicted values. "
"* `decision_function`: Only for binary classification objectives. This function takes predicted probabilities that were output from the model and a binary classification threshold, and returns predicted values.\n",
"\n",
"* `perfect_score`: The score achieved by a perfect model on this objective."
]
},
{
Expand All @@ -93,6 +95,7 @@
" name = \"Fraud Cost\"\n",
" greater_is_better = False\n",
" score_needs_proba = False\n",
" perfect_score = 0.0\n",
"\n",
" def __init__(self, retry_percentage=.5, interchange_fee=.02,\n",
" fraud_payout_percentage=1.0, amount_col='amount'):\n",
Expand Down
7 changes: 6 additions & 1 deletion evalml/automl/automl_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def __init__(self,
self.allowed_model_families = allowed_model_families
self._automl_algorithm = None
self._start = None
self._baseline_cv_score = None

self._validate_problem_type()

Expand Down Expand Up @@ -528,6 +529,7 @@ def _add_baseline_pipelines(self, X, y):
self._start)

baseline_results = self._compute_cv_scores(baseline, X, y)
self._baseline_cv_score = baseline_results["cv_score_mean"]
self._add_result(trained_pipeline=baseline,
parameters=baseline.parameters,
training_time=baseline_results['training_time'],
Expand Down Expand Up @@ -614,6 +616,7 @@ def _compute_cv_scores(self, pipeline, X, y):

def _add_result(self, trained_pipeline, parameters, training_time, cv_data, cv_scores):
cv_score = cv_scores.mean()
percent_better = self.objective.calculate_percent_difference(cv_score, self._baseline_cv_score)
# calculate high_variance_cv
# if the coefficient of variance is greater than .2
with warnings.catch_warnings():
Expand All @@ -634,6 +637,7 @@ def _add_result(self, trained_pipeline, parameters, training_time, cv_data, cv_s
"high_variance_cv": high_variance_cv,
"training_time": training_time,
"cv_data": cv_data,
"percent_better_than_baseline": percent_better
}
self._results['search_order'].append(pipeline_id)

Expand Down Expand Up @@ -780,7 +784,8 @@ def full_rankings(self):
if self.objective.greater_is_better:
ascending = False

full_rankings_cols = ["id", "pipeline_name", "score", "high_variance_cv", "parameters"]
full_rankings_cols = ["id", "pipeline_name", "score", "percent_better_than_baseline",
"high_variance_cv", "parameters"]
if not self.has_searched:
return pd.DataFrame(columns=full_rankings_cols)

Expand Down
1 change: 1 addition & 0 deletions evalml/objectives/cost_benefit_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class CostBenefitMatrix(BinaryClassificationObjective):
name = "Cost Benefit Matrix"
greater_is_better = True
score_needs_proba = False
perfect_score = np.inf

def __init__(self, true_positive_cost, true_negative_cost, false_positive_cost, false_negative_cost):
"""Create instance of CostBenefitMatrix.
Expand Down
1 change: 1 addition & 0 deletions evalml/objectives/fraud_cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class FraudCost(BinaryClassificationObjective):
name = "Fraud Cost"
greater_is_better = False
score_needs_proba = False
perfect_score = 0.0

def __init__(self, retry_percentage=.5, interchange_fee=.02,
fraud_payout_percentage=1.0, amount_col='amount'):
Expand Down
3 changes: 3 additions & 0 deletions evalml/objectives/lead_scoring.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import math

import pandas as pd

from .binary_classification_objective import BinaryClassificationObjective
Expand All @@ -8,6 +10,7 @@ class LeadScoring(BinaryClassificationObjective):
name = "Lead Scoring"
greater_is_better = True
score_needs_proba = False
perfect_score = math.inf

def __init__(self, true_positives=1, false_positives=-1):
"""Create instance.
Expand Down
30 changes: 30 additions & 0 deletions evalml/objectives/objective_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ def score_needs_proba(cls):
"""Returns a boolean determining if the score() method needs probability estimates. This should be true for objectives which work with predicted probabilities, like log loss or AUC, and false for objectives which compare predicted class labels to the actual labels, like F1 or correlation.
"""

@property
@classmethod
@abstractmethod
def perfect_score(cls):
"""Returns the score obtained by evaluating this objective on a perfect model."""

@classmethod
@abstractmethod
def objective_function(cls, y_true, y_predicted, X=None):
Expand Down Expand Up @@ -89,3 +95,27 @@ def validate_inputs(self, y_true, y_predicted):
raise ValueError("y_predicted contains NaN or infinity")
if self.score_needs_proba and np.any([(y_predicted < 0) | (y_predicted > 1)]):
raise ValueError("y_predicted contains probability estimates not within [0, 1]")

@classmethod
def calculate_percent_difference(cls, score, baseline_score):
"""Calculate the percent difference between scores.
Arguments:
score (float): A score. Output of the score method of this objective.
baseline_score (float): A score. Output of the score method of this objective. In practice,
this is the score achieved on this objective with a baseline estimator.
Returns:
float: The percent difference between the scores. This will be the difference normalized by the
baseline score.
"""

if pd.isna(score) or pd.isna(baseline_score):
return np.nan

if baseline_score == 0:
return np.nan

difference = (baseline_score - score)
change = difference / baseline_score
return 100 * (-1) ** (cls.greater_is_better) * change

0 comments on commit 88a5f1b

Please sign in to comment.