Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Percent better than baseline #1050

Merged
merged 7 commits into from
Aug 18, 2020
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ Release Notes
* Added new LSA component for text featurization :pr:`1022`
* Added guide on installing with conda :pr:`1041`
* Standardized error when calling transform/predict before fit for pipelines :pr:`1048`
* Added `percent_better_than_baseline` to Automl search rankings and full rankings table :pr:`1050`
* Fixes
* Updated TextFeaturizer component to no longer require an internet connection to run :pr:`1022`
* Fixed non-deterministic element of TextFeaturizer transformations :pr:`1022`
Expand Down
5 changes: 4 additions & 1 deletion docs/source/user_guide/objectives.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@
"\n",
"* `score_needs_proba`: Only for classification objectives. `True` if the objective is intended to function with predicted probabilities as opposed to predicted values (example: cross entropy for classifiers).\n",
"\n",
"* `decision_function`: Only for binary classification objectives. This function takes predicted probabilities that were output from the model and a binary classification threshold, and returns predicted values. "
"* `decision_function`: Only for binary classification objectives. This function takes predicted probabilities that were output from the model and a binary classification threshold, and returns predicted values.\n",
"\n",
"* `perfect_score`: The score achieved by a perfect model on this objective."
]
},
{
Expand All @@ -93,6 +95,7 @@
" name = \"Fraud Cost\"\n",
" greater_is_better = False\n",
" score_needs_proba = False\n",
" perfect_score = 0.0\n",
"\n",
" def __init__(self, retry_percentage=.5, interchange_fee=.02,\n",
" fraud_payout_percentage=1.0, amount_col='amount'):\n",
Expand Down
7 changes: 6 additions & 1 deletion evalml/automl/automl_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ def __init__(self,
self.allowed_model_families = allowed_model_families
self._automl_algorithm = None
self._start = None
self._baseline_cv_score = None

self._validate_problem_type()

Expand Down Expand Up @@ -528,6 +529,7 @@ def _add_baseline_pipelines(self, X, y):
self._start)

baseline_results = self._compute_cv_scores(baseline, X, y)
self._baseline_cv_score = baseline_results["cv_score_mean"]
self._add_result(trained_pipeline=baseline,
parameters=baseline.parameters,
training_time=baseline_results['training_time'],
Expand Down Expand Up @@ -614,6 +616,7 @@ def _compute_cv_scores(self, pipeline, X, y):

def _add_result(self, trained_pipeline, parameters, training_time, cv_data, cv_scores):
cv_score = cv_scores.mean()
percent_better = self.objective.calculate_percent_difference(cv_score, self._baseline_cv_score)
# calculate high_variance_cv
# if the coefficient of variance is greater than .2
with warnings.catch_warnings():
Expand All @@ -634,6 +637,7 @@ def _add_result(self, trained_pipeline, parameters, training_time, cv_data, cv_s
"high_variance_cv": high_variance_cv,
"training_time": training_time,
"cv_data": cv_data,
"percent_better_than_baseline": percent_better
}
self._results['search_order'].append(pipeline_id)

Expand Down Expand Up @@ -780,7 +784,8 @@ def full_rankings(self):
if self.objective.greater_is_better:
ascending = False

full_rankings_cols = ["id", "pipeline_name", "score", "high_variance_cv", "parameters"]
full_rankings_cols = ["id", "pipeline_name", "score", "percent_better_than_baseline",
"high_variance_cv", "parameters"]
if not self.has_searched:
return pd.DataFrame(columns=full_rankings_cols)

Expand Down
1 change: 1 addition & 0 deletions evalml/objectives/cost_benefit_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class CostBenefitMatrix(BinaryClassificationObjective):
name = "Cost Benefit Matrix"
greater_is_better = True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@angela97lin I forgot to check this -- we should document why we chose this. I.e. is this score quantifying the cost (lower is better) or the benefit (higher is better)?

For this PR, we just need perfect_score here to align with greater_is_better, which it appears to do

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gotcha, I can document this as I work on #1026. It's a little difficult to document this directly on the attribute since we don't expose it but I'll add it to the CostBenefitMatrix class docstring.

score_needs_proba = False
perfect_score = np.inf

def __init__(self, true_positive_cost, true_negative_cost, false_positive_cost, false_negative_cost):
"""Create instance of CostBenefitMatrix.
Expand Down
1 change: 1 addition & 0 deletions evalml/objectives/fraud_cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class FraudCost(BinaryClassificationObjective):
name = "Fraud Cost"
greater_is_better = False
score_needs_proba = False
perfect_score = 0.0

def __init__(self, retry_percentage=.5, interchange_fee=.02,
fraud_payout_percentage=1.0, amount_col='amount'):
Expand Down
3 changes: 3 additions & 0 deletions evalml/objectives/lead_scoring.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import math

import pandas as pd

from .binary_classification_objective import BinaryClassificationObjective
Expand All @@ -8,6 +10,7 @@ class LeadScoring(BinaryClassificationObjective):
name = "Lead Scoring"
greater_is_better = True
score_needs_proba = False
perfect_score = math.inf
freddyaboulton marked this conversation as resolved.
Show resolved Hide resolved

def __init__(self, true_positives=1, false_positives=-1):
"""Create instance.
Expand Down
30 changes: 30 additions & 0 deletions evalml/objectives/objective_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ def score_needs_proba(cls):
"""Returns a boolean determining if the score() method needs probability estimates. This should be true for objectives which work with predicted probabilities, like log loss or AUC, and false for objectives which compare predicted class labels to the actual labels, like F1 or correlation.
"""

@property
@classmethod
@abstractmethod
def perfect_score(cls):
"""Returns the score obtained by evaluating this objective on a perfect model."""

@classmethod
@abstractmethod
def objective_function(cls, y_true, y_predicted, X=None):
Expand Down Expand Up @@ -89,3 +95,27 @@ def validate_inputs(self, y_true, y_predicted):
raise ValueError("y_predicted contains NaN or infinity")
if self.score_needs_proba and np.any([(y_predicted < 0) | (y_predicted > 1)]):
raise ValueError("y_predicted contains probability estimates not within [0, 1]")

@classmethod
def calculate_percent_difference(cls, score, baseline_score):
"""Calculate the percent difference between scores.

Arguments:
score (float): A score. Output of the score method of this objective.
baseline_score (float): A score. Output of the score method of this objective. In practice,
this is the score achieved on this objective with a baseline estimator.
freddyaboulton marked this conversation as resolved.
Show resolved Hide resolved

Returns:
float: The percent difference between the scores. This will be the difference normalized by the
baseline score.
"""

if pd.isna(score) or pd.isna(baseline_score):
return np.nan

if baseline_score == 0:
return np.nan

difference = (baseline_score - score)
change = difference / baseline_score
return 100 * (-1) ** (cls.greater_is_better) * change
Loading