Skip to content

Commit

Permalink
Change score output to return one dictionary (#429)
Browse files Browse the repository at this point in the history
  • Loading branch information
angela97lin committed Mar 4, 2020
1 parent 5fdbdd7 commit 1ca6769
Show file tree
Hide file tree
Showing 12 changed files with 61 additions and 45 deletions.
2 changes: 2 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Changelog
* Downloaded demo and test datasets so unit tests can run offline :pr:`408`
* Remove `_needs_fitting` attribute from Components :pr:`398`
* Changed plot.feature_importance to show only non-zero feature importances by default, added optional parameter to show all :pr:`413`
* Changed the output of `score` to return one dictionary :pr:`429`
* Documentation Changes
* Update release.md with instructions to release to internal license key :pr:`354`
* Testing Changes
Expand All @@ -37,6 +38,7 @@ Changelog
* Pipelines will now no longer take an objective parameter during instantiation, and will no longer have an objective attribute.
* ``fit()`` and ``predict()`` now use an optional ``objective`` parameter, which is only used in binary classification pipelines to fit for a specific objective.
* ``score()`` will now use a required ``objectives`` parameter that is used to determine all the objectives to score on. This differs from the previous behavior, where the pipeline's objective was scored on regardless.
* ``score()`` will now return one dictionary of all objective scores.
**v0.6.0 Dec. 16, 2019**
* Enhancements
* Added ability to create a plot of feature importances :pr:`133`
Expand Down
7 changes: 4 additions & 3 deletions evalml/automl/auto_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,18 +259,19 @@ def _do_iteration(self, X, y, pbar, raise_errors):
objectives_to_score = [self.objective] + self.additional_objectives
try:
pipeline.fit(X_train, y_train, self.objective)
score, other_scores = pipeline.score(X_test, y_test, objectives=objectives_to_score)
scores = pipeline.score(X_test, y_test, objectives=objectives_to_score)
score = scores[self.objective.name]
plot_data.append(pipeline.get_plot_data(X_test, y_test, self.plot_metrics))
except Exception as e:
if raise_errors:
raise e
if pbar:
pbar.write(str(e))
score = np.nan
other_scores = OrderedDict(zip([n.name for n in self.additional_objectives], [np.nan] * len(self.additional_objectives)))
scores = OrderedDict(zip([n.name for n in self.additional_objectives], [np.nan] * len(self.additional_objectives)))
ordered_scores = OrderedDict()
ordered_scores.update({self.objective.name: score})
ordered_scores.update(other_scores)
ordered_scores.update(scores)
ordered_scores.update({"# Training": len(y_train)})
ordered_scores.update({"# Testing": len(y_test)})
cv_data.append({"all_objective_scores": ordered_scores, "score": score})
Expand Down
20 changes: 7 additions & 13 deletions evalml/pipelines/binary_classification_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def score(self, X, y, objectives):
objectives (list): list of objectives to score
Returns:
float, dict: score, ordered dictionary of other objective scores
dict: ordered dictionary of objective scores
"""
if not isinstance(X, pd.DataFrame):
X = pd.DataFrame(X)
Expand All @@ -97,7 +97,7 @@ def score(self, X, y, objectives):
y_predicted = None
y_predicted_proba = None

scores = []
scores = OrderedDict()
for objective in objectives:
if objective.score_needs_proba:
if y_predicted_proba is None:
Expand All @@ -110,15 +110,10 @@ def score(self, X, y, objectives):
y_predictions = y_predicted

if objective.uses_extra_columns:
scores.append(objective.score(y_predictions, y, X))
scores.update({objective.name: objective.score(y_predictions, y, X)})
else:
scores.append(objective.score(y_predictions, y))
if not objectives:
return scores[0], {}

other_scores = OrderedDict(zip([n.name for n in objectives[1:]], scores[1:]))

return scores[0], other_scores
scores.update({objective.name: objective.score(y_predictions, y)})
return scores

def get_plot_data(self, X, y, plot_metrics):
"""Generates plotting data for the pipeline for each specified plot metric
Expand All @@ -138,7 +133,7 @@ def get_plot_data(self, X, y, plot_metrics):
y = pd.Series(y)
y_predicted = None
y_predicted_proba = None
scores = []
scores = OrderedDict()
for plot_metric in plot_metrics:
if plot_metric.score_needs_proba:
if y_predicted_proba is None:
Expand All @@ -149,6 +144,5 @@ def get_plot_data(self, X, y, plot_metrics):
if y_predicted is None:
y_predicted = self.predict(X)
y_predictions = y_predicted
scores.append(plot_metric.score(y_predictions, y))
scores = OrderedDict(zip([n.name for n in plot_metrics], scores))
scores.update({plot_metric.name: plot_metric.score(y_predictions, y)})
return scores
19 changes: 7 additions & 12 deletions evalml/pipelines/pipeline_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def score(self, X, y, objectives):
objectives (list): Non-empty list of objectives to score on
Returns:
float, dict: score, ordered dictionary of other objective scores
dict: ordered dictionary of objective scores
"""
if not isinstance(X, pd.DataFrame):
X = pd.DataFrame(X)
Expand All @@ -190,7 +190,7 @@ def score(self, X, y, objectives):
y_predicted = None
y_predicted_proba = None

scores = []
scores = OrderedDict()
for objective in objectives:
if objective.score_needs_proba:
if y_predicted_proba is None:
Expand All @@ -202,15 +202,11 @@ def score(self, X, y, objectives):
y_predictions = y_predicted

if objective.uses_extra_columns:
scores.append(objective.score(y_predictions, y, X))
scores.update({objective.name: objective.score(y_predictions, y, X)})
else:
scores.append(objective.score(y_predictions, y))
if not objectives:
return scores[0], {}
scores.update({objective.name: objective.score(y_predictions, y)})

other_scores = OrderedDict(zip([n.name for n in objectives[1:]], scores[1:]))

return scores[0], other_scores
return scores

def get_plot_data(self, X, y, plot_metrics):
"""Generates plotting data for the pipeline for each specified plot metric
Expand All @@ -230,7 +226,7 @@ def get_plot_data(self, X, y, plot_metrics):
y = pd.Series(y)
y_predicted = None
y_predicted_proba = None
scores = []
scores = OrderedDict()
for plot_metric in plot_metrics:
if plot_metric.score_needs_proba:
if y_predicted_proba is None:
Expand All @@ -240,8 +236,7 @@ def get_plot_data(self, X, y, plot_metrics):
if y_predicted is None:
y_predicted = self.predict(X)
y_predictions = y_predicted
scores.append(plot_metric.score(y_predictions, y))
scores = OrderedDict(zip([n.name for n in plot_metrics], scores))
scores.update({plot_metric.name: plot_metric.score(y_predictions, y)})
return scores

@property
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ def test_catboost_multi(X_y_multi):
clf = CatBoostMulticlassClassificationPipeline(impute_strategy='mean', n_estimators=1000, bootstrap_type='Bayesian',
number_features=X.shape[1], eta=0.03, max_depth=6, random_state=0)
clf.fit(X, y)
clf_score = clf.score(X, y, [objective])
clf_scores = clf.score(X, y, [objective])
y_pred = clf.predict(X)

assert((y_pred == sk_pipeline.predict(X)).all())
assert (sk_score == clf_score[0])
assert (sk_score == clf_scores[objective.name])
assert len(np.unique(y_pred)) == 3
assert len(clf.feature_importances) == len(X[0])
assert not clf.feature_importances.isnull().all().all()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ def test_lor_multi(X_y_multi):
objective = PrecisionMicro()
clf = LogisticRegressionMulticlassPipeline(penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]), random_state=0)
clf.fit(X, y)
clf_score = clf.score(X, y, [objective])
clf_scores = clf.score(X, y, [objective])
y_pred = clf.predict(X)
assert((y_pred == sk_pipeline.predict(X)).all())
assert (sk_score == clf_score[0])
assert (sk_score == clf_scores[objective.name])
assert len(np.unique(y_pred)) == 3
assert len(clf.feature_importances) == len(X[0])
assert not clf.feature_importances.isnull().all().all()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ def test_rf_multi(X_y_multi):
objective = PrecisionMicro()
clf = RFMulticlassClassificationPipeline(n_estimators=10, max_depth=3, impute_strategy='mean', percent_features=1.0, number_features=len(X[0]))
clf.fit(X, y)
clf_score = clf.score(X, y, [objective])
clf_scores = clf.score(X, y, [objective])
y_pred = clf.predict(X)

assert((y_pred == sk_pipeline.predict(X)).all())
assert (sk_score == clf_score[0])
assert (sk_score == clf_scores[objective.name])
assert len(np.unique(y_pred)) == 3
assert len(clf.feature_importances) == len(X[0])
assert not clf.feature_importances.isnull().all().all()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,11 @@ def test_xg_multi(X_y_multi):
objective = PrecisionMicro()
clf = XGBoostMulticlassPipeline(eta=0.1, min_child_weight=1, max_depth=3, impute_strategy='mean', percent_features=1.0, number_features=len(X[0]), n_estimators=10)
clf.fit(X, y)
clf_score = clf.score(X, y, [objective])
clf_scores = clf.score(X, y, [objective])
y_pred = clf.predict(X)

assert((y_pred == sk_pipeline.predict(X)).all())
assert (sk_score == clf_score[0])
assert (sk_score == clf_scores[objective.name])
assert len(np.unique(y_pred)) == 3
assert len(clf.feature_importances) == len(X[0])
assert not clf.feature_importances.isnull().all().all()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,16 @@ def test_catboost_regression(X_y_reg):
clf = CatBoostRegressionPipeline(n_estimators=1000, eta=0.03, number_features=X.shape[1],
bootstrap_type='Bayesian', max_depth=6, impute_strategy='mean', random_state=0)
clf.fit(X, y)
clf_score = clf.score(X, y, [objective])
clf_scores = clf.score(X, y, [objective])
y_pred = clf.predict(X)

np.testing.assert_almost_equal(y_pred, sk_pipeline.predict(X), decimal=5)
np.testing.assert_almost_equal(sk_score, clf_score[0], decimal=5)
np.testing.assert_almost_equal(sk_score, clf_scores[objective.name], decimal=5)

# testing objective parameter passed in does not change results
clf.fit(X, y, objective)
y_pred_with_objective = clf.predict(X, objective)
np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5)

# testing objective parameter passed in does not change results
clf.fit(X, y, objective)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ def test_linear_regression(X_y_categorical_regression):
random_state=0,
n_jobs=-1)
clf.fit(X, y)
clf_score = clf.score(X, y, [objective])
clf_scores = clf.score(X, y, [objective])
y_pred = clf.predict(X)

np.testing.assert_almost_equal(y_pred, sk_pipeline.predict(X), decimal=5)
np.testing.assert_almost_equal(sk_score, clf_score[0], decimal=5)
np.testing.assert_almost_equal(sk_score, clf_scores[objective.name], decimal=5)
assert not clf.feature_importances.isnull().all().all()

# testing objective parameter passed in does not change results
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,15 @@ def test_rf_regression(X_y_categorical_regression):
clf = RFRegressionPipeline(n_estimators=10, max_depth=3, impute_strategy='mean', percent_features=1.0, number_features=X.shape[1])

clf.fit(X, y)
clf_score = clf.score(X, y, [objective])
clf_scores = clf.score(X, y, [objective])
y_pred = clf.predict(X)
np.testing.assert_almost_equal(y_pred, sk_pipeline.predict(X), decimal=5)
np.testing.assert_almost_equal(sk_score, clf_score[0], decimal=5)
np.testing.assert_almost_equal(sk_score, clf_scores[objective.name], decimal=5)

# testing objective parameter passed in does not change results
clf.fit(X, y, objective)
y_pred_with_objective = clf.predict(X, objective)
np.testing.assert_almost_equal(y_pred, y_pred_with_objective, decimal=5)

# testing objective parameter passed in does not change results
clf.fit(X, y, objective)
Expand Down
20 changes: 17 additions & 3 deletions evalml/tests/pipeline_tests/test_pipelines.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import os

import numpy as np
import pytest

from evalml.model_types import ModelTypes
from evalml.objectives import FraudCost, Precision
from evalml.objectives import FraudCost, Precision, Recall
from evalml.pipelines import (
LogisticRegressionBinaryPipeline,
LogisticRegressionMulticlassPipeline,
Expand Down Expand Up @@ -181,8 +182,21 @@ def test_score_with_empty_list_of_objectives(X_y):
X, y = X_y
clf = LogisticRegressionBinaryPipeline(penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]), random_state=0)
clf.fit(X, y)
with pytest.raises(IndexError):
clf.score(X, y, [])
scores = clf.score(X, y, [])
assert len(scores.values()) == 0


def test_score_with_list_of_multiple_objectives(X_y):
X, y = X_y
clf = LogisticRegressionBinaryPipeline(penalty='l2', C=1.0, impute_strategy='mean', number_features=len(X[0]), random_state=0)
clf.fit(X, y)
recall_name = Recall().name
precision_name = Precision().name
objective_names = [recall_name, precision_name]
scores = clf.score(X, y, objective_names)
assert len(scores.values()) == 2
assert all(name in scores.keys() for name in objective_names)
assert not any(np.isnan(val) for val in scores.values())


def test_n_jobs(X_y):
Expand Down

0 comments on commit 1ca6769

Please sign in to comment.