Skip to content

Commit

Permalink
Suppress UndefinedMetric Warning for F1/precision/recall (#671)
Browse files Browse the repository at this point in the history
* Changed edge case results to 0.0 and added tests. Updated precision and F1 tests. Updated changelog.

* Remove old todo

* Ignore zero-div warnings for f1/precision/recall and return 0.0

* Rename tests

* Update tests, add precision_micro test

* Added tests for f1, precision and recall

* Changelog

* Lint

Co-authored-by: christopherbunn <chris.l.bunn@gmail.com>
  • Loading branch information
dsherry and christopherbunn committed Apr 17, 2020
1 parent 9848eff commit 1273b44
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 39 deletions.
1 change: 1 addition & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Changelog
* Add error case for `normalize_confusion_matrix()` :pr:`640`
* Update make_pipeline_graph to not accidentally create empty file when testing if path is valid :pr:`649`
* Fix pip installation warning about docsutils version, from boto dependency :pr:`664`
* Removed zero division warning for F1/precision/recall metrics :pr:`671`
* Changes
* Updated default objective for binary/multiseries classification to log loss :pr:`613`
* Created classification and regression pipeline subclasses and removed objective as an attribute of pipeline classes :pr:`405`
Expand Down
25 changes: 12 additions & 13 deletions evalml/objectives/standard_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,14 @@ def objective_function(self, y_predicted, y_true, X=None):
return metrics.balanced_accuracy_score(y_true, y_predicted)


# todo does this need tuning?
class F1(BinaryClassificationObjective):
"""F1 score for binary classification"""
name = "F1"
greater_is_better = True
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.f1_score(y_true, y_predicted)
return metrics.f1_score(y_true, y_predicted, zero_division=0.0)


class F1Micro(MultiClassificationObjective):
Expand All @@ -65,7 +64,7 @@ class F1Micro(MultiClassificationObjective):
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.f1_score(y_true, y_predicted, average='micro')
return metrics.f1_score(y_true, y_predicted, average='micro', zero_division=0.0)


class F1Macro(MultiClassificationObjective):
Expand All @@ -75,7 +74,7 @@ class F1Macro(MultiClassificationObjective):
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.f1_score(y_true, y_predicted, average='macro')
return metrics.f1_score(y_true, y_predicted, average='macro', zero_division=0.0)


class F1Weighted(MultiClassificationObjective):
Expand All @@ -85,7 +84,7 @@ class F1Weighted(MultiClassificationObjective):
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.f1_score(y_true, y_predicted, average='weighted')
return metrics.f1_score(y_true, y_predicted, average='weighted', zero_division=0.0)


class Precision(BinaryClassificationObjective):
Expand All @@ -95,7 +94,7 @@ class Precision(BinaryClassificationObjective):
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.precision_score(y_true, y_predicted)
return metrics.precision_score(y_true, y_predicted, zero_division=0.0)


class PrecisionMicro(MultiClassificationObjective):
Expand All @@ -105,7 +104,7 @@ class PrecisionMicro(MultiClassificationObjective):
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.precision_score(y_true, y_predicted, average='micro')
return metrics.precision_score(y_true, y_predicted, average='micro', zero_division=0.0)


class PrecisionMacro(MultiClassificationObjective):
Expand All @@ -115,7 +114,7 @@ class PrecisionMacro(MultiClassificationObjective):
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.precision_score(y_true, y_predicted, average='macro')
return metrics.precision_score(y_true, y_predicted, average='macro', zero_division=0.0)


class PrecisionWeighted(MultiClassificationObjective):
Expand All @@ -125,7 +124,7 @@ class PrecisionWeighted(MultiClassificationObjective):
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.precision_score(y_true, y_predicted, average='weighted')
return metrics.precision_score(y_true, y_predicted, average='weighted', zero_division=0.0)


class Recall(BinaryClassificationObjective):
Expand All @@ -135,7 +134,7 @@ class Recall(BinaryClassificationObjective):
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.recall_score(y_true, y_predicted)
return metrics.recall_score(y_true, y_predicted, zero_division=0.0)


class RecallMicro(MultiClassificationObjective):
Expand All @@ -145,7 +144,7 @@ class RecallMicro(MultiClassificationObjective):
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.recall_score(y_true, y_predicted, average='micro')
return metrics.recall_score(y_true, y_predicted, average='micro', zero_division=0.0)


class RecallMacro(MultiClassificationObjective):
Expand All @@ -155,7 +154,7 @@ class RecallMacro(MultiClassificationObjective):
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.recall_score(y_true, y_predicted, average='macro')
return metrics.recall_score(y_true, y_predicted, average='macro', zero_division=0.0)


class RecallWeighted(MultiClassificationObjective):
Expand All @@ -165,7 +164,7 @@ class RecallWeighted(MultiClassificationObjective):
score_needs_proba = False

def objective_function(self, y_predicted, y_true, X=None):
return metrics.recall_score(y_true, y_predicted, average='weighted')
return metrics.recall_score(y_true, y_predicted, average='weighted', zero_division=0.0)


class AUC(BinaryClassificationObjective):
Expand Down
245 changes: 219 additions & 26 deletions evalml/tests/objective_tests/test_standard_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,240 @@

from evalml.exceptions import DimensionMismatchError
from evalml.objectives import (
F1,
AccuracyBinary,
BalancedAccuracyBinary,
BalancedAccuracyMulticlass
BalancedAccuracyMulticlass,
F1Macro,
F1Micro,
F1Weighted,
Precision,
PrecisionMacro,
PrecisionMicro,
PrecisionWeighted,
Recall,
RecallMacro,
RecallMicro,
RecallWeighted
)

EPS = 1e-5

def test_accuracy():
acc = AccuracyBinary()
assert acc.score(np.array([0, 0, 1, 1]), np.array([1, 1, 0, 0])) == pytest.approx(0.0, 1e-5)
assert acc.score(np.array([0, 0, 1, 1]), np.array([0, 1, 0, 1])) == pytest.approx(0.5, 1e-5)
assert acc.score(np.array([0, 0, 1, 1]), np.array([0, 0, 1, 1])) == pytest.approx(1.0, 1e-5)

def test_accuracy_binary():
obj = AccuracyBinary()
assert obj.score(np.array([0, 0, 1, 1]), np.array([1, 1, 0, 0])) == pytest.approx(0.0, EPS)
assert obj.score(np.array([0, 0, 1, 1]), np.array([0, 1, 0, 1])) == pytest.approx(0.5, EPS)
assert obj.score(np.array([0, 0, 1, 1]), np.array([0, 0, 1, 1])) == pytest.approx(1.0, EPS)

with pytest.raises(ValueError, match="Length of inputs is 0"):
acc.score(y_predicted=[], y_true=[1])
obj.score(y_predicted=[], y_true=[1])
with pytest.raises(ValueError, match="Length of inputs is 0"):
acc.score(y_predicted=[1], y_true=[])
obj.score(y_predicted=[1], y_true=[])
with pytest.raises(DimensionMismatchError):
acc.score(y_predicted=[0], y_true=[1, 0])
obj.score(y_predicted=[0], y_true=[1, 0])
with pytest.raises(DimensionMismatchError):
acc.score(y_predicted=np.array([0]), y_true=np.array([1, 0]))
obj.score(y_predicted=np.array([0]), y_true=np.array([1, 0]))


def test_balanced_accuracy_binary():
obj = BalancedAccuracyBinary()
assert obj.score(np.array([0, 1, 0, 0, 1, 0]),
np.array([0, 1, 0, 0, 0, 1])) == pytest.approx(0.625, EPS)

assert obj.score(np.array([0, 1, 0, 0, 1, 0]),
np.array([0, 1, 0, 0, 1, 0])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([0, 1, 0, 0, 1, 0]),
np.array([1, 0, 1, 1, 0, 1])) == pytest.approx(0.0, EPS)


def test_balanced_accuracy_multi():
obj = BalancedAccuracyMulticlass()
assert obj.score(np.array([0, 0, 2, 0, 0, 2, 3]),
np.array([0, 1, 2, 0, 1, 2, 3])) == pytest.approx(0.75, EPS)

assert obj.score(np.array([0, 1, 2, 0, 1, 2, 3]),
np.array([0, 1, 2, 0, 1, 2, 3])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([1, 0, 3, 1, 2, 1, 0]),
np.array([0, 1, 2, 0, 1, 2, 3])) == pytest.approx(0.0, EPS)


def test_f1_binary():
obj = F1()
assert obj.score(np.array([0, 1, 0, 0, 1, 0]),
np.array([0, 1, 0, 0, 0, 1])) == pytest.approx(0.5, EPS)

assert obj.score(np.array([0, 1, 0, 0, 1, 1]),
np.array([0, 1, 0, 0, 1, 1])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([0, 0, 0, 0, 1, 0]),
np.array([0, 1, 0, 0, 0, 1])) == pytest.approx(0.0, EPS)

assert obj.score(np.array([0, 0]),
np.array([0, 0])) == pytest.approx(0.0, EPS)


def test_f1_micro_multi():
obj = F1Micro()
assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 3.0, EPS)

assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)

assert obj.score(np.array([1, 2]),
np.array([0, 0])) == pytest.approx(0.0, EPS)


def test_f1_macro_multi():
obj = F1Macro()
assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) \
== pytest.approx(2 * (1 / 3.0) * (1 / 9.0) / (1 / 3.0 + 1 / 9.0), EPS)

assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)

assert obj.score(np.array([1, 2]),
np.array([0, 0])) == pytest.approx(0.0, EPS)


def test_f1_weighted_multi():
obj = F1Weighted()
assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) \
== pytest.approx(2 * (1 / 3.0) * (1 / 9.0) / (1 / 3.0 + 1 / 9.0), EPS)

assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)

assert obj.score(np.array([1, 2]),
np.array([0, 0])) == pytest.approx(0.0, EPS)


def test_precision_binary():
obj = Precision()
assert obj.score(np.array([0, 0, 0, 1, 1, 1]),
np.array([1, 1, 1, 1, 1, 1])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([1, 1, 1, 1, 1, 1]),
np.array([0, 0, 0, 1, 1, 1])) == pytest.approx(0.5, EPS)

assert obj.score(np.array([1, 1, 1, 1, 1, 1]),
np.array([0, 0, 0, 0, 0, 0])) == pytest.approx(0.0, EPS)

assert obj.score(np.array([0, 0, 0, 0, 0, 0]),
np.array([0, 0, 0, 0, 0, 0])) == pytest.approx(0.0, EPS)


def test_precision_micro_multi():
obj = PrecisionMicro()
assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 3.0, EPS)

assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)

assert obj.score(np.array([1, 2]),
np.array([0, 0])) == pytest.approx(0.0, EPS)


def test_precision_macro_multi():
obj = PrecisionMacro()
assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 9.0, EPS)

assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)

assert obj.score(np.array([1, 2]),
np.array([0, 0])) == pytest.approx(0.0, EPS)


def test_precision_weighted_multi():
obj = PrecisionWeighted()
assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 9.0, EPS)

assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)

assert obj.score(np.array([1, 2]),
np.array([0, 0])) == pytest.approx(0.0, EPS)


def test_recall_binary():
obj = Recall()
assert obj.score(np.array([1, 1, 1, 1, 1, 1]),
np.array([0, 0, 0, 1, 1, 1])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([0, 0, 0, 0, 0, 0]),
np.array([0, 0, 0, 1, 1, 1])) == pytest.approx(0.0, EPS)

assert obj.score(np.array([0, 0, 0, 1, 1, 1]),
np.array([1, 1, 1, 1, 1, 1])) == pytest.approx(0.5, EPS)


def test_recall_micro_multi():
obj = RecallMicro()
assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 3.0, EPS)

assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)

assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)

assert obj.score(np.array([1, 2]),
np.array([0, 0])) == pytest.approx(0.0, EPS)


def test_recall_macro_multi():
obj = RecallMacro()
assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 3.0, EPS)

assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)

def test_binary_accuracy_binary():
baccb = BalancedAccuracyBinary()
assert baccb.score(np.array([0, 1, 0, 0, 1, 0]),
np.array([0, 1, 0, 0, 0, 1])) == pytest.approx(0.625, 1e-5)
assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)

assert baccb.score(np.array([0, 1, 0, 0, 1, 0]),
np.array([0, 1, 0, 0, 1, 0])) == 1.000
assert obj.score(np.array([1, 2]),
np.array([0, 0])) == pytest.approx(0.0, EPS)

assert baccb.score(np.array([0, 1, 0, 0, 1, 0]),
np.array([1, 0, 1, 1, 0, 1])) == 0.000

def test_recall_weighted_multi():
obj = RecallWeighted()
assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 3.0, EPS)

def test_binary_accuracy_multi():
baccm = BalancedAccuracyMulticlass()
assert baccm.score(np.array([0, 0, 2, 0, 0, 2, 3]),
np.array([0, 1, 2, 0, 1, 2, 3])) == pytest.approx(0.75, 1e-5)
assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)

assert baccm.score(np.array([0, 1, 2, 0, 1, 2, 3]),
np.array([0, 1, 2, 0, 1, 2, 3])) == 1.000
assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)

assert baccm.score(np.array([1, 0, 3, 1, 2, 1, 0]),
np.array([0, 1, 2, 0, 1, 2, 3])) == 0.000
assert obj.score(np.array([1, 2]),
np.array([0, 0])) == pytest.approx(0.0, EPS)

0 comments on commit 1273b44

Please sign in to comment.