Suppress UndefinedMetric Warning for F1/precision/recall (#671)

* Changed edge case results to 0.0 and added tests. Updated precision and F1 tests. Updated changelog. * Remove old todo * Ignore zero-div warnings for f1/precision/recall and return 0.0 * Rename tests * Update tests, add precision_micro test * Added tests for f1, precision and recall * Changelog * Lint Co-authored-by: christopherbunn <chris.l.bunn@gmail.com>
alteryx · Apr 17, 2020 · 1273b44 · 1273b44
1 parent 9848eff
commit 1273b44
Show file tree

Hide file tree

Showing 3 changed files with 232 additions and 39 deletions.
diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
@@ -16,6 +16,7 @@ Changelog
         * Add error case for `normalize_confusion_matrix()` :pr:`640`
         * Update make_pipeline_graph to not accidentally create empty file when testing if path is valid :pr:`649`
         * Fix pip installation warning about docsutils version, from boto dependency :pr:`664`
+        * Removed zero division warning for F1/precision/recall metrics :pr:`671`
     * Changes
         * Updated default objective for binary/multiseries classification to log loss :pr:`613`
         * Created classification and regression pipeline subclasses and removed objective as an attribute of pipeline classes :pr:`405`

diff --git a/evalml/objectives/standard_metrics.py b/evalml/objectives/standard_metrics.py
@@ -47,15 +47,14 @@ def objective_function(self, y_predicted, y_true, X=None):
         return metrics.balanced_accuracy_score(y_true, y_predicted)
 
 
-# todo does this need tuning?
 class F1(BinaryClassificationObjective):
     """F1 score for binary classification"""
     name = "F1"
     greater_is_better = True
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.f1_score(y_true, y_predicted)
+        return metrics.f1_score(y_true, y_predicted, zero_division=0.0)
 
 
 class F1Micro(MultiClassificationObjective):
@@ -65,7 +64,7 @@ class F1Micro(MultiClassificationObjective):
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.f1_score(y_true, y_predicted, average='micro')
+        return metrics.f1_score(y_true, y_predicted, average='micro', zero_division=0.0)
 
 
 class F1Macro(MultiClassificationObjective):
@@ -75,7 +74,7 @@ class F1Macro(MultiClassificationObjective):
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.f1_score(y_true, y_predicted, average='macro')
+        return metrics.f1_score(y_true, y_predicted, average='macro', zero_division=0.0)
 
 
 class F1Weighted(MultiClassificationObjective):
@@ -85,7 +84,7 @@ class F1Weighted(MultiClassificationObjective):
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.f1_score(y_true, y_predicted, average='weighted')
+        return metrics.f1_score(y_true, y_predicted, average='weighted', zero_division=0.0)
 
 
 class Precision(BinaryClassificationObjective):
@@ -95,7 +94,7 @@ class Precision(BinaryClassificationObjective):
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.precision_score(y_true, y_predicted)
+        return metrics.precision_score(y_true, y_predicted, zero_division=0.0)
 
 
 class PrecisionMicro(MultiClassificationObjective):
@@ -105,7 +104,7 @@ class PrecisionMicro(MultiClassificationObjective):
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.precision_score(y_true, y_predicted, average='micro')
+        return metrics.precision_score(y_true, y_predicted, average='micro', zero_division=0.0)
 
 
 class PrecisionMacro(MultiClassificationObjective):
@@ -115,7 +114,7 @@ class PrecisionMacro(MultiClassificationObjective):
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.precision_score(y_true, y_predicted, average='macro')
+        return metrics.precision_score(y_true, y_predicted, average='macro', zero_division=0.0)
 
 
 class PrecisionWeighted(MultiClassificationObjective):
@@ -125,7 +124,7 @@ class PrecisionWeighted(MultiClassificationObjective):
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.precision_score(y_true, y_predicted, average='weighted')
+        return metrics.precision_score(y_true, y_predicted, average='weighted', zero_division=0.0)
 
 
 class Recall(BinaryClassificationObjective):
@@ -135,7 +134,7 @@ class Recall(BinaryClassificationObjective):
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.recall_score(y_true, y_predicted)
+        return metrics.recall_score(y_true, y_predicted, zero_division=0.0)
 
 
 class RecallMicro(MultiClassificationObjective):
@@ -145,7 +144,7 @@ class RecallMicro(MultiClassificationObjective):
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.recall_score(y_true, y_predicted, average='micro')
+        return metrics.recall_score(y_true, y_predicted, average='micro', zero_division=0.0)
 
 
 class RecallMacro(MultiClassificationObjective):
@@ -155,7 +154,7 @@ class RecallMacro(MultiClassificationObjective):
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.recall_score(y_true, y_predicted, average='macro')
+        return metrics.recall_score(y_true, y_predicted, average='macro', zero_division=0.0)
 
 
 class RecallWeighted(MultiClassificationObjective):
@@ -165,7 +164,7 @@ class RecallWeighted(MultiClassificationObjective):
     score_needs_proba = False
 
     def objective_function(self, y_predicted, y_true, X=None):
-        return metrics.recall_score(y_true, y_predicted, average='weighted')
+        return metrics.recall_score(y_true, y_predicted, average='weighted', zero_division=0.0)
 
 
 class AUC(BinaryClassificationObjective):

diff --git a/evalml/tests/objective_tests/test_standard_metrics.py b/evalml/tests/objective_tests/test_standard_metrics.py
@@ -3,47 +3,240 @@
 
 from evalml.exceptions import DimensionMismatchError
 from evalml.objectives import (
+    F1,
     AccuracyBinary,
     BalancedAccuracyBinary,
-    BalancedAccuracyMulticlass
+    BalancedAccuracyMulticlass,
+    F1Macro,
+    F1Micro,
+    F1Weighted,
+    Precision,
+    PrecisionMacro,
+    PrecisionMicro,
+    PrecisionWeighted,
+    Recall,
+    RecallMacro,
+    RecallMicro,
+    RecallWeighted
 )
 
+EPS = 1e-5
 
-def test_accuracy():
-    acc = AccuracyBinary()
-    assert acc.score(np.array([0, 0, 1, 1]), np.array([1, 1, 0, 0])) == pytest.approx(0.0, 1e-5)
-    assert acc.score(np.array([0, 0, 1, 1]), np.array([0, 1, 0, 1])) == pytest.approx(0.5, 1e-5)
-    assert acc.score(np.array([0, 0, 1, 1]), np.array([0, 0, 1, 1])) == pytest.approx(1.0, 1e-5)
+
+def test_accuracy_binary():
+    obj = AccuracyBinary()
+    assert obj.score(np.array([0, 0, 1, 1]), np.array([1, 1, 0, 0])) == pytest.approx(0.0, EPS)
+    assert obj.score(np.array([0, 0, 1, 1]), np.array([0, 1, 0, 1])) == pytest.approx(0.5, EPS)
+    assert obj.score(np.array([0, 0, 1, 1]), np.array([0, 0, 1, 1])) == pytest.approx(1.0, EPS)
 
     with pytest.raises(ValueError, match="Length of inputs is 0"):
-        acc.score(y_predicted=[], y_true=[1])
+        obj.score(y_predicted=[], y_true=[1])
     with pytest.raises(ValueError, match="Length of inputs is 0"):
-        acc.score(y_predicted=[1], y_true=[])
+        obj.score(y_predicted=[1], y_true=[])
     with pytest.raises(DimensionMismatchError):
-        acc.score(y_predicted=[0], y_true=[1, 0])
+        obj.score(y_predicted=[0], y_true=[1, 0])
     with pytest.raises(DimensionMismatchError):
-        acc.score(y_predicted=np.array([0]), y_true=np.array([1, 0]))
+        obj.score(y_predicted=np.array([0]), y_true=np.array([1, 0]))
+
+
+def test_balanced_accuracy_binary():
+    obj = BalancedAccuracyBinary()
+    assert obj.score(np.array([0, 1, 0, 0, 1, 0]),
+                     np.array([0, 1, 0, 0, 0, 1])) == pytest.approx(0.625, EPS)
+
+    assert obj.score(np.array([0, 1, 0, 0, 1, 0]),
+                     np.array([0, 1, 0, 0, 1, 0])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([0, 1, 0, 0, 1, 0]),
+                     np.array([1, 0, 1, 1, 0, 1])) == pytest.approx(0.0, EPS)
+
+
+def test_balanced_accuracy_multi():
+    obj = BalancedAccuracyMulticlass()
+    assert obj.score(np.array([0, 0, 2, 0, 0, 2, 3]),
+                     np.array([0, 1, 2, 0, 1, 2, 3])) == pytest.approx(0.75, EPS)
+
+    assert obj.score(np.array([0, 1, 2, 0, 1, 2, 3]),
+                     np.array([0, 1, 2, 0, 1, 2, 3])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([1, 0, 3, 1, 2, 1, 0]),
+                     np.array([0, 1, 2, 0, 1, 2, 3])) == pytest.approx(0.0, EPS)
+
+
+def test_f1_binary():
+    obj = F1()
+    assert obj.score(np.array([0, 1, 0, 0, 1, 0]),
+                     np.array([0, 1, 0, 0, 0, 1])) == pytest.approx(0.5, EPS)
+
+    assert obj.score(np.array([0, 1, 0, 0, 1, 1]),
+                     np.array([0, 1, 0, 0, 1, 1])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([0, 0, 0, 0, 1, 0]),
+                     np.array([0, 1, 0, 0, 0, 1])) == pytest.approx(0.0, EPS)
+
+    assert obj.score(np.array([0, 0]),
+                     np.array([0, 0])) == pytest.approx(0.0, EPS)
+
+
+def test_f1_micro_multi():
+    obj = F1Micro()
+    assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 3.0, EPS)
+
+    assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)
+
+    assert obj.score(np.array([1, 2]),
+                     np.array([0, 0])) == pytest.approx(0.0, EPS)
+
+
+def test_f1_macro_multi():
+    obj = F1Macro()
+    assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) \
+        == pytest.approx(2 * (1 / 3.0) * (1 / 9.0) / (1 / 3.0 + 1 / 9.0), EPS)
+
+    assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)
+
+    assert obj.score(np.array([1, 2]),
+                     np.array([0, 0])) == pytest.approx(0.0, EPS)
+
+
+def test_f1_weighted_multi():
+    obj = F1Weighted()
+    assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) \
+        == pytest.approx(2 * (1 / 3.0) * (1 / 9.0) / (1 / 3.0 + 1 / 9.0), EPS)
+
+    assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)
+
+    assert obj.score(np.array([1, 2]),
+                     np.array([0, 0])) == pytest.approx(0.0, EPS)
+
+
+def test_precision_binary():
+    obj = Precision()
+    assert obj.score(np.array([0, 0, 0, 1, 1, 1]),
+                     np.array([1, 1, 1, 1, 1, 1])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([1, 1, 1, 1, 1, 1]),
+                     np.array([0, 0, 0, 1, 1, 1])) == pytest.approx(0.5, EPS)
+
+    assert obj.score(np.array([1, 1, 1, 1, 1, 1]),
+                     np.array([0, 0, 0, 0, 0, 0])) == pytest.approx(0.0, EPS)
+
+    assert obj.score(np.array([0, 0, 0, 0, 0, 0]),
+                     np.array([0, 0, 0, 0, 0, 0])) == pytest.approx(0.0, EPS)
+
+
+def test_precision_micro_multi():
+    obj = PrecisionMicro()
+    assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 3.0, EPS)
+
+    assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)
+
+    assert obj.score(np.array([1, 2]),
+                     np.array([0, 0])) == pytest.approx(0.0, EPS)
+
+
+def test_precision_macro_multi():
+    obj = PrecisionMacro()
+    assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 9.0, EPS)
+
+    assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)
+
+    assert obj.score(np.array([1, 2]),
+                     np.array([0, 0])) == pytest.approx(0.0, EPS)
+
+
+def test_precision_weighted_multi():
+    obj = PrecisionWeighted()
+    assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 9.0, EPS)
+
+    assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)
+
+    assert obj.score(np.array([1, 2]),
+                     np.array([0, 0])) == pytest.approx(0.0, EPS)
+
+
+def test_recall_binary():
+    obj = Recall()
+    assert obj.score(np.array([1, 1, 1, 1, 1, 1]),
+                     np.array([0, 0, 0, 1, 1, 1])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([0, 0, 0, 0, 0, 0]),
+                     np.array([0, 0, 0, 1, 1, 1])) == pytest.approx(0.0, EPS)
+
+    assert obj.score(np.array([0, 0, 0, 1, 1, 1]),
+                     np.array([1, 1, 1, 1, 1, 1])) == pytest.approx(0.5, EPS)
+
+
+def test_recall_micro_multi():
+    obj = RecallMicro()
+    assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 3.0, EPS)
+
+    assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)
+
+    assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)
+
+    assert obj.score(np.array([1, 2]),
+                     np.array([0, 0])) == pytest.approx(0.0, EPS)
+
+
+def test_recall_macro_multi():
+    obj = RecallMacro()
+    assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 3.0, EPS)
 
+    assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)
 
-def test_binary_accuracy_binary():
-    baccb = BalancedAccuracyBinary()
-    assert baccb.score(np.array([0, 1, 0, 0, 1, 0]),
-                       np.array([0, 1, 0, 0, 0, 1])) == pytest.approx(0.625, 1e-5)
+    assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)
 
-    assert baccb.score(np.array([0, 1, 0, 0, 1, 0]),
-                       np.array([0, 1, 0, 0, 1, 0])) == 1.000
+    assert obj.score(np.array([1, 2]),
+                     np.array([0, 0])) == pytest.approx(0.0, EPS)
 
-    assert baccb.score(np.array([0, 1, 0, 0, 1, 0]),
-                       np.array([1, 0, 1, 1, 0, 1])) == 0.000
 
+def test_recall_weighted_multi():
+    obj = RecallWeighted()
+    assert obj.score(np.array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1 / 3.0, EPS)
 
-def test_binary_accuracy_multi():
-    baccm = BalancedAccuracyMulticlass()
-    assert baccm.score(np.array([0, 0, 2, 0, 0, 2, 3]),
-                       np.array([0, 1, 2, 0, 1, 2, 3])) == pytest.approx(0.75, 1e-5)
+    assert obj.score(np.array([0, 0, 0, 1, 1, 1, 2, 2, 2]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(1.0, EPS)
 
-    assert baccm.score(np.array([0, 1, 2, 0, 1, 2, 3]),
-                       np.array([0, 1, 2, 0, 1, 2, 3])) == 1.000
+    assert obj.score(np.array([2, 2, 2, 0, 0, 0, 1, 1, 1]),
+                     np.array([0, 0, 0, 1, 1, 1, 2, 2, 2])) == pytest.approx(0.0, EPS)
 
-    assert baccm.score(np.array([1, 0, 3, 1, 2, 1, 0]),
-                       np.array([0, 1, 2, 0, 1, 2, 3])) == 0.000
+    assert obj.score(np.array([1, 2]),
+                     np.array([0, 0])) == pytest.approx(0.0, EPS)