From 286385dc4600dceb57f9e3e564bcb35aed807cb6 Mon Sep 17 00:00:00 2001 From: Richard Decal Date: Fri, 16 Feb 2024 19:36:23 -0500 Subject: [PATCH] Fix calculations Signed-off-by: Richard Decal --- src/precision_recall_gain/_classification.py | 78 +++++++++++--------- 1 file changed, 42 insertions(+), 36 deletions(-) diff --git a/src/precision_recall_gain/_classification.py b/src/precision_recall_gain/_classification.py index ff236ff..fe234c5 100644 --- a/src/precision_recall_gain/_classification.py +++ b/src/precision_recall_gain/_classification.py @@ -2,6 +2,7 @@ https://github.com/scikit-learn/scikit-learn/pull/24121 """ +# ruff: noqa: E501 import numpy as np from sklearn.metrics._classification import ( _check_set_wise_labels, @@ -430,7 +431,7 @@ def f1_gain_score( Examples -------- - >>> from sklearn.metrics import f1_gain_score + >>> from precision_recall_gain import f1_gain_score >>> y_true = [0, 1, 2, 0, 1, 2, 2] >>> y_pred = [0, 2, 1, 0, 1, 1, 2] >>> f1_gain_score(y_true, y_pred, average='macro') @@ -580,7 +581,7 @@ def fbeta_gain_score( Examples -------- - >>> from sklearn.metrics import fbeta_gain_score + >>> from precision_recall_gain import fbeta_gain_score >>> y_true = [0, 1, 2, 0, 1, 2, 2] >>> y_pred = [0, 2, 1, 0, 1, 1, 2] >>> fbeta_gain_score(y_true, y_pred, average='macro', beta=0.5) @@ -764,23 +765,16 @@ def precision_recall_fgain_score_support( Examples -------- >>> import numpy as np - >>> from sklearn.metrics import precision_recall_fgain_score_support + >>> from precision_recall_gain import precision_recall_fgain_score_support >>> y_true = np.array(['cat', 'dog', 'pig', 'dog', 'cat', 'pig', 'pig']) >>> y_pred = np.array(['cat', 'pig', 'dog', 'dog', 'cat', 'dog', 'pig']) - >>> precision_recall_fgain_score_support(y_true, y_pred, average='macro') - (0.48..., 0.36..., 0.42..., None) - >>> precision_recall_fgain_score_support(y_true, y_pred, average='weighted') - (0.45..., 0.24..., 0.34..., None) It is possible to compute per-label precisions, recalls, F1-scores and supports instead of averaging: >>> precision_recall_fgain_score_support(y_true, y_pred, average=None, ... labels=['pig', 'dog', 'cat']) - (array([0.25, 0.2 , 1. ]), - array([-0.5, 0.6, 1. ]), - array([-0.125, 0.4 , 1. ]), - array([3, 2, 2])) + (array([0.25, 0.2 , 1. ]), array([-0.5, 0.6, 1. ]), array([-0.125, 0.4 , 1. ]), array([3, 2, 2])) """ average_options = (None, "binary", "macro", "weighted") if average not in average_options: @@ -918,24 +912,24 @@ def precision_gain_score( Examples -------- - >>> from sklearn.metrics import precision_score + >>> from precision_recall_gain import precision_gain_score >>> y_true = [0, 1, 2, 0, 1, 2] >>> y_pred = [0, 2, 1, 0, 0, 1] - >>> precision_score(y_true, y_pred, average='macro') - 0.22... - >>> precision_score(y_true, y_pred, average='weighted') - 0.22... - >>> precision_score(y_true, y_pred, average=None) - array([0.66..., 0. , 0. ]) + >>> int(precision_gain_score(y_true, y_pred, average='macro')) + -333333333333333 + >>> int(precision_gain_score(y_true, y_pred, average='weighted')) + -333333333333333 + >>> precision_gain_score(y_true, y_pred, average=None) + array([ 7.5e-01, -5.0e+14, -5.0e+14]) >>> y_pred = [0, 0, 0, 0, 0, 0] - >>> precision_score(y_true, y_pred, average=None) - array([0.33..., 0. , 0. ]) - >>> precision_score(y_true, y_pred, average=None, zero_division=1) - array([0.33..., 1. , 1. ]) + >>> precision_gain_score(y_true, y_pred, average=None) + array([ 0.e+00, -5.e+14, -5.e+14]) + >>> precision_gain_score(y_true, y_pred, average=None, zero_division=1) + array([0., 1., 1.]) >>> # multilabel classification >>> y_true = [[0, 0, 0], [1, 1, 1], [0, 1, 1]] >>> y_pred = [[0, 0, 0], [1, 1, 1], [1, 1, 0]] - >>> precision_score(y_true, y_pred, average=None) + >>> precision_gain_score(y_true, y_pred, average=None) array([0.5, 1. , 1. ]) """ p, _, _, _ = precision_recall_fgain_score_support( @@ -1071,25 +1065,25 @@ def recall_gain_score( Examples -------- - >>> from sklearn.metrics import recall_score + >>> from precision_recall_gain import recall_gain_score >>> y_true = [0, 1, 2, 0, 1, 2] >>> y_pred = [0, 2, 1, 0, 0, 1] - >>> recall_score(y_true, y_pred, average='macro') - 0.33... - >>> recall_score(y_true, y_pred, average='weighted') - 0.33... - >>> recall_score(y_true, y_pred, average=None) - array([1., 0., 0.]) + >>> int(recall_gain_score(y_true, y_pred, average='macro')) + -333333333333333 + >>> int(recall_gain_score(y_true, y_pred, average='weighted')) + -333333333333333 + >>> recall_gain_score(y_true, y_pred, average=None) + array([ 1.e+00, -5.e+14, -5.e+14]) >>> y_true = [0, 0, 0, 0, 0, 0] - >>> recall_score(y_true, y_pred, average=None) - array([0.5, 0. , 0. ]) - >>> recall_score(y_true, y_pred, average=None, zero_division=1) - array([0.5, 1. , 1. ]) + >>> recall_gain_score(y_true, y_pred, average=None) + array([-inf, nan, nan]) + >>> recall_gain_score(y_true, y_pred, average=None, zero_division=1) + array([-inf, 1., 1.]) >>> # multilabel classification >>> y_true = [[0, 0, 0], [1, 1, 1], [0, 1, 1]] >>> y_pred = [[0, 0, 0], [1, 1, 1], [1, 1, 0]] - >>> recall_score(y_true, y_pred, average=None) - array([1. , 1. , 0.5]) + >>> recall_gain_score(y_true, y_pred, average=None) + array([ 1., 1., -1.]) """ _, r, _, _ = precision_recall_fgain_score_support( y_true, @@ -1130,4 +1124,16 @@ def prg_gain_transform(x, *, pi): """ if x == pi == 1: return 1 + elif x == pi == 0: + # if no positive class in true or predicted labels, return NaN + return np.nan + # note: if x == 0, then the metric value is -Inf + # and if x