diff --git a/cyclops/evaluate/metrics/experimental/__init__.py b/cyclops/evaluate/metrics/experimental/__init__.py index 7fe2361f9..d2ed2a566 100644 --- a/cyclops/evaluate/metrics/experimental/__init__.py +++ b/cyclops/evaluate/metrics/experimental/__init__.py @@ -18,6 +18,11 @@ MultilabelFBetaScore, ) from cyclops.evaluate.metrics.experimental.metric_dict import MetricDict +from cyclops.evaluate.metrics.experimental.negative_predictive_value import ( + BinaryNPV, + MulticlassNPV, + MultilabelNPV, +) from cyclops.evaluate.metrics.experimental.precision_recall import ( BinaryPPV, BinaryPrecision, diff --git a/cyclops/evaluate/metrics/experimental/functional/__init__.py b/cyclops/evaluate/metrics/experimental/functional/__init__.py index f3d1b4ac3..7530f6cf8 100644 --- a/cyclops/evaluate/metrics/experimental/functional/__init__.py +++ b/cyclops/evaluate/metrics/experimental/functional/__init__.py @@ -17,6 +17,11 @@ multilabel_f1_score, multilabel_fbeta_score, ) +from cyclops.evaluate.metrics.experimental.functional.negative_predictive_value import ( + binary_npv, + multiclass_npv, + multilabel_npv, +) from cyclops.evaluate.metrics.experimental.functional.precision_recall import ( binary_ppv, binary_precision, diff --git a/cyclops/evaluate/metrics/experimental/functional/negative_predictive_value.py b/cyclops/evaluate/metrics/experimental/functional/negative_predictive_value.py new file mode 100644 index 000000000..81a1fd5ed --- /dev/null +++ b/cyclops/evaluate/metrics/experimental/functional/negative_predictive_value.py @@ -0,0 +1,444 @@ +"""Methods for computing the negative predictive value for classification tasks.""" +from typing import Literal, Optional, Tuple, Union + +import array_api_compat as apc + +from cyclops.evaluate.metrics.experimental.functional._stat_scores import ( + _binary_stat_scores_format_arrays, + _binary_stat_scores_update_state, + _binary_stat_scores_validate_args, + _binary_stat_scores_validate_arrays, + _multiclass_stat_scores_format_arrays, + _multiclass_stat_scores_update_state, + _multiclass_stat_scores_validate_args, + _multiclass_stat_scores_validate_arrays, + _multilabel_stat_scores_format_arrays, + _multilabel_stat_scores_update_state, + _multilabel_stat_scores_validate_arrays, +) +from cyclops.evaluate.metrics.experimental.utils.ops import ( + _adjust_weight_apply_average, + safe_divide, + squeeze_all, +) +from cyclops.evaluate.metrics.experimental.utils.types import Array + + +def _negative_predictive_value_compute( + average: Literal["micro", "macro", "weighted", "none"], + is_multilabel: bool, + *, + tp: Array, + fp: Array, + tn: Array, + fn: Array, +) -> Array: + xp = apc.array_namespace(tp, fp, tn) + if average == "micro": + tn = xp.sum(tn, axis=0) + fn = xp.sum(fn, axis=0) + return safe_divide(tn, tn + fn) + + score = safe_divide(tn, tn + fn) + return _adjust_weight_apply_average( + score, + average, + is_multilabel=is_multilabel, + tp=tp, + fp=fp, + fn=fn, + xp=xp, + ) + + +def _binary_negative_predictive_value_compute(*, fn: Array, tn: Array) -> Array: + return squeeze_all(safe_divide(tn, tn + fn)) + + +def binary_npv( + target: Array, + preds: Array, + threshold: float = 0.5, + ignore_index: Optional[int] = None, +) -> Array: + """Measure the proportion of negative predictions that are true negative. + + Parameters + ---------- + target : Array + An array object that is compatible with the Python array API standard + and contains the ground truth labels. The expected shape of the array + is `(N, ...)`, where `N` is the number of samples. + preds : Array + An array object that is compatible with the Python array API standard and + contains the predictions of a binary classifier. The expected shape of the + array is `(N, ...)` where `N` is the number of samples. If `preds` contains + floating point values that are not in the range `[0, 1]`, a sigmoid function + will be applied to each value before thresholding. + ignore_index : int, optional, default=None + Specifies a target class that is ignored when computing the negative predictive + value. Ignoring a target class means that the corresponding predictions do not + contribute to the negative predictive value. + + Returns + ------- + Array + An array API compatible object containing the negative predictive value. + + Raises + ------ + ValueError + If the arrays `target` and `preds` are not compatible with the Python + array API standard. + ValueError + If `target` or `preds` are empty. + ValueError + If `target` or `preds` are not numeric arrays. + ValueError + If `target` and `preds` have different shapes. + RuntimeError + If `target` contains values that are not in {0, 1}. + RuntimeError + If `preds` contains integer values that are not in {0, 1}. + ValueError + If `threshold` is not a float in the range [0, 1]. + ValueError + If `ignore_index` is not `None` or an integer. + + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental.functional import binary_npv + >>> import numpy.array_api as anp + >>> target = anp.asarray([1, 1, 0, 1, 0, 1]) + >>> preds = anp.asarray([1, 0, 1, 1, 0, 1]) + >>> binary_npv(target, preds) + Array(0.5, dtype=float32) + >>> binary_npv(target, preds, ignore_index=0) + Array(0., dtype=float32) + >>> target = anp.asarray([1, 1, 0, 1, 0, 1]) + >>> preds = anp.asarray([0.61, 0.22, 0.84, 0.73, 0.33, 0.92]) + >>> binary_npv(target, preds) + Array(0.5, dtype=float32) + >>> binary_npv(target, preds, threshold=0.8) + Array(0.25, dtype=float32) + + """ + _binary_stat_scores_validate_args( + threshold=threshold, + ignore_index=ignore_index, + ) + xp = _binary_stat_scores_validate_arrays( + target, + preds, + ignore_index=ignore_index, + ) + target, preds = _binary_stat_scores_format_arrays( + target, + preds, + threshold=threshold, + ignore_index=ignore_index, + xp=xp, + ) + tn, _, fn, _ = _binary_stat_scores_update_state(target, preds, xp=xp) + return _binary_negative_predictive_value_compute(fn=fn, tn=tn) + + +def multiclass_npv( + target: Array, + preds: Array, + num_classes: int, + top_k: int = 1, + average: Optional[Literal["micro", "macro", "weighted", "none"]] = "micro", + ignore_index: Optional[Union[int, Tuple[int]]] = None, +) -> Array: + """Measure the proportion of negative predictions that are true negative. + + Parameters + ---------- + target : Array + An array object that is compatible with the Python array API standard + and contains the ground truth labels. The expected shape of the array + is `(N, ...)`, where `N` is the number of samples. + preds : Array + An array object that is compatible with the Python array API standard and + contains the predictions of a classifier. If `preds` contains integer values + the expected shape of the array is `(N, ...)`, where `N` is the number of + samples. If `preds` contains floating point values the expected shape of the + array is `(N, C, ...)` where `N` is the number of samples and `C` is the + number of classes. + num_classes : int + The number of classes in the classification task. + top_k : int, default=1 + The number of highest probability or logit score predictions to consider + when computing the negative predictive value. By default, only the top + prediction is considered. This parameter is ignored if `preds` contains + integer values. + average : {'micro', 'macro', 'weighted', 'none'}, optional, default='micro' + Specifies the type of averaging to apply to the negative predictive values. + Should be one of the following: + - `'micro'`: Compute the negative predictive value globally by considering all + predictions and all targets. + - `'macro'`: Compute the negative predictive value for each class individually + and then take the unweighted mean of the negative predictive values. + - `'weighted'`: Compute the negative predictive value for each class + individually and then take the mean of the negative predictive values + weighted by the support (the number of true positives + the number of + false negatives) for each class. + - `'none'` or `None`: Compute the negative predictive value for each class + individually and return the scores as an array. + ignore_index : int or tuple of int, optional, default=None + Specifies a target class that is ignored when computing the negative + predictive value. Ignoring a target class means that the corresponding + predictions do not contribute to the negative predictive value. + + + Returns + ------- + Array + An array API compatible object containing the negative predictive value(s). + + Raises + ------ + ValueError + If the arrays `target` and `preds` are not compatible with the Python + array API standard. + ValueError + If `target` or `preds` are empty. + ValueError + If `target` or `preds` are not numeric arrays. + ValueError + If `preds` has one more dimension than `target` but `preds` does not + contain floating point values. + ValueError + If `preds` has one more dimension than `target` and the second dimension + (first dimension, if `preds` is a scalar) of `preds` is not equal to + `num_classes`. In the multidimensional case (i.e., `preds` has more than + two dimensions), the rest of the dimensions must be the same for `target` + and `preds`. + ValueError + If `preds` and `target` have the same number of dimensions but not the + same shape. + RuntimeError + If `target` or `preds` contain values that are not in + {0, 1, ..., num_classes-1} or `target` contains more values than specified + in `ignore_index`. + ValueError + If `num_classes` is not a positive integer greater than two. + ValueError + If `top_k` is not a positive integer. + ValueError + If `top_k` is greater than the number of classes. + ValueError + If `average` is not one of {`'micro'`, `'macro'`, `'weighted'`, `'none'`, + `None`}. + ValueError + If `ignore_index` is not `None`, an integer, or a tuple of integers. + + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental.functional import ( + ... multiclass_npv + ... ) + >>> import numpy.array_api as anp + >>> target = anp.asarray([2, 1, 0, 0]) + >>> preds = anp.asarray([2, 1, 0, 1]) + >>> multiclass_npv(target, preds, num_classes=3) + Array(0.875, dtype=float32) + >>> target = anp.asarray([2, 1, 0, 0]) + >>> preds = anp.asarray( + ... [[0.1, 0.1, 0.8], [0.2, 0.7, 0.1], [0.9, 0.1, 0.0], [0.4, 0.6, 0.0]], + ... ) + >>> multiclass_npv(target, preds, num_classes=3) + Array(0.875, dtype=float32) + >>> multiclass_npv(target, preds, num_classes=3, top_k=2) + Array(1., dtype=float32) + >>> multiclass_npv(target, preds, num_classes=3, average=None) + Array([0.6666667, 1. , 1. ], dtype=float32) + >>> multiclass_npv(target, preds, num_classes=3, average="macro") + Array(0.88888896, dtype=float32) + >>> multiclass_npv(target, preds, num_classes=3, average="weighted") + Array(0.8333334, dtype=float32) + >>> multiclass_npv(target, preds, num_classes=3, ignore_index=0) + Array(1., dtype=float32) + >>> multiclass_npv( + ... target, preds, num_classes=3, average=None, ignore_index=(1, 2), + ... ) + Array([0., 1., 1.], dtype=float32) + + """ + _multiclass_stat_scores_validate_args( + num_classes, + top_k=top_k, + average=average, + ignore_index=ignore_index, + ) + xp = _multiclass_stat_scores_validate_arrays( + target, + preds, + num_classes, + top_k=top_k, + ignore_index=ignore_index, + ) + + target, preds = _multiclass_stat_scores_format_arrays( + target, + preds, + top_k=top_k, + xp=xp, + ) + tn, fp, fn, tp = _multiclass_stat_scores_update_state( + target, + preds, + num_classes, + top_k=top_k, + average=average, + ignore_index=ignore_index, + xp=xp, + ) + return _negative_predictive_value_compute( + average, # type: ignore[arg-type] + is_multilabel=False, + tp=tp, + fp=fp, + tn=tn, + fn=fn, + ) + + +def multilabel_npv( + target: Array, + preds: Array, + num_labels: int, + threshold: float = 0.5, + top_k: int = 1, + average: Optional[Literal["micro", "macro", "weighted", "none"]] = "macro", + ignore_index: Optional[int] = None, +) -> Array: + """Measure the proportion of negative predictions that are true negative. + + Parameters + ---------- + target : Array + An array object that is compatible with the Python array API standard + and contains the ground truth labels. The expected shape of the array + is `(N, L, ...)`, where `N` is the number of samples and `L` is the + number of labels. + preds : Array + An array object that is compatible with the Python array API standard and + contains the predictions of a classifier. The expected shape of the array + is `(N, L, ...)`, where `N` is the number of samples and `L` is the + number of labels. If `preds` contains floating point values that are not + in the range `[0, 1]`, a sigmoid function will be applied to each value + before thresholding. + num_labels : int + The number of labels in the classification task. + threshold : float, optional, default=0.5 + The threshold used to convert probabilities to binary values. + top_k : int, optional, default=1 + The number of highest probability predictions to assign the value `1` + (all other predictions are assigned the value `0`). By default, only the + highest probability prediction is considered. This parameter is ignored + if `preds` does not contain floating point values. + average : {'micro', 'macro', 'weighted', 'none'}, optional, default='macro' + Specifies the type of averaging to apply to the negative predictive values. + Should be one of the following: + - `'micro'`: Compute the negative predictive value globally by considering all + predictions and all targets. + - `'macro'`: Compute the negative predictive value for each label individually + and then take the unweighted mean of the negative predictive values. + - `'weighted'`: Compute the negative predictive value for each label + individually and then take the mean of the negative predictive values + weighted by the support (the number of true positives + the number of false + negatives) for each label. + - `'none'` or `None`: Compute the negative predictive value for each label + individually and return the scores as an array. + ignore_index : int, optional, default=None + Specifies value in `target` that is ignored when computing the negative + predictive value. + + Raises + ------ + ValueError + If the arrays `target` and `preds` are not compatible with the Python + array API standard. + ValueError + If `target` or `preds` are empty. + ValueError + If `target` or `preds` are not numeric arrays. + ValueError + If `target` and `preds` have different shapes. + ValueError + If the second dimension of `target` and `preds` is not equal to `num_labels`. + RuntimeError + If `target` contains values that are not in {0, 1} or not in `ignore_index`. + RuntimeError + If `preds` contains integer values that are not in {0, 1}. + ValueError + If `num_labels` is not a positive integer greater than two. + ValueError + If `threshold` is not a float in the range [0, 1]. + ValueError + If `top_k` is not a positive integer. + ValueError + If `top_k` is greater than the number of labels. + ValueError + If `average` is not one of {`'micro'`, `'macro'`, `'weighted'`, `'none'`, + `None`}. + ValueError + If `ignore_index` is not `None` or an integer. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental.functional import ( + ... multilabel_npv + ... ) + >>> import numpy.array_api as anp + >>> target = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> preds = anp.asarray([[0, 0, 1], [1, 0, 1]]) + >>> multilabel_npv(target, preds, num_labels=3) + Array(0.5, dtype=float32) + >>> target = anp.asarray([[1, 0, 1, 0], [1, 1, 0, 1]]) + >>> preds = anp.asarray([[0.11, 0.58, 0.22, 0.84], [0.73, 0.47, 0.33, 0.92]]) + >>> multilabel_npv(target, preds, num_labels=4) + Array(0.125, dtype=float32) + >>> multilabel_npv(target, preds, num_labels=4, top_k=2) + Array(0.125, dtype=float32) + >>> multilabel_npv(target, preds, num_labels=4, threshold=0.7) + Array(0.25, dtype=float32) + >>> multilabel_npv(target, preds, num_labels=4, average=None) + Array([0. , 0. , 0.5, 0. ], dtype=float32) + >>> multilabel_npv(target, preds, num_labels=4, average="micro") + Array(0.25, dtype=float32) + >>> multilabel_npv(target, preds, num_labels=4, average="weighted") + Array(0.1, dtype=float32) + >>> multilabel_npv( + ... target, preds, num_labels=4, average=None, ignore_index=1, + ... ) + Array([0., 0., 1., 0.], dtype=float32) + + """ + xp = _multilabel_stat_scores_validate_arrays( + target, + preds, + num_labels, + ignore_index=ignore_index, + ) + target, preds = _multilabel_stat_scores_format_arrays( + target, + preds, + top_k=top_k, + threshold=threshold, + ignore_index=ignore_index, + xp=xp, + ) + tn, fp, fn, tp = _multilabel_stat_scores_update_state(target, preds, xp=xp) + return _negative_predictive_value_compute( + average, # type: ignore[arg-type] + is_multilabel=True, + tp=tp, + fp=fp, + tn=tn, + fn=fn, + ) diff --git a/cyclops/evaluate/metrics/experimental/functional/specificity.py b/cyclops/evaluate/metrics/experimental/functional/specificity.py index 4b5de6074..6fbd3e4b3 100644 --- a/cyclops/evaluate/metrics/experimental/functional/specificity.py +++ b/cyclops/evaluate/metrics/experimental/functional/specificity.py @@ -315,7 +315,7 @@ def multilabel_specificity( average: Optional[Literal["micro", "macro", "weighted", "none"]] = "macro", ignore_index: Optional[int] = None, ) -> Array: - """Measure the proportion of positive predictions that are true positive. + """Measure how well a classifier identifies negative samples. Parameters ---------- diff --git a/cyclops/evaluate/metrics/experimental/negative_predictive_value.py b/cyclops/evaluate/metrics/experimental/negative_predictive_value.py new file mode 100644 index 000000000..7a5f1e5ee --- /dev/null +++ b/cyclops/evaluate/metrics/experimental/negative_predictive_value.py @@ -0,0 +1,187 @@ +"""Classes for computing the negative predictive value for classification tasks.""" +from cyclops.evaluate.metrics.experimental._stat_scores import ( + _AbstractBinaryStatScores, + _AbstractMulticlassStatScores, + _AbstractMultilabelStatScores, +) +from cyclops.evaluate.metrics.experimental.functional.negative_predictive_value import ( + _binary_negative_predictive_value_compute, + _negative_predictive_value_compute, +) +from cyclops.evaluate.metrics.experimental.utils.types import Array + + +class BinaryNPV(_AbstractBinaryStatScores, registry_key="binary_npv"): + """The proportion of negative predictions that are true negatives. + + Parameters + ---------- + threshold : float, default=0.5 + Threshold for converting probabilities into binary values. + ignore_index : int, optional + Values in the target array to ignore when computing the metric. + **kwargs + Additional keyword arguments common to all metrics. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import BinaryNPV + >>> import numpy.array_api as anp + >>> target = anp.asarray([0, 1, 0, 1]) + >>> preds = anp.asarray([0, 1, 1, 1]) + >>> metric = BinaryNPV() + >>> metric(target, preds) + Array(1., dtype=float32) + >>> metric.reset() + >>> target = [[0, 1, 0, 1], [1, 0, 1, 0]] + >>> preds = [[0, 1, 1, 1], [1, 0, 1, 0]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(1., dtype=float32) + + """ + + name: str = "Negative Predictive Value" + + def _compute_metric(self) -> Array: + """Compute the negative predictive value.""" + tn, _, fn, _ = self._final_state() + return _binary_negative_predictive_value_compute(fn=fn, tn=tn) + + +class MulticlassNPV( + _AbstractMulticlassStatScores, + registry_key="multiclass_npv", +): + """The proportion of negative predictions that are true negatives. + + Parameters + ---------- + num_classes : int + The number of classes in the classification task. + top_k : int, default=1 + The number of highest probability or logit score predictions to consider + when computing the negative predictive value. By default, only the top + prediction is considered. This parameter is ignored if `preds` contains + integer values. + average : {'micro', 'macro', 'weighted', 'none'}, optional, default='micro' + Specifies the type of averaging to apply to the negative predictive values. + Should be one of the following: + - `'micro'`: Compute the negative predictive value globally by considering all + predictions and all targets. + - `'macro'`: Compute the negative predictive value for each class individually + and then take the unweighted mean of the negative predictive values. + - `'weighted'`: Compute the negative predictive value for each class + individually and then take the mean of the negative predictive values + weighted by the support (the number of true positives + the number of + false negatives) for each class. + - `'none'` or `None`: Compute the negative predictive value for each class + individually and return the scores as an array. + ignore_index : int or tuple of int, optional, default=None + Specifies a target class that is ignored when computing the negative + predictive value. Ignoring a target class means that the corresponding + predictions do not contribute to the negative predictive value. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import MulticlassNPV + >>> import numpy.array_api as anp + >>> target = anp.asarray([0, 1, 2, 2, 2]) + >>> preds = anp.asarray([0, 0, 2, 2, 1]) + >>> metric = MulticlassNPV(num_classes=3) + >>> metric(target, preds) + Array(0.8, dtype=float32) + >>> metric.reset() + >>> target = [[0, 1, 2], [2, 1, 0]] + >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1], [0.2, 0.6, 0.2]], + ... [[0.1, 0.8, 0.1], [0.05, 0.95, 0], [0.2, 0.6, 0.2]]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(0.6666667, dtype=float32) + + """ + + name: str = "Negative predictive value" + + def _compute_metric(self) -> Array: + """Compute the negative predictive value(s).""" + tn, fp, fn, tp = self._final_state() + return _negative_predictive_value_compute( + self.average, # type: ignore[arg-type] + is_multilabel=False, + tp=tp, + fp=fp, + tn=tn, + fn=fn, + ) + + +class MultilabelNPV( + _AbstractMultilabelStatScores, + registry_key="multilabel_npv", +): + """The proportion of negative predictions that are true negatives. + + Parameters + ---------- + num_labels : int + The number of labels in the classification task. + threshold : float, optional, default=0.5 + The threshold used to convert probabilities to binary values. + top_k : int, optional, default=1 + The number of highest probability predictions to assign the value `1` + (all other predictions are assigned the value `0`). By default, only the + highest probability prediction is considered. This parameter is ignored + if `preds` does not contain floating point values. + average : {'micro', 'macro', 'weighted', 'none'}, optional, default='macro' + Specifies the type of averaging to apply to the negative predictive values. + Should be one of the following: + - `'micro'`: Compute the negative predictive value globally by considering all + predictions and all targets. + - `'macro'`: Compute the negative predictive value for each label individually + and then take the unweighted mean of the negative predictive values. + - `'weighted'`: Compute the negative predictive value for each label + individually and then take the mean of the negative predictive values + weighted by the support (the number of true positives + the number of + false negatives) for each label. + - `'none'` or `None`: Compute the negative predictive value for each label + individually and return the scores as an array. + ignore_index : int, optional, default=None + Specifies a value in the target array(s) that is ignored when computing + the negative predictive value. + + Examples + -------- + >>> from cyclops.evaluate.metrics.experimental import MultilabelNPV + >>> import numpy.array_api as anp + >>> target = anp.asarray([[0, 1, 1], [1, 0, 0]]) + >>> preds = anp.asarray([[0, 1, 0], [1, 0, 1]]) + >>> metric = MultilabelNPV(num_labels=3) + >>> metric(target, preds) + Array(0.6666667, dtype=float32) + >>> metric.reset() + >>> target = [[[0, 1, 1], [1, 0, 0]], [[1, 0, 0], [0, 1, 1]]] + >>> preds = [[[0.05, 0.95, 0], [0.1, 0.8, 0.1]], + ... [[0.1, 0.8, 0.1], [0.05, 0.95, 0]]] + >>> for t, p in zip(target, preds): + ... metric.update(anp.asarray(t), anp.asarray(p)) + >>> metric.compute() + Array(0.33333334, dtype=float32) + + """ + + name: str = "Negative Predictive Value" + + def _compute_metric(self) -> Array: + """Compute the negative predictive value(s).""" + tn, fp, fn, tp = self._final_state() + return _negative_predictive_value_compute( + self.average, # type: ignore[arg-type] + is_multilabel=True, + tp=tp, + fp=fp, + tn=tn, + fn=fn, + ) diff --git a/cyclops/evaluate/metrics/f_beta.py b/cyclops/evaluate/metrics/f_beta.py index 59e6659de..575bacb7c 100644 --- a/cyclops/evaluate/metrics/f_beta.py +++ b/cyclops/evaluate/metrics/f_beta.py @@ -178,7 +178,7 @@ class MultilabelFbetaScore(MultilabelStatScores, registry_key="multilabel_fbeta_ num_labels : int Number of labels for the task. threshold : float, default=0.5 - Threshold for deciding the positive class if predicitions are logits + Threshold for deciding the positive class if predictions are logits or probability scores. Logits will be converted to probabilities using the sigmoid function. top_k : int, optional diff --git a/tests/cyclops/evaluate/metrics/experimental/test_negative_predicitve_value.py b/tests/cyclops/evaluate/metrics/experimental/test_negative_predicitve_value.py new file mode 100644 index 000000000..b07f2e7ea --- /dev/null +++ b/tests/cyclops/evaluate/metrics/experimental/test_negative_predicitve_value.py @@ -0,0 +1,541 @@ +"""Test negative predictive value.""" +from functools import partial +from typing import Literal, Optional + +import array_api_compat as apc +import array_api_compat.torch +import numpy as np +import numpy.array_api as anp +import pytest +import torch.utils.dlpack +from torch import Tensor +from torchmetrics.functional.classification.stat_scores import ( + _binary_stat_scores_arg_validation, + _binary_stat_scores_format, + _binary_stat_scores_tensor_validation, + _binary_stat_scores_update, + _multiclass_stat_scores_arg_validation, + _multiclass_stat_scores_format, + _multiclass_stat_scores_tensor_validation, + _multiclass_stat_scores_update, + _multilabel_stat_scores_arg_validation, + _multilabel_stat_scores_format, + _multilabel_stat_scores_tensor_validation, + _multilabel_stat_scores_update, +) +from torchmetrics.utilities.compute import _adjust_weights_safe_divide, _safe_divide + +from cyclops.evaluate.metrics.experimental.functional.negative_predictive_value import ( + binary_npv, + multiclass_npv, + multilabel_npv, +) +from cyclops.evaluate.metrics.experimental.negative_predictive_value import ( + BinaryNPV, + MulticlassNPV, + MultilabelNPV, +) +from cyclops.evaluate.metrics.experimental.utils.ops import to_int +from cyclops.evaluate.metrics.experimental.utils.validation import is_floating_point + +from ..conftest import NUM_CLASSES, NUM_LABELS, THRESHOLD +from .inputs import _binary_cases, _multiclass_cases, _multilabel_cases +from .testers import MetricTester, _inject_ignore_index + + +def _npv_reduce( + tp: Tensor, + fp: Tensor, + tn: Tensor, + fn: Tensor, + average: Optional[Literal["binary", "micro", "macro", "weighted", "none"]], + multilabel: bool = False, +) -> Tensor: + if average == "binary": + return _safe_divide(tn, tn + fn) + if average == "micro": + tn = tn.sum(dim=0) + fn = fn.sum(dim=0) + return _safe_divide(tn, tn + fn) + + npv_score = _safe_divide(tn, tn + fn) + return _adjust_weights_safe_divide(npv_score, average, multilabel, tp, fp, fn) + + +def _binary_npv_reference( + target, + preds, + threshold, + ignore_index, +) -> torch.Tensor: + """Compute binary negative predictive value using torchmetrics.""" + preds = torch.utils.dlpack.from_dlpack(preds) + target = torch.utils.dlpack.from_dlpack(target) + _binary_stat_scores_arg_validation(threshold, ignore_index=ignore_index) + _binary_stat_scores_tensor_validation(preds, target, ignore_index=ignore_index) + preds, target = _binary_stat_scores_format(preds, target, threshold, ignore_index) + tp, fp, tn, fn = _binary_stat_scores_update(preds, target) + return _npv_reduce(tp, fp, tn, fn, average="binary") + + +class TestBinaryNPV(MetricTester): + """Test binary negative predictive value metric class and function.""" + + @pytest.mark.parametrize("inputs", _binary_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_binary_npv_function_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test function for binary NPV using `numpy.array_api` arrays.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_function_implementation_test( + target, + preds, + metric_function=binary_npv, + metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index}, + reference_metric=partial( + _binary_npv_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + ) + + @pytest.mark.parametrize("inputs", _binary_cases(xp=anp)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_binary_npv_class_with_numpy_array_api_arrays( + self, + inputs, + ignore_index, + ) -> None: + """Test class for binary NPV using `numpy.array_api` arrays.""" + target, preds = inputs + + if ( + preds.ndim == 1 + and is_floating_point(preds) + and not anp.all(to_int((preds >= 0)) * to_int((preds <= 1))) + ): + pytest.skip( + "When using 0-D logits, batch result will be different from local " + "result because the `sigmoid` operation may not be applied to each " + "batch (some values may be in [0, 1] and some may not).", + ) + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=BinaryNPV, + metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index}, + reference_metric=partial( + _binary_npv_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + ) + + @pytest.mark.integration_test() # machine for integration tests has GPU + @pytest.mark.parametrize("inputs", _binary_cases(xp=array_api_compat.torch)) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_binary_npv_class_with_torch_tensors( + self, + inputs, + ignore_index, + ) -> None: + """Test binary negative predictive value class with torch tensors.""" + target, preds = inputs + + if ( + preds.ndim == 1 + and is_floating_point(preds) + and not torch.all(to_int((preds >= 0)) * to_int((preds <= 1))) + ): + pytest.skip( + "When using 0-D logits, batch result will be different from local " + "result because the `sigmoid` operation may not be applied to each " + "batch (some values may be in [0, 1] and some may not).", + ) + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + device = "cuda" if torch.cuda.is_available() else "cpu" + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=BinaryNPV, + metric_args={"threshold": THRESHOLD, "ignore_index": ignore_index}, + reference_metric=partial( + _binary_npv_reference, + threshold=THRESHOLD, + ignore_index=ignore_index, + ), + device=device, + use_device_for_ref=True, + ) + + +def _multiclass_npv_reference( + target, + preds, + num_classes=NUM_CLASSES, + top_k: int = 1, + average: Optional[Literal["micro", "macro", "weighted"]] = "micro", + ignore_index=None, +) -> torch.Tensor: + """Compute multiclass negative predictive value using torchmetrics.""" + if preds.ndim == 1 and is_floating_point(preds): + xp = apc.array_namespace(preds) + preds = xp.argmax(preds, axis=0) + + preds = torch.utils.dlpack.from_dlpack(preds) + target = torch.utils.dlpack.from_dlpack(target) + _multiclass_stat_scores_arg_validation( + num_classes, + top_k, + average, + ignore_index=ignore_index, + ) + _multiclass_stat_scores_tensor_validation( + preds, + target, + num_classes, + ignore_index=ignore_index, + ) + preds, target = _multiclass_stat_scores_format(preds, target, top_k) + tp, fp, tn, fn = _multiclass_stat_scores_update( + preds, + target, + num_classes, + top_k, + average, + ignore_index=ignore_index, + ) + return _npv_reduce(tp, fp, tn, fn, average=average) + + +class TestMulticlassNPV(MetricTester): + """Test multiclass negative predictive value metric class and function.""" + + atol = 2e-7 + + @pytest.mark.parametrize("inputs", _multiclass_cases(xp=anp)) + @pytest.mark.parametrize("top_k", [1, 2]) + @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) + @pytest.mark.parametrize("ignore_index", [None, 1, -1]) + def test_multiclass_npv_function_with_numpy_array_api_arrays( + self, + inputs, + top_k, + average, + ignore_index, + ) -> None: + """Test function for multiclass NPV using `numpy.array_api` arrays.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + if top_k > 1 and not is_floating_point(preds): + with pytest.raises(ValueError): + multiclass_npv( + target, + preds, + num_classes=NUM_CLASSES, + top_k=top_k, + average=average, + ignore_index=ignore_index, + ) + else: + self.run_metric_function_implementation_test( + target, + preds, + metric_function=multiclass_npv, + metric_args={ + "num_classes": NUM_CLASSES, + "top_k": top_k, + "average": average, + "ignore_index": ignore_index, + }, + reference_metric=partial( + _multiclass_npv_reference, + num_classes=NUM_CLASSES, + top_k=top_k, + average=average, + ignore_index=ignore_index, + ), + ) + + @pytest.mark.parametrize("inputs", _multiclass_cases(xp=anp)) + @pytest.mark.parametrize("top_k", [1, 2]) + @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) + @pytest.mark.parametrize("ignore_index", [None, 1, -1]) + def test_multiclass_npv_class_with_numpy_array_api_arrays( + self, + inputs, + top_k, + average, + ignore_index, + ) -> None: + """Test class for multiclass NPV using `numpy.array_api` arrays.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + if top_k > 1 and not is_floating_point(preds): + with pytest.raises(ValueError): + metric = MulticlassNPV( + num_classes=NUM_CLASSES, + top_k=top_k, + average=average, + ignore_index=ignore_index, + ) + metric(target, preds) + else: + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MulticlassNPV, + metric_args={ + "num_classes": NUM_CLASSES, + "top_k": top_k, + "average": average, + "ignore_index": ignore_index, + }, + reference_metric=partial( + _multiclass_npv_reference, + num_classes=NUM_CLASSES, + top_k=top_k, + average=average, + ignore_index=ignore_index, + ), + ) + + @pytest.mark.integration_test() # machine for integration tests has GPU + @pytest.mark.parametrize("inputs", _multiclass_cases(xp=array_api_compat.torch)) + @pytest.mark.parametrize("top_k", [1, 2]) + @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) + @pytest.mark.parametrize("ignore_index", [None, 1, -1]) + def test_multiclass_npv_class_with_torch_tensors( + self, + inputs, + top_k, + average, + ignore_index, + ) -> None: + """Test multiclass negative predictive value class with torch tensors.""" + target, preds = inputs + + if ignore_index is not None: + target = _inject_ignore_index(target, ignore_index) + + if top_k > 1 and not is_floating_point(preds): + with pytest.raises(ValueError): + metric = MulticlassNPV( + num_classes=NUM_CLASSES, + top_k=top_k, + average=average, + ignore_index=ignore_index, + ) + metric(target, preds) + else: + device = "cuda" if torch.cuda.is_available() else "cpu" + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MulticlassNPV, + reference_metric=partial( + _multiclass_npv_reference, + num_classes=NUM_CLASSES, + top_k=top_k, + average=average, + ignore_index=ignore_index, + ), + metric_args={ + "num_classes": NUM_CLASSES, + "top_k": top_k, + "average": average, + "ignore_index": ignore_index, + }, + device=device, + use_device_for_ref=True, + ) + + +def _multilabel_npv_reference( + target, + preds, + threshold, + num_labels=NUM_LABELS, + average: Optional[Literal["micro", "macro", "weighted"]] = "macro", + ignore_index=None, +) -> torch.Tensor: + """Compute multilabel negative predictive value using torchmetrics.""" + preds = torch.utils.dlpack.from_dlpack(preds) + target = torch.utils.dlpack.from_dlpack(target) + _multilabel_stat_scores_arg_validation( + num_labels, + threshold, + average, + ignore_index=ignore_index, + ) + _multilabel_stat_scores_tensor_validation( + preds, + target, + num_labels, + "global", + ignore_index=ignore_index, + ) + preds, target = _multilabel_stat_scores_format( + preds, + target, + num_labels, + threshold, + ignore_index=ignore_index, + ) + tp, fp, tn, fn = _multilabel_stat_scores_update(preds, target) + return _npv_reduce(tp, fp, tn, fn, average=average, multilabel=True) + + +class TestMultilabelNPV(MetricTester): + """Test multilabel negative predictive value function and class.""" + + atol = 6e-8 + + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_multilabel_npv_with_numpy_array_api_arrays( + self, + inputs, + average, + ignore_index, + ) -> None: + """Test function for multilabel NPV with `numpy.array_api` arrays.""" + target, preds = inputs + + self.run_metric_function_implementation_test( + target, + preds, + metric_function=multilabel_npv, + reference_metric=partial( + _multilabel_npv_reference, + num_labels=NUM_LABELS, + threshold=THRESHOLD, + average=average, + ignore_index=ignore_index, + ), + metric_args={ + "threshold": THRESHOLD, + "num_labels": NUM_LABELS, + "average": average, + "ignore_index": ignore_index, + }, + ) + + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_multilabel_npv_class_with_numpy_array_api_arrays( + self, + inputs, + average, + ignore_index, + ) -> None: + """Test class for multilabel NPV with `numpy.array_api` arrays.""" + target, preds = inputs + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MultilabelNPV, + reference_metric=partial( + _multilabel_npv_reference, + num_labels=NUM_LABELS, + threshold=THRESHOLD, + average=average, + ignore_index=ignore_index, + ), + metric_args={ + "threshold": THRESHOLD, + "num_labels": NUM_LABELS, + "average": average, + "ignore_index": ignore_index, + }, + ) + + @pytest.mark.integration_test() # machine for integration tests has GPU + @pytest.mark.parametrize("inputs", _multilabel_cases(xp=anp)) + @pytest.mark.parametrize("average", [None, "micro", "macro", "weighted"]) + @pytest.mark.parametrize("ignore_index", [None, 0, -1]) + def test_multilabel_npv_class_with_torch_tensors( + self, + inputs, + average, + ignore_index, + ) -> None: + """Test class for multilabel negative predictive value with torch tensors.""" + target, preds = inputs + + self.run_metric_class_implementation_test( + target, + preds, + metric_class=MultilabelNPV, + reference_metric=partial( + _multilabel_npv_reference, + num_labels=NUM_LABELS, + threshold=THRESHOLD, + average=average, + ignore_index=ignore_index, + ), + metric_args={ + "threshold": THRESHOLD, + "num_labels": NUM_LABELS, + "average": average, + "ignore_index": ignore_index, + }, + ) + + +def test_top_k_multilabel_npv(): + """Test top-k multilabel negative predictive value.""" + target = anp.asarray([[0, 1, 1, 0], [1, 0, 1, 0]]) + preds = anp.asarray([[0.1, 0.9, 0.8, 0.3], [0.9, 0.1, 0.8, 0.3]]) + expected_result = anp.asarray([1.0, 1.0, 0.0, 1.0], dtype=anp.float32) + + result = multilabel_npv(target, preds, num_labels=4, average=None, top_k=2) + assert np.allclose(result, expected_result) + + metric = MultilabelNPV(num_labels=4, average=None, top_k=2) + metric(target, preds) + class_result = metric.compute() + assert np.allclose(class_result, expected_result) + metric.reset() + + preds = anp.asarray( + [ + [[0.57, 0.63], [0.33, 0.55], [0.73, 0.55], [0.36, 0.66]], + [[0.78, 0.94], [0.47, 0.31], [0.14, 0.28], [0.35, 0.81]], + ], + ) + target = anp.asarray( + [[[0, 0], [1, 1], [0, 1], [0, 0]], [[0, 1], [0, 1], [1, 0], [0, 0]]], + ) + expected_result = anp.asarray([0.0, 0.0, 0.33333334, 1.0], dtype=anp.float32) + + result = multilabel_npv(target, preds, num_labels=4, average=None, top_k=2) + assert np.allclose(result, expected_result) + + class_result = metric(target, preds) + assert np.allclose(class_result, expected_result) diff --git a/tests/cyclops/evaluate/metrics/experimental/test_specificity.py b/tests/cyclops/evaluate/metrics/experimental/test_specificity.py index b905a15d2..035edbada 100644 --- a/tests/cyclops/evaluate/metrics/experimental/test_specificity.py +++ b/tests/cyclops/evaluate/metrics/experimental/test_specificity.py @@ -1,4 +1,4 @@ -"""Test specificity recall metrics.""" +"""Test specificity.""" from functools import partial from typing import Literal, Optional