In [None]:
# | default_exp metrics/detection

# Imports

In [None]:
# | export


from typing import Literal

import torch
from torchmetrics import Metric

from vision_architectures.utils.bounding_boxes import get_tps_fps_fns, sort_by_first_column_descending

In [None]:
from monai.data.box_utils import convert_box_to_standard_mode

# Mean Average Precision

### Direct function

In [None]:
# | export


def mean_average_precision_mean_average_recall(
    pred_bboxes: list[torch.Tensor],
    pred_objectness_probabilities: list[torch.Tensor] | None,
    pred_class_probabilities: list[torch.Tensor],
    target_bboxes: list[torch.Tensor],
    target_classes: list[torch.Tensor],
    iou_thresholds: list[float] = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95],
    average_precision_num_points: int = 101,
    min_confidence_threshold: float = 0.0,
    max_bboxes_per_image: int | None = 100,
    return_intermediates: bool = False,
) -> tuple[float, float] | tuple[float, float, dict[float, dict[int, float]], dict[float, dict[int, float]]]:
    """Calculate the COCO mean average precision (mAP) for object detection.

    Args:
        pred_bboxes: A list of length B containing tensors of shape (NP, 4) or (NP, 6) containing the predicted bounding
            box parameters in xyxy or xyzxyz format.
        pred_objectness_probabilities: A list of length B containing tensors of shape (NP,) containing the predicted
            objectness probabilities for the corresponding bounding boxes. This can be set to None in which case only
            the class probabilities are considered.
        pred_class_probabilities: A list of length B containing tensors of shape (NP, num_classes) containing the
            predicted class probabilities for the corresponding bounding boxes.
        target_bboxes: A list of length B containing tensors of shape (NT, 4) or (NT, 6) containing the target bounding
            box parameters in xyxy or xyzxyz format.
        target_classes: A list of length B containing tensors of shape (NT,) containing the target class labels for the
            objects in the image.
        iou_thresholds: A list of IoU thresholds to use for calculating mAP and mAR.
        average_precision_num_points: Number of points over which to calculate average precision.
        min_confidence_threshold: Minimum confidence probability threshold to consider a prediction.
        max_bboxes_per_image: Maximum number of bounding boxes to consider per image. If more are present, only the top
            `max_bboxes_per_image` boxes based on confidence scores are considered. If set to None, all bounding boxes
            are considered.
        return_intermediates: If True, return intermediate values used to calculate mAP and mAR.

    Returns:
        The mean average precision (mAP) and mean average recall (mAR) across all classes and IoU thresholds for the
        entire dataset.
        If `return_intermediates` is True, also returns two dictionaries containing the average precision and average
        recall for each class at each IoU threshold.
    """
    # Set some globaly used variables
    B = len(pred_bboxes)
    num_classes = pred_class_probabilities[0].shape[-1]

    if pred_objectness_probabilities is None:
        pred_objectness_probabilities = [
            torch.ones_like(pred_class_probability[:, 0]) for pred_class_probability in pred_class_probabilities
        ]

    # Some basic tests
    assert (
        len(pred_bboxes)
        == len(pred_objectness_probabilities)
        == len(pred_class_probabilities)
        == len(target_bboxes)
        == len(target_classes)
        == B
    ), (
        f"All input lists must have the same length. Got lengths: {len(pred_bboxes)}, "
        f"{len(pred_objectness_probabilities)}, {len(pred_class_probabilities)}, {len(target_bboxes)}, "
        f"{len(target_classes)}."
    )
    assert all(
        pred_bbox.shape[0] == pred_objectness_probability.shape[0] == pred_class_probability.shape[0]
        for pred_bbox, pred_objectness_probability, pred_class_probability in zip(
            pred_bboxes, pred_objectness_probabilities, pred_class_probabilities
        )
    ), "Each prediction input list element must have the same number of bounding boxes."
    assert all(
        pred_bbox.shape[1] == 4 or pred_bbox.shape[1] == 6 for pred_bbox in pred_bboxes
    ), "Prediction bounding boxes must have shape (NP, 4) or (NP, 6)."
    assert all(
        pred_class_probability.shape[1] == num_classes for pred_class_probability in pred_class_probabilities
    ), "Prediction class probabilities must have shape (NP, num_classes)."
    assert all(
        target_bbox.shape[0] == target_class.shape[0]
        for target_bbox, target_class in zip(target_bboxes, target_classes)
    ), "Each target must have the same number of bounding boxes."

    # Split everything based on different classes. Calculate confidence scores as well.
    pred_bboxes_by_class = [[] for _ in range(num_classes)]
    pred_confidences_scores_by_class = [[] for _ in range(num_classes)]
    target_bboxes_by_class = [[] for _ in range(num_classes)]
    for b in range(B):
        pred_classes = torch.argmax(pred_class_probabilities[b], dim=-1)
        # (NP,)
        for c in range(num_classes):
            pred_classes_mask = pred_classes == c
            # (NP,)
            target_classes_mask = target_classes[b] == (c + 1)
            # (NT,)

            pred_bboxes_by_class[c].append(pred_bboxes[b][pred_classes_mask])
            pred_confidences_scores_by_class[c].append(
                pred_objectness_probabilities[b][pred_classes_mask]
                * pred_class_probabilities[b][pred_classes_mask][:, c]
            )
            # (NP,)

            target_bboxes_by_class[c].append(target_bboxes[b][target_classes_mask])
            # (NT,)

    # Limit number of bounding boxes per image if applicable
    if max_bboxes_per_image is not None:
        for b in range(B):
            _confidence_scores = []
            for c in range(num_classes):
                if pred_bboxes_by_class[c][b].numel() > 0:
                    _confidence_scores.append(
                        torch.stack(
                            [
                                pred_confidences_scores_by_class[c][b],
                                torch.arange(
                                    pred_confidences_scores_by_class[c][b].shape[0], device=pred_bboxes[b].device
                                ),
                                torch.full_like(pred_confidences_scores_by_class[c][b], c),
                            ],
                            dim=-1,
                        )
                    )
            if len(_confidence_scores) == 0:
                continue
            _confidence_scores = torch.cat(_confidence_scores, dim=0)
            # (NC, 3)

            if _confidence_scores.shape[0] > max_bboxes_per_image:
                _confidence_scores = sort_by_first_column_descending(_confidence_scores)
                topk_confidences = _confidence_scores[:max_bboxes_per_image]
                # (max_bboxes_per_image, 3)
                for c in range(num_classes):
                    class_mask = topk_confidences[:, 2] == c
                    # (max_bboxes_per_image,)
                    offsets_to_keep = topk_confidences[class_mask][:, 1].long()
                    pred_bboxes_by_class[c][b] = pred_bboxes_by_class[c][b][offsets_to_keep]
                    # (NP', 4) or (NP', 6)
                    pred_confidences_scores_by_class[c][b] = pred_confidences_scores_by_class[c][b][offsets_to_keep]
                    # (NP',)

    # For each IOU threshold, calculate average precision and average recall
    average_precisions = {}
    average_recalls = {}
    for iou_threshold in iou_thresholds:
        # For each class calculate average precision and average recall
        class_average_precisions = {}
        class_average_recalls = {}
        for c in range(num_classes):
            # If no target boxes for this class, skip it
            if all(target_bbox.numel() == 0 for target_bbox in target_bboxes_by_class[c]):
                class_average_precisions[c + 1] = float("nan")
                class_average_recalls[c + 1] = float("nan")
                continue

            _, _, _, intermediate_counts = get_tps_fps_fns(
                pred_bboxes=pred_bboxes_by_class[c],
                pred_confidence_scores=pred_confidences_scores_by_class[c],
                target_bboxes=target_bboxes_by_class[c],
                iou_threshold=iou_threshold,
                matching_method="coco",
                min_confidence_threshold=min_confidence_threshold,
                max_bboxes_per_image=max_bboxes_per_image,
                return_intermediate_counts=True,
            )
            intermediate_counts = torch.tensor(intermediate_counts, device=pred_bboxes[0].device, dtype=torch.float32)
            # (NC, 3) where the first column is TP, second is FP and third is FN for each prediction considered
            precisions = intermediate_counts[:, 0] / (intermediate_counts[:, 0] + intermediate_counts[:, 1] + 1e-8)
            recalls = intermediate_counts[:, 0] / (intermediate_counts[:, 0] + intermediate_counts[:, 2] + 1e-8)
            # (NC,), (NC,)

            # Precision envelope: P_interp(r) = max_{r' >= r} P(r')
            enveloped_precisions = precisions.clone()
            for i in range(len(enveloped_precisions) - 2, -1, -1):
                if enveloped_precisions[i] < enveloped_precisions[i + 1]:
                    enveloped_precisions[i] = enveloped_precisions[i + 1]

            # Calculate average precision using step-wise interpolation
            recall_samples = torch.linspace(0, 1, average_precision_num_points, device=recalls.device)
            idxs = torch.searchsorted(recalls, recall_samples, side="left")
            valid = idxs < enveloped_precisions.numel()
            enveloped_precisions_at_t = torch.zeros_like(recall_samples)
            enveloped_precisions_at_t[valid] = enveloped_precisions[idxs[valid]]
            class_average_precisions[c + 1] = enveloped_precisions_at_t.mean().item()

            # Calculate average recall i.e. maximum recall achieved at this IoU threshold
            class_average_recalls[c + 1] = recalls.max().item() if recalls.numel() > 0 else 0.0

        average_precisions[iou_threshold] = class_average_precisions
        average_recalls[iou_threshold] = class_average_recalls

    map_metric = torch.nanmean(
        torch.stack([torch.tensor(ap) for iou_aps in average_precisions.values() for ap in iou_aps.values()])
    ).item()
    mar_metric = torch.nanmean(
        torch.stack([torch.tensor(ar) for iou_ars in average_recalls.values() for ar in iou_ars.values()])
    ).item()

    if return_intermediates:
        return map_metric, mar_metric, average_precisions, average_recalls
    return map_metric, mar_metric


# Create aliases
map_mar = mean_average_precision_mean_average_recall
mean_average_precision_recall = mean_average_precision_mean_average_recall

In [None]:
# Random predicted and target boxes

pred_bboxes = [convert_box_to_standard_mode(torch.rand(i + 10, 6) * 128, "cccwhd") for i in range(25)]
pred_objectness_probabilities = [torch.rand(i + 10) for i in range(25)]
pred_class_probabilities = [torch.rand(i + 10, 5) for i in range(25)]

target_bboxes = [convert_box_to_standard_mode(torch.rand(i + 1 + 10 * (i % 2), 6) * 128, "cccwhd") for i in range(25)]
target_classes = [torch.randint(1, 6, (i + 1 + 10 * (i % 2),)) for i in range(25)]

print([x.shape for x in pred_objectness_probabilities])
print([x.shape for x in target_classes])
map_mar(
    pred_bboxes,
    pred_objectness_probabilities,
    pred_class_probabilities,
    target_bboxes,
    target_classes,
    iou_thresholds=[0.001],
    return_intermediates=True,
)

[torch.Size([10]), torch.Size([11]), torch.Size([12]), torch.Size([13]), torch.Size([14]), torch.Size([15]), torch.Size([16]), torch.Size([17]), torch.Size([18]), torch.Size([19]), torch.Size([20]), torch.Size([21]), torch.Size([22]), torch.Size([23]), torch.Size([24]), torch.Size([25]), torch.Size([26]), torch.Size([27]), torch.Size([28]), torch.Size([29]), torch.Size([30]), torch.Size([31]), torch.Size([32]), torch.Size([33]), torch.Size([34])]
[torch.Size([1]), torch.Size([12]), torch.Size([3]), torch.Size([14]), torch.Size([5]), torch.Size([16]), torch.Size([7]), torch.Size([18]), torch.Size([9]), torch.Size([20]), torch.Size([11]), torch.Size([22]), torch.Size([13]), torch.Size([24]), torch.Size([15]), torch.Size([26]), torch.Size([17]), torch.Size([28]), torch.Size([19]), torch.Size([30]), torch.Size([21]), torch.Size([32]), torch.Size([23]), torch.Size([34]), torch.Size([25])]



[1m([0m
    [1;36m0.3377198874950409[0m,
    [1;36m0.5781774520874023[0m,
    [1m{[0m
        [1;36m0.001[0m: [1m{[0m
            [1;36m1[0m: [1;36m0.3959692418575287[0m,
            [1;36m2[0m: [1;36m0.31459006667137146[0m,
            [1;36m3[0m: [1;36m0.34917694330215454[0m,
            [1;36m4[0m: [1;36m0.32185280323028564[0m,
            [1;36m5[0m: [1;36m0.30701035261154175[0m
        [1m}[0m
    [1m}[0m,
    [1m{[0m
        [1;36m0.001[0m: [1m{[0m
            [1;36m1[0m: [1;36m0.5979381203651428[0m,
            [1;36m2[0m: [1;36m0.5476190447807312[0m,
            [1;36m3[0m: [1;36m0.6063829660415649[0m,
            [1;36m4[0m: [1;36m0.5600000023841858[0m,
            [1;36m5[0m: [1;36m0.5789473652839661[0m
        [1m}[0m
    [1m}[0m
[1m)[0m

In [None]:
# Predicted boxes are approximately equal to target boxes i.e. precision should be high

pred_bboxes = [convert_box_to_standard_mode(torch.rand(i, 6) * 128, "cccwhd") for i in range(25)]
pred_objectness_probabilities = [torch.rand(i) for i in range(25)]
pred_class_probabilities = [torch.rand(i, 5) for i in range(25)]

target_bboxes = [pred_bboxes[i] + 0.5 for i in range(25)]
target_classes = [pred_class_probabilities[i].argmax(dim=-1) + 1 for i in range(25)]

print([x.shape for x in pred_objectness_probabilities])
print([x.shape for x in target_classes])
map_mar(
    pred_bboxes,
    pred_objectness_probabilities,
    pred_class_probabilities,
    target_bboxes,
    target_classes,
    return_intermediates=True,
)

[torch.Size([0]), torch.Size([1]), torch.Size([2]), torch.Size([3]), torch.Size([4]), torch.Size([5]), torch.Size([6]), torch.Size([7]), torch.Size([8]), torch.Size([9]), torch.Size([10]), torch.Size([11]), torch.Size([12]), torch.Size([13]), torch.Size([14]), torch.Size([15]), torch.Size([16]), torch.Size([17]), torch.Size([18]), torch.Size([19]), torch.Size([20]), torch.Size([21]), torch.Size([22]), torch.Size([23]), torch.Size([24])]
[torch.Size([0]), torch.Size([1]), torch.Size([2]), torch.Size([3]), torch.Size([4]), torch.Size([5]), torch.Size([6]), torch.Size([7]), torch.Size([8]), torch.Size([9]), torch.Size([10]), torch.Size([11]), torch.Size([12]), torch.Size([13]), torch.Size([14]), torch.Size([15]), torch.Size([16]), torch.Size([17]), torch.Size([18]), torch.Size([19]), torch.Size([20]), torch.Size([21]), torch.Size([22]), torch.Size([23]), torch.Size([24])]



[1m([0m
    [1;36m0.7555274963378906[0m,
    [1;36m0.8376586437225342[0m,
    [1m{[0m
        [1;36m0.5[0m: [1m{[0m
            [1;36m1[0m: [1;36m0.8506786227226257[0m,
            [1;36m2[0m: [1;36m0.9793864488601685[0m,
            [1;36m3[0m: [1;36m0.8891515731811523[0m,
            [1;36m4[0m: [1;36m0.9469391703605652[0m,
            [1;36m5[0m: [1;36m0.9487349987030029[0m
        [1m}[0m,
        [1;36m0.55[0m: [1m{[0m
            [1;36m1[0m: [1;36m0.8506786227226257[0m,
            [1;36m2[0m: [1;36m0.9793864488601685[0m,
            [1;36m3[0m: [1;36m0.8891515731811523[0m,
            [1;36m4[0m: [1;36m0.9095678925514221[0m,
            [1;36m5[0m: [1;36m0.9487349987030029[0m
        [1m}[0m,
        [1;36m0.6[0m: [1m{[0m
            [1;36m1[0m: [1;36m0.8506786227226257[0m,
            [1;36m2[0m: [1;36m0.9793864488601685[0m,
            [1;36m3[0m: [1;36m0.860744833946228[0m,
            [1;36m4[0m: 

In [None]:
# Predicted boxes are subset of target boxes but with random classes i.e. precision should be high

pred_bboxes = [convert_box_to_standard_mode(torch.rand(i, 6) * 128, "cccwhd") for i in range(25)]
pred_objectness_probabilities = [torch.rand(i) for i in range(25)]
pred_class_probabilities = [torch.rand(i, 5) for i in range(25)]

target_bboxes = [
    torch.cat([pred_bboxes[i], convert_box_to_standard_mode(torch.rand(i, 6) * 128, "cccwhd")]) for i in range(25)
]
target_classes = [
    torch.cat([pred_class_probabilities[i].argmax(dim=-1) + 1, torch.randint(1, 6, (i,))]) for i in range(25)
]

print([x.shape for x in pred_objectness_probabilities])
print([x.shape for x in target_classes])
map_mar(
    pred_bboxes,
    pred_objectness_probabilities,
    pred_class_probabilities,
    target_bboxes,
    target_classes,
    return_intermediates=True,
)

[torch.Size([0]), torch.Size([1]), torch.Size([2]), torch.Size([3]), torch.Size([4]), torch.Size([5]), torch.Size([6]), torch.Size([7]), torch.Size([8]), torch.Size([9]), torch.Size([10]), torch.Size([11]), torch.Size([12]), torch.Size([13]), torch.Size([14]), torch.Size([15]), torch.Size([16]), torch.Size([17]), torch.Size([18]), torch.Size([19]), torch.Size([20]), torch.Size([21]), torch.Size([22]), torch.Size([23]), torch.Size([24])]
[torch.Size([0]), torch.Size([2]), torch.Size([4]), torch.Size([6]), torch.Size([8]), torch.Size([10]), torch.Size([12]), torch.Size([14]), torch.Size([16]), torch.Size([18]), torch.Size([20]), torch.Size([22]), torch.Size([24]), torch.Size([26]), torch.Size([28]), torch.Size([30]), torch.Size([32]), torch.Size([34]), torch.Size([36]), torch.Size([38]), torch.Size([40]), torch.Size([42]), torch.Size([44]), torch.Size([46]), torch.Size([48])]



[1m([0m
    [1;36m0.499009907245636[0m,
    [1;36m0.4999324381351471[0m,
    [1m{[0m
        [1;36m0.5[0m: [1m{[0m
            [1;36m1[0m: [1;36m0.5247524976730347[0m,
            [1;36m2[0m: [1;36m0.5445544719696045[0m,
            [1;36m3[0m: [1;36m0.5049505233764648[0m,
            [1;36m4[0m: [1;36m0.4455445408821106[0m,
            [1;36m5[0m: [1;36m0.4752475321292877[0m
        [1m}[0m,
        [1;36m0.55[0m: [1m{[0m
            [1;36m1[0m: [1;36m0.5247524976730347[0m,
            [1;36m2[0m: [1;36m0.5445544719696045[0m,
            [1;36m3[0m: [1;36m0.5049505233764648[0m,
            [1;36m4[0m: [1;36m0.4455445408821106[0m,
            [1;36m5[0m: [1;36m0.4752475321292877[0m
        [1m}[0m,
        [1;36m0.6[0m: [1m{[0m
            [1;36m1[0m: [1;36m0.5247524976730347[0m,
            [1;36m2[0m: [1;36m0.5445544719696045[0m,
            [1;36m3[0m: [1;36m0.5049505233764648[0m,
            [1;36m4[0m: 

In [None]:
# Target boxes are subset of prediction boxes but with random classes i.e. recall should be high

pred_bboxes = [convert_box_to_standard_mode(torch.rand(i + 10, 6) * 128, "cccwhd") for i in range(25)]
pred_objectness_probabilities = [torch.rand(i + 10) for i in range(25)]
pred_class_probabilities = [torch.rand(i + 10, 5) for i in range(25)]

target_bboxes = [pred_bboxes[i][: i + 1] for i in range(25)]
target_classes = [pred_class_probabilities[i][: i + 1].argmax(dim=-1) + 1 for i in range(25)]

print([x.shape for x in pred_objectness_probabilities])
print([x.shape for x in target_classes])
map_mar(
    pred_bboxes,
    pred_objectness_probabilities,
    pred_class_probabilities,
    target_bboxes,
    target_classes,
    return_intermediates=True,
)

[torch.Size([10]), torch.Size([11]), torch.Size([12]), torch.Size([13]), torch.Size([14]), torch.Size([15]), torch.Size([16]), torch.Size([17]), torch.Size([18]), torch.Size([19]), torch.Size([20]), torch.Size([21]), torch.Size([22]), torch.Size([23]), torch.Size([24]), torch.Size([25]), torch.Size([26]), torch.Size([27]), torch.Size([28]), torch.Size([29]), torch.Size([30]), torch.Size([31]), torch.Size([32]), torch.Size([33]), torch.Size([34])]
[torch.Size([1]), torch.Size([2]), torch.Size([3]), torch.Size([4]), torch.Size([5]), torch.Size([6]), torch.Size([7]), torch.Size([8]), torch.Size([9]), torch.Size([10]), torch.Size([11]), torch.Size([12]), torch.Size([13]), torch.Size([14]), torch.Size([15]), torch.Size([16]), torch.Size([17]), torch.Size([18]), torch.Size([19]), torch.Size([20]), torch.Size([21]), torch.Size([22]), torch.Size([23]), torch.Size([24]), torch.Size([25])]



[1m([0m
    [1;36m0.6625458002090454[0m,
    [1;36m1.0[0m,
    [1m{[0m
        [1;36m0.5[0m: [1m{[0m
            [1;36m1[0m: [1;36m0.6671385765075684[0m,
            [1;36m2[0m: [1;36m0.6295832991600037[0m,
            [1;36m3[0m: [1;36m0.7016004920005798[0m,
            [1;36m4[0m: [1;36m0.6478681564331055[0m,
            [1;36m5[0m: [1;36m0.6787266731262207[0m
        [1m}[0m,
        [1;36m0.55[0m: [1m{[0m
            [1;36m1[0m: [1;36m0.6545827388763428[0m,
            [1;36m2[0m: [1;36m0.6295832991600037[0m,
            [1;36m3[0m: [1;36m0.7016004920005798[0m,
            [1;36m4[0m: [1;36m0.6478681564331055[0m,
            [1;36m5[0m: [1;36m0.6787266731262207[0m
        [1m}[0m,
        [1;36m0.6[0m: [1m{[0m
            [1;36m1[0m: [1;36m0.6545827388763428[0m,
            [1;36m2[0m: [1;36m0.6295832991600037[0m,
            [1;36m3[0m: [1;36m0.7016004920005798[0m,
            [1;36m4[0m: [1;36m0.64786

### Lightning metrics

In [None]:
# | export


class MeanAveragePrecisionMeanAverageRecall(Metric):
    """Calculate the COCO mean average precision (mAP) and mean average recall (mAR) for object detection."""

    def __init__(
        self,
        iou_thresholds: list[float] = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95],
        average_precision_num_points: int = 101,
        min_confidence_threshold: float = 0.0,
        max_bboxes_per_image: int | None = 100,
    ):
        """Initialize the MeanAveragePrecisionMeanAverageRecall metric.

        Args:
            num_classes: Number of classes in the dataset.
            iou_thresholds: A list of IoU thresholds to use for calculating mAP and mAR.
            average_precision_num_points: Number of points over which to calculate average precision.
            min_confidence_threshold: Minimum confidence score threshold to consider a prediction.
            max_bboxes_per_image: Maximum number of bounding boxes to consider per image. If more are present, only the
                top `max_bboxes_per_image` boxes based on confidence scores are considered.
        """
        super().__init__()

        self.iou_thresholds = iou_thresholds
        self.average_precision_num_points = average_precision_num_points
        self.min_confidence_threshold = min_confidence_threshold
        self.max_bboxes_per_image = max_bboxes_per_image

        self.add_state("pred_bboxes", [], dist_reduce_fx=None, persistent=False)
        self.add_state("pred_objectness_probabilities", [], dist_reduce_fx=None, persistent=False)
        self.add_state("pred_class_probabilities", [], dist_reduce_fx=None, persistent=False)
        self.add_state("target_bboxes", [], dist_reduce_fx=None, persistent=False)
        self.add_state("target_classes", [], dist_reduce_fx=None, persistent=False)

    def update(
        self,
        pred_bboxes: list[torch.Tensor],
        pred_objectness_probabilities: list[torch.Tensor],
        pred_class_probabilities: list[torch.Tensor],
        target_bboxes: list[torch.Tensor],
        target_classes: list[torch.Tensor],
    ):
        self.pred_bboxes.extend(pred_bboxes)
        self.pred_objectness_probabilities.extend(pred_objectness_probabilities)
        self.pred_class_probabilities.extend(pred_class_probabilities)
        self.target_bboxes.extend(target_bboxes)
        self.target_classes.extend(target_classes)

    def compute(self):
        return mean_average_precision_mean_average_recall(
            self.pred_bboxes,
            self.pred_objectness_probabilities,
            self.pred_class_probabilities,
            self.target_bboxes,
            self.target_classes,
            iou_thresholds=self.iou_thresholds,
            average_precision_num_points=self.average_precision_num_points,
            min_confidence_threshold=self.min_confidence_threshold,
            max_bboxes_per_image=self.max_bboxes_per_image,
        )

    def forward(self, *args, return_metrics: Literal["map_only", "mar_only", "map_mar"] = "map_mar", **kwargs):
        map, mar = super().forward(*args, **kwargs)
        if return_metrics == "map_only":
            return map
        elif return_metrics == "mar_only":
            return mar
        return map, mar


# Aliases
MeanAveragePrecisionRecall = MeanAveragePrecisionMeanAverageRecall

In [None]:
test = MeanAveragePrecisionMeanAverageRecall(max_bboxes_per_image=100)

for _ in range(100):
    pred_bboxes = [convert_box_to_standard_mode(torch.rand(i + 5, 6) * 128, "cccwhd") for i in range(10)]
    pred_objectness_probabilities = [torch.rand(i + 5) for i in range(10)]
    pred_class_probabilities = [torch.rand(i + 5, 3) for i in range(10)]

    target_bboxes = pred_bboxes
    target_classes = [torch.randint(1, 4, (i + 5,)) for i in range(10)]

    map, mar = test(
        pred_bboxes,
        pred_objectness_probabilities,
        pred_class_probabilities,
        pred_bboxes,
        target_classes,
    )
    print(map, mar)

print(len(test.pred_bboxes))
test.reset()
print(len(test.pred_bboxes))

0.15267755091190338 0.34454983472824097
0.10040593892335892 0.2701754570007324
0.1908988356590271 0.3892964720726013
0.16495607793331146 0.377538800239563
0.18733376264572144 0.3967181146144867
0.12756767868995667 0.33449074625968933
0.17939788103103638 0.39404764771461487
0.08487001061439514 0.2425505369901657
0.19043144583702087 0.3594455122947693
0.11127053946256638 0.28465110063552856
0.17545171082019806 0.32136422395706177
0.17516739666461945 0.3145299255847931
0.1184687614440918 0.31231096386909485
0.14919836819171906 0.35726043581962585
0.16936486959457397 0.34703773260116577
0.1707777976989746 0.3692307770252228
0.14756280183792114 0.31684982776641846
0.20145398378372192 0.39315131306648254
0.16337981820106506 0.35255590081214905
0.08714329451322556 0.23314793407917023
0.13929112255573273 0.3063492178916931
0.12915953993797302 0.3164808452129364
0.14435245096683502 0.33920449018478394
0.13841845095157623 0.33492979407310486
0.17037607729434967 0.32706373929977417
0.143613189458

In [None]:
# | export


class MeanAveragePrecision(MeanAveragePrecisionMeanAverageRecall):
    """Calculate the COCO mean average precision (mAP) for object detection."""

    def forward(self, *args, **kwargs):
        return super().forward(*args, return_metrics="map_only", **kwargs)

In [None]:
# | export


class MeanAverageRecall(MeanAveragePrecisionMeanAverageRecall):
    """Calculate the COCO mean average recall (mAR) for object detection."""

    def forward(self, *args, **kwargs):
        return super().forward(*args, return_metrics="mar_only", **kwargs)

# nbdev

In [None]:
!nbdev_export