In [None]:
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

def get_model_instance_segmentation(num_classes, isTrain):
    if isTrain:
        # load an instance segmentation model pre-trained on COCO
        model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights="DEFAULT")
    else:
        # load an instance segmentation model pre-trained on COCO
        model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2()

    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(
        in_features_mask,
        hidden_layer,
        num_classes
    )

    return model

In [None]:
from torchvision.transforms import v2 as T
def get_transform():
    transforms = []
    
    transforms.append(T.ToDtype(torch.float, scale=True))
    transforms.append(T.ColorJitter(brightness=(.1, 1), contrast=(0, 10),
                     saturation=(.1, 1), 
                     hue=(.1, .4)))
    transforms.append(T.RandomApply([T.RandomAdjustSharpness(sharpness_factor=10)], p=0.8))
    transforms.append(T.RandomApply([T.ElasticTransform(alpha=250)], p=0.5))
    transforms.append(T.RandomApply([T.RandomSolarize(threshold=5.0)], p=0.5))
    transforms.append(T.RandomApply([T.GaussianBlur(kernel_size=(11, 21), sigma=(5, 50))], p=0.5))
    transforms.append(T.ToPureTensor())
    
    return T.Compose(transforms)

In [None]:
import numpy as np
import os
import torch
import torch.utils.data
import torchvision
from PIL import Image, ImageDraw
from pycocotools.coco import COCO
import cv2
import json
import matplotlib.pyplot as plt 

class CurrentDataset(torch.utils.data.Dataset):
    def __init__(self, root, annotation, transforms=None):
        self.root = root
        self.transforms = transforms
        self.coco = COCO(annotation)
        self.ids = list(sorted(self.coco.imgs.keys()))

    def __getitem__(self, index):
        # Own coco file
        coco = self.coco
        # Image ID
        img_id = self.ids[index]
        # List: get annotation id from coco
        ann_ids = coco.getAnnIds(imgIds=img_id)
        # Dictionary: target coco_annotation file for an image
        coco_annotation = coco.loadAnns(ann_ids)
        # path for input image
        path = coco.loadImgs(img_id)[0]['file_name']
        # open the input image
        img = Image.open(os.path.join(self.root, path))

        # number of objects in the image
        num_objs = len(coco_annotation)

        # Bounding boxes for objects
        # In coco format, bbox = [xmin, ymin, width, height]
        # In pytorch, the input should be [xmin, ymin, xmax, ymax]
        boxes = []
        for i in range(num_objs):
            xmin = coco_annotation[i]['bbox'][0]
            ymin = coco_annotation[i]['bbox'][1]
            xmax = xmin + coco_annotation[i]['bbox'][2]
            ymax = ymin + coco_annotation[i]['bbox'][3]
            boxes.append([xmin, ymin, xmax, ymax])
        
        
        # Masks
        masks = []
        
        instance_masks = []
        #class_ids = []
        width, height = img.size
        
        
        for i, annotation in enumerate(coco_annotation):
            mask = np.zeros((height, width), dtype=np.uint8)
            segments = [[annotation['segmentation'][j], annotation['segmentation'][j+1]] for j in range(0, len(annotation['segmentation']), 2)]
            cv2.fillPoly(mask, [np.array(segments)], color=(255, 255, 255))
        
            mask = mask.astype(np.float32) / 255.0
            mask = mask.astype(np.uint8)
            instance_masks.append(mask)
                

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        masks = torch.as_tensor(instance_masks, dtype=torch.uint8)

        # Labels (In my case, I only one class: target class or background)
        labels = torch.ones((num_objs,), dtype=torch.int64)

        boxes = torch.tensor(boxes)
        labels = torch.tensor(labels)
        masks = torch.tensor(masks)

        return torch.from_numpy(np.array(img).astype(dtype=np.float32)), boxes, labels, masks

    def __len__(self):
        return len(self.ids)

In [None]:
from torch.nn.utils.rnn import pad_sequence
import torch.nn.functional as F

def collate_fn(batch):
    imgs, box, label, mask = zip(*batch)

    boxes = pad_sequence(box, batch_first=True, padding_value=0)
    num_rows = len(boxes)
    
    for i in range(num_rows):
        pad_len = len(boxes[i])
        for j in range(pad_len):
            a = boxes[i][j][0]
            b = boxes[i][j][1]
            c = boxes[i][j][2]
            d = boxes[i][j][3] 
            
            if a == 0 and b == 0 and c == 0 and d == 0:
                boxes[i][j][2] = 1e-9
                boxes[i][j][3] = 1e-9
    
    labels = pad_sequence(label, batch_first=True, padding_value=0)
    masks = pad_sequence(mask, batch_first=True, padding_value=0)
    
    res_img = []
    for im in imgs:
        res_img.append(im.tolist())

    res = []
    for i, (b, l, m) in enumerate(zip(boxes, labels, masks)):
        annotations = {}
        annotations['boxes'] = b
        annotations['labels'] = l
        annotations['masks'] = m
        
        res.append(annotations)

    return torch.tensor(res_img), res

In [None]:
def collate_fnValid(batch):
    imgs, box, label, mask = zip(*batch)

    boxes = pad_sequence(box, batch_first=True, padding_value=0)
    num_rows = len(boxes)
    
    for i in range(num_rows):
        pad_len = len(boxes[i])
        for j in range(pad_len):
            a = boxes[i][j][0]
            b = boxes[i][j][1]
            c = boxes[i][j][2]
            d = boxes[i][j][3] 
            
            if a == 0 and b == 0 and c == 0 and d == 0:
                boxes[i][j][2] = 1e-9
                boxes[i][j][3] = 1e-9
    
    labels = pad_sequence(label, batch_first=True, padding_value=0)
    masks = pad_sequence(mask, batch_first=True, padding_value=0)
    
    scores_values = [1.0, 1.0, 1.0, 1.0]
    scores = torch.tensor(scores_values)
    
    res_img = []
    for im in imgs:
        res_img.append(im.tolist())

    res = []
    for i, (b, l, m) in enumerate(zip(boxes, labels, masks)):
        annotations = {}
        annotations['boxes'] = b
        annotations['labels'] = l
        annotations['masks'] = m
        annotations['scores'] = scores
        
        res.append(annotations)

    return torch.tensor(res_img), res

In [None]:
#from currentdataset import CurrentDataset
import numpy as np
# path to your own data and coco file
train_data_dir = '../data/sard_yolo/images/train'
train_coco = '../data/sard_yolo/ann_files/binary_masks/_train_annotations.coco.json'

valid_data_dir = '../data/sard_yolo/images/valid'
valid_coco = '../data/sard_yolo/ann_files/binary_masks/_valid_annotations.coco.json'

# create own Dataset
train_dataset = CurrentDataset(root=train_data_dir,
                          annotation=train_coco,
                          transforms = get_transform
                          )

valid_dataset = CurrentDataset(root=valid_data_dir,
                          annotation=valid_coco,
                          )

# Batch sizes
train_batch_size = 8
valid_batch_size = 4

# own DataLoader
trainData_loader = torch.utils.data.DataLoader(train_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=2,
                                          collate_fn=collate_fn
                                        )
validData_loader = torch.utils.data.DataLoader(valid_dataset,
                                          batch_size=train_batch_size,
                                          shuffle=True,
                                          num_workers=2,
                                          collate_fn=collate_fnValid
                                        )

In [None]:

def calculate_iou(box1, box2):
    # Unpack the coordinates for easier reading
    x1 = box1[0][0]
    y1 = box1[0][1]
    w1 = box1[0][2]
    h1 = box1[0][3]

    x2 = box2[0][0]
    y2 = box2[0][1]
    w2 = box2[0][2]
    h2 = box2[0][3]

    # Calculate the coordinates of the intersection rectangle
    x_left = max(x1, x2)
    y_top = max(y1, y2)
    x_right = min(x1 + w1, x2 + w2)
    y_bottom = min(y1 + h1, y2 + h2)

    # Calculate area of intersection rectangle
    if x_right < x_left or y_bottom < y_top:
        return 0.0
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # Calculate area of both bounding boxes
    box1_area = w1 * h1
    box2_area = w2 * h2

    # Calculate union area by using inclusion-exclusion principle
    union_area = box1_area + box2_area - intersection_area

    # Compute the IoU
    iou = intersection_area / union_area
    return iou.item()

def calculate_boxApAr(detections, annotations, iou_threshold=0.5):
    """
    Calculate Average Precision (AP) for bounding boxes.

    Args:
        pred_bboxes (List[Tensor]): List of predicted bounding boxes.
        gt_bboxes (List[Tensor]): List of ground truth bounding boxes.
        confidences (List[float]): List of confidence scores for predicted bounding boxes.
        iou_threshold (float): IoU threshold to consider a detection as correct.

    Returns:
        float: Average Precision (AP) value.
    """
    # Sort predicted bounding boxes by confidence scores in descending order
    # sorted_indices = np.argsort(confidences)[::-1]
    
    # detections = [detections[i]['boxes'] for i in sorted_indices]

    true_positives = np.zeros(len(detections))
    false_positives = np.zeros(len(detections))
    num_gt_bboxes = len(annotations)
    used_gt_bboxes = np.zeros(num_gt_bboxes)

    # Calculate IoU for each predicted bounding box
    for i in range(0, len(detections)):
        max_iou = -1
        max_iou_idx = -1
        for j in range(0, len(annotations)):
            iou = calculate_iou(detections[i]['boxes'], annotations[j]['boxes'])
            if iou > max_iou:
                max_iou = iou
                max_iou_idx = j
        
        #print(f"MAX IOU: {max_iou}")
        if max_iou >= iou_threshold:
            if not used_gt_bboxes[max_iou_idx]:
                true_positives[i] = 1
                used_gt_bboxes[max_iou_idx] = 1
            else:
                false_positives[i] = 1
        else:
            false_positives[i] = 1

    # Compute precision and recall
    cum_tp = np.cumsum(true_positives)
    cum_fp = np.cumsum(false_positives)
    precision = cum_tp / (cum_tp + cum_fp)
    recall = cum_tp / num_gt_bboxes

    ar = np.mean(recall)

    # Compute Average Precision (AP) using the precision-recall curve
    ap = 0
    for i in range(1, len(precision)):
        ap += (recall[i] - recall[i - 1]) * precision[i]
    ar = np.sum(np.diff(recall)) / len(recall)
    return ap, ar


In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

import torchvision.transforms.functional as F


plt.rcParams["savefig.bbox"] = 'tight'


def show(imgs):
    if not isinstance(imgs, list):
        imgs = [imgs]
    fig, axs = plt.subplots(ncols=len(imgs), squeeze=False)
    for i, img in enumerate(imgs):
        img = img.detach()
        img = F.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
        
from torchvision.utils import make_grid
from torchvision.io import read_image
from pathlib import Path
from torchvision.utils import draw_segmentation_masks

def makingMaskForData(img, data):
    data_int = read_image(str(img))
    proba_threshold = 0.95
    data_bool_masks = data['scores']> proba_threshold
    data_bool_masks = (data['masks'].squeeze(1)).bool()
    
    show(draw_segmentation_masks(data_int, data_bool_masks, alpha=0.9))


In [None]:
from typing import Tuple, List, Dict, Optional
import torch
from torch import Tensor
from collections import OrderedDict
from torchvision.models.detection.roi_heads import fastrcnn_loss
from torchvision.models.detection.rpn import concat_box_prediction_layers
def eval_forward(model, images, targets):
    """
    Args:
        images (list[Tensor]): images to be processed
        targets (list[Dict[str, Tensor]]): ground-truth boxes present in the image (optional)
    Returns:
        result (list[BoxList] or dict[Tensor]): the output from the model.
            It returns list[BoxList] contains additional fields
            like `scores`, `labels` and `mask` (for Mask R-CNN models).
    """
    model.eval()

    original_image_sizes: List[Tuple[int, int]] = []
    for img in images:
        val = img.shape[-2:]
        assert len(val) == 2
        original_image_sizes.append((val[0], val[1]))

    images, targets = model.transform(images, targets)

    # Check for degenerate boxes
    # TODO: Move this to a function
    if targets is not None:
        for target_idx, target in enumerate(targets):
            boxes = target["boxes"]
            degenerate_boxes = boxes[:, 2:] <= boxes[:, :2]
            if degenerate_boxes.any():
                # print the first degenerate box
                bb_idx = torch.where(degenerate_boxes.any(dim=1))[0][0]
                degen_bb: List[float] = boxes[bb_idx].tolist()
                raise ValueError(
                    "All bounding boxes should have positive height and width."
                    f" Found invalid box {degen_bb} for target at index {target_idx}."
                )

    features = model.backbone(images.tensors)
    if isinstance(features, torch.Tensor):
        features = OrderedDict([("0", features)])
    model.rpn.training=True
    #model.roi_heads.training=True


    #####proposals, proposal_losses = model.rpn(images, features, targets)
    features_rpn = list(features.values())
    objectness, pred_bbox_deltas = model.rpn.head(features_rpn)
    anchors = model.rpn.anchor_generator(images, features_rpn)

    num_images = len(anchors)
    num_anchors_per_level_shape_tensors = [o[0].shape for o in objectness]
    num_anchors_per_level = [s[0] * s[1] * s[2] for s in num_anchors_per_level_shape_tensors]
    objectness, pred_bbox_deltas = concat_box_prediction_layers(objectness, pred_bbox_deltas)
    # apply pred_bbox_deltas to anchors to obtain the decoded proposals
    # note that we detach the deltas because Faster R-CNN do not backprop through
    # the proposals
    proposals = model.rpn.box_coder.decode(pred_bbox_deltas.detach(), anchors)
    proposals = proposals.view(num_images, -1, 4)
    proposals, scores = model.rpn.filter_proposals(proposals, objectness, images.image_sizes, num_anchors_per_level)

    proposal_losses = {}
    assert targets is not None
    labels, matched_gt_boxes = model.rpn.assign_targets_to_anchors(anchors, targets)
    regression_targets = model.rpn.box_coder.encode(matched_gt_boxes, anchors)
    loss_objectness, loss_rpn_box_reg = model.rpn.compute_loss(
        objectness, pred_bbox_deltas, labels, regression_targets
    )
    proposal_losses = {
        "loss_objectness": loss_objectness,
        "loss_rpn_box_reg": loss_rpn_box_reg,
    }

    #####detections, detector_losses = model.roi_heads(features, proposals, images.image_sizes, targets)
    image_shapes = images.image_sizes
    proposals, matched_idxs, labels, regression_targets = model.roi_heads.select_training_samples(proposals, targets)
    box_features = model.roi_heads.box_roi_pool(features, proposals, image_shapes)
    box_features = model.roi_heads.box_head(box_features)
    class_logits, box_regression = model.roi_heads.box_predictor(box_features)

    result: List[Dict[str, torch.Tensor]] = []
    detector_losses = {}
    loss_classifier, loss_box_reg = fastrcnn_loss(class_logits, box_regression, labels, regression_targets)
    detector_losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
    boxes, scores, labels = model.roi_heads.postprocess_detections(class_logits, box_regression, proposals, image_shapes)
    num_images = len(boxes)
    for i in range(num_images):
        result.append(
            {
                "boxes": boxes[i],
                "labels": labels[i],
                "scores": scores[i],
            }
        )
    detections = result
    detections = model.transform.postprocess(detections, images.image_sizes, original_image_sizes)  # type: ignore[operator]
    model.rpn.training=False
    model.roi_heads.training=False
    losses = {}
    losses.update(detector_losses)
    losses.update(proposal_losses)
    return losses, detections

In [None]:
# 2 classes; Only target class or background
num_classes = 2
# It is pretrained on coco so it should not need to be 
num_epochs = 25
model = get_model_instance_segmentation(num_classes, True)

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# move model to the right device
model.to(device)

# parameters
params = [p for p in model.parameters() if p.requires_grad]
# Try LR of 0.001
optimizer = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0001)

len_trainDataloader = len(trainData_loader)
len_validDataloader = len(validData_loader)

training_loss = []
valid_losses = []
apAll = []
arAll = []
mAPAll = []

for epoch in range(num_epochs):
    model.train()
    for i, (imgs, annotations) in enumerate(trainData_loader, 1):
        i += 1
        imgs = torch.tensor(imgs).to(device)
        imgs = torch.permute(imgs, (0, 3, 1, 2))

        for ann in annotations:
            ann['boxes'] = ann['boxes'].to(device)
            ann['masks'] = ann['masks'].to(device)
            ann['labels'] = ann['labels'].to(device)
        
        loss_dict = model(imgs, annotations)
        losses = sum(loss for loss in loss_dict.values())/train_batch_size
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        training_loss.append(losses.detach().cpu().item())
        if i % 29 == 0 : print(f'TRAIN -- Iteration: {i}/{len_trainDataloader}, Loss: {losses}')

    print(f'TRAINING -- Epoch: {epoch}, Loss: {sum(training_loss)/len_trainDataloader}')
        
    model.eval()
    with torch.no_grad():
        for j, (imgs, annotations) in enumerate(validData_loader, 1): 
            j += 1
            val_loss = 0
            imgs = torch.tensor(imgs).to(device)
            imgs = torch.permute(imgs, (0, 3, 1, 2))
            for ann in annotations:
                ann['boxes'] = ann['boxes'].to(device)
                ann['masks'] = ann['masks'].to(device)
                ann['labels'] = ann['labels'].to(device)
                ann['scores'] = ann['scores'].to(device)
            
            losses, detections = eval_forward(model, imgs, annotations)
            val_loss += (sum(loss for loss in losses.values())/valid_batch_size)
            valid_losses.append(val_loss.detach().cpu().item())
            ap, ar = calculate_boxApAr(detections, annotations)
            apAll.append(ap)
            arAll.append(ar)
            mAPAll.append((sum(apAll)/len(apAll)))

            if j % 10 == 0 : print(f'VALIDATION -- Iteration: {j}/{len_validDataloader}, Loss: {val_loss}')
        
    print(f'VALIDATION -- Epoch: {epoch}, Loss: {sum(valid_losses)/len_validDataloader}')

    model_dir = 'checkpoints'
    os.makedirs(model_dir, exist_ok=True)
    torch.save(model.state_dict(), f'{model_dir}/ckpt-model-{epoch+1}.pt')


        

In [None]:
print(max(mAPAll))

In [None]:
# avg_ap = []
# for i in range(0, len(training_loss), 50):
#     subset = apAll[i:i+50]
#     avg = np.mean(subset)
#     avg_ap.append(avg)

bins_mAP = np.linspace(0, num_epochs, len(mAPAll))
#bins_ap = np.linspace(0, num_epochs, len(avg_ap))

#plt.plot(bins_ap, avg_ap, color='steelblue', label='ap overall')
plt.plot(bins_mAP, mAPAll, color='darkblue', label='mask mAP at 50')

plt.xlabel('Epochs')
plt.ylabel('mAP')
plt.title('Segmentation mAP')

plt.legend()
plt.show()

In [None]:
avg_train = []
avg_valid = []
window_train = 2
window_valid = 3

print(len(training_loss))
print(len(valid_losses))

for i in range(0, len(training_loss), window_train):
    subset = training_loss[i:i+window_train]
    avg = np.mean(subset)
    avg_train.append(avg)

for i in range(0, len(valid_losses), window_valid):
    subset = valid_losses[i:i+window_valid]
    avg = np.mean(subset)
    avg_valid.append(avg)

# print(avg_train)
# print(avg_valid)

bins_train = np.linspace(0, num_epochs, len(avg_train))
bins_valid = np.linspace(0, num_epochs, len(avg_valid))

plt.plot(bins_train, avg_train, color='darkblue', label='Training Loss')
plt.plot(bins_valid, avg_valid, color='steelblue', label='Validation')

plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss Functions for Training and Validation')

plt.legend()
plt.show()

In [None]:
import numpy as np
import torch
import torchvision
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.datasets import CocoDetection
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader
from coco_eval import CocoEvaluator
import utils
import coco_utils

model = get_model_instance_segmentation(2, False).to('cuda')
model.load_state_dict(torch.load('checkpoints/ckpt-model-10.pt'))

test_dataset = CurrentDataset(root='../data/sard_yolo/images/test',
                          annotation='../data/sard_yolo/ann_files/binary_masks/_test_annotations.coco.json',
                          )

# Batch sizes
test_batch_size = 4

# own DataLoader
testData_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=test_batch_size,
                                          shuffle=False,
                                          num_workers=2,
                                          collate_fn=collate_fnValid
                                          )

def calculate_iou(box1, box2):
    # Unpack the coordinates for easier reading
    x1 = box1[0][0]
    y1 = box1[0][1]
    w1 = box1[0][2]
    h1 = box1[0][3]

    x2 = box2[0][0]
    y2 = box2[0][1]
    w2 = box2[0][2]
    h2 = box2[0][3]

    # Calculate the coordinates of the intersection rectangle
    x_left = max(x1, x2)
    y_top = max(y1, y2)
    x_right = min(x1 + w1, x2 + w2)
    y_bottom = min(y1 + h1, y2 + h2)

    # Calculate area of intersection rectangle
    if x_right < x_left or y_bottom < y_top:
        return 0.0
    intersection_area = (x_right - x_left) * (y_bottom - y_top)

    # Calculate area of both bounding boxes
    box1_area = w1 * h1
    box2_area = w2 * h2

    # Calculate union area by using inclusion-exclusion principle
    union_area = box1_area + box2_area - intersection_area

    # Compute the IoU
    iou = intersection_area / union_area
    return iou.item()

def calculate_boxApAr(detections, annotations, iou_threshold=0.5):
    """
    Calculate Average Precision (AP) for bounding boxes.

    Args:
        pred_bboxes (List[Tensor]): List of predicted bounding boxes.
        gt_bboxes (List[Tensor]): List of ground truth bounding boxes.
        confidences (List[float]): List of confidence scores for predicted bounding boxes.
        iou_threshold (float): IoU threshold to consider a detection as correct.

    Returns:
        float: Average Precision (AP) value.
    """
    # Sort predicted bounding boxes by confidence scores in descending order
    # sorted_indices = np.argsort(confidences)[::-1]
    
    # detections = [detections[i]['boxes'] for i in sorted_indices]

    true_positives = np.zeros(len(detections))
    false_positives = np.zeros(len(detections))
    num_gt_bboxes = len(annotations)
    used_gt_bboxes = np.zeros(num_gt_bboxes)

    # Calculate IoU for each predicted bounding box
    for i in range(0, len(detections)):
        max_iou = -1
        max_iou_idx = -1
        for j in range(0, len(annotations)):
            iou = calculate_iou(detections[i]['boxes'], annotations[j]['boxes'])
            if iou > max_iou:
                max_iou = iou
                max_iou_idx = j
        
        #print(f"MAX IOU: {max_iou}")
        if max_iou >= iou_threshold:
            if not used_gt_bboxes[max_iou_idx]:
                true_positives[i] = 1
                used_gt_bboxes[max_iou_idx] = 1
            else:
                false_positives[i] = 1
        else:
            false_positives[i] = 1

    # Compute precision and recall
    cum_tp = np.cumsum(true_positives)
    cum_fp = np.cumsum(false_positives)
    precision = cum_tp / (cum_tp + cum_fp)
    recall = cum_tp / num_gt_bboxes

    ar = np.mean(recall)

    # Compute Average Precision (AP) using the precision-recall curve
    ap = 0
    for i in range(1, len(precision)):
        ap += (recall[i] - recall[i - 1]) * precision[i]
    ar = np.sum(np.diff(recall)) / len(recall)
    return ap, ar

def calculate_maskApAr(detections, annotations, iou_threshold=0.5):
    """
    Calculate Average Precision (AP) and Average Recall (AR) for masks.

    Args:
        detections (List[Dict]): List of dictionaries containing predicted masks.
        annotations (List[Dict]): List of dictionaries containing ground truth masks.
        iou_threshold (float): IoU threshold to consider a detection as correct.

    Returns:
        Tuple: Tuple containing Average Precision (AP) and Average Recall (AR) values.
    """
    true_positives = np.zeros(len(detections))
    false_positives = np.zeros(len(detections))
    num_gt_masks = len(annotations)
    used_gt_masks = np.zeros(num_gt_masks)

    # Calculate IoU for each predicted mask
    for i in range(len(detections)):
        if (detections[0]['masks'][i] == 1 and annotations[0]['masks'][i] == 1):
            true_positives[i] = 1
        elif (detections[0]['masks'][i] == 1 and annotations[0]['masks'][i] != 1): 
            false_positives[i] = 1

    # Compute precision and recall
    cum_tp = np.cumsum(true_positives)
    cum_fp = np.cumsum(false_positives)
    precision = cum_tp / (cum_tp + cum_fp)
    recall = cum_tp / num_gt_masks

    # Compute Average Precision (AP) using the precision-recall curve
    ap = 0
    for i in range(1, len(precision)):
        ap += (recall[i] - recall[i - 1]) * precision[i]

    # Compute Average Recall (AR)
    ar = np.sum(np.diff(recall)) / len(recall)

    return ap, ar

# Helper function to calculate IoU between masks
def calculate_mask_iou(mask1, mask2):
    intersection = np.logical_and(mask1, mask2)
    union = np.logical_or(mask1, mask2)
    iou = np.sum(intersection) / np.sum(union)
    return iou


num_epochs_test = 2
ap_all = []
mAP_all = []
ar_all = []

maskAp_all = []
maskAr_all = []
maskmAP_all = []

model.eval()
for epoch in range(num_epochs_test):
    with torch.no_grad():
        for j, (imgs, annotations) in enumerate(testData_loader, 1): 
            j += 1
            val_loss = 0
            imgs = torch.tensor(imgs).to(device)
            imgs = torch.permute(imgs, (0, 3, 1, 2))
            for ann in annotations:
                ann['boxes'] = ann['boxes'].to(device)
                ann['masks'] = ann['masks'].to(device)
                ann['labels'] = ann['labels'].to(device)
                ann['scores'] = ann['scores'].to(device)
            
            losses, detections = eval_forward(model, imgs, annotations)
            val_loss += (sum(loss for loss in losses.values())/valid_batch_size)
            valid_losses.append(val_loss.detach().cpu().item())
            #print(detections[0]['boxes'])
            #print(annotations[0]['boxes'])
            #print(detections)
            ap, ar = calculate_boxApAr(detections, annotations)
            #maskAp, maskAr = calculate_maskApAr(detections, annotations)
            ap_all.append(ap)
            mAP_all.append(sum(ap_all)/len(ap_all))
            ar_all.append(ar)

            # maskAp_all.append(maskAp)
            # maskAr_all.append(maskAr)
            # maskmAP_all.append(sum(maskAp_all)/len(maskAp_all))

            
            if j%10 == 0: print(f'TEST -- Iteration: {j}/{len(testData_loader)}, Loss: {val_loss}')
    
print(f'TEST -- Epoch: {epoch}, Loss: {sum(valid_losses)/len(testData_loader)}')
