In [1]:
# Basic python and ML Libraries
import os
import random
import numpy as np
import pandas as pd
# for ignoring warnings
import warnings
warnings.filterwarnings('ignore')

# We will be reading images using OpenCV
import cv2

# xml library for parsing xml files
from xml.etree import ElementTree as et

# matplotlib for visualization
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# torchvision libraries
import torch
import torchvision
from torchvision import transforms as torchtrans  
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# these are the helper libraries imported.
from engine import train_one_epoch, evaluate
import utils
import transforms as T

# for image augmentations
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

In [2]:
class IELDatasetTest(torch.utils.data.Dataset):

    def __init__(self,images_dir, width, height, transforms=None):
        self.transforms = transforms
        self.images_dir = images_dir
        self.label_dir = images_dir.replace("images","labels")
        self.height = height
        self.width = width
        
        # sorting the images for consistency
        # To get images, the extension of the filename is checked to be jpg
        self.imgs = [image for image in sorted(os.listdir(images_dir))
                        if image[-4:]=='.jpg']
        
        for text_file in sorted(os.listdir(self.label_dir)):
            l = 0
            with open(os.path.join(self.label_dir,text_file),'r') as f:
                for x in f:
                    l += 1
                    
            if l == 0:
                self.imgs.remove(text_file.replace('.txt','.jpg'))
        
        # classes: 0 index is reserved for background
#         self.classes = [_, 'Epithelial Nuclei','IEL']
        # self.classes = ['background', 'Inflammatory','Epithelial','Spindle']
        self.classes = ['background', 'Epithelial','Lymphocyte','Neutrophil','Macrophage']
        
    def __getitem__(self, idx):

        img_name = self.imgs[idx]
        image_path = os.path.join(self.images_dir, img_name)

        # reading the images and converting them to correct size and color    
        img = cv2.imread(image_path)
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
        img_res = cv2.resize(img_rgb, (self.width, self.height), cv2.INTER_CUBIC)
        # diving by 255
        img_res /= 255.0
        
        boxes = []
        labels = []
        
        wt = img.shape[1]
        ht = img.shape[0]
        
        label_path = os.path.join(self.label_dir, img_name.replace('.jpg','.txt'))
        
        with open(label_path,'r') as f:
            for line in f:
                splits = line.split(' ')
                w = float(splits[3]) * wt
                h = float(splits[4]) * ht
                x1 = ((2 * float(splits[1]) * wt) - w)/2
                y1 = ((2 * float(splits[2]) * ht) - h)/2
                x2 = x1 + w
                y2 = y1 + h
                
                x1 = max(0,(x1/wt)*self.width)
                x2 = min(self.width-1,(x2/wt)*self.width)
                y1 = max(0,(y1/ht)*self.height)
                y2 = min(self.height-1,(y2/ht)*self.height)
                
                if x1 >= x2 or y1 >= y2:
                    continue
                
                boxes.append([x1,y1,x2,y2])
                labels.append(int(splits[0]) + 1)
                
        boxes = [box for box in boxes if len(box) == 4]
                
        # convert boxes into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        # getting the areas of the boxes
        if boxes.shape[0] == 0:
            area = boxes
        else:
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # suppose all instances are not crowd
        iscrowd = torch.zeros((boxes.shape[0],), dtype=torch.int64)
        
        labels = torch.as_tensor(labels, dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["area"] = area
        target["iscrowd"] = iscrowd
        # image_id
        image_id = torch.tensor([idx])
        target["image_id"] = image_id
        target["image_name"] = img_name


        if self.transforms:
            
            sample = self.transforms(image = img_res,
                                     bboxes = target['boxes'],
                                     labels = labels)
            
            img_res = sample['image']
            target['boxes'] = torch.Tensor(sample['bboxes'])
            
        return img_res , target
    
    def __len__(self):
        return len(self.imgs)

In [3]:
# the function takes the original prediction and the iou threshold.

def apply_nms(orig_prediction, iou_thresh=0.3):
    
    # torchvision returns the indices of the bboxes to keep
    keep = torchvision.ops.nms(orig_prediction['boxes'], orig_prediction['scores'], iou_thresh)
    
    final_prediction = orig_prediction
    final_prediction['boxes'] = final_prediction['boxes'][keep]
    final_prediction['scores'] = final_prediction['scores'][keep]
    final_prediction['labels'] = final_prediction['labels'][keep]
    
    return final_prediction

# function to convert a torchtensor back to PIL image
def torch_to_pil(img):
    return torchtrans.ToPILImage()(img).convert('RGB')

In [4]:
# to train on gpu if selected.
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

num_classes = 4

# get the model using our helper function
#model = get_object_detection_model(num_classes)
# model = torch.load("/home/aayush/chirag/faster_rcnn/weights/faster_rcnn_MoNuSac_inc_dec_0001_Shape/faster_rcnn_49fasterrcnn_embed_monusac_shape_inc_balance_mean_embed_od0001_size_intensity_gmm_ep300.pt")
# model = torch.load("/home/aayush/chirag/faster_rcnn/weights/Celiac_kfold_weights/faster_rcnn_fasterrcnn_embed_kfold2_shape_inc_balance_mean_embed_size_intensity_gmm_combined.pt")
# model = torch.load("/home/aayush/chirag/faster_rcnn/weights/ConSep/faster-rcnn-consep.pt")
model = torch.load("/home/aayush/chirag/faster_rcnn/weights/faster_rcnn_ConSep/faster_rcnn_299fasterrcnn_embed_consep_shape_inc_balance_mean_embed_size_intensity_gmm_ep500.pt")
# model = torch.load("/home/aayush/chirag/faster_rcnn_monusac/faster_rcnn/weights/faster-rcnn-monusac_lr_005_ep100.pt")


# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

In [5]:
# Send train=True fro training transforms and False for val/test transforms
def get_transform(train):
    
    if train:
        return A.Compose([
                            A.HorizontalFlip(0.5),
                     # ToTensorV2 converts image to pytorch tensor without div by 255
                            ToTensorV2(p=1.0) 
                        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
    else:
        return A.Compose([
                            ToTensorV2(p=1.0)
                        ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [6]:
# pick one image from the test set
import numpy as np
# fold_dir = '/home/aayush/chirag/tensorflow/yolo_kfold/2/images/'
fold_dir = '/home/aayush/chirag/open_src_datasets/consep/yolo_format/images/'
# fold_dir = '/home/aayush/chirag/open_src_datasets/monusac/yolo_format/images/'
image_size = 500

dataset_val = IELDatasetTest(os.path.join(fold_dir,"test"), image_size, image_size, transforms= get_transform(train=False))
dataset_test = IELDatasetTest(os.path.join(fold_dir,"test"), image_size, image_size, transforms= get_transform(train=False))

# put the model in evaluation mode
model.eval()


# for iel_thres in [0.1,0.3,0.5]:
#     for epith_thres in [0.1,0.3,0.5]:
#         print(iel_thres , epith_thres)
#         print('---------------------------------------------')
        
#         target_dict = {}
#         pred_dict = {}

#         for i in range(dataset_test.__len__()):
#             img,target = dataset_test[i]
#             image_name = target["image_name"][:-6]

#             with torch.no_grad():
#                 prediction = model([img.to(device)])[0]

#             nms_prediction = apply_nms(prediction, iou_thresh=0.01)

#             target_labels = target["labels"].numpy()
#             pred_labels = list(nms_prediction["labels"].cpu())
#             pred_scores = list(nms_prediction["scores"].cpu())
#             num_iels = 0
#             num_epith = 0

#             for i in range(len(pred_labels)):
#                 if pred_labels[i] == 1:
#                     if pred_scores[i] >= epith_thres:
#                         num_epith += 1
#                 else:
#                     if pred_scores[i] >= iel_thres:
#                         num_iels += 1

#             if image_name in target_dict:
#                 target_dict[image_name]["iel"] += np.count_nonzero(target_labels == 2)
#                 target_dict[image_name]["epith"] += np.count_nonzero(target_labels == 1)
#                 pred_dict[image_name]["iel"] += num_iels
#                 pred_dict[image_name]["epith"] += num_epith

#             else:
#                 target_dict[image_name] = {}
#                 target_dict[image_name]["iel"] = np.count_nonzero(target_labels == 2)
#                 target_dict[image_name]["epith"] = np.count_nonzero(target_labels == 1)

#                 pred_dict[image_name] = {}
#                 pred_dict[image_name]["iel"] = num_iels
#                 pred_dict[image_name]["epith"] = num_epith

#         mae_iel = 0
#         mre_iel = 0
#         mae_epith = 0
#         mre_epith = 0
#         num_images = 0
#         mae_ratio = 0
#         mre_ratio = 0

#         for key in target_dict.keys():
#             num_images += 1
#             #print(key , target_dict[key]["iel"] , pred_dict[key]["iel"], target_dict[key]["epith"], pred_dict[key]["epith"] , target_dict[key]["iel"]/target_dict[key]["epith"])
#             mae_iel += abs(target_dict[key]["iel"] - pred_dict[key]["iel"])
#             mre_iel += abs(target_dict[key]["iel"] - pred_dict[key]["iel"])/target_dict[key]["iel"]
#             mae_epith += abs(target_dict[key]["epith"] - pred_dict[key]["epith"])
#             mre_epith += abs(target_dict[key]["epith"] - pred_dict[key]["epith"])/target_dict[key]["epith"]
            
#             if pred_dict[key]["epith"] != 0:
#                 print(key, (100*target_dict[key]["iel"])/target_dict[key]["epith"], (100*pred_dict[key]["iel"])/pred_dict[key]["epith"], target_dict[key]["iel"], pred_dict[key]["iel"], target_dict[key]["epith"], pred_dict[key]["epith"])
#                 mae_ratio += abs((100*target_dict[key]["iel"])/target_dict[key]["epith"] - (100*pred_dict[key]["iel"])/pred_dict[key]["epith"])
                
#                 if target_dict[key]["iel"] != 0:
#                     mre_ratio += ((abs((100*target_dict[key]["iel"])/target_dict[key]["epith"] - (100*pred_dict[key]["iel"])/pred_dict[key]["epith"])) / ((100*target_dict[key]["iel"])/target_dict[key]["epith"]))
#             else:
#                 print('Not nice :' ,key, target_dict[key]["iel"], pred_dict[key]["iel"], target_dict[key]["epith"], pred_dict[key]["epith"])

#         mae_iel /= num_images
#         mre_iel /= num_images
#         mae_epith /= num_images
#         mre_epith /= num_images
#         mae_ratio /= num_images
#         mre_ratio /= num_images
        
        
#         print(mae_iel , mre_iel , mae_epith , mre_epith , mae_ratio, mre_ratio)
#         print('----------------------------------------')


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [7]:

conf0, conf1, conf2 = 0.1, 0, 0.1

target_dict = {}
pred_dict = {}

for i in range(dataset_test.__len__()):
    img,target = dataset_test[i]
    image_name = target["image_name"][:-6]

    with torch.no_grad():
        prediction = model([img.to(device)])[0]

    nms_prediction = apply_nms(prediction, iou_thresh=0.01)

    target_labels = target["labels"].numpy()
    pred_labels = list(nms_prediction["labels"].cpu())
    pred_scores = list(nms_prediction["scores"].cpu())
    num_0 = 0
    num_1 = 0
    num_2 = 0

    for i in range(len(pred_labels)):
        if pred_labels[i] == 1 and pred_scores[i] >= conf0:
            num_0 += 1
        if pred_labels[i] == 2 and pred_scores[i] >= conf1:
            num_1 += 1
        if pred_labels[i] == 3 and pred_scores[i] >= conf2:
            num_2 += 1

    if image_name in target_dict:
        target_dict[image_name][1] += np.count_nonzero(target_labels == 1)
        target_dict[image_name][2] += np.count_nonzero(target_labels == 2)
        target_dict[image_name][3] += np.count_nonzero(target_labels == 3)
        pred_dict[image_name][1] += num_0
        pred_dict[image_name][2] += num_1
        pred_dict[image_name][3] += num_2
        

    else:
        target_dict[image_name] = {}
        target_dict[image_name][1] = np.count_nonzero(target_labels == 1)
        target_dict[image_name][2] = np.count_nonzero(target_labels == 2)
        target_dict[image_name][3] = np.count_nonzero(target_labels == 3)
        pred_dict[image_name] = {}
        pred_dict[image_name][1] = num_0
        pred_dict[image_name][2] = num_1
        pred_dict[image_name][3] = num_2

mae_1 = 0
mae_2 = 0
mae_3 = 0
num_images = 0

for key in target_dict.keys():
    num_images += 1
    #print(key , target_dict[key]["iel"] , pred_dict[key]["iel"], target_dict[key]["epith"], pred_dict[key]["epith"] , target_dict[key]["iel"]/target_dict[key]["epith"])
    print(key, target_dict[key][1], pred_dict[key][1], target_dict[key][2], pred_dict[key][2], target_dict[key][3], pred_dict[key][3] )
    mae_1 += abs(target_dict[key][1] - pred_dict[key][1])
    mae_2 += abs(target_dict[key][2] - pred_dict[key][2])
    mae_3 += abs(target_dict[key][3] - pred_dict[key][3])
    

mae_1 /= num_images
mae_2 /= num_images
mae_3 /= num_images

print(mae_1, mae_2, mae_3)

test_10 297 239 338 84 209 22
test_11 7 106 618 149 9 68
test_12 102 72 166 101 159 104
test_13 595 292 0 21 757 57
test_14 11 56 483 196 56 55
test_1 71 121 214 71 578 135
test_2 15 7 0 13 229 186
test_3 2 13 265 142 103 104
test_4 5 25 302 166 59 103
test_5 192 175 212 91 479 98
test_6 111 58 0 14 186 155
test_7 63 44 0 33 172 136
test_8 2 8 609 193 10 60
test_9 162 154 0 23 350 99
51.92857142857143 151.28571428571428 163.0


In [8]:

conf0, conf1, conf2, conf3 = 0.1, 0.1, 0.1, 0.1

target_dict = {}
pred_dict = {}

for i in range(dataset_test.__len__()):
    img,target = dataset_test[i]
    image_name = target["image_name"][:-6]

    with torch.no_grad():
        prediction = model([img.to(device)])[0]

    nms_prediction = apply_nms(prediction, iou_thresh=0.01)

    target_labels = target["labels"].numpy()
    pred_labels = list(nms_prediction["labels"].cpu())
    pred_scores = list(nms_prediction["scores"].cpu())
    num_0 = 0
    num_1 = 0
    num_2 = 0
    num_3 = 0

    for i in range(len(pred_labels)):
        if pred_labels[i] == 1 and pred_scores[i] >= conf0:
            num_0 += 1
        if pred_labels[i] == 2 and pred_scores[i] >= conf1:
            num_1 += 1
        if pred_labels[i] == 3 and pred_scores[i] >= conf2:
            num_2 += 1
        if pred_labels[i] == 4 and pred_scores[i] >= conf3:
            num_3 += 1

    if image_name in target_dict:
        target_dict[image_name][1] += np.count_nonzero(target_labels == 1)
        target_dict[image_name][2] += np.count_nonzero(target_labels == 2)
        target_dict[image_name][3] += np.count_nonzero(target_labels == 3)
        target_dict[image_name][4] += np.count_nonzero(target_labels == 4)
        pred_dict[image_name][1] += num_0
        pred_dict[image_name][2] += num_1
        pred_dict[image_name][3] += num_2
        pred_dict[image_name][4] += num_3
        

    else:
        target_dict[image_name] = {}
        target_dict[image_name][1] = np.count_nonzero(target_labels == 1)
        target_dict[image_name][2] = np.count_nonzero(target_labels == 2)
        target_dict[image_name][3] = np.count_nonzero(target_labels == 3)
        target_dict[image_name][4] = np.count_nonzero(target_labels == 4)
        pred_dict[image_name] = {}
        pred_dict[image_name][1] = num_0
        pred_dict[image_name][2] = num_1
        pred_dict[image_name][3] = num_2
        pred_dict[image_name][4] = num_3

mae_1 = 0
mae_2 = 0
mae_3 = 0
mae_4 = 0
num_images = 0

for key in target_dict.keys():
    num_images += 1
    #print(key , target_dict[key]["iel"] , pred_dict[key]["iel"], target_dict[key]["epith"], pred_dict[key]["epith"] , target_dict[key]["iel"]/target_dict[key]["epith"])
    print(key, target_dict[key][1], pred_dict[key][1], target_dict[key][2], pred_dict[key][2], target_dict[key][3], pred_dict[key][3], target_dict[key][4], pred_dict[key][4] )
    mae_1 += abs(target_dict[key][1] - pred_dict[key][1])
    mae_2 += abs(target_dict[key][2] - pred_dict[key][2])
    mae_3 += abs(target_dict[key][3] - pred_dict[key][3])
    mae_4 += abs(target_dict[key][4] - pred_dict[key][4])
    

mae_1 /= num_images
mae_2 /= num_images
mae_3 /= num_images
mae_4 /= num_images

print(mae_1, mae_2, mae_3, mae_4)

test_10 297 239 338 84 209 22 0 0
test_11 7 106 618 149 9 68 0 0
test_12 102 72 166 101 159 104 0 0
test_13 595 292 0 21 757 57 0 0
test_14 11 56 483 196 56 55 0 0
test_1 71 121 214 71 578 135 0 0
test_2 15 7 0 13 229 186 0 0
test_3 2 13 265 142 103 104 0 0
test_4 5 25 302 166 59 103 0 0
test_5 192 175 212 91 479 98 0 0
test_6 111 58 0 14 186 155 0 0
test_7 63 44 0 33 172 136 0 0
test_8 2 8 609 193 10 60 0 0
test_9 162 154 0 23 350 99 0 0
51.92857142857143 151.28571428571428 163.0 0.0


In [9]:
def box_iou(box1, box2):
    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
    """
    Return intersection-over-union (Jaccard index) of boxes.
    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
    Arguments:
        box1 (Tensor[N, 4])
        box2 (Tensor[M, 4])
    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """

    def box_area(box):
        # box = 4xn
        return (box[2] - box[0]) * (box[3] - box[1])

    area1 = box_area(box1.T)
    area2 = box_area(box2.T)

    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)

def process_batch(detections, labels, iouv):
    """
    Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
    Arguments:
        detections (Array[N, 6]), x1, y1, x2, y2, conf, class
        labels (Array[M, 5]), class, x1, y1, x2, y2
    Returns:
        correct (Array[N, 10]), for 10 IoU levels
    """
    correct = torch.zeros(detections.shape[0], iouv.shape[0], dtype=torch.bool, device=iouv.device)
    iou = box_iou(labels[:, 1:], detections[:, :4])
    x = torch.where((iou >= iouv[0]) & (labels[:, 0:1] == detections[:, 5]))  # IoU above threshold and classes match
    if x[0].shape[0]:
        matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy()  # [label, detection, iou]
        if x[0].shape[0] > 1:
            matches = matches[matches[:, 2].argsort()[::-1]]
            matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
            # matches = matches[matches[:, 2].argsort()[::-1]]
            matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
        matches = torch.Tensor(matches).to(iouv.device)
        correct[matches[:, 1].long()] = matches[:, 2:3] >= iouv
    return correct

In [10]:
def compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves
    # Arguments
        recall:    The recall curve (list)
        precision: The precision curve (list)
    # Returns
        Average precision, precision curve, recall curve
    """

    # Append sentinel values to beginning and end
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([1.0], precision, [0.0]))

    # Compute the precision envelope
    mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))

    # Integrate area under curve
    method = 'interp'  # methods: 'continuous', 'interp'
    if method == 'interp':
        x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
        ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
    else:  # 'continuous'
        i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve

    return ap, mpre, mrec

def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16):
    """ Compute the average precision, given the recall and precision curves.
    Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
    # Arguments
        tp:  True positives (nparray, nx1 or nx10).
        conf:  Objectness value from 0-1 (nparray).
        pred_cls:  Predicted object classes (nparray).
        target_cls:  True object classes (nparray).
        plot:  Plot precision-recall curve at mAP@0.5
        save_dir:  Plot save directory
    # Returns
        The average precision as computed in py-faster-rcnn.
    """

    # Sort by objectness
    i = np.argsort(-conf)
    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]

    # Find unique classes
    unique_classes, nt = np.unique(target_cls, return_counts=True)
    nc = unique_classes.shape[0]  # number of classes, number of detections

    # Create Precision-Recall curve and compute AP for each class
    px, py = np.linspace(0, 1, 1000), []  # for plotting
    ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
    for ci, c in enumerate(unique_classes):
        i = pred_cls == c
        n_l = nt[ci]  # number of labels
        n_p = i.sum()  # number of predictions

        if n_p == 0 or n_l == 0:
            continue
        else:
            # Accumulate FPs and TPs
            fpc = (1 - tp[i]).cumsum(0)
            tpc = tp[i].cumsum(0)

            # Recall
            recall = tpc / (n_l + eps)  # recall curve
            r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0)  # negative x, xp because xp decreases

            # Precision
            precision = tpc / (tpc + fpc)  # precision curve
            p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1)  # p at pr_score

            # AP from recall-precision curve
            for j in range(tp.shape[1]):
                ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
                if plot and j == 0:
                    py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5

    # Compute F1 (harmonic mean of precision and recall)
    f1 = 2 * p * r / (p + r + eps)

    i = f1.mean(0).argmax()  # max F1 index
    p, r, f1 = p[:, i], r[:, i], f1[:, i]
    tp = (r * nt).round()  # true positives
    fp = (tp / (p + eps) - tp).round()  # false positives
    return tp, fp, p, r, f1, ap, unique_classes.astype('int32')

In [11]:
# pick one image from the test set
import numpy as np

dataset_val = IELDatasetTest(os.path.join(fold_dir,"test"), image_size, image_size, transforms= get_transform(train=False))
dataset_test = IELDatasetTest(os.path.join(fold_dir,"test"), image_size, image_size, transforms= get_transform(train=False))

# put the model in evaluation mode
model.eval()

mAPl = 0.5
mAPr = 0.95
iouv = torch.linspace(mAPl, mAPr, 10).to(device)  # iou vector for mAP@0.5:0.
niou = iouv.numel()

jdict, stats, ap, ap_class = [], [], [], []

for i in range(dataset_test.__len__()):
    img,target = dataset_test[i]
    image_name = target["image_name"][:-6]

    with torch.no_grad():
        prediction = model([img.to(device)])[0]

    nms_prediction = apply_nms(prediction, iou_thresh=0.01)
    
    labelsn = torch.cat((torch.reshape(target["labels"],(target["labels"].shape[0],1)), target["boxes"]), 1)
    labelsn = labelsn.to(device)
    
    pred_boxes = nms_prediction["boxes"]
    pred_labels = nms_prediction["labels"]
    pred_conf = nms_prediction["scores"]
    
    predn = torch.cat((pred_boxes, torch.reshape(pred_conf , (pred_conf.shape[0],1)) , torch.reshape(pred_labels, (pred_labels.shape[0],1))), 1)
    
    # print(labelsn.shape,predn.shape)
    
    correct = process_batch(predn, labelsn, iouv)
    
    stats.append((correct.cpu(), predn[:, 4].cpu(), predn[:, 5].cpu(), target["labels"].tolist()))  # (correct, conf, pcls, tcls)

stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
if len(stats) and stats[0].any():
    tp, fp, p, r, f1, ap, ap_class = ap_per_class(*stats)
    ap50, ap = ap[:, 0], ap.mean(1)  # AP@0.5, AP@0.5:0.95
    mp, mr, map50, map5095 = p.mean(), r.mean(), ap50.mean(), ap.mean()
    
print(mp, mr, map50, map5095)

0.5716106173277811 0.33113696145851645 0.45190920243186206 0.2202741202114309
