In [None]:
#!unzip PennFudanPed.zip


In [None]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
from torch.optim.lr_scheduler import StepLR
import math
          
class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms
        # load all image files, sorting them to
        # ensure that they are aligned
        self.imgs = list(sorted(os.listdir(os.path.join(root, "PNGImages"))))
        self.masks = list(sorted(os.listdir(os.path.join(root, "PedMasks"))))

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join(self.root, "PNGImages", self.imgs[idx])
        mask_path = os.path.join(self.root, "PedMasks", self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        # note that we haven't converted the mask to RGB,
        # because each color corresponds to a different instance
        # with 0 being background
        mask = Image.open(mask_path)

        mask = np.array(mask)
        # instances are encoded as different colors
        obj_ids = np.unique(mask)
        # first id is the background, so remove it
        obj_ids = obj_ids[1:]

        # split the color-encoded mask into a set
        # of binary masks
        masks = mask == obj_ids[:, None, None]

        # get bounding box coordinates for each mask
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)
        masks = torch.as_tensor(masks, dtype=torch.uint8)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        # suppose all instances are not crowd
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            #img, target = self.transforms(img, target)
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.imgs)

dataset = PennFudanDataset('PennFudanPed/')

import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import RetinaNet
from engine import train_one_epoch, evaluate
import utils
import torchvision.transforms as T



def get_transform(train):
    transforms = []
    # converts the image, a PIL image, into a PyTorch Tensor
    transforms.append(T.ToTensor())
    if train:
        # during training, randomly flip the training images
        # and ground-truth for data augmentation
        #transforms.append(T.RandomHorizontalFlip(0.5))
        transforms.append(T.ColorJitter(hue=(-0.4, 0.4)))

    return T.Compose(transforms)

# use our dataset and defined transformations
dataset = PennFudanDataset('PennFudanPed', get_transform(train=False))
dataset111 = PennFudanDataset('PennFudanPed', get_transform(train=True))

dataset_test = PennFudanDataset('PennFudanPed', get_transform(train=False))

# split the dataset in train and test set
# torch.manual_seed(1)
# indices = torch.randperm(len(dataset)).tolist()
# dataset = torch.utils.data.Subset(dataset, indices[:-50])
# dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
#each supgroup should have 90 pics
dataset5 = torch.utils.data.Subset(dataset, indices[:120])

dataset1 = torch.utils.data.Subset(dataset, indices[:90])
dataset2 = torch.utils.data.Subset(dataset, indices[:60]+indices[90:120])
dataset3 = torch.utils.data.Subset(dataset, indices[:30]+indices[60:120])
dataset4 = torch.utils.data.Subset(dataset, indices[30:120])

dataset1111 = torch.utils.data.Subset(dataset111, indices[:120])

dataset11 = torch.utils.data.Subset(dataset111, indices[:90])
dataset21 = torch.utils.data.Subset(dataset111, indices[:60]+indices[90:120])
dataset31 = torch.utils.data.Subset(dataset111, indices[:30]+indices[60:120])
dataset41 = torch.utils.data.Subset(dataset111, indices[30:120])

dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])


# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset5, batch_size=2, shuffle=True, pin_memory=True,
    collate_fn=utils.collate_fn)
data_loader22 = torch.utils.data.DataLoader(
    dataset1111, batch_size=2, shuffle=True, pin_memory=True,
    collate_fn=utils.collate_fn)

data_loader1 = torch.utils.data.DataLoader(
    dataset1, batch_size=2, shuffle=True, pin_memory=True,
    collate_fn=utils.collate_fn)
data_loader2 = torch.utils.data.DataLoader(
    dataset2, batch_size=2, shuffle=True, pin_memory=True,
    collate_fn=utils.collate_fn)
data_loader3 = torch.utils.data.DataLoader(
    dataset3, batch_size=2, shuffle=True, pin_memory=True,
    collate_fn=utils.collate_fn)
data_loader4 = torch.utils.data.DataLoader(
    dataset4, batch_size=2, shuffle=True, pin_memory=True,
    collate_fn=utils.collate_fn)

data_loader11 = torch.utils.data.DataLoader(
    dataset11, batch_size=2, shuffle=True, pin_memory=True,
    collate_fn=utils.collate_fn)
data_loader21 = torch.utils.data.DataLoader(
    dataset21, batch_size=2, shuffle=True, pin_memory=True,
    collate_fn=utils.collate_fn)
data_loader31 = torch.utils.data.DataLoader(
    dataset31, batch_size=2, shuffle=True, pin_memory=True,
    collate_fn=utils.collate_fn)
data_loader41 = torch.utils.data.DataLoader(
    dataset41, batch_size=2, shuffle=True, pin_memory=True,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, pin_memory=True,
    collate_fn=utils.collate_fn)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

In [None]:
# # load models
# model_retina_res = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=False)
# model_retina_res.load_state_dict(torch.load('models/retina_res_aug.pth'))

# model_faster_res = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False)
# model_faster_res.load_state_dict(torch.load('models/faster_res_aug.pth'))

# model_faster_mo = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=False)
# model_faster_mo.load_state_dict(torch.load('models/faster_mo_aug.pth'))

### retinanet with resnet50 backbone submodel, if want to perform data augmentation, uncomment last five train_one_epoch function¶

In [None]:

model_retina_res1 = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True)
model_retina_res2 = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True)
model_retina_res3 = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True)
model_retina_res4 = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True)

# move model to the right device
model_retina_res1.to(device)
model_retina_res2.to(device)
model_retina_res3.to(device)
model_retina_res4.to(device)

# construct an optimizer
params1 = [p1 for p1 in model_retina_res1.parameters() if p1.requires_grad]
params2 = [p2 for p2 in model_retina_res2.parameters() if p2.requires_grad]
params3 = [p3 for p3 in model_retina_res3.parameters() if p3.requires_grad]
params4 = [p4 for p4 in model_retina_res4.parameters() if p4.requires_grad]

optimizer1 = torch.optim.SGD(params1, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
optimizer2 = torch.optim.SGD(params2, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
optimizer3 = torch.optim.SGD(params3, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
optimizer4 = torch.optim.SGD(params4, lr=0.005,
                           momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler1 = torch.optim.lr_scheduler.StepLR(optimizer1,
                                               step_size=3,
                                               gamma=0.1)
lr_scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer2,
                                               step_size=3,
                                               gamma=0.1)
lr_scheduler3 = torch.optim.lr_scheduler.StepLR(optimizer3,
                                               step_size=3,
                                               gamma=0.1)
lr_scheduler4 = torch.optim.lr_scheduler.StepLR(optimizer4,
                                               step_size=3,
                                               gamma=0.1)

for epoch in range(10):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model_retina_res1, optimizer1, data_loader1, device, epoch, print_freq=10)
    train_one_epoch(model_retina_res2, optimizer2, data_loader2, device, epoch, print_freq=10)
    train_one_epoch(model_retina_res3, optimizer3, data_loader3, device, epoch, print_freq=10)
    train_one_epoch(model_retina_res4, optimizer4, data_loader4, device, epoch, print_freq=10)
#uncomment here to do data augmentation
#     train_one_epoch(model_retina_res1, optimizer1, data_loader11, device, epoch, print_freq=10)
#     train_one_epoch(model_retina_res2, optimizer2, data_loader21, device, epoch, print_freq=10)
#     train_one_epoch(model_retina_res3, optimizer3, data_loader31, device, epoch, print_freq=10)
#     train_one_epoch(model_retina_res4, optimizer4, data_loader41, device, epoch, print_freq=10)

    
    # update the learning rate
    lr_scheduler1.step()
    lr_scheduler2.step()
    lr_scheduler3.step()
    lr_scheduler4.step()

    # evaluate on the test dataset
evaluate(model_retina_res1, data_loader_test, device=device)
evaluate(model_retina_res2, data_loader_test, device=device)
evaluate(model_retina_res3, data_loader_test, device=device)
evaluate(model_retina_res4, data_loader_test, device=device)

### FasterRCNN with resnet50 backbone submodel,if want to perform data augmentation,uncomment last five train_one_epoch function

In [None]:

model_retina_res1 = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model_retina_res2 = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model_retina_res3 = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model_retina_res4 = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# move model to the right device
model_retina_res1.to(device)
model_retina_res2.to(device)
model_retina_res3.to(device)
model_retina_res4.to(device)

# construct an optimizer
params1 = [p1 for p1 in model_retina_res1.parameters() if p1.requires_grad]
params2 = [p2 for p2 in model_retina_res2.parameters() if p2.requires_grad]
params3 = [p3 for p3 in model_retina_res3.parameters() if p3.requires_grad]
params4 = [p4 for p4 in model_retina_res4.parameters() if p4.requires_grad]

optimizer1 = torch.optim.SGD(params1, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
optimizer2 = torch.optim.SGD(params2, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
optimizer3 = torch.optim.SGD(params3, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
optimizer4 = torch.optim.SGD(params4, lr=0.005,
                           momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler1 = torch.optim.lr_scheduler.StepLR(optimizer1,
                                               step_size=3,
                                               gamma=0.1)
lr_scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer2,
                                               step_size=3,
                                               gamma=0.1)
lr_scheduler3 = torch.optim.lr_scheduler.StepLR(optimizer3,
                                               step_size=3,
                                               gamma=0.1)
lr_scheduler4 = torch.optim.lr_scheduler.StepLR(optimizer4,
                                               step_size=3,
                                               gamma=0.1)

for epoch in range(10):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model_retina_res1, optimizer1, data_loader1, device, epoch, print_freq=10)
    train_one_epoch(model_retina_res2, optimizer2, data_loader2, device, epoch, print_freq=10)
    train_one_epoch(model_retina_res3, optimizer3, data_loader3, device, epoch, print_freq=10)
    train_one_epoch(model_retina_res4, optimizer4, data_loader4, device, epoch, print_freq=10)

#     train_one_epoch(model_retina_res1, optimizer1, data_loader11, device, epoch, print_freq=10)
#     train_one_epoch(model_retina_res2, optimizer2, data_loader21, device, epoch, print_freq=10)
#     train_one_epoch(model_retina_res3, optimizer3, data_loader31, device, epoch, print_freq=10)
#     train_one_epoch(model_retina_res4, optimizer4, data_loader41, device, epoch, print_freq=10)

    
    # update the learning rate
    lr_scheduler1.step()
    lr_scheduler2.step()
    lr_scheduler3.step()
    lr_scheduler4.step()

    # evaluate on the test dataset
evaluate(model_retina_res1, data_loader_test, device=device)
evaluate(model_retina_res2, data_loader_test, device=device)
evaluate(model_retina_res3, data_loader_test, device=device)
evaluate(model_retina_res4, data_loader_test, device=device)





### FasterRCNN with mobileNet backbone submodel,if want to perform data augmentation, uncomment last five train_one_epoch function

In [None]:

model_retina_res1 = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
model_retina_res2 = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
model_retina_res3 = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
model_retina_res4 = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)

# move model to the right device
model_retina_res1.to(device)
model_retina_res2.to(device)
model_retina_res3.to(device)
model_retina_res4.to(device)

# construct an optimizer
params1 = [p1 for p1 in model_retina_res1.parameters() if p1.requires_grad]
params2 = [p2 for p2 in model_retina_res2.parameters() if p2.requires_grad]
params3 = [p3 for p3 in model_retina_res3.parameters() if p3.requires_grad]
params4 = [p4 for p4 in model_retina_res4.parameters() if p4.requires_grad]

optimizer1 = torch.optim.SGD(params1, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
optimizer2 = torch.optim.SGD(params2, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
optimizer3 = torch.optim.SGD(params3, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
optimizer4 = torch.optim.SGD(params4, lr=0.005,
                           momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler1 = torch.optim.lr_scheduler.StepLR(optimizer1,
                                               step_size=3,
                                               gamma=0.1)
lr_scheduler2 = torch.optim.lr_scheduler.StepLR(optimizer2,
                                               step_size=3,
                                               gamma=0.1)
lr_scheduler3 = torch.optim.lr_scheduler.StepLR(optimizer3,
                                               step_size=3,
                                               gamma=0.1)
lr_scheduler4 = torch.optim.lr_scheduler.StepLR(optimizer4,
                                               step_size=3,
                                               gamma=0.1)

for epoch in range(10):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model_retina_res1, optimizer1, data_loader1, device, epoch, print_freq=10)
    train_one_epoch(model_retina_res2, optimizer2, data_loader2, device, epoch, print_freq=10)
    train_one_epoch(model_retina_res3, optimizer3, data_loader3, device, epoch, print_freq=10)
    train_one_epoch(model_retina_res4, optimizer4, data_loader4, device, epoch, print_freq=10)

#     train_one_epoch(model_retina_res1, optimizer1, data_loader11, device, epoch, print_freq=10)
#     train_one_epoch(model_retina_res2, optimizer2, data_loader21, device, epoch, print_freq=10)
#     train_one_epoch(model_retina_res3, optimizer3, data_loader31, device, epoch, print_freq=10)
#     train_one_epoch(model_retina_res4, optimizer4, data_loader41, device, epoch, print_freq=10)

    
    # update the learning rate
    lr_scheduler1.step()
    lr_scheduler2.step()
    lr_scheduler3.step()
    lr_scheduler4.step()

    # evaluate on the test dataset
evaluate(model_retina_res1, data_loader_test, device=device)
evaluate(model_retina_res2, data_loader_test, device=device)
evaluate(model_retina_res3, data_loader_test, device=device)
evaluate(model_retina_res4, data_loader_test, device=device)





Adjust IoU here

In [None]:
def nonMaximumSuppression(boxes, overlapThresh):
    # if there are no boxes, return an empty list

    # initialize the list of picked indexes
    pick = []
    probFinal = 0
    # grab the coordinates of the bounding boxes
    x1 = boxes[:, 0].astype(float)
    y1 = boxes[:, 1].astype(float)
    x2 = boxes[:, 2].astype(float)
    y2 = boxes[:, 3].astype(float)

    # compute the area of the bounding boxes and sort the bounding
    # boxes by the bottom-right y-coordinate of the bounding box
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    idxs = np.argsort(y2)
    # keep looping while some indexes still remain in the indexes
    # list
    while len(idxs) > 0:
        # grab the last index in the indexes list, add the index
        # value to the list of picked indexes, then initialize
        # the suppression list (i.e. indexes that will be deleted)
        # using the last index
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        suppress = [last]
        # loop over all indexes in the indexes list
        for pos in range(0, last):
            # grab the current index
            j = idxs[pos]

            # find the largest (x, y) coordinates for the start of
            # the bounding box and the smallest (x, y) coordinates
            # for the end of the bounding box
            xx1 = max(x1[i], x1[j])
            yy1 = max(y1[i], y1[j])
            xx2 = min(x2[i], x2[j])
            yy2 = min(y2[i], y2[j])

            # compute the width and height of the bounding box
            w = max(0, xx2 - xx1 + 1)
            h = max(0, yy2 - yy1 + 1)

            # compute the ratio of overlap between the computed
            # bounding box and the bounding box in the area list
            overlap = float(w * h) / area[j]

            # if there is sufficient overlap, suppress the
            # current bounding box
            if overlap > overlapThresh:
                suppress.append(pos)

        # delete all indexes from the index list that are in the
        # suppression list
        idxs = np.delete(idxs, suppress)
    # return only the bounding boxes that were picked
    return boxes[pick]


def uneBoundingBoxes(boxesAllXmls):
    boundingBox=[]
    listBox = []
    l=len(boxesAllXmls)

    while(l>0):
        boxPrim=boxesAllXmls[0]

        listBox.append(boxPrim)
        boxesAllXmls1=boxesAllXmls[1:]
        boxesAllXmls.remove(boxPrim)
        for box in boxesAllXmls1:
            if bb_intersection_over_union(boxPrim, box) > 0.8:
                listBox.append(box)
                boxesAllXmls.remove(box)

        boundingBox.append(listBox)
        listBox = []
        l=len(boxesAllXmls)
        
    return boundingBox


def bb_intersection_over_union(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)

    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return iou

def ensemble(boxes, option='consensus'):
    numFich = 4
    result = []
    box = uneBoundingBoxes(boxes)

    for rectangles in box:
        list1 = []
        for rc in rectangles:
            list1.append(rc)
        pick = []

        if option == 'consensus':
            if len(np.array(list1))>=math.ceil(numFich/2):
                #adjust IoU value here
                pick = nonMaximumSuppression(np.array(list1), 0.2)

        elif option == 'unanimous':
            if len(np.array(list1))==numFich:
                #adjust IoU value here
                pick = nonMaximumSuppression(np.array(list1), 0.2)

        elif option == 'affirmative':
            #adjust IoU value here
            pick = nonMaximumSuppression(np.array(list1), 0.2)

        if len(pick)!=0:
            result.append(list(pick[0]))

    return result

### Adjust voting strategy here
Perform Bagging

In [None]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from pprint import pprint
import math
metric = MeanAveragePrecision()
#just bagging
# model_faster_res.to(device).eval()
# model_retina_res.to(device).eval()
# model_faster_mo.to(device).eval()
# move model to the right device
model_retina_res1.to(device).eval()
model_retina_res2.to(device).eval()
model_retina_res3.to(device).eval()
model_retina_res4.to(device).eval()

for i, (img, tar) in enumerate(data_loader_test):
    print(i)
    #(img1, tar1) , (img2, tar2), (img3, tar3) = imgs[0], imgs[1], imgs[2]
    #(img1, tar1) = imgs[0]

    with torch.no_grad():
        img1 = img[0].to(device)
        tar1 = tar[0]
#         img2 = img2[0].to(device)
#         tar2 = tar2[0]
#         img3 = img3[0].to(device)
#         tar3 = tar3[0]
        pred1_1 = model_retina_res1([img1])
        pred1_2 = model_retina_res2([img1])
        pred1_3 = model_retina_res3([img1])
        pred1_4 = model_retina_res4([img1])

#         pred2_1 = model_retina_res([img2])
#         pred2_2 = model_faster_res([img2])
#         pred2_3 = model_faster_mo([img2])
        
#         pred3_1 = model_retina_res([img3])
#         pred3_2 = model_faster_res([img3])
#         pred3_3 = model_faster_mo([img3])
        
        box1_1 = pred1_1[0]['boxes'].cpu().numpy().tolist()
        box1_2 = pred1_2[0]['boxes'].cpu().numpy().tolist()
        box1_3 = pred1_3[0]['boxes'].cpu().numpy().tolist()
        box1_4 = pred1_4[0]['boxes'].cpu().numpy().tolist()

#         box2_1 = pred2_1[0]['boxes'].cpu().numpy().tolist()
#         box2_2 = pred2_2[0]['boxes'].cpu().numpy().tolist()
#         box2_3 = pred2_3[0]['boxes'].cpu().numpy().tolist()
        
#         box3_1 = pred3_1[0]['boxes'].cpu().numpy().tolist()
#         box3_2 = pred3_2[0]['boxes'].cpu().numpy().tolist()
#         box3_3 = pred3_3[0]['boxes'].cpu().numpy().tolist()
        #boxes = box1_1+box1_2+box1_3+box2_1+box2_2+box2_3+box3_1+box3_2+box3_3
        boxes = box1_1+box1_2+box1_3+box1_4
        #print(boxes)
        #boxes = np.concatenate((box1_1, box1_2, box1_3, box2_1, box2_2, box2_3, box3_1, box3_2, box3_3), axis=0)
        #print(boxes)
        score1_1 = pred1_1[0]['scores'].cpu().numpy()
        score1_2 = pred1_2[0]['scores'].cpu().numpy()
        score1_3 = pred1_3[0]['scores'].cpu().numpy()  
        score1_4 = pred1_4[0]['scores'].cpu().numpy()

        
#         score2_1 = pred2_1[0]['scores'].cpu().numpy()
#         score2_2 = pred2_2[0]['scores'].cpu().numpy()
#         score2_3 = pred2_3[0]['scores'].cpu().numpy()
        
#         score3_1 = pred3_1[0]['scores'].cpu().numpy()
#         score3_2 = pred3_2[0]['scores'].cpu().numpy()
#         score3_3 = pred3_3[0]['scores'].cpu().numpy()
        
        #scores = np.concatenate((score1_1, score1_2, score1_3, score2_1, score2_2, score2_3, score3_1, score3_2, score3_3), 0)
        scores = np.concatenate((score1_1, score1_2, score1_3,score1_4), 0)

        #print(scores)
        boxx = boxes.copy()
        #Addjust voting strategy here
        #pick = ensemble(boxes, option='consensus')
        #pick = ensemble(boxes, option='affirmative')
        pick = ensemble(boxes, option='unanimous')

        #print(len(pick))
        idx = []
        boxes = boxx
        for j in range(len(pick)):
            for k in range(len(boxes)):
                if (pick[j] == boxes[k]):#.all():
                    idx.append(k)
                    break
        #idx = np.array(idx)
        #print(idx)
        #print('idx',pick)
        if len(idx)==0:
            continue
        pick = torch.from_numpy(np.array(pick))
        print('am I here')
        img = torchvision.transforms.ConvertImageDtype(torch.uint8)(img1.cpu())
        result = torchvision.utils.draw_bounding_boxes(img, pick)
        out = Image.fromarray(result.permute(1, 2, 0).contiguous().numpy())

       # out.save('imgs/test_')
        out.show()
        
        prediction = [{
            'boxes' : pick.to(device),
            'scores' : torch.from_numpy(scores[idx]).to(device),
            'labels' : torch.ones(len(pick)).to(device),
        }]
        target = [{
            'boxes' : tar1['boxes'].to(device),
            'labels' : tar1['labels'].to(device),
        }]

    metric.update(prediction, target)
    pprint(metric.compute())
    i += 1

Bagging + TTA

In [None]:
#1. 
dataset_test = PennFudanDataset('PennFudanPed', get_transform(train=False))
dataset_test1 = PennFudanDataset('PennFudanPed', get_transform(train=True))
dataset_test2 = PennFudanDataset('PennFudanPed', get_transform(train=True))
dataset_test3 = PennFudanDataset('PennFudanPed', get_transform(train=True))

dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])
dataset_test1 = torch.utils.data.Subset(dataset_test1, indices[-50:])
dataset_test2 = torch.utils.data.Subset(dataset_test2, indices[-50:])
dataset_test3 = torch.utils.data.Subset(dataset_test3, indices[-50:])

# define training and validation data loaders
data_loader_test1 = torch.utils.data.DataLoader(
    dataset_test1, batch_size=1, shuffle=False, pin_memory=True,
    collate_fn=utils.collate_fn)

data_loader_test2 = torch.utils.data.DataLoader(
    dataset_test2, batch_size=1, shuffle=False, pin_memory=True,
    collate_fn=utils.collate_fn)

data_loader_test3 = torch.utils.data.DataLoader(
    dataset_test3, batch_size=1, shuffle=False, pin_memory=True,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, pin_memory=True,
    collate_fn=utils.collate_fn)

In [None]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from pprint import pprint
import math
metric = MeanAveragePrecision()
#Run this one
# model_faster_res.to(device).eval()
# model_retina_res.to(device).eval()
# model_faster_mo.to(device).eval()
# model_retina_res1.to(device).eval()
# model_retina_res2.to(device).eval()
# model_retina_res3.to(device).eval()
# model_retina_res4.to(device).eval()
i = 0
for imgs in zip(data_loader_test, data_loader_test2, data_loader_test3,data_loader_test1):
    print(i)
    
    (img1, tar1) , (img2, tar2), (img3, tar3),(img4,tar4) = imgs[0], imgs[1], imgs[2],imgs[3]

    with torch.no_grad():
        img1 = img1[0].to(device)
        tar1 = tar1[0]
        img2 = img2[0].to(device)
        tar2 = tar2[0]
        img3 = img3[0].to(device)
        tar3 = tar3[0]
        img4 = img4[0].to(device)
        tar4 = tar4[0]
        
        pred1_1 = model_retina_res1([img1])
        pred1_2 = model_retina_res2([img1])
        pred1_3 = model_retina_res3([img1])
        pred1_4 = model_retina_res4([img1])
        
        pred2_1 = model_retina_res1([img2])
        pred2_2 = model_retina_res2([img2])
        pred2_3 = model_retina_res3([img2])
        pred2_4 = model_retina_res4([img2])
        
        pred3_1 = model_retina_res1([img3])
        pred3_2 = model_retina_res2([img3])
        pred3_3 = model_retina_res3([img3])
        pred3_4 = model_retina_res4([img3])
        
        pred4_1 = model_retina_res1([img4])
        pred4_2 = model_retina_res2([img4])
        pred4_3 = model_retina_res3([img4])
        pred4_4 = model_retina_res4([img4])
        
#         pred1_1 = model_retina_res([img1])
#         pred1_2 = model_faster_res([img1])
#         pred1_3 = model_faster_mo([img1])
        
#         pred2_1 = model_retina_res([img2])
#         pred2_2 = model_faster_res([img2])
#         pred2_3 = model_faster_mo([img2])
        
#         pred3_1 = model_retina_res([img3])
#         pred3_2 = model_faster_res([img3])
#         pred3_3 = model_faster_mo([img3])
        
        box1_1 = pred1_1[0]['boxes'].cpu().numpy().tolist()
        box1_2 = pred1_2[0]['boxes'].cpu().numpy().tolist()
        box1_3 = pred1_3[0]['boxes'].cpu().numpy().tolist()
        box1_4 = pred1_4[0]['boxes'].cpu().numpy().tolist()
        
        box2_1 = pred2_1[0]['boxes'].cpu().numpy().tolist()
        box2_2 = pred2_2[0]['boxes'].cpu().numpy().tolist()
        box2_3 = pred2_3[0]['boxes'].cpu().numpy().tolist()
        box2_4 = pred2_4[0]['boxes'].cpu().numpy().tolist()

        box3_1 = pred3_1[0]['boxes'].cpu().numpy().tolist()
        box3_2 = pred3_2[0]['boxes'].cpu().numpy().tolist()
        box3_3 = pred3_3[0]['boxes'].cpu().numpy().tolist()
        box3_4 = pred3_4[0]['boxes'].cpu().numpy().tolist()
        
        box4_1 = pred3_1[0]['boxes'].cpu().numpy().tolist()
        box4_2 = pred3_2[0]['boxes'].cpu().numpy().tolist()
        box4_3 = pred3_3[0]['boxes'].cpu().numpy().tolist()
        box4_4 = pred3_4[0]['boxes'].cpu().numpy().tolist()

        boxes = box1_1+box1_2+box1_3+box1_4+box2_1+box2_2+box2_3+box2_4+box3_1+box3_2+box3_3+box3_4+box4_1+box4_2+box4_3+box4_4
        #print(boxes)
        #boxes = np.concatenate((box1_1, box1_2, box1_3, box2_1, box2_2, box2_3, box3_1, box3_2, box3_3), axis=0)
        #print(boxes)
        score1_1 = pred1_1[0]['scores'].cpu().numpy()
        score1_2 = pred1_2[0]['scores'].cpu().numpy()
        score1_3 = pred1_3[0]['scores'].cpu().numpy()
        score1_4 = pred1_4[0]['scores'].cpu().numpy()

        score2_1 = pred2_1[0]['scores'].cpu().numpy()
        score2_2 = pred2_2[0]['scores'].cpu().numpy()
        score2_3 = pred2_3[0]['scores'].cpu().numpy()
        score2_4 = pred2_4[0]['scores'].cpu().numpy()

        score3_1 = pred3_1[0]['scores'].cpu().numpy()
        score3_2 = pred3_2[0]['scores'].cpu().numpy()
        score3_3 = pred3_3[0]['scores'].cpu().numpy()
        score3_4 = pred3_4[0]['scores'].cpu().numpy()
        
        score4_1 = pred4_1[0]['scores'].cpu().numpy()
        score4_2 = pred4_2[0]['scores'].cpu().numpy()
        score4_3 = pred4_3[0]['scores'].cpu().numpy()
        score4_4 = pred4_4[0]['scores'].cpu().numpy()

        
        scores = np.concatenate((score1_1, score1_2,score1_3,score1_4, score2_1, score2_2, score2_3,score2_4, score3_1, score3_2, score3_3, score3_4, score4_1, score4_2, score4_3, score4_4), 0)
        #print(scores)
        boxx = boxes.copy()
        #pick = ensemble(boxes, option='consensus')
        #pick = ensemble(boxes, option='affirmative')
        pick = ensemble(boxes, option='unanimous')

        #print(len(pick))
        idx = []
        boxes = boxx
        for j in range(len(pick)):
            for k in range(len(boxes)):
                if (pick[j] == boxes[k]):#.all():
                    idx.append(k)
                    break
        #idx = np.array(idx)
        #print(idx)
        if len(idx)==0:
            continue
        pick = torch.from_numpy(np.array(pick))

        img = torchvision.transforms.ConvertImageDtype(torch.uint8)(img1.cpu())
        result = torchvision.utils.draw_bounding_boxes(img, pick)
        out = Image.fromarray(result.permute(1, 2, 0).contiguous().numpy())

       # out.save('imgs/test_')
        out.show()
        
        prediction = [{
            'boxes' : pick.to(device),
            'scores' : torch.from_numpy(scores[idx]).to(device),
            'labels' : torch.ones(len(pick)).to(device),
        }]
        target = [{
            'boxes' : tar1['boxes'].to(device),
            'labels' : tar1['labels'].to(device),
        }]

    metric.update(prediction, target)
    pprint(metric.compute())
    i += 1