# Training and testing of objection detection network
Based on Faster-RCNN and this guide: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html


In [None]:
# %%shell
#pip install cython
# # Install pycocotools, the version by default in Colab
# # has a bug fixed in https://github.com/cocodataset/cocoapi/pull/354
#pip install pycocotools -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

## Importing libraries and custom scripts


In [2]:
import sys
import os
import numpy as np
import torch
import torch.utils.data

# Cloning git repo

!git clone https://github.com/jonasbrondum/02456-project.git


In [6]:
import os
os.getcwd()
sys.path.append(os.getcwd() + "/02456-project/scripts_colab")
sys.path.append(os.getcwd() + "/02456-project")


In [7]:
print(sys.path)

['', '/content', '/env/python', '/usr/lib/python37.zip', '/usr/lib/python3.7', '/usr/lib/python3.7/lib-dynload', '/usr/local/lib/python3.7/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.7/dist-packages/IPython/extensions', '/root/.ipython', '/content/02456-project/scripts_colab', '/content/02456-project/scripts_colab', '/content/02456-project']


## Dataset class with new __getitem__ function

In [8]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import scripts_colab.transforms as T






class CansDataset(torch.utils.data.Dataset):
    def __init__(self, root, train=True):
        if train is True:
            self.root = root + "/video1/train/"
        else:
            self.root = root +  "/video1/test/"

        self.imgs = list(sorted(os.listdir(os.path.join(self.root,"frames"))))
        self.bbox = list(sorted(os.listdir(os.path.join(self.root,"boundingboxes"))))
    
    def __getitem__(self, idx):
        # load images and bboxes
        img_path = os.path.join(self.root, "frames", self.imgs[idx])
        bbox_path = os.path.join(self.root, "boundingboxes", self.bbox[idx])
        img = Image.open(img_path).convert("RGB")
        img=np.array(img)
        img=torch.tensor(img)/255
        img=img.permute(2,0,1)

        bbox = []
        label = []
        lines = 0
        iscrowd = []
        with open(bbox_path, 'r') as f:
            for line in f:
                line = line.split(" ")
                id = line[0] # class label, 1=beer, 2=cola, 0=background
                id = 1 if id == 'beer' else 2
                xmin = float(line[1])
                ymin = float(line[2])
                xmax = float(line[3])
                ymax = float(line[4])
                bbox.append([xmin, ymin, xmax, ymax])
                label.append(id)
                lines += 1
                iscrowd.append(False)

        bbox = torch.as_tensor(bbox, dtype=torch.int64)
        labels = torch.as_tensor(label, dtype=torch.int64) #torch.ones((num_objs, ), dtype=torch.float32)
        image_id = torch.tensor([idx],dtype=torch.int64)
        area = (bbox[:, 3] - bbox[:, 1]) * (bbox[:, 2] - bbox[:, 0])
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        #iscrowd = torch.zeros((lines,), dtype=torch.int64)

        target = {}
        target["boxes"] = bbox
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        # if no boxes
        # torch.zeros((0,4), dtype=torch.float32)




        return img, target

    def __len__(self):
        return len(self.imgs)

Test class

In [13]:

#root = os.getcwd()+'data/'#os.getcwd() + '/..' + '/data/'
#sys.path.append(os.getcwd() + "/.." + "/scripts_colab") 
#sys.path.append(os.getcwd() + "/..") 
#print(os.getcwd())
#os.chdir(os.getcwd()+"/..")

#print(root)
root = os.getcwd() + "/02456-project/data"

## Adding pretrained model and modify numbers of classes

In [10]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 3  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

In [11]:
def get_instance_segmentation_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

In [14]:
from scripts_colab.engine import train_one_epoch, evaluate

import scripts_colab.utils
import scripts_colab.transforms as T
from typing import List, Tuple





dataset = CansDataset(root, train=True)
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=2,
    collate_fn=scripts_colab.utils.collate_fn
)
# For Training
images,targets = next(iter(data_loader))
images = list(image for image in images)


print(images[0])

targets = [{k: v for k, v in t.items()} for t in targets]

output = model(images,targets)   # Returns losses and detections
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)           # Returns predictions
print(predictions)


tensor([[[0.3765, 0.3765, 0.3961,  ..., 0.3529, 0.3647, 0.3569],
         [0.3961, 0.3490, 0.3176,  ..., 0.3569, 0.3725, 0.3647],
         [0.3882, 0.3255, 0.3059,  ..., 0.3529, 0.3686, 0.3608],
         ...,
         [0.1020, 0.1020, 0.1020,  ..., 0.0706, 0.0706, 0.0706],
         [0.0980, 0.1020, 0.1020,  ..., 0.0745, 0.0745, 0.0745],
         [0.0980, 0.0980, 0.0980,  ..., 0.0745, 0.0745, 0.0745]],

        [[0.3255, 0.3255, 0.3373,  ..., 0.3020, 0.3137, 0.3098],
         [0.3373, 0.2902, 0.2627,  ..., 0.3059, 0.3216, 0.3176],
         [0.3294, 0.2667, 0.2510,  ..., 0.3020, 0.3176, 0.3137],
         ...,
         [0.1059, 0.1059, 0.1059,  ..., 0.0745, 0.0745, 0.0745],
         [0.1020, 0.1059, 0.1059,  ..., 0.0784, 0.0784, 0.0784],
         [0.1020, 0.1020, 0.1020,  ..., 0.0784, 0.0784, 0.0784]],

        [[0.2588, 0.2510, 0.2471,  ..., 0.1686, 0.1765, 0.1608],
         [0.2627, 0.2078, 0.1608,  ..., 0.1725, 0.1843, 0.1686],
         [0.2471, 0.1765, 0.1490,  ..., 0.1765, 0.1804, 0.

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[{'boxes': tensor([[  0.0000,  12.8552, 308.3311, 178.9292],
        [ 40.1656, 188.9036,  46.7233, 194.9113],
        [  0.0000,  35.4238, 184.1772, 188.6429],
        [342.6573,  12.8271, 400.0000, 195.7937],
        [  0.0000, 168.2077, 239.9594, 248.0885],
        [ 88.5754,  77.3201,  93.0437,  80.6575],
        [ 34.7917, 142.0704, 259.1571, 224.5171],
        [  0.0000,  59.8464, 348.2917, 235.1200],
        [297.9188,  61.5508, 302.5425,  66.1730],
        [ 30.3133, 107.6977, 263.2783, 184.9244],
        [200.0288,  28.2272, 400.0000, 126.4453],
        [150.5571,  15.5129, 383.5333, 163.4640],
        [309.8131, 224.5099, 314.1648, 228.6493],
        [ 92.0823, 151.8054, 325.4271, 239.1302],
        [282.3657,  95.0826, 286.2843, 101.3401],
        [  0.0000,  95.6831, 193.1662, 249.1653],
        [ 41.0164, 191.1950,  47.2054, 197.1384],
        [334.8720,  78.6036, 339.1789,  84.7026],
        [298.9394, 227.0254, 305.0697, 231.4228],
        [  9.6293,  15.1959, 101.3057, 

In [15]:
# use our dataset and defined transformations
dataset = CansDataset(root, train=True)
dataset_test = CansDataset(root, train=False)
print("Oriignal dataset lengths are:")
print("training:",len(dataset))
print("test:",len(dataset_test))


# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
indices_test = torch.randperm(len(dataset_test)).tolist()

dataset = torch.utils.data.Subset(dataset, indices[:100])          #indices[:-973])
dataset_test = torch.utils.data.Subset(dataset_test,indices_test[:10]) #indices[-200:])

print("Augmented dataset lengths are:")
print("training:",len(dataset))
print("test:",len(dataset_test))

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=scripts_colab.utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=scripts_colab.utils.collate_fn)


Oriignal dataset lengths are:
training: 1273
test: 319
Augmented dataset lengths are:
training: 100
test: 10


  cpuset_checked))


In [16]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("device:",device)

# our dataset has three classes only - background, beer and coke
num_classes = 3

# get the model using our helper function
#model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

device: cuda


In [None]:
# let's train it for 10 epochs
from torch.optim.lr_scheduler import StepLR
num_epochs = 1

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    #evaluate(model, data_loader_test, device=device)  

  


  cpuset_checked))


Epoch: [0]  [ 0/50]  eta: 0:01:39  lr: 0.000107  loss: 1.4859 (1.4859)  loss_classifier: 1.1622 (1.1622)  loss_box_reg: 0.3203 (0.3203)  loss_objectness: 0.0020 (0.0020)  loss_rpn_box_reg: 0.0013 (0.0013)  time: 1.9829  data: 0.4008  max mem: 2115
Epoch: [0]  [10/50]  eta: 0:00:54  lr: 0.001126  loss: 0.6184 (0.8710)  loss_classifier: 0.4242 (0.6000)  loss_box_reg: 0.2479 (0.2619)  loss_objectness: 0.0020 (0.0065)  loss_rpn_box_reg: 0.0017 (0.0027)  time: 1.3595  data: 0.0475  max mem: 2383
Epoch: [0]  [20/50]  eta: 0:00:39  lr: 0.002146  loss: 0.4289 (0.6348)  loss_classifier: 0.2032 (0.3968)  loss_box_reg: 0.2109 (0.2314)  loss_objectness: 0.0005 (0.0045)  loss_rpn_box_reg: 0.0017 (0.0021)  time: 1.3005  data: 0.0115  max mem: 2383
Epoch: [0]  [30/50]  eta: 0:00:26  lr: 0.003165  loss: 0.3613 (0.5214)  loss_classifier: 0.1114 (0.2958)  loss_box_reg: 0.2040 (0.2195)  loss_objectness: 0.0003 (0.0040)  loss_rpn_box_reg: 0.0016 (0.0021)  time: 1.3065  data: 0.0110  max mem: 2383
Epoch: [

In [None]:
evaluate(model, data_loader_test, device=device)

In [None]:
# pick one image from the test set
img, _ = dataset_test[0]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])

In [None]:
prediction

In [None]:
# Plotting code
import matplotlib.pyplot as plt

import torchvision.transforms.functional as F


plt.rcParams["savefig.bbox"] = 'tight'


def show(imgs):
    if not isinstance(imgs, list):
        imgs = [imgs]
    fix, axs = plt.subplots(ncols=len(imgs), squeeze=False)
    for i, img in enumerate(imgs):
        img = img.detach()
        img = F.to_pil_image(img)
        axs[0, i].imshow(np.asarray(img))
        axs[0, i].set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])

In [None]:


score_threshold = .8
cans_with_boxes = []
model.eval()
ALL_TEST_IMAGES = len(dataset_test)
HALF_TEST_IMAGES = ALL_TEST_IMAGES/2
THREE_IMAGES = 3
for i in range(ALL_TEST_IMAGES):
    img, _ = dataset_test[i]

    with torch.no_grad():
        output = model([img.to(device)])[0] # prediction
        img = img.mul(255).type(torch.uint8) # Bring to 0-255 from 0-1 and convert to usable type

        # extract boxes and scores for each color box
        green_boxes = {'boxes':output['boxes'][output['labels']==1],  'scores': output['scores'][output['labels']==1]}
        red_boxes =   {'boxes':output['boxes'][output['labels']==2],  'scores': output['scores'][output['labels']==2]}
        

        # only extract valid boxes
        valid_green_boxes = green_boxes['boxes'][green_boxes['scores'] > score_threshold]
        valid_red_boxes = red_boxes['boxes'][red_boxes['scores'] > score_threshold]

        # concat to one torch array
        boxes = torch.cat((valid_green_boxes, valid_red_boxes),0)

        # Add correct number of green and red boxes
        colors = [len(valid_green_boxes)*['green'], len(valid_red_boxes)*['red']]
        colors = [item for sublist in colors for item in sublist] # flatten list


        # Prepare for plotting
        draw = torchvision.utils.draw_bounding_boxes(img, boxes,colors=colors, width=4)
        show(draw)
        cans_with_boxes.append(draw)



        

# all in one plot
show(cans_with_boxes)

In [None]:
# If error with torch not releasing memory, delete model and empty cache. Start over
#del model
#torch.cuda.empty_cache()