# Training and testing of objection detection network
Based on Faster-RCNN and this guide: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html


In [1]:
# %%shell

# pip install cython
# # Install pycocotools, the version by default in Colab
# # has a bug fixed in https://github.com/cocodataset/cocoapi/pull/354
# pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

## Importing libraries and custom scripts


In [2]:
import sys
import os
import numpy as np
import torch
import torch.utils.data

sys.path.append(os.getcwd() + "/.." + "/scripts")


## Dataset class with new __getitem__ function

In [3]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import transforms as T






class CansDataset(torch.utils.data.Dataset):
    def __init__(self, root, train=True):
        if train is True:
            self.root = root + "/video1/train/"
        else:
            self.root = root +  "/video1/test/"

        self.imgs = list(sorted(os.listdir(os.path.join(self.root,"frames"))))
        self.bbox = list(sorted(os.listdir(os.path.join(self.root,"boundingboxes"))))
    
    def __getitem__(self, idx):
        # load images and bboxes
        img_path = os.path.join(self.root, "frames", self.imgs[idx])
        bbox_path = os.path.join(self.root, "boundingboxes", self.bbox[idx])
        img = Image.open(img_path).convert("RGB")
        img=np.array(img)
        img=torch.tensor(img)/255
        img=img.permute(2,0,1)

        bbox = []
        label = []
        lines = 0
        iscrowd = []
        with open(bbox_path, 'r') as f:
            for line in f:
                line = line.split(" ")
                id = line[0] # class label, 1=beer, 2=cola, 0=background
                id = 1 if id == 'beer' else 2
                xmin = float(line[1])
                ymin = float(line[2])
                xmax = float(line[3])
                ymax = float(line[4])
                bbox.append([xmin, ymin, xmax, ymax])
                label.append(id)
                lines += 1
                iscrowd.append(False)

        bbox = torch.as_tensor(bbox, dtype=torch.int64)
        labels = torch.as_tensor(label, dtype=torch.int64) #torch.ones((num_objs, ), dtype=torch.float32)
        image_id = torch.tensor([idx],dtype=torch.int64)
        area = (bbox[:, 3] - bbox[:, 1]) * (bbox[:, 2] - bbox[:, 0])
        iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)
        #iscrowd = torch.zeros((lines,), dtype=torch.int64)

        target = {}
        target["boxes"] = bbox
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        # if no boxes
        # torch.zeros((0,4), dtype=torch.float32)




        return img, target

    def __len__(self):
        return len(self.imgs)

Test class

In [4]:

#root = os.getcwd()+'data/'#os.getcwd() + '/..' + '/data/'
sys.path.append(os.getcwd() + "/.." + "/scripts")
print(os.getcwd())
os.chdir(os.getcwd()+"/..")

#print(root)
root = os.getcwd() + "/data"


/home/andreasgp/MEGAsync/DTU/9. Semester/Deep Learning/object-tracking-project/02456-project/notebooks


## Adding pretrained model and modify numbers of classes

In [5]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 3  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [6]:
def get_instance_segmentation_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

In [7]:

from engine import train_one_epoch, evaluate

import utils
import transforms as T
from typing import List, Tuple





dataset = CansDataset(root, train=True)
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn
)
# For Training
images,targets = next(iter(data_loader))
images = list(image for image in images)


print(images[0])

targets = [{k: v for k, v in t.items()} for t in targets]

output = model(images,targets)   # Returns losses and detections
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)           # Returns predictions
print(predictions)


  bbox = torch.as_tensor(bbox, dtype=torch.int64)
  bbox = torch.as_tensor(bbox, dtype=torch.int64)
  bbox = torch.as_tensor(bbox, dtype=torch.int64)
  bbox = torch.as_tensor(bbox, dtype=torch.int64)


tensor([[[0.4039, 0.3804, 0.3922,  ..., 0.2706, 0.2706, 0.2667],
         [0.4039, 0.3412, 0.3098,  ..., 0.2784, 0.2784, 0.2745],
         [0.3922, 0.3294, 0.3176,  ..., 0.2824, 0.2824, 0.2784],
         ...,
         [0.1020, 0.1020, 0.1020,  ..., 0.0784, 0.0784, 0.0784],
         [0.1020, 0.1020, 0.1020,  ..., 0.0784, 0.0784, 0.0784],
         [0.1020, 0.1020, 0.1020,  ..., 0.0784, 0.0784, 0.0784]],

        [[0.3294, 0.3098, 0.3216,  ..., 0.2275, 0.2275, 0.2235],
         [0.3294, 0.2706, 0.2392,  ..., 0.2353, 0.2353, 0.2314],
         [0.3216, 0.2588, 0.2392,  ..., 0.2392, 0.2392, 0.2353],
         ...,
         [0.1059, 0.1059, 0.1059,  ..., 0.0824, 0.0824, 0.0824],
         [0.1059, 0.1059, 0.1059,  ..., 0.0824, 0.0824, 0.0824],
         [0.1059, 0.1059, 0.1059,  ..., 0.0824, 0.0824, 0.0824]],

        [[0.2627, 0.2314, 0.2275,  ..., 0.1176, 0.1176, 0.1059],
         [0.2627, 0.1922, 0.1451,  ..., 0.1255, 0.1176, 0.1137],
         [0.2431, 0.1725, 0.1412,  ..., 0.1294, 0.1216, 0.

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[{'boxes': tensor([[ 61.2084, 146.3186, 323.1326, 290.9955],
        [ 22.8236, 134.6014, 124.0131, 279.2125],
        [  0.0000,  69.2209,  51.6782, 280.1263],
        [ 77.9764,  65.7266,  81.8052,  70.2569],
        [293.8410, 118.9181, 297.5344, 124.0548],
        [  0.0000, 150.3896, 219.2576, 295.2558],
        [ 69.1974,  99.2678, 278.2643, 232.8902],
        [ 69.9732,  92.1933, 301.8970, 174.3298],
        [ 97.1194, 132.8947, 354.7691, 209.5895],
        [ 73.1813,  61.3903,  76.8940,  65.5782],
        [  1.8352,  66.2828, 147.8917, 196.8068],
        [ 67.7157,  72.5782, 322.2645, 146.5858],
        [126.6452, 217.8990, 130.9050, 222.9216],
        [ 78.6450,  64.4050,  82.1083,  69.0168],
        [290.7042, 195.6609, 294.6983, 201.7324],
        [160.4558, 146.6925, 165.0472, 153.2784],
        [213.8866, 107.9661, 218.0859, 113.6432],
        [292.0615, 121.4115, 295.7265, 126.4519],
        [295.3268, 117.8782, 299.1170, 123.3251],
        [ 74.2185,  60.8539,  77.9568, 

### To Do
Fix get item så den kører med Jonas' data\
Få vores data til at kører med den pretrænede model\
Test forward pass\
Benyt Holgers split til at træne med og opnå fuld model



In [8]:
# use our dataset and defined transformations
dataset = CansDataset(root, train=True)
dataset_test = CansDataset(root, train=False)




# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-50])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)


In [9]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("device:",device)

# our dataset has three classes only - background, beer and coke
num_classes = 3

# get the model using our helper function
#model = get_instance_segmentation_model(num_classes)
# move model to the right device
model.to(device)

# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005,
                            momentum=0.9, weight_decay=0.0005)

# and a learning rate scheduler which decreases the learning rate by
# 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                               step_size=3,
                                               gamma=0.1)

device: cuda


In [10]:
# let's train it for 10 epochs
from torch.optim.lr_scheduler import StepLR
num_epochs = 1

for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    #evaluate(model, data_loader_test, device=device)  

  


  bbox = torch.as_tensor(bbox, dtype=torch.int64)
  bbox = torch.as_tensor(bbox, dtype=torch.int64)
  bbox = torch.as_tensor(bbox, dtype=torch.int64)
  bbox = torch.as_tensor(bbox, dtype=torch.int64)


Epoch: [0]  [  0/590]  eta: 0:16:15  lr: 0.000013  loss: 1.7305 (1.7305)  loss_classifier: 1.3977 (1.3977)  loss_box_reg: 0.3283 (0.3283)  loss_objectness: 0.0035 (0.0035)  loss_rpn_box_reg: 0.0010 (0.0010)  time: 1.6533  data: 0.2801  max mem: 2115
Epoch: [0]  [ 10/590]  eta: 0:13:10  lr: 0.000098  loss: 1.6003 (1.5146)  loss_classifier: 1.3451 (1.2340)  loss_box_reg: 0.2644 (0.2716)  loss_objectness: 0.0049 (0.0070)  loss_rpn_box_reg: 0.0013 (0.0020)  time: 1.3628  data: 0.0304  max mem: 2379


Traceback (most recent call last):
  File "/usr/lib/python3.9/multiprocessing/queues.py", line 251, in _feed
    send_bytes(obj)
  File "/usr/lib/python3.9/multiprocessing/connection.py", line 205, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/usr/lib/python3.9/multiprocessing/connection.py", line 416, in _send_bytes
    self._send(header + buf)
  File "/usr/lib/python3.9/multiprocessing/connection.py", line 373, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


KeyboardInterrupt: 

In [None]:
evaluate(model, data_loader_test, device=device)

In [None]:
# pick one image from the test set
img, _ = dataset_test[0]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([img.to(device)])

  bbox = torch.as_tensor(bbox, dtype=torch.int64)


AttributeError: 'list' object has no attribute 'tolist'