# Training and testing of objection detection network
Based on Faster-RCNN and this guide: https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html


In [1]:
# %%shell

# pip install cython
# # Install pycocotools, the version by default in Colab
# # has a bug fixed in https://github.com/cocodataset/cocoapi/pull/354
# pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

## Importing libraries and custom scripts


In [2]:
import sys
import os
import numpy as np
import torch
import torch.utils.data

sys.path.append(os.getcwd() + "/.." + "/scripts")


## Dataset class with new __getitem__ function

In [3]:
import os
import numpy as np
import torch
import torch.utils.data
from PIL import Image
import transforms as T






class CansDataset(torch.utils.data.Dataset):
    def __init__(self, root):
        self.root = root
        self.imgs = list(sorted(os.listdir(os.path.join(root,"video1/train/frames"))))
        self.bbox = list(sorted(os.listdir(os.path.join(root,"video1/train/boundingboxes"))))
    
    def __getitem__(self, idx):
        # load images and bboxes
        img_path = os.path.join(self.root, "video1/train/frames", self.imgs[idx])
        bbox_path = os.path.join(self.root, "video1/train/boundingboxes", self.bbox[idx])
        img = Image.open(img_path).convert("RGB")
        img=np.array(img)
        img=torch.tensor(img)/255
        img=img.permute(2,0,1)

        bbox = []
        label = []
        with open(bbox_path, 'r') as f:
            for line in f:
                line = line.split(" ")
                id = line[0] # class label, 1=beer, 2=cola, 0=background
                id = 1 if id == 'beer' else 2
                xmin = float(line[1])
                ymin = float(line[2])
                xmax = float(line[3])
                ymax = float(line[4])
                bbox.append([xmin, ymin, xmax, ymax])
                label.append(id)
        bbox = torch.as_tensor(bbox, dtype=torch.int64)
        labels = torch.as_tensor(label, dtype=torch.int64) #torch.ones((num_objs, ), dtype=torch.float32)
        image_id = torch.tensor([idx],dtype=torch.int64)
        area = (bbox[:, 3] - bbox[:, 1]) * (bbox[:, 2] - bbox[:, 0])

        target = {}
        target["boxes"] = bbox
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area


        #img = pil2tensor(images[0])
        return img, target

    def __len__(self):
        return len(self.imgs)

Test class

In [4]:

#root = os.getcwd()+'data/'#os.getcwd() + '/..' + '/data/'
sys.path.append(os.getcwd() + "/.." + "/scripts")
print(os.getcwd())
os.chdir(os.getcwd()+"/..")

#print(root)
root = os.getcwd() + "/data"
dataset = CansDataset(root)


# img,target = dataset.__getitem__(907)
# print("img",img)
# print("target",target)

/home/andreasgp/MEGAsync/DTU/9. Semester/Deep Learning/object-tracking-project/02456-project/notebooks


## Adding pretrained model and modify numbers of classes

In [5]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = 3  # 1 class (person) + background
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


In [6]:

#from engine import train_one_epoch, evaluate

import utils
import transforms as T
from typing import List, Tuple





dataset = CansDataset(root)
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn
)
# For Training
images,targets = next(iter(data_loader))
images = list(image for image in images)


print(images[0])

targets = [{k: v for k, v in t.items()} for t in targets]

output = model(images,targets)   # Returns losses and detections
# For inference
model.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = model(x)           # Returns predictions
print(predictions)


  bbox = torch.as_tensor(bbox, dtype=torch.int64)
  bbox = torch.as_tensor(bbox, dtype=torch.int64)
  bbox = torch.as_tensor(bbox, dtype=torch.int64)
  bbox = torch.as_tensor(bbox, dtype=torch.int64)


tensor([[[0.3882, 0.3843, 0.3686,  ..., 0.2627, 0.2745, 0.2784],
         [0.4000, 0.3333, 0.3059,  ..., 0.2627, 0.2706, 0.2745],
         [0.3843, 0.3255, 0.3294,  ..., 0.2667, 0.2706, 0.2706],
         ...,
         [0.1059, 0.1059, 0.1059,  ..., 0.0745, 0.0784, 0.0824],
         [0.1059, 0.1059, 0.1098,  ..., 0.0745, 0.0784, 0.0824],
         [0.1059, 0.1098, 0.1098,  ..., 0.0745, 0.0784, 0.0824]],

        [[0.3490, 0.3451, 0.3333,  ..., 0.2157, 0.2275, 0.2314],
         [0.3608, 0.2941, 0.2627,  ..., 0.2157, 0.2235, 0.2275],
         [0.3255, 0.2706, 0.2745,  ..., 0.2196, 0.2235, 0.2235],
         ...,
         [0.1098, 0.1098, 0.1098,  ..., 0.0784, 0.0824, 0.0863],
         [0.1098, 0.1098, 0.1137,  ..., 0.0784, 0.0824, 0.0863],
         [0.1098, 0.1137, 0.1137,  ..., 0.0784, 0.0824, 0.0863]],

        [[0.2510, 0.2392, 0.2196,  ..., 0.1216, 0.1333, 0.1373],
         [0.2627, 0.1882, 0.1529,  ..., 0.1216, 0.1294, 0.1333],
         [0.2353, 0.1686, 0.1608,  ..., 0.1255, 0.1294, 0.

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


typing.List[typing.Tuple[int, int]]
tensor([[[0.1282, 0.5887, 0.2213,  ..., 0.3449, 0.0592, 0.3018],
         [0.0652, 0.8235, 0.9055,  ..., 0.6729, 0.5918, 0.0409],
         [0.0983, 0.7814, 0.8326,  ..., 0.8028, 0.3649, 0.2984],
         ...,
         [0.5545, 0.9740, 0.4885,  ..., 0.6818, 0.3860, 0.0414],
         [0.9502, 0.3671, 0.5939,  ..., 0.1750, 0.5009, 0.0874],
         [0.6049, 0.6741, 0.2731,  ..., 0.1026, 0.3659, 0.8614]],

        [[0.5350, 0.7905, 0.2743,  ..., 0.1225, 0.4954, 0.6784],
         [0.9291, 0.6579, 0.7402,  ..., 0.2481, 0.0947, 0.2566],
         [0.0177, 0.5821, 0.0965,  ..., 0.8593, 0.9256, 0.1965],
         ...,
         [0.2383, 0.5356, 0.3392,  ..., 0.5606, 0.4862, 0.5269],
         [0.8673, 0.6599, 0.7377,  ..., 0.3050, 0.7345, 0.9793],
         [0.3035, 0.4061, 0.4279,  ..., 0.5140, 0.4490, 0.7564]],

        [[0.2644, 0.2633, 0.9981,  ..., 0.5227, 0.5020, 0.2502],
         [0.8142, 0.6570, 0.3960,  ..., 0.6870, 0.5365, 0.5096],
         [0.2468, 0.96

### To Do
Fix get item så den kører med Jonas' data\
Få vores data til at kører med den pretrænede model\
Test forward pass\
Benyt Holgers split til at træne med og opnå fuld model

