# Pytorch Vision - Object Detection Finetuning
Notebook for following along with the [Pytorch Vision tutorials](https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html), with this one looking at finetuning a pretrained MASK R-CNN model. The training data can be downloaded [here](https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip).<br><br>

### Choices for data

<br>

### Libaries and Modules
Importing the necessary libaries and modules for the notebook.

In [12]:
#Import cell
import glob
import matplotlib as mpl
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import math
import numpy as np
import os
import pandas as pd
import pickle as pk
import random
import re
import string
import torchvision.transforms as T
import time
import torch
import torchvision

from detection import utils
from PIL import Image
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(1247) #setting seed value
print(f"Device: {device}. Cuda available: {torch.cuda.is_available()}")
print(f"Torch current seed = {torch.seed()}")
print("Imports complete")

ModuleNotFoundError: No module named 'detection'

<br>

### Data Loading and Manipulation Functions
<b>Functions:</b><br>
<ul>
    <li></li>
</ul>

In [2]:
#Data loading and manipulation function definition cell

print("Data loading and manipulation functions defined.")

Data loading and manipulation functions defined.


### Importing and preparing data sets
Importing and preparing the data for the models.

In [3]:
#Importing data sets

print("Datasets imported")

Datasets imported


<br>

### Class Definitions
<b>Classes:</b>
<ul>
    <li>PennFudanDataset - datset containing pairs of images and segmentation masks. Paths in __intit__() may need to be modified to meet local requirements.</li>
</ul><br>
<b>Model Definition Functions:</b>
<ul>
    <li>get_model_instance_segmentation - returns a torchvision maskrcnn_resnet50 model instance for an input number of classes.</li>
</ul>

In [4]:
#Class definition cell
class PennFudanDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms) -> None:
        self.PNGPath = os.path.join("data", root, "PNGImages")
        self.PedPath = os.path.join("data", root, "PedMasks")
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(self.PNGPath)))
        self.masks = list(sorted(os.listdir(self.PedPath)))
        return None
    
    
    def __get_item__(self, idx):
        img_path = os.path.join(self.PNGPath, self.imgs[idx])
        mask_path = os.path.join(self.PedPath, self.masks[idx])
        img = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path)
        mask = np.array(mask)
        obj_ids = np.unique(mask)
        obj_ids = obj_ids[1:] #first 1 is background, so remove it
        
        masks = mask==obks_ids[:, None, None] #split colour mask into binary set
        num_objs = len(obj_ids)
        boxes = []
        for i in range(num_objs):
            pos = np.where(masks[i])
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])
            
        boxes = torch.as_tensor(boxes, dtype=torch.float32) #Tensor conversion
        labels = torch.ones((num_objs,), dtype=torch.int64) #only one class
        masks = torch.as_tensor(masks, dtype=torch.unit8)
        
        image_id = torch.tensor([idx])
        area = (boxes[:, 3]-boxes[:, 1])*(boxes[:, 2]-boxes[:, 0])
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["masks"] = masks
        target["image-id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        
        if self.transforms is not None:
            img, target = self.transforms(img, target)
            
        return img, target


    
def get_model_instance_segmentation(num_classes):
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(weights="DEFAULT")
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                      hidden_layer,
                                                      num_classes)
    return model
            
print("Classes defined.")

Classes defined.


<br>

### Calculation functions
<b>Functions:</b>
<ul>
    <li>get_transform - used to return the transforms for the data.</li>
</ul>

In [5]:
#Calculation functions cell
def get_transform(train):
    transforms = []
    transforms.append(T.PILToTensor())
    transforms.append(T.ConvertImageDtype(torch.float))
    if train: transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)


print("Calculation functions defined.")

Calculation functions defined.


<br>

### Plotting functions
<b>Functions:</b>
<ul>
    <li></li>
</ul>

In [6]:
#Plotting functions Cell
%matplotlib inline

print("Plotting functions defined.")

Plotting functions defined.


<br>

### Training Functions
<b>Functions:</b>
<ul>
    <li></li>
</ul>

In [7]:
#Training Functions

print("Training functions defined.")

Training functions defined.


### Main code

#### Importing pretrained model

In [8]:
#Model import
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
num_classes = 2
in_features = model.roi_heads.box_predictor.cls_score.in_features 
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

#### Modifying the model to add a different backbone

In [9]:
#Backbone modification
backbone = torchvision.models.mobilenet_v2(weights="DEFAULT").features
backbone.out_channels = 1280
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                    aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                output_size=7,
                                                sampling_ratio=2)
model = FasterRCNN(backbone,
                  num_classes=2,
                  rpn_anchor_generator=anchor_generator,
                  box_roi_pool=roi_pooler)

In [10]:
model2 = get_model_instance_segmentation(2)

#### Testing `forward()` method (optional)

In [11]:
modelTest = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
dataset = PennFudanDataset('PennFudanPed', get_transform(train=True))
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

images, targets = next(iter(data_loader))
images = list(image for image in images)
targets = [{k: v for k, v in t.items()} for t in targets]

output = modelTest(images, targets)
modelTest.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictions = modelTest(x)

AttributeError: module 'utils' has no attribute 'collate_fn'

<br>

In [None]:
root = 'PennFudanPed'
PNGPath = os.path.join("data", root, "PNGImages")
PedPath = os.path.join("data", root, "PedMasks")
imgs = list(sorted(os.listdir(PNGPath)))

In [None]:
print(PNGPath)

In [None]:
os.listdir(os.path.join("data", "PennFudanPed"))