# research

> This notebook contains code for exploring existing research

In [1]:
#| default_exp research

In [2]:
#| hide
from nbdev.showdoc import *

# Survey of Existing Research
This notebook contains a survey of existing research of methods to do object detection and classification.

In [3]:
import torch; torch.__version__

'2.0.1'

In [4]:
import os;print(os.environ['PYTORCH_ENABLE_MPS_FALLBACK'])

1


## Building a subset of the data

In [5]:
!pip install -e ../

Obtaining file:///Users/dhritimansagar/Dev/dog_breed_id
  Preparing metadata (setup.py) ... [?25ldone
[?25hInstalling collected packages: dog_breed_id
  Attempting uninstall: dog_breed_id
    Found existing installation: dog_breed_id 0.0.1
    Uninstalling dog_breed_id-0.0.1:
      Successfully uninstalled dog_breed_id-0.0.1
  Running setup.py develop for dog_breed_id
Successfully installed dog_breed_id-0.0.1


In [6]:
from dog_breed_id.data_preprocessing import read_csv_with_array_columns

In [7]:
#| export
import cv2
from pathlib import Path
from PIL import Image
import pandas as pd
import numpy as np
import glob
import os
from matplotlib import pyplot as plt
from miniai.datasets import show_images
import shutil

In [8]:
df = read_csv_with_array_columns('../data/dogs.csv', ['bboxes'])
subset = df.sample(frac=0.1)

def copy_files(path):
    target_path = str(path).replace('data', 'datasubset')
    folder = Path(target_path).parent
    folder.mkdir(exist_ok=True, parents=True)
    shutil.copy(path, target_path)
    return

In [9]:
if not Path('../datasubset').exists():
    subset['image'].apply(copy_files)
    subset.to_csv('../datasubset/dogs.csv')
else:
    subset = read_csv_with_array_columns('../datasubset/dogs.csv', ['bboxes'])

## Mask RCNN
Mask RCNN is from the RCNN family of object detectors. Add more here

In [10]:
#| export
import torch
import torchvision
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import functional as F

In [11]:
#| export
def get_classes_from_frame(df, column=None):
    """gets the ['background'] + the rest of classes from a dataframe `df` with classes in column specified by `column`"""
    if column is None: column = 'category'
    classes = ['background'] + subset['category'].unique().tolist()
    return classes

In [12]:
#| export
import os
import torch

from torchvision.io import read_image
from torchvision.ops.boxes import masks_to_boxes
from torchvision.transforms import functional as F
from sklearn.preprocessing import LabelEncoder

class DogsSubsetDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        super().__init__()
        self.df = df
        self.classes = get_classes_from_frame(df)
        self.le = LabelEncoder()
        self.le.fit(self.classes)

    def _label2id(self, labels):
        return self.le.transform(labels)

    def _id2labels(self, ids):
        return self.le.inverse_transform(labels)
        
    def __getitem__(self, idx):
        item = self.df.iloc[idx]
        img = F.convert_image_dtype(read_image(item['image']), torch.float)
        boxes = torch.as_tensor(item['bboxes'], dtype=torch.float32)
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        num_objs = boxes.shape[0]
        labels  = torch.ones((num_objs,), dtype=torch.int64) * self._label2id([item['category']])[0]
        image_id = idx
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['image_id'] = image_id
        target['area'] = area
        target['iscrowd'] = iscrowd
        return img, target

    def __len__(self):
        return self.df.shape[0]

def collate_fn(data):
    images = [item[0] for item in data]
    images = torch.stack(images, dim=0)
    targets = [item[1] for item in data]
    return images, targets

In [13]:
#| export
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
def get_fasterrcnn_model(num_classes):
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
    return model

In [23]:
#| export
from tqdm.auto import tqdm
from dog_breed_id import engine
from dog_breed_id.engine import train_one_epoch, evaluate

def train_faster_rcnn(df):
        # train on the GPU or on the CPU, if a GPU is not available
    device = torch.device('cuda') if torch.cuda.is_available() else ('mps' if torch.backends.mps.is_available() else 'cpu')
    #device = 'cpu'
    print('Using device ', device)
    # our dataset has two classes only - background and person
    num_classes = len(get_classes_from_frame(df))
    # use our dataset and defined transformations
    dataset = DogsSubsetDataset(df)
    dataset_test = DogsSubsetDataset(df)

    # split the dataset in train and test set
    indices = torch.randperm(len(dataset)).tolist()
    dataset = torch.utils.data.Subset(dataset, indices[:-50])
    dataset_test = torch.utils.data.Subset(dataset_test, indices[-50:])

    # define training and validation data loaders
    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=64, shuffle=True,
        collate_fn=collate_fn)

    data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=16, shuffle=False,
        collate_fn=collate_fn)

    # get the model using our helper function
    model = get_fasterrcnn_model(num_classes)

    # move model to the right device
    model.to(device)

    # construct an optimizer
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.Adam(params, lr=0.005)
    # and a learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   step_size=3,
                                                   gamma=0.1)

    # let's train it for 10 epochs
    num_epochs = 2

    for epoch in tqdm(range(num_epochs)):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
        # update the learning rate
        lr_scheduler.step()
        # evaluate on the test dataset
        evaluate(model, data_loader_test, device=device)
    torch.save(model.state_dict(), 'model-fasterrcnn.pt')
    return model

In [17]:
train_faster_rcnn(subset)

Using device  mps


  0%|                                                                                                                                                                                              | 0/10 [00:00<?, ?it/s]

Epoch: [0]  [  0/566]  eta: 1:48:49  lr: 0.000014  loss: 5.1205 (5.1205)  loss_classifier: 4.9846 (4.9846)  loss_box_reg: 0.1288 (0.1288)  loss_objectness: 0.0002 (0.0002)  loss_rpn_box_reg: 0.0070 (0.0070)  time: 11.5367  data: 0.0242
Epoch: [0]  [ 10/566]  eta: 1:13:12  lr: 0.000102  loss: 5.0822 (4.9635)  loss_classifier: 4.9337 (4.8175)  loss_box_reg: 0.1431 (0.1390)  loss_objectness: 0.0002 (0.0018)  loss_rpn_box_reg: 0.0050 (0.0052)  time: 7.9006  data: 0.0269
Epoch: [0]  [ 20/566]  eta: 1:11:03  lr: 0.000191  loss: 4.2258 (3.9367)  loss_classifier: 4.0823 (3.7909)  loss_box_reg: 0.1431 (0.1387)  loss_objectness: 0.0002 (0.0023)  loss_rpn_box_reg: 0.0043 (0.0049)  time: 7.6228  data: 0.0271
Epoch: [0]  [ 30/566]  eta: 1:09:55  lr: 0.000279  loss: 0.6968 (2.8527)  loss_classifier: 0.5480 (2.7024)  loss_box_reg: 0.1451 (0.1428)  loss_objectness: 0.0005 (0.0025)  loss_rpn_box_reg: 0.0045 (0.0050)  time: 7.7871  data: 0.0282
Epoch: [0]  [ 40/566]  eta: 1:08:24  lr: 0.000367  loss: 0.



creating index...
index created!
Test:  [0/4]  eta: 0:01:55  model_time: 28.7881 (28.7881)  evaluator_time: 0.0149 (0.0149)  time: 28.8311  data: 0.0163


 10%|█████████████████▌                                                                                                                                                             | 1/10 [1:24:12<12:37:48, 5052.08s/it]

Test:  [3/4]  eta: 0:00:18  model_time: 21.1218 (18.9122)  evaluator_time: 0.0099 (0.0093)  time: 18.9449  data: 0.0175
Test: Total time: 0:01:15 (18.9450 s / it)
Averaged stats: model_time: 21.1218 (18.9122)  evaluator_time: 0.0099 (0.0093)
Accumulating evaluation results...
DONE (t=0.04s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.112
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.147
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.135
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.112
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.182
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.183
 Average Recall     (AR) @[ IoU=0.50:0.95 | area

 10%|█████████████████▌                                                                                                                                                             | 1/10 [1:35:01<14:15:09, 5701.11s/it]


KeyboardInterrupt: 

In [18]:
ds = DogsSubsetDataset(subset)

In [21]:
model.eval()
model.to
model(ds[0][0].unsqueeze(0))

tensor([[[0.1373, 0.1255, 0.1255,  ..., 0.1922, 0.1804, 0.4549],
         [0.1412, 0.1333, 0.1255,  ..., 0.1765, 0.1608, 0.3725],
         [0.1333, 0.1294, 0.1137,  ..., 0.1804, 0.1725, 0.3176],
         ...,
         [0.8118, 0.7882, 0.7882,  ..., 0.2549, 0.2275, 0.2275],
         [0.8157, 0.7333, 0.6471,  ..., 0.2314, 0.2275, 0.2275],
         [0.8196, 0.8039, 0.8000,  ..., 0.2431, 0.2157, 0.2157]],

        [[0.1216, 0.1216, 0.1294,  ..., 0.2118, 0.2078, 0.4863],
         [0.1255, 0.1294, 0.1294,  ..., 0.1961, 0.1882, 0.4039],
         [0.1176, 0.1255, 0.1176,  ..., 0.2000, 0.2000, 0.3451],
         ...,
         [0.6902, 0.6667, 0.6667,  ..., 0.2431, 0.2235, 0.2235],
         [0.6941, 0.6118, 0.5255,  ..., 0.2157, 0.2157, 0.2157],
         [0.6980, 0.6824, 0.6863,  ..., 0.2196, 0.1922, 0.2000]],

        [[0.1098, 0.1059, 0.1098,  ..., 0.2863, 0.2824, 0.5608],
         [0.1137, 0.1137, 0.1098,  ..., 0.2706, 0.2627, 0.4784],
         [0.1059, 0.1098, 0.0980,  ..., 0.2745, 0.2745, 0.

In [36]:
#| hide
import nbdev; nbdev.nbdev_export()