# YoloV3 Model Implementation

The following implements a YoloV3 model using a resnet as its base.

In [1]:
import json
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision as torchv
from PIL import Image

from yaml import load, dump
try:
    from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
    from yaml import Loader, Dumper

In [2]:
data_path = "../data"
sys.path.append(data_path)

In [3]:
from augment_data import augment_data

## Parameters

In [4]:
class Parameters:
    
    def __init__(self, params = {}):
        self.num_epochs            = params.get("num_epochs", 100)                   # Number of epochs to train for
        self.epoch_start           = params.get("epoch_start", 0)                    # Start counting epochs from this number
        self.batch_size            = params.get("batch_size", 1)                     # Number of images in each batch
        self.checkpoint_step       = params.get("checkpoint_step", 2)                # How often to save checkpoints (epochs)
        self.validation_step       = params.get("validation_step", 2)                # How often to perform validation (epochs)
        self.num_validation        = params.get("num_validation", 1000)              # How many validation images to use
        self.num_workers           = params.get("num_workers", 4)                    # Number of workers
        self.learning_rate         = params.get("learning_rate", 0.045)              # learning rate used for training
        self.cuda                  = params.get("cuda", "0")                         # GPU ids used for training  
        self.use_gpu               = params.get("use_gpu", True)                     # whether to user gpu for training
        self.pretrained_model_path = params.get("pretrained_model_path", None)       # path to pretrained model
        self.save_model_path       = params.get("save_model_path", "./.checkpoints") # path to save model
        self.log_file              = params.get("log_file", "./train.log")           # path to log file

        self.use_gpu = self.use_gpu and torch.cuda.is_available()
        
if os.path.isfile("params.yml"):
    with open("params.yml") as file:
        params = Parameters(load(file, Loader=Loader))
else:
    params = Parameters()

## Load Data

See the [data augmentation notebook](https://github.com/antoniojkim/WheresWaldo-YoloV3/blob/master/data/data.ipynb) for more details on how the data was curated and augmented.

In [5]:
np.random.seed(20200422)
data = augment_data(augment_times=1000)

In [6]:
class Dataset:
    
    def __init__(self, data, mode="train"):
        assert mode in ("train", "test")
        self.data = data
        self.mode = mode
        
        self.to_tensor = torchv.transforms.ToTensor()
        
    def __len__(self):
        return len(self.data)
    
    
    def __getitem__(self, index):
        if self.mode == "test":
            train = self.to_tensor(
                self.data[index]["image"][y:y+h, x:x+w]
            )
        
        else:
            dims = self.data[index]["crop"]
            x, y, w, h = dims['x'], dims['y'], dims['w'], dims['h']
            train = self.to_tensor(
                self.data[index]["image"][y:y+h, x:x+w]
            )
        
        labels = []
        if "box" in self.data[index]:
            box = data[index]['box']
            x, y, w, h = box['x'], box['y'], box['w'], box['h']
            labels.append((x + w // 2, y + h // 2, w, h))
        
        return train, labels    

In [7]:
train_dataloader = torch.utils.data.DataLoader(
    Dataset(data, mode="train"),
    batch_size  = params.batch_size,
    num_workers = params.num_workers,
    shuffle     = True,
    drop_last   = True
)

In [8]:
test_dataloader = torch.utils.data.DataLoader(
    Dataset(data, mode="test"),
    batch_size  = params.batch_size,
    num_workers = params.num_workers,
    shuffle     = True,
    drop_last   = True
)

## Define Model

In [22]:
class Model(torch.nn.Module):
    
    def __init__(self):
        super().__init__()
        self.resnet = torchv.models.resnet18(pretrained=True)
        for param in self.resnet.parameters():
            param.require_grad = False
        
    def forward(self, x):
        output = self.resnet(x)
        return output
    
    def __call__(self, x):
        return self.forward(x)
    
model = Model()

In [23]:
if params.use_gpu:
    model = torch.nn.DataParallel(model).cuda()

In [29]:
num_parameters = np.sum(np.fromiter((p.numel() for p in model.parameters()), dtype=np.int32))
num_parameters

11689512

In [25]:
for i, (images, labels) in enumerate(train_dataloader):
    print(images.shape)
    output = model.forward(images)
    break

torch.Size([1, 3, 1447, 2287])


In [26]:
output.shape

torch.Size([1, 1000])

In [31]:
%%timeit
with torch.no_grad():
    output = model.forward(images)

1.43 s ± 57.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
