## Imports

In [1]:
from __future__ import print_function, division
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.optim.lr_scheduler import StepLR
import pandas as pd
from skimage import io, transform
import scipy.io
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from PIL import Image, ImageDraw
from matplotlib.patches import Rectangle
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import time

import warnings
warnings.filterwarnings("ignore")

plt.ion() 

## Pre-process data

In [2]:
train_path = './data/cars_train/'
test_path = './data/cars_test/'
devkit_path = './data/cars_devkit'

train_annos_path = devkit_path + '/cars_train_annos.csv'
test_annos_path = devkit_path + '/cars_test_annos.csv'
cars_meta_path = devkit_path + '/cars_meta.csv'

train_annos = pd.read_csv(train_annos_path, header=None)
test_annos = pd.read_csv(test_annos_path, header=None)
cars_meta = pd.read_csv(cars_meta_path, header=None)

train_annos.columns = ['file', 'bbox_x1', 'bbox_y1', 'bbox_x2', 'bbox_y2', 'label']
test_annos.columns = ['file', 'bbox_x1', 'bbox_y1', 'bbox_x2', 'bbox_y2', 'label']
cars_meta.columns = ['label_details']
train_annos['label'] = train_annos['label']-1
test_annos['label'] = test_annos['label']-1

def label_details(row):
    index = row['label']
    return cars_meta.loc[index, 'label_details']

train_annos['label_details'] = train_annos.apply(lambda row: label_details(row), axis=1)
test_annos['label_details'] = test_annos.apply(lambda row: label_details(row), axis=1)

def rgb_images(data_path, df):
    for i, row in df.iterrows():
        file = row['file']
        image = Image.open(data_path + file)
        num_channel = len(image.split())
        if (num_channel != 3):
            df.loc[i, 'rgb'] = False
        else:
            df.loc[i, 'rgb'] = True
    df = df[df.rgb == True]
    df = df.drop('rgb', axis=1)
    return df

train_annos = rgb_images(train_path, train_annos)
test_annos = rgb_images(test_path, test_annos)

train_annos.to_csv('train_annos_cleaned.csv', index=False)
test_annos.to_csv('test_annos_cleaned.csv', index=False)

## Create the custom dataset

In [3]:
class CarsDataset(Dataset):

    def __init__(self, annos_path, data_dir, transform=None):
        """
        Args:
            annos_path (string): Path to the csv file with annotations.
            data_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.car_details = pd.read_csv(annos_path)
        self.car_details = np.array(self.car_details)

        self.data_dir = data_dir
        self.transform = transform

    def __len__(self):
        return len(self.car_details)

    def __getitem__(self, idx):
        img_name = os.path.join(self.data_dir, self.car_details[idx][0])
        image = Image.open(img_name)
        num_channel = len(image.split())
        car_class = self.car_details[idx][5]
        car_details = self.car_details[idx][6]
        x1, y1, x2, y2 = self.car_details[idx][1], self.car_details[idx][2], self.car_details[idx][3], self.car_details[idx][4]
        bounding_box = [[x1, y1, x2, y2]]
        
        # Pytorch's Faster RCNN recognizes labels of 0 as background
        if car_class == 0:
            car_class = 196
        
        target = {}
        target['boxes'] = torch.as_tensor(bounding_box, dtype=torch.float32)
        target['labels'] = torch.as_tensor(car_class, dtype=torch.int64)
        
        if self.transform:
            image = self.transform(image)
            
        return image, target, car_details

## Create the Dataloaders

In [4]:
def train_valid_split(train_dataset):
    np.random.random(2020)

    cars = {}
    for i in range(len(train_dataset)):
        label = train_dataset[i][2]
        if label in cars:
            cars[label].append(i)
        else:
            cars[label] = [i]

    subset_indices_train = []
    subset_indices_valid = []
    for car in cars:
        indices = cars[car]
        total = len(indices)
        np.random.shuffle(indices)
        subset_indices_valid.extend(indices[:int(0.15 * total)])
        subset_indices_train.extend(indices[int(0.15 * total):])

    assert (len(subset_indices_train) + len(subset_indices_valid) == len(train_dataset))
    
    np.save("train_indices.npy", subset_indices_train)
    np.save("valid_indices.npy", subset_indices_valid)
    
    return subset_indices_train, subset_indices_valid

transform = transforms.Compose([transforms.Resize((300,300)), transforms.ToTensor()])

cars_data_train = CarsDataset('train_annos_cleaned.csv', train_path, transform=transform)

subset_indices_train = np.load("train_indices.npy")
subset_indices_valid = np.load("valid_indices.npy")

# Collate function used for the Dataloader to keep data in the correct format
def collate_fn(batch):
    image = [item[0] for item in batch]
    target = [item[1] for item in batch]
    detail = [item[2] for item in batch]
    return [image, target, detail]

train_loader = torch.utils.data.DataLoader(cars_data_train, batch_size=64, sampler=SubsetRandomSampler(subset_indices_train), collate_fn = collate_fn)
val_loader = torch.utils.data.DataLoader(cars_data_train, batch_size=64, sampler=SubsetRandomSampler(subset_indices_valid), collate_fn = collate_fn)

dset_loaders = {}
dset_loaders['train'] = train_loader
dset_loaders['test'] = val_loader

## Set up the Faster RCNN Model

In [5]:
# load the pretrained Faster RCNN
model = models.detection.fasterrcnn_resnet50_fpn(pretrained = True)

# replace the classifier with a new one, that has
# num_classes which in the dataset
num_classes = 196
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

lrscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max')

# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# move model to the right device
model.to(device)

FasterRCNN(
  (transform): GeneralizedRCNNTransform()
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace=True)
          (downsample): Sequential(
            (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): FrozenBatchNorm2d()
          )
  

In [6]:
def train_model(model, criterion, optimizer, scheduler, n_epochs):
    model.train()
    all_losses = []
    all_accuracies = []
    for epoch in range(n_epochs):
        losses = []
        correct = 0
        for i, data in enumerate(train_loader, 0):
            inputs, targets, details = data
            
            # convert the intputs and targets to tensors to send to GPU
            inputs = [img.to(device) for img in inputs]
            targets = [{'boxes':d['boxes'].to(device), 'labels':d['labels']} for d in targets]
                
            optimizer.zero_grad()
            outputs = model(inputs, targets)
            _, predicted = torch.max(outputs.data, 1)
            loss = criterion(outputs, targets)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
            correct += (targets==predicted).sum().item() 
        accuracy = 100/64*correct/len(train_loader)
        print("Epoch %s, Training Accuracy: %.4f %%, Training Loss: %.4f" % (epoch+1, accuracy, np.average(losses)))
        all_losses.append(np.average(losses))
        all_accuracies.append(accuracy)
    return all_losses, all_accuracies

def test_model(model, val_loader):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0.0
    with torch.no_grad():
        for i, data in enumerate(val_loader, 0):
            inputs, targets, details = data
            
             # convert the intputs and targets to tensors to send to GPU
            inputs = [img.to(device) for img in inputs]
            targets = [{'boxes':d['boxes'].to(device), 'labels':d['labels']} for d in targets]
            
            outputs = model_ft(inputs, targets)
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    test_acc = 100.0 * correct / total
    print('Validation accuracy: %.4f %%' % (test_acc))
    return test_acc

In [7]:
# Alternative functions that trains and compute validation accuracy at the same time
def train_val_model(model, criterion, optimizer, lr_scheduler, num_epochs=25):
    since = time.time()

    best_model = model
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            for data in dset_loaders[phase]:
                # get the inputs
                inputs, targets, details = data
                
                # convert the inputs and labels from tuples to lists
                inputs = [img.to(device) for img in inputs]
                targets = [{'boxes':d['boxes'].to(device), 'labels':d['labels']} for d in targets]

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs, targets)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, targets)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.item()
                running_corrects += torch.sum(preds == targets).item()

            epoch_loss = running_loss / dset_sizes[phase]
            epoch_acc = running_corrects / dset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model = copy.deepcopy(model)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    return best_model

## Run the model

In [8]:
training_losses, training_accuracies = train_model(model, criterion, optimizer, lrscheduler, n_epochs=10)
test_accuracy = test_model(model_ft, val_loader)

RuntimeError: CUDA out of memory. Tried to allocate 2.44 GiB (GPU 0; 4.00 GiB total capacity; 701.81 MiB already allocated; 2.01 GiB free; 722.00 MiB reserved in total by PyTorch)