<a href="https://colab.research.google.com/github/ericae9/Autonomous-Vehicle-Object-Detection/blob/main/Faster_RCNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook contains the code to fine tune the [PyTorch Faster-RCNN model](http://pytorch.org/vision/stable/models.html#object-detection-instance-segmentation-and-person-keypoint-detection)

In [None]:
import random
from PIL import Image
import cv2
import copy

Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
import os
# Edit the file path below to go to the folder containing the training, val, and
# test images folders.
BASE_PATH = '/content/drive/My Drive/CV_Project/'
if not os.path.exists(BASE_PATH):
    os.makedirs(BASE_PATH)

!pwd
!ls
os.chdir(BASE_PATH)
!pwd
!ls

Before running the cell below, place the file [pt_util.py](https://gist.github.com/pjreddie/e531394d779af2da9201096af0dba78a) from [CSE 543 Deep Learning](https://github.com/pjreddie/uwnet) in the folder defined by `BASE_PATH`.

In [None]:
import torch
import torch.nn as nn
import torchvision
from torchvision import datasets
from torchvision import transforms
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import numpy as np
import os
import torch.nn.functional as F
import torch.optim as optim
import random
import math
import h5py
import sys
sys.path.append(BASE_PATH)
import pt_util

In [None]:
device = torch.device("cuda")

## Create helper functions to save and load the model, which are from [CSE 543 Deep Learning](https://github.com/pjreddie/uwnet).

In [None]:
# The functions in this cell are from CSE 543 Deep Learning.
import glob
import re
import matplotlib.pyplot as plt
try:
    # For 2.7
    import cPickle as pickle
except:
    # For 3.x
    import pickle


def restore(net, save_file):
    """Restores the weights from a saved file

    This does more than the simple Pytorch restore. It checks that the names
    of variables match, and if they don't doesn't throw a fit. It is similar
    to how Caffe acts. This is especially useful if you decide to change your
    network architecture but don't want to retrain from scratch.

    Args:
        net(torch.nn.Module): The net to restore
        save_file(str): The file path
    """

    net_state_dict = net.state_dict()
    restore_state_dict = torch.load(save_file)

    restored_var_names = set()

    print('Restoring:')
    for var_name in restore_state_dict.keys():
        if var_name in net_state_dict:
            var_size = net_state_dict[var_name].size()
            restore_size = restore_state_dict[var_name].size()
            if var_size != restore_size:
                print('Shape mismatch for var', var_name, 'expected', var_size, 'got', restore_size)
            else:
                if isinstance(net_state_dict[var_name], torch.nn.Parameter):
                    # backwards compatibility for serialized parameters
                    net_state_dict[var_name] = restore_state_dict[var_name].data
                try:
                    net_state_dict[var_name].copy_(restore_state_dict[var_name])
                    print(str(var_name) + ' -> \t' + str(var_size) + ' = ' + str(int(np.prod(var_size) * 4 / 10**6)) + 'MB')
                    restored_var_names.add(var_name)
                except Exception as ex:
                    print('While copying the parameter named {}, whose dimensions in the model are'
                          ' {} and whose dimensions in the checkpoint are {}, ...'.format(
                              var_name, var_size, restore_size))
                    raise ex

    ignored_var_names = sorted(list(set(restore_state_dict.keys()) - restored_var_names))
    unset_var_names = sorted(list(set(net_state_dict.keys()) - restored_var_names))
    print('')
    if len(ignored_var_names) == 0:
        print('Restored all variables')
    else:
        print('Did not restore:\n\t' + '\n\t'.join(ignored_var_names))
    if len(unset_var_names) == 0:
        print('No new variables')
    else:
        print('Initialized but did not modify:\n\t' + '\n\t'.join(unset_var_names))

    print('Restored %s' % save_file)


def restore_latest(net, folder):
    """Restores the most recent weights in a folder

    Args:
        net(torch.nn.module): The net to restore
        folder(str): The folder path
    Returns:
        int: Attempts to parse the epoch from the state and returns it if possible. Otherwise returns 0.
    """

    checkpoints = sorted(glob.glob(folder + '/*.pt'), key=os.path.getmtime)
    start_it = 0
    if len(checkpoints) > 0:
        restore(net, checkpoints[-1])
        try:
            start_it = int(re.findall(r'\d+', checkpoints[-1])[-1])
        except:
            pass
    return start_it


def save(net, file_name, num_to_keep=1):
    """Saves the net to file, creating folder paths if necessary.

    Args:
        net(torch.nn.module): The network to save
        file_name(str): the path to save the file.
        num_to_keep(int): Specifies how many previous saved states to keep once this one has been saved.
            Defaults to 1. Specifying < 0 will not remove any previous saves.
    """

    folder = os.path.dirname(file_name)
    if not os.path.exists(folder):
        os.makedirs(folder)
    torch.save(net.state_dict(), file_name)
    extension = os.path.splitext(file_name)[1]
    checkpoints = sorted(glob.glob(folder + '/*' + extension), key=os.path.getmtime)
    print('Saved %s\n' % file_name)
    if num_to_keep > 0:
        for ff in checkpoints[:-num_to_keep]:
            os.remove(ff)

def write_log(filename, data):
    """Pickles and writes data to a file

    Args:
        filename(str): File name
        data(pickleable object): Data to save
    """

    if not os.path.exists(os.path.dirname(filename)):
        os.makedirs(os.path.dirname(filename))
    pickle.dump(data, open(filename, 'wb'))

def read_log(filename, default_value=None):
    """Reads pickled data or returns the default value if none found

    Args:
        filename(str): File name
        default_value(anything): Value to return if no file is found
    Returns:
        unpickled file
    """

    if os.path.exists(filename):
        return pickle.load(open(filename, 'rb'))
    return default_value

def show_images(images, titles=None, columns=5, max_rows=5):
    """Shows images in a tiled format

    Args:
        images(list[np.array]): Images to show
        titles(list[string]): Titles for each of the images
        columns(int): How many columns to use in the tiling
        max_rows(int): If there are more than columns * max_rows images, only the first n of them will be shown.
    """

    images = images[:min(len(images), max_rows * columns)]

    plt.figure(figsize=(20, 10))
    for ii, image in enumerate(images):
        plt.subplot(len(images) / columns + 1, columns, ii + 1)
        plt.axis('off')
        if titles is not None and ii < len(titles):
            plt.title(str(titles[ii]))
        plt.imshow(image)
    plt.show()

def plot(x_values, y_values, title, xlabel, ylabel):
    """Plots a line graph

    Args:
        x_values(list or np.array): x values for the line
        y_values(list or np.array): y values for the line
        title(str): Title for the plot
        xlabel(str): Label for the x axis
        ylabel(str): label for the y axis
    """

    plt.figure(figsize=(20, 10))
    plt.plot(x_values, y_values)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.show()

def to_scaled_uint8(array):
    """Returns a normalized uint8 scaled to 0-255. This is useful for showing images especially of floats.

    Args:
        array(np.array): The array to normalize
    Returns:
        np.array normalized and of type uint8
    """

    array = np.array(array, dtype=np.float32)
    array -= np.min(array)
    array *= (255. / np.max(array))
    array = array.astype(np.uint8)
    return array

In [None]:
# The functions in this cell are from CSE 543 Deep Learning.
def save_model(model, file_path, num_to_keep=1):
    pt_util.save(model, file_path, num_to_keep)

def load_model(model, file_path):
    pt_util.restore(model, file_path)

def load_last_model(model, dir_path):
    return pt_util.restore_latest(model, dir_path)

## Dataset class for the [Berkeley DeepDrive Dataset](https://arxiv.org/abs/1805.04687)

In [None]:
class DeepDriveDataset(torch.utils.data.Dataset):
    def __init__(self, image_data):
        super(DeepDriveDataset, self).__init__()

        self.image_data = image_data

    def __len__(self):
        return len(self.image_data)
        
    def __getitem__(self, idx):
        cur_image = cv2.cvtColor(cv2.imread(self.image_data[idx][0]), cv2.COLOR_BGR2RGB)
        return (transforms.ToTensor()(cur_image), self.image_data[idx][1])


## Clone the TorchVision repo

In [None]:
%%shell

# Only run this cell the very first time you run the notebook
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/coco_utils.py ../

## Load the training, val, and test data

In [None]:
train_val_test_data = dict()
# Edit the file path below to go to the location of the labels file for Faster R-CNN
with open('/content/drive/MyDrive/CV_Project/faster_rcnn_labels.p', 'rb') as images_file:
    train_val_test_data = pickle.load(images_file)

## Train the model

In [None]:
# All the code in this cell, with some modifications by me (namely, the learning
# rate scheduler code and returning the total loss in train_one_epoch), is from
# https://github.com/pytorch/vision/blob/master/references/detection/engine.py
import math
import sys
import time
import torch

from coco_utils import get_coco_api_from_dataset
from coco_eval import CocoEvaluator
import utils


def train_one_epoch(model, optimizer, lr_scheduler, lr_change_thresh, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = utils.MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    total_loss = 0
    prev_loss = None
    loss_increases = 0
    for images, targets in metric_logger.log_every(data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()
        total_loss += loss_value

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if prev_loss is None:
            prev_loss = loss_value
        elif loss_value < prev_loss:
            loss_increases = 0
            prev_loss = loss_value
        else:
            loss_increases += 1
            prev_loss = loss_value
            if loss_increases >= lr_change_thresh:
                lr_scheduler.step()
                loss_increases = 0

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    return total_loss

@torch.no_grad()
def evaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = utils.MetricLogger(delimiter="  ")
    header = 'Test:'

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = ["bbox"]
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator

In [None]:
# Hyperparameters
BATCH_SIZE = 9
VAL_BATCH_SIZE = 5
TEST_BATCH_SIZE = 10
EPOCHS = 20
LEARNING_RATE = 0.00005
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0005
USE_CUDA = True
SEED = 0
PRINT_INTERVAL = 100
LR_CHANGE_THRESH = 4
GAMMA = 0.95

# The lines below, up to creating kwargs, as well as the except and finally
# blocks at the end of this cell are from CSE 543 Deep Learning
# (https://github.com/pjreddie/uwnet/blob/master/hw1.ipynb).

EXPERIMENT_VERSION = '0.001' # increment this to start a new experiment
LOG_PATH = BASE_PATH + 'faster_rcnn_logs/' + EXPERIMENT_VERSION + '/'

use_cuda = USE_CUDA and torch.cuda.is_available()

print('Using device', device)
import multiprocessing
print('num cpus:', multiprocessing.cpu_count())

kwargs = {'num_workers': multiprocessing.cpu_count(),
          'pin_memory': True} if use_cuda else {}

data_train = DeepDriveDataset(train_val_test_data['train_labels'])
train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE,
                                          shuffle=True, collate_fn=utils.collate_fn)

# The code below for loading the model, creating the learning rate scheduler, and calling
# train_one_epoch and evaluate are from the PyTorch TorchVision Object Detection Fine
# Tuning Tutorial (https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html).
# To fine tune Faster R-CNN with a MobileNetV3 backbone instead of ResNet-50, comment
# out the first line below and uncomment the second line below.
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
num_classes = 14
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model = model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.Adam(params, lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=GAMMA)
start_epoch = load_last_model(model, LOG_PATH)

train_losses, val_losses, val_mAPs = pt_util.read_log(LOG_PATH + 'log.pkl', ([], [], []))
max_val_mAP = None
if len(val_mAPs) > 0:
    max_val_mAP = val_mAPs[len(val_mAPs) - 1][1]

try:
    for epoch in range(start_epoch, EPOCHS + 1):
        # Train the model for one epoch
        train_loss = train_one_epoch(model, optimizer, lr_scheduler, LR_CHANGE_THRESH, train_loader, device, epoch, print_freq=10)
        data_val_for_loss = DeepDriveDataset(copy.deepcopy(train_val_test_data['val_labels']))
        val_loader_for_loss = torch.utils.data.DataLoader(data_val_for_loss, batch_size=VAL_BATCH_SIZE,
                                                          shuffle=False, collate_fn=utils.collate_fn)
        # Compute loss on the validation set
        val_loss = 0
        with torch.no_grad():
            for batch_idx, (images, targets) in enumerate(val_loader_for_loss):
                cur_images = list(image.to(device) for image in images)
                cur_targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                loss_dict = model(cur_images, cur_targets)
                cur_loss = sum(loss for loss in loss_dict.values())
                val_loss += cur_loss.item()
        
        # Evaluate the model on the validation set. It is important to make a copy
        # of the data passed to the evaluate function as this function appears to
        # modify the data in a way that can make the bounding boxes invalid for
        # passing to the model.
        data_val_for_evaluate = DeepDriveDataset(copy.deepcopy(train_val_test_data['val_labels']))
        val_loader_for_evaluate = torch.utils.data.DataLoader(data_val_for_evaluate, batch_size=VAL_BATCH_SIZE,
                                                              shuffle=False, collate_fn=utils.collate_fn)
        val_evaluator = evaluate(model, val_loader_for_evaluate, device=device)
        
        print('Total loss on validation set:', val_loss)
        
        val_mAP = val_evaluator.coco_eval['bbox'].stats[0]
        train_losses.append((epoch, train_loss))
        val_losses.append((epoch, val_loss))
        val_mAPs.append((epoch, val_mAP))
        pt_util.write_log(LOG_PATH + 'log.pkl', (train_losses, val_losses, val_mAPs))
        if max_val_mAP is None or val_mAP > max_val_mAP:
            save_model(model, LOG_PATH + '%03d.pt' % epoch, 1)
            max_val_mAP = val_mAP
        print('Current learning rate:', optimizer.param_groups[0]['lr'])


except KeyboardInterrupt as ke:
    print('Interrupted')
except:
    import traceback
    traceback.print_exc()
finally:
    save_model(model, LOG_PATH + '%03d.pt' % epoch, 0)
    ep, val = zip(*train_losses)
    pt_util.plot(ep, val, 'Train loss', 'Epoch', 'Loss')
    ep, val = zip(*val_losses)
    pt_util.plot(ep, val, 'Val loss', 'Epoch', 'Loss')
    ep, val = zip(*val_mAPs)
    pt_util.plot(ep, val, 'Val mAP', 'Epoch', 'mAP')


## Evaluate the model on test data

In [None]:
# Change 'faster-rcnn-resnet-exp-4' to the name of the folder containing
# the logs for the model to evaluate
LOG_PATH = BASE_PATH + 'faster-rcnn-resnet-exp-4/'
# Comment out the first line below and uncomment the second line below if
# the model you want to evaluate has a MobileNetV3 backbone.
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)
num_classes = 14
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model = model.to(device)

start_epoch = load_last_model(model, LOG_PATH)

train_losses, val_losses, val_mAPs = pt_util.read_log(LOG_PATH + 'log.pkl', ([], [], []))

In [None]:
data_test = DeepDriveDataset(copy.deepcopy(train_val_test_data['test_labels']))
test_loader = torch.utils.data.DataLoader(data_test, batch_size=5,
                                          shuffle=False, collate_fn=utils.collate_fn)

In [None]:
test_evaluator = evaluate(model, test_loader, device=device)

### Compute mAP on the test set for each object class

In [None]:
for i in range(0, num_classes):
    print('Computing mAP for class', i)
    cur_class_test_data = []
    for test_image, test_label in train_val_test_data['test_labels']:
        cur_boxes = torch.tensor([])
        cur_area = torch.tensor([])
        for j in range(0, test_label['labels'].size(0)):
            if test_label['labels'][j].item() == i:
                cur_boxes = torch.cat((cur_boxes, torch.unsqueeze(test_label['boxes'][j], 0)), 0)
                cur_area = torch.cat((cur_area, torch.unsqueeze(test_label['area'][j], 0)), 0)
        num_boxes = cur_boxes.size()[0]
        if num_boxes > 0:
            cur_label = {
                'image_id': test_label['image_id'],
                'boxes': cur_boxes,
                'area': cur_area,
                'labels': torch.zeros((num_boxes,), dtype=torch.int64).fill_(i),
                'iscrowd': torch.zeros((num_boxes,), dtype=torch.int64)
            }
            cur_class_test_data.append((test_image, cur_label))
    print(len(cur_class_test_data), 'images for class', i)
    if len(cur_class_test_data) > 0:
        print('Finished creating dataset, starting evaluation')
        cur_data_test = DeepDriveDataset(copy.deepcopy(cur_class_test_data))
        cur_test_loader = torch.utils.data.DataLoader(cur_data_test, batch_size=5,
                                                      shuffle=False, collate_fn=utils.collate_fn)
        cur_evaluator = evaluate(model, cur_test_loader, device=device)