In [1]:
import numpy as np
import cv2
from PIL import Image
import copy
import matplotlib.pyplot as plt
import os
from utils import helper
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from sklearn.model_selection import train_test_split

In [2]:
# Create dataset
# Columns: image_path, label_path, bboxes, classes
impath = 'data/kitti/augmented_test/images/'
labels_path = 'data/kitti/augmented_test/labels/'
imlabel_list = helper.imlabel(impath, labels_path)

In [3]:
imlabel_list[0][0]

'data/kitti/augmented_test/images/2-wheeler-10_resized_brightness.jpg'

In [4]:
df = pd.DataFrame(columns = ['image_path','label_path','class','bboxes'])
# df = pd.DataFrame()
# df['image_path'] = imlabel_list[:][0]
# df['label_path'] = imlabel_list[:][1]
# df.head()

for item in imlabel_list:
    #bboxes = helper.fetch_bboxes(item[1]).tolist()
    bboxes = helper.fetch_bboxes(item[1])
    classes = helper.fetch_classes(item[1])
    for i,cls_bbox in enumerate(zip(classes,bboxes)):
        # print(cls_bbox)
        #df.loc[i,'image_path'] = item[0]
        #df.loc[i,'label_path'] = item[1]
        #df.loc[i,'class'] = cls_bbox[0]
        
        #df.loc[i,'bboxes'] = cls_bbox[1]
        # df = df.append([item[0],item[1],cls_bbox[0],cls_bbox[1]])
        df = df.append({'image_path':item[0], 'label_path': item[1],
                        'class': cls_bbox[0], 'bboxes': cls_bbox[1][:4]}, ignore_index=True)
df.head() 

Unnamed: 0,image_path,label_path,class,bboxes
0,data/kitti/augmented_test/images/2-wheeler-10_...,data/kitti/augmented_test/labels/2-wheeler-10_...,pedestrian,"[862.2254025044723, 150.72, 926.4543828264759,..."
1,data/kitti/augmented_test/images/2-wheeler-10_...,data/kitti/augmented_test/labels/2-wheeler-10_...,pedestrian,"[871.9570661896244, 111.36, 917.6958855098391,..."
2,data/kitti/augmented_test/images/2-wheeler-10_...,data/kitti/augmented_test/labels/2-wheeler-10_...,2-wheeler,"[83.6923076923077, 131.51999999999998, 282.218..."
3,data/kitti/augmented_test/images/2-wheeler-10_...,data/kitti/augmented_test/labels/2-wheeler-10_...,2-wheeler,"[295.84257602862255, 120.96, 476.8515205724508..."
4,data/kitti/augmented_test/images/2-wheeler-10_...,data/kitti/augmented_test/labels/2-wheeler-10_...,2-wheeler,"[484.63685152057246, 121.92, 615.0411449016101..."


In [5]:
print(df['class'].unique())

class_dict = {'2-wheeler': 0, 
              'pedestrian': 1, 'car': 2,
              'dontcare': 3}

df['class'] = df['class'].apply(lambda x:  class_dict[x])
df.head()

['pedestrian' '2-wheeler' 'car' 'dontcare']


Unnamed: 0,image_path,label_path,class,bboxes
0,data/kitti/augmented_test/images/2-wheeler-10_...,data/kitti/augmented_test/labels/2-wheeler-10_...,1,"[862.2254025044723, 150.72, 926.4543828264759,..."
1,data/kitti/augmented_test/images/2-wheeler-10_...,data/kitti/augmented_test/labels/2-wheeler-10_...,1,"[871.9570661896244, 111.36, 917.6958855098391,..."
2,data/kitti/augmented_test/images/2-wheeler-10_...,data/kitti/augmented_test/labels/2-wheeler-10_...,0,"[83.6923076923077, 131.51999999999998, 282.218..."
3,data/kitti/augmented_test/images/2-wheeler-10_...,data/kitti/augmented_test/labels/2-wheeler-10_...,0,"[295.84257602862255, 120.96, 476.8515205724508..."
4,data/kitti/augmented_test/images/2-wheeler-10_...,data/kitti/augmented_test/labels/2-wheeler-10_...,0,"[484.63685152057246, 121.92, 615.0411449016101..."


In [6]:
# Training Dataset Split
X = df.image_path
y = df[['bboxes', 'class']]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
def normalize(im_arr):
    # Normalizes image with imagenet stats."""
    imagenet_stats = np.array([[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]])
    return (im_arr - imagenet_stats[0])/imagenet_stats[1]

class KittiDS(Dataset):
    def __init__(self, paths, bboxes, y):
        # self.transforms = transforms
        self.paths = paths.values
        self.bboxes = bboxes.values
        self.y = y.values
        
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):
        path = self.paths[idx]
        y_class = self.y[idx]
        y_bbox = self.bboxes[idx]
        # x, y_bb = transformsXY(path, self.bb[idx], self.transforms)
        x = cv2.cvtColor(cv2.imread(path).astype('float32'),
                         cv2.COLOR_BGR2RGB)/255
        x = normalize(x)
        x = np.rollaxis(x, 2)
        return x, y_bbox, y_class

In [8]:
train_kitti = KittiDS(X_train, y_train['bboxes'], y_train['class'])
val_kitti = KittiDS(X_val, y_val['bboxes'], y_val['class'])

In [9]:
# set batch size
batch_size = 8
# For autobatching and parallelizing data-loading
train_kitti_pt = DataLoader(train_kitti, batch_size=batch_size, shuffle=True, drop_last=True)
val_kitti_pt = DataLoader(val_kitti, batch_size=batch_size, drop_last=True)

In [10]:
# verify if GPU is being used with its name
print("Flexing my GPU ^_^ : ", torch.cuda.get_device_name(0))

Flexing my GPU ^_^ :  GeForce RTX 3080


In [11]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [12]:
def update_optimizer(optimizer, lr):
    for i, param_group in enumerate(optimizer.param_groups):
        param_group["lr"] = lr

In [13]:
class PyKitti_model(nn.Module):
    def __init__(self):
        super(PyKitti_model, self).__init__()
        resnet = models.resnet34(pretrained=True)
        # children returns immediate child modules
        layers = list(resnet.children())[:8]
        self.features1 = nn.Sequential(*layers[:6])
        self.features2 = nn.Sequential(*layers[6:])
        # classification network
        self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        # bbox regressor network
        self.bbox = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        
    def forward(self, x):
        x = self.features1(x)
        # print("x shape after extracting features1: ", x.shape)
        x = self.features2(x)
        # print("x shape after extracting features2: ", x.shape)
        x = F.relu(x)
        x = nn.AdaptiveAvgPool2d((1,1))(x)
        # print("x shape before reshape: ", x.shape)
        # reshape tensor
        x = x.view(x.shape[0], -1)
        # print("x shape: ", x.shape)
        return self.classifier(x), self.bbox(x)

In [14]:
for x, y_bbox, y_class in train_kitti_pt:
    print(y_bbox)

tensor([[475.3294, 356.2667, 593.3570, 538.6667],
        [238.9167, 107.5000, 306.9167, 262.5000],
        [418.3905, 107.6384, 509.8268, 238.8227],
        [-37.5201, 245.0727,  63.3244, 441.9215],
        [341.9729, 201.8462, 526.1122, 421.7436],
        [140.4509,   4.1032, 187.5117,  75.2533],
        [567.3592, 135.3357, 621.1430, 283.1699],
        [215.1349, 193.6865, 275.6416, 340.7302]], dtype=torch.float64)
tensor([[384.8333, 155.0000, 445.7500, 331.2500],
        [632.4000, 107.5783, 727.6000, 171.3960],
        [461.4461,  59.6189, 596.4436, 274.1977],
        [571.7080, 157.4212, 811.8438, 287.9327],
        [105.4264, 249.1034, 144.0827, 467.5862],
        [357.5200,  91.1681, 496.2400, 290.8262],
        [284.9947, 264.9870, 370.7125, 476.8564],
        [358.5552, 211.5073, 391.9723, 273.2216]], dtype=torch.float64)
tensor([[ 695.0284,  284.6897,  718.2222,  391.4483],
        [ 424.6349,  222.4000,  608.5587,  409.6000],
        [ 357.1631,  229.1518,  541.0870,  416.3

tensor([[871.9571, 111.3600, 917.6959, 203.5200],
        [657.6914, 211.8857, 686.4457, 279.7714],
        [  2.5211, 275.1139,  93.2812, 493.3077],
        [111.5391, 249.8728, 150.1955, 468.3556],
        [ 19.6571, 192.8000, 257.9810, 480.0000],
        [100.0833, 158.7500, 170.9167, 338.7500],
        [595.2791, 262.4650, 680.9969, 474.3343],
        [799.5887,  85.1167, 836.4221, 206.3667]], dtype=torch.float64)
tensor([[329.5833,  83.7500, 423.0833, 342.5000],
        [675.3775, 151.6800, 721.1163, 312.9600],
        [526.3543, 227.6571, 562.1029, 256.4571],
        [176.2554, 122.6167, 262.6721, 355.1167],
        [860.6105, 303.4307, 891.3282, 368.3487],
        [357.2666,  10.4276, 414.4118,  84.7400],
        [183.4793, 314.6667, 433.4832, 435.2000],
        [ 48.0330, 231.5518, 164.6044, 379.5518]], dtype=torch.float64)
tensor([[ 641.3119,  144.0318,  709.3819,  287.1226],
        [ -58.5294,  116.2119,  -32.4779,  238.7483],
        [  35.4167,  152.5000,   86.4167,  270.0

tensor([[435.8333, 105.0000, 512.3333, 298.7500],
        [507.1200,  82.9630, 683.9200, 301.7664],
        [  0.0000, 125.2500,  37.7778, 370.5000],
        [915.2727,  35.7254, 947.2068, 110.8284],
        [853.1108, 116.0000, 946.5425, 353.6000],
        [286.0178,  29.0489, 419.0161, 115.8324],
        [298.6282,   1.5811, 325.5201,  74.3124],
        [332.6233,  16.0000, 516.1026, 187.7333]], dtype=torch.float64)
tensor([[356.6369, 151.6800, 510.3971, 252.4800],
        [111.4958, 292.6110, 165.9948, 483.4307],
        [389.2622, 123.7333, 530.7022, 371.2000],
        [295.8426, 120.9600, 476.8515, 530.8800],
        [232.4000, 107.5783, 327.6000, 171.3960],
        [551.5798,   7.9056, 608.7250,  82.2180],
        [636.9088,  98.8800, 908.4222, 367.6800],
        [249.5492, 233.6000, 411.8857, 373.6000]], dtype=torch.float64)
tensor([[ 463.7600,   91.1681,  602.4800,  290.8262],
        [ 856.6922,  175.5852,  933.1922,  350.5852],
        [ 435.6087,  166.0170,  492.7539,  305.9

tensor([[ 741.5036,  142.3003,  795.2873,  278.2761],
        [ 304.2481,  294.6207,  316.1964,  340.1379],
        [ 565.3468,   95.6495,  627.3215,  226.8545],
        [ 135.2997,  213.4505,  205.8908,  394.4881],
        [  68.6349,   38.5987,  196.9944,  397.7987],
        [ 992.4779,  116.2119, 1018.5294,  238.7483],
        [ 374.6025,   79.6244,  506.1266,  363.0673],
        [ 184.2440,    4.0000,  378.0931,  494.4000]], dtype=torch.float64)
tensor([[ 949.1621,  114.4000, 1035.6083,  344.0000],
        [ 251.6000,  105.7550,  278.8000,  178.6895],
        [ 909.6889,  132.7500,  965.6000,  393.0000],
        [ 183.1155,  100.0834,  296.7273,  236.9975],
        [  21.1556,   97.5000,  211.5556,  540.0000],
        [1408.9600,   96.6382, 1432.0800,  293.5613],
        [ 834.8800,  106.6667,  868.8800,  179.6011],
        [ 694.2170,  141.8667,  860.5286,  226.1333]], dtype=torch.float64)
tensor([[ 544.6692,    7.2654,  579.9648,   77.6250],
        [ 721.4594,  209.0108, 1052.91

In [15]:
# train model
def train(model, optimizer, train_kitti_pt, val_kitti_pt, epochs=10,C=1000):
    for i in range(epochs):
        # enables model training, grad compute
        model.train()
        total = 0
        sum_loss = 0
        correct = 0
        for x, y_bbox, y_class in train_kitti_pt:
            batch = y_class.shape[0]
            # fp32 precision
            # x = x.cuda().float()
            x = x.float().to(device)
            # print("x shape: ", x.shape)
            # y_class = y_class.cuda()
            y_class = y_class.long().to(device)
            # print("y_class: ", y_class, 'y_class shape: ', y_class.shape)
            # fp32 precision 
            # y_bbox = y_bbox.cuda().float()
            y_bbox = y_bbox.float().to(device, dtype=float)
            # print("y_bbox: ", y_bbox, "\ny_bbox shape", y_bbox.shape)
            out_class, out_bbox = model.forward(x)
            # print("out_bbox: ", out_bbox, "\nout_bbox shape", out_bbox.shape)
            # print("out_class: ", out_class, '\nout_class shape: ', out_class.shape)
            # compute classification loss: torch.max(outputs, 1)[1], torch.max(labels, 1)[1]
            clf_loss = F.cross_entropy(out_class, y_class, reduction="sum")
            # compute L1 loss
            bbox_reg_loss = F.l1_loss(out_bbox, y_bbox, reduction="none").sum(1)
            bbox_reg_loss = bbox_reg_loss.sum()
            # computing total loss
            loss = clf_loss + bbox_reg_loss/C
            # set gradients to 0
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total += batch
            sum_loss += loss.item()
            _, pred = torch.max(out_class, 1)
            correct += pred.eq(y_class).sum().item()
        train_loss = sum_loss/total
        train_acc = correct/total
        val_loss, val_acc = val_metrics(model, val_kitti_pt, C)
        print("Epoch: ",i+1,"/",epochs,"\n----------------------------")
        print("Train_loss: %.3f, Train_acc: %.3f,\nVal_loss: %.3f, Val_acc: %.3f" % 
              (train_loss, train_acc,val_loss, val_acc))

In [16]:
# compute validation metrics
def val_metrics(model, val_kitti_pt, C=1000):
    # evaluation mode
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0
    # pair .eval() with no_grad
    # turn off grad computation
    with torch.no_grad():
        for x, y_bbox, y_class in val_kitti_pt:
            batch = y_class.shape[0]
            x = x.float().to(device)
            y_class = y_class.long().to(device)
            y_bbox = y_bbox.float().to(device, dtype=float)
            out_class, out_bbox = model.forward(x)
            clf_loss = F.cross_entropy(out_class, y_class, reduction="sum")
            bbox_reg_loss = F.l1_loss(out_bbox, y_bbox, reduction="none").sum(1)
            bbox_reg_loss = bbox_reg_loss.sum()
            loss = clf_loss + bbox_reg_loss/C
            _, pred = torch.max(out_class, 1)
            correct += pred.eq(y_class).sum().item()
            sum_loss += loss.item()
            total += batch
    return sum_loss/total, correct/total

In [17]:
model1 = PyKitti_model().to(device)
parameters = filter(lambda p: p.requires_grad, model1.parameters())
optimizer = torch.optim.Adam(parameters, lr=0.006)

Layers of resnet to be used: 
 [Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False), BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), ReLU(inplace=True), MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False), Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), str

In [18]:
%%time
train(model1, optimizer, train_kitti_pt, val_kitti_pt, epochs=10)

Epoch:  0 / 10 
----------------------------
Train_loss: 2.904, Train_acc: 0.468,
Val_loss: 23.700, Val_acc: 0.478
Epoch:  1 / 10 
----------------------------
Train_loss: 2.008, Train_acc: 0.589,
Val_loss: 1.830, Val_acc: 0.574
Epoch:  2 / 10 
----------------------------
Train_loss: 1.541, Train_acc: 0.680,
Val_loss: 1.905, Val_acc: 0.522
Epoch:  3 / 10 
----------------------------
Train_loss: 1.531, Train_acc: 0.680,
Val_loss: 2.652, Val_acc: 0.640
Epoch:  4 / 10 
----------------------------
Train_loss: 1.461, Train_acc: 0.713,
Val_loss: 1.602, Val_acc: 0.684
Epoch:  5 / 10 
----------------------------
Train_loss: 1.419, Train_acc: 0.745,
Val_loss: 3.568, Val_acc: 0.640
Epoch:  6 / 10 
----------------------------
Train_loss: 1.474, Train_acc: 0.709,
Val_loss: 1.417, Val_acc: 0.721
Epoch:  7 / 10 
----------------------------
Train_loss: 1.503, Train_acc: 0.718,
Val_loss: 1.765, Val_acc: 0.581
Epoch:  8 / 10 
----------------------------
Train_loss: 1.407, Train_acc: 0.734,
Val_l

In [19]:
!nvidia-smi

Tue Jun  8 17:19:27 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.73.01    Driver Version: 460.73.01    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 3080    Off  | 00000000:09:00.0  On |                  N/A |
| 38%   65C    P2   131W / 320W |   5727MiB / 10014MiB |     21%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------