In [1]:
import numpy as np
import cv2
from PIL import Image
import copy
import matplotlib.pyplot as plt
import os
from utils import helper
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from torchinfo import summary
from sklearn.model_selection import train_test_split

In [2]:
# Create dataset
# Columns: image_path, label_path, bboxes, classes
impath = 'data/kitti/integration/resized/training/images/'
labels_path = 'data/kitti/integration/resized/training/labels/'
imlabel_list = helper.imlabel(impath, labels_path)

In [3]:
imlabel_list[0][0]

'data/kitti/integration/resized/training/images/000000_resized.jpg'

In [4]:
df = pd.DataFrame(columns = ['image_path','label_path','class','bboxes'])
# df = pd.DataFrame()
# df['image_path'] = imlabel_list[:][0]
# df['label_path'] = imlabel_list[:][1]
# df.head()

for item in imlabel_list:
    #bboxes = helper.fetch_bboxes(item[1]).tolist()
    bboxes = helper.fetch_bboxes(item[1])
    classes = helper.fetch_classes(item[1])
    for i,cls_bbox in enumerate(zip(classes,bboxes)):
        # print(cls_bbox)
        #df.loc[i,'image_path'] = item[0]
        #df.loc[i,'label_path'] = item[1]
        #df.loc[i,'class'] = cls_bbox[0]
        
        #df.loc[i,'bboxes'] = cls_bbox[1]
        # df = df.append([item[0],item[1],cls_bbox[0],cls_bbox[1]])
        df = df.append({'image_path':item[0], 'label_path': item[1],
                        'class': cls_bbox[0], 'bboxes': cls_bbox[1][:4]}, ignore_index=True)
df.head() 

Unnamed: 0,image_path,label_path,class,bboxes
0,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Pedestrian,"[1047.4205405405405, 112.15686274509804, 1191...."
1,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Truck,"[869.5441066666667, 120.88888888888889, 913.55..."
2,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Car,"[562.32192, 140.3207729468599, 614.80704, 157...."
3,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Cyclist,"[981.5210666666668, 126.72463768115941, 999.48..."
4,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,DontCare,"[730.9764266666667, 131.17681159420292, 856.77..."


In [5]:
df['class'].value_counts()

Car               42505
DontCare          17015
Pedestrian         6686
Van                4283
Cyclist            2424
Truck              1614
Misc               1394
Tram                743
Person_sitting      304
2-wheeler           122
pedestrian           70
car                  50
dontcare              1
Name: class, dtype: int64

In [6]:
# Remove classes that we don't need

remove_classes = ['Truck', 'Misc', 'Tram']

for item in remove_classes:
    df.drop(df[df['class'] == item].index, inplace = True)

In [7]:
df.head()

Unnamed: 0,image_path,label_path,class,bboxes
0,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Pedestrian,"[1047.4205405405405, 112.15686274509804, 1191...."
2,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Car,"[562.32192, 140.3207729468599, 614.80704, 157...."
3,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Cyclist,"[981.5210666666668, 126.72463768115941, 999.48..."
4,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,DontCare,"[730.9764266666667, 131.17681159420292, 856.77..."
5,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,DontCare,"[741.7984000000001, 135.23478260869567, 765.67..."


In [8]:
# Clubbing similar categories
class_dict = {'Car': 'car','car':'car', 
              'Pedestrian': 'pedestrian','pedestrian': 'pedestrian',
              '2-wheeler':'2-wheeler', 'Van': 'car', 'dontcare': 'dontcare', 
              'Cyclist': '2-wheeler', 'Person_sitting' : 'pedestrian', 'DontCare' : 'dontcare'}

df['class'] = df['class'].apply(lambda x:  class_dict[x])
df.head()

Unnamed: 0,image_path,label_path,class,bboxes
0,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,pedestrian,"[1047.4205405405405, 112.15686274509804, 1191...."
2,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,car,"[562.32192, 140.3207729468599, 614.80704, 157...."
3,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,2-wheeler,"[981.5210666666668, 126.72463768115941, 999.48..."
4,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,dontcare,"[730.9764266666667, 131.17681159420292, 856.77..."
5,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,dontcare,"[741.7984000000001, 135.23478260869567, 765.67..."


In [9]:
df['class'].value_counts()

car           46838
dontcare      17016
pedestrian     7060
2-wheeler      2546
Name: class, dtype: int64

In [10]:
class_dict = {'2-wheeler': 0, 
              'pedestrian': 1, 'car': 2,
              'dontcare': 3}

df['class'] = df['class'].apply(lambda x:  class_dict[x])
df.head()

Unnamed: 0,image_path,label_path,class,bboxes
0,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,1,"[1047.4205405405405, 112.15686274509804, 1191...."
2,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,2,"[562.32192, 140.3207729468599, 614.80704, 157...."
3,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,0,"[981.5210666666668, 126.72463768115941, 999.48..."
4,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,3,"[730.9764266666667, 131.17681159420292, 856.77..."
5,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,3,"[741.7984000000001, 135.23478260869567, 765.67..."


In [11]:
# Training Dataset Split
X = df.image_path
y = df[['bboxes', 'class']]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
def normalize(im_arr):
    # Normalizes image with imagenet stats."""
    imagenet_stats = np.array([[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]])
    return (im_arr - imagenet_stats[0])/imagenet_stats[1]

class KittiDS(Dataset):
    def __init__(self, paths, bboxes, y):
        # self.transforms = transforms
        self.paths = paths.values
        self.bboxes = bboxes.values
        self.y = y.values
        
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):
        path = self.paths[idx]
        y_class = self.y[idx]
        y_bbox = self.bboxes[idx]
        # x, y_bb = transformsXY(path, self.bb[idx], self.transforms)
        x = cv2.cvtColor(cv2.imread(path).astype('float32'),
                         cv2.COLOR_BGR2RGB)/255
        x = normalize(x)
        x = np.rollaxis(x, 2)
        return x, y_bbox, y_class

In [13]:
train_kitti = KittiDS(X_train, y_train['bboxes'], y_train['class'])
val_kitti = KittiDS(X_val, y_val['bboxes'], y_val['class'])

In [14]:
# set batch size
batch_size = 16
# For autobatching and parallelizing data-loading
train_kitti_pt = DataLoader(train_kitti, batch_size=batch_size, shuffle=True, drop_last=True)
val_kitti_pt = DataLoader(val_kitti, batch_size=batch_size, drop_last=True)

In [15]:
# verify if GPU is being used with its name
print("Flexing my GPU ^_^ : ", torch.cuda.get_device_name(0))

Flexing my GPU ^_^ :  GeForce RTX 3080


In [16]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [17]:
def update_optimizer(optimizer, lr):
    for i, param_group in enumerate(optimizer.param_groups):
        param_group["lr"] = lr

In [18]:
class PyKitti_model(nn.Module):
    def __init__(self):
        super(PyKitti_model, self).__init__()
        resnet = models.resnet34(pretrained=True)
        # children returns immediate child modules
        layers = list(resnet.children())[:8]
        self.features1 = nn.Sequential(*layers[:6])
        self.features2 = nn.Sequential(*layers[6:])
        # classification network
        self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        # bbox regressor network
        self.bbox = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        
    def forward(self, x):
        x = self.features1(x)
        # print("x shape after extracting features1: ", x.shape)
        x = self.features2(x)
        # print("x shape after extracting features2: ", x.shape)
        x = F.relu(x)
        x = nn.AdaptiveAvgPool2d((1,1))(x)
        # print("x shape before reshape: ", x.shape)
        # reshape tensor
        x = x.view(x.shape[0], -1)
        # print("x shape: ", x.shape)
        return self.classifier(x), self.bbox(x)

In [19]:
# train model
def train(model, optimizer, train_kitti_pt, val_kitti_pt, epochs=10,C=1000):
    for i in range(epochs):
        # enables model training, grad compute
        model.train()
        total = 0
        sum_loss = 0
        correct = 0
        for x, y_bbox, y_class in train_kitti_pt:
            batch = y_class.shape[0]
            # fp32 precision
            # x = x.cuda().float()
            # x = x.float().to(device, dtype=float)
            x = x.float().to(device)
            # x = x.to(device, dtype=torch.float16)
            # print("x shape: ", x.shape)
            # y_class = y_class.cuda()
            y_class = y_class.long().to(device)
            # print("y_class: ", y_class, 'y_class shape: ', y_class.shape)
            # fp32 precision 
            # y_bbox = y_bbox.float().to(device, dtype=float)
            y_bbox = y_bbox.float().to(device)
            # y_bbox = y_bbox.to(device, dtype=torch.float16)
            # print("y_bbox: ", y_bbox, "\ny_bbox shape", y_bbox.shape)
            out_class, out_bbox = model.forward(x)
            # print("out_bbox: ", out_bbox, "\nout_bbox shape", out_bbox.shape)
            # print("out_class: ", out_class, '\nout_class shape: ', out_class.shape)
            # compute classification loss: torch.max(outputs, 1)[1], torch.max(labels, 1)[1]
            clf_loss = F.cross_entropy(out_class, y_class, reduction="sum")
            # compute L1 loss
            bbox_reg_loss = F.l1_loss(out_bbox, y_bbox, reduction="none").sum(1)
            bbox_reg_loss = bbox_reg_loss.sum()
            # computing total loss
            loss = clf_loss + bbox_reg_loss/C
            # set gradients to 0
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total += batch
            sum_loss += loss.item()
            _, pred = torch.max(out_class, 1)
            correct += pred.eq(y_class).sum().item()
        train_loss = sum_loss/total
        train_acc = correct/total
        val_loss, val_acc = val_metrics(model, val_kitti_pt, C)
        print("Epoch: ",i+1,"/",epochs,"\n----------------------------")
        print("Train_loss: %.3f, Train_acc: %.3f,\nVal_loss: %.3f, Val_acc: %.3f" % 
              (train_loss, train_acc,val_loss, val_acc))

In [20]:
# compute validation metrics
def val_metrics(model, val_kitti_pt, C=1000):
    # evaluation mode
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0
    # pair .eval() with no_grad
    # turn off grad computation
    with torch.no_grad():
        for x, y_bbox, y_class in val_kitti_pt:
            batch = y_class.shape[0]
            # x = x.float().to(device, dtype=float)
            x = x.float().to(device)
            y_class = y_class.long().to(device)
            y_bbox = y_bbox.float().to(device)
            out_class, out_bbox = model.forward(x)
            clf_loss = F.cross_entropy(out_class, y_class, reduction="sum")
            bbox_reg_loss = F.l1_loss(out_bbox, y_bbox, reduction="none").sum(1)
            bbox_reg_loss = bbox_reg_loss.sum()
            loss = clf_loss + bbox_reg_loss/C
            _, pred = torch.max(out_class, 1)
            correct += pred.eq(y_class).sum().item()
            sum_loss += loss.item()
            total += batch
    return sum_loss/total, correct/total

In [None]:
model1 = PyKitti_model().to(device, dtype=torch.float32)
# model1 = PyKitti_model().to(device)
parameters = filter(lambda p: p.requires_grad, model1.parameters())
optimizer = torch.optim.Adam(parameters, lr=0.006)
summary(model1, input_size=(batch_size, 3, 544, 960))

In [None]:
%%time
train(model1, optimizer, train_kitti_pt, val_kitti_pt, epochs=15)

In [1]:
!nvidia-smi

Sat Jun 12 14:27:36 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.73.01    Driver Version: 460.73.01    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  GeForce RTX 3080    Off  | 00000000:09:00.0  On |                  N/A |
|  0%   46C    P8    27W / 320W |    343MiB / 10014MiB |      4%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------------------------------------------------------