In [1]:
import numpy as np
import cv2
from PIL import Image
import copy
import matplotlib.pyplot as plt
import os, shutil
from utils import helper
import pandas as pd
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from torchinfo import summary
from sklearn.model_selection import train_test_split

In [2]:
# Create dataset
# Columns: image_path, label_path, bboxes, classes
impath = 'data/kitti/integration/resized/training/images/'
labels_path = 'data/kitti/integration/resized/training/labels/'
imlabel_list = helper.imlabel(impath, labels_path)

In [3]:
imlabel_list[0][0]

'data/kitti/integration/resized/training/images/000000_resized.jpg'

In [4]:
df = pd.DataFrame(columns = ['image_path','label_path','class','bboxes'])
# df = pd.DataFrame()
# df['image_path'] = imlabel_list[:][0]
# df['label_path'] = imlabel_list[:][1]
# df.head()

for item in imlabel_list:
    #bboxes = helper.fetch_bboxes(item[1]).tolist()
    bboxes = helper.fetch_bboxes(item[1])
    classes = helper.fetch_classes(item[1])
    for i,cls_bbox in enumerate(zip(classes,bboxes)):
        # print(cls_bbox)
        #df.loc[i,'image_path'] = item[0]
        #df.loc[i,'label_path'] = item[1]
        #df.loc[i,'class'] = cls_bbox[0]
        
        #df.loc[i,'bboxes'] = cls_bbox[1]
        # df = df.append([item[0],item[1],cls_bbox[0],cls_bbox[1]])
        df = df.append({'image_path':item[0], 'label_path': item[1],
                        'class': cls_bbox[0], 'bboxes': cls_bbox[1][:4]}, ignore_index=True)
df.head() 

Unnamed: 0,image_path,label_path,class,bboxes
0,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Pedestrian,"[1047.4205405405405, 112.15686274509804, 1191...."
1,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Truck,"[869.5441066666667, 120.88888888888889, 913.55..."
2,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Car,"[562.32192, 140.3207729468599, 614.80704, 157...."
3,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Cyclist,"[981.5210666666668, 126.72463768115941, 999.48..."
4,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,DontCare,"[730.9764266666667, 131.17681159420292, 856.77..."


In [5]:
df['class'].value_counts()

Car               42505
DontCare          17015
Pedestrian         6686
Van                4283
Cyclist            2424
Truck              1614
Misc               1394
Tram                743
Person_sitting      304
2-wheeler           122
pedestrian           70
car                  50
dontcare              1
Name: class, dtype: int64

In [6]:
# Remove classes that we don't need

remove_classes = ['Truck', 'Misc', 'Tram']

for item in remove_classes:
    df.drop(df[df['class'] == item].index, inplace = True)

In [7]:
df.head()

Unnamed: 0,image_path,label_path,class,bboxes
0,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Pedestrian,"[1047.4205405405405, 112.15686274509804, 1191...."
2,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Car,"[562.32192, 140.3207729468599, 614.80704, 157...."
3,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,Cyclist,"[981.5210666666668, 126.72463768115941, 999.48..."
4,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,DontCare,"[730.9764266666667, 131.17681159420292, 856.77..."
5,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,DontCare,"[741.7984000000001, 135.23478260869567, 765.67..."


In [8]:
# Clubbing similar categories
class_dict = {'Car': 'car','car':'car', 
              'Pedestrian': 'pedestrian','pedestrian': 'pedestrian',
              '2-wheeler':'2-wheeler', 'Van': 'car', 'dontcare': 'dontcare', 
              'Cyclist': '2-wheeler', 'Person_sitting' : 'pedestrian', 'DontCare' : 'dontcare'}

df['class'] = df['class'].apply(lambda x:  class_dict[x])
df.head()

Unnamed: 0,image_path,label_path,class,bboxes
0,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,pedestrian,"[1047.4205405405405, 112.15686274509804, 1191...."
2,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,car,"[562.32192, 140.3207729468599, 614.80704, 157...."
3,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,2-wheeler,"[981.5210666666668, 126.72463768115941, 999.48..."
4,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,dontcare,"[730.9764266666667, 131.17681159420292, 856.77..."
5,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,dontcare,"[741.7984000000001, 135.23478260869567, 765.67..."


In [9]:
df['class'].value_counts()

car           46838
dontcare      17016
pedestrian     7060
2-wheeler      2546
Name: class, dtype: int64

In [10]:
class_dict = {'2-wheeler': 0, 
              'pedestrian': 1, 'car': 2,
              'dontcare': 3}

df['class'] = df['class'].apply(lambda x:  class_dict[x])
df.head()

Unnamed: 0,image_path,label_path,class,bboxes
0,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,1,"[1047.4205405405405, 112.15686274509804, 1191...."
2,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,2,"[562.32192, 140.3207729468599, 614.80704, 157...."
3,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,0,"[981.5210666666668, 126.72463768115941, 999.48..."
4,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,3,"[730.9764266666667, 131.17681159420292, 856.77..."
5,data/kitti/integration/resized/training/images...,data/kitti/integration/resized/training/labels...,3,"[741.7984000000001, 135.23478260869567, 765.67..."


In [11]:
# Training Dataset Split
X = df.image_path
y = df[['bboxes', 'class']]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
def normalize(im_arr):
    # Normalizes image with imagenet stats."""
    imagenet_stats = np.array([[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]])
    return (im_arr - imagenet_stats[0])/imagenet_stats[1]

class KittiDS(Dataset):
    def __init__(self, paths, bboxes, y):
        # self.transforms = transforms
        self.paths = paths.values
        self.bboxes = bboxes.values
        self.y = y.values
        
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):
        path = self.paths[idx]
        y_class = self.y[idx]
        y_bbox = self.bboxes[idx]
        # x, y_bb = transformsXY(path, self.bb[idx], self.transforms)
        x = cv2.cvtColor(cv2.imread(path).astype('float32'),
                         cv2.COLOR_BGR2RGB)/255
        x = normalize(x)
        x = np.rollaxis(x, 2)
        return x, y_bbox, y_class

In [13]:
#train_kitti = KittiDS(X_train, y_train['bboxes'], y_train['class'])
#val_kitti = KittiDS(X_val, y_val['bboxes'], y_val['class'])

In [14]:
# set batch size
#batch_size = 16
# For autobatching and parallelizing data-loading
#train_kitti_pt = DataLoader(train_kitti, batch_size=batch_size, shuffle=True, drop_last=True)
#val_kitti_pt = DataLoader(val_kitti, batch_size=batch_size, drop_last=True)

In [15]:
# verify if GPU is being used with its name
print("Flexing my GPU ^_^ : ", torch.cuda.get_device_name(0))

Flexing my GPU ^_^ :  GeForce RTX 3080


In [16]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [17]:
def update_optimizer(optimizer, lr):
    for i, param_group in enumerate(optimizer.param_groups):
        param_group["lr"] = lr

In [18]:
class PyKitti_model(nn.Module):
    def __init__(self):
        super(PyKitti_model, self).__init__()
        resnet = models.resnet34(pretrained=True)
        # children returns immediate child modules
        layers = list(resnet.children())[:8]
        self.features1 = nn.Sequential(*layers[:6])
        self.features2 = nn.Sequential(*layers[6:])
        # classification network
        self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        # bbox regressor network
        self.bbox = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        
    def forward(self, x):
        x = self.features1(x)
        # print("x shape after extracting features1: ", x.shape)
        x = self.features2(x)
        # print("x shape after extracting features2: ", x.shape)
        x = F.relu(x)
        x = nn.AdaptiveAvgPool2d((1,1))(x)
        # print("x shape before reshape: ", x.shape)
        # reshape tensor
        x = x.view(x.shape[0], -1)
        # print("x shape: ", x.shape)
        return self.classifier(x), self.bbox(x)

In [19]:
# train model
def train(model, optimizer, train_kitti_pt, val_kitti_pt, epochs=10,C=1000):
    for i in range(epochs):
        # enables model training, grad compute
        model.train()
        total = 0
        sum_loss = 0
        correct = 0
        for x, y_bbox, y_class in train_kitti_pt:
            batch = y_class.shape[0]
            # fp32 precision
            # x = x.cuda().float()
            # x = x.float().to(device, dtype=float)
            x = x.float().to(device)
            # x = x.to(device, dtype=torch.float16)
            # print("x shape: ", x.shape)
            # y_class = y_class.cuda()
            y_class = y_class.long().to(device)
            # print("y_class: ", y_class, 'y_class shape: ', y_class.shape)
            # fp32 precision 
            # y_bbox = y_bbox.float().to(device, dtype=float)
            y_bbox = y_bbox.float().to(device)
            # y_bbox = y_bbox.to(device, dtype=torch.float16)
            # print("y_bbox: ", y_bbox, "\ny_bbox shape", y_bbox.shape)
            out_class, out_bbox = model.forward(x)
            # print("out_bbox: ", out_bbox, "\nout_bbox shape", out_bbox.shape)
            # print("out_class: ", out_class, '\nout_class shape: ', out_class.shape)
            # compute classification loss: torch.max(outputs, 1)[1], torch.max(labels, 1)[1]
            clf_loss = F.cross_entropy(out_class, y_class, reduction="sum")
            # compute L1 loss
            bbox_reg_loss = F.l1_loss(out_bbox, y_bbox, reduction="none").sum(1)
            bbox_reg_loss = bbox_reg_loss.sum()
            # computing total loss
            loss = clf_loss + bbox_reg_loss/C
            # set gradients to 0
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total += batch
            sum_loss += loss.item()
            _, pred = torch.max(out_class, 1)
            correct += pred.eq(y_class).sum().item()
        train_loss = sum_loss/total
        train_acc = correct/total
        val_loss, val_acc = val_metrics(model, val_kitti_pt, C)
        print("Epoch: ",i+1,"/",epochs,"\n----------------------------")
        print("Train_loss: %.3f, Train_acc: %.3f,\nVal_loss: %.3f, Val_acc: %.3f" % 
              (train_loss, train_acc,val_loss, val_acc))

In [20]:
# compute validation metrics
def val_metrics(model, val_kitti_pt, C=1000):
    # evaluation mode
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0
    # pair .eval() with no_grad
    # turn off grad computation
    with torch.no_grad():
        for x, y_bbox, y_class in val_kitti_pt:
            batch = y_class.shape[0]
            # x = x.float().to(device, dtype=float)
            x = x.float().to(device)
            y_class = y_class.long().to(device)
            y_bbox = y_bbox.float().to(device)
            out_class, out_bbox = model.forward(x)
            clf_loss = F.cross_entropy(out_class, y_class, reduction="sum")
            bbox_reg_loss = F.l1_loss(out_bbox, y_bbox, reduction="none").sum(1)
            bbox_reg_loss = bbox_reg_loss.sum()
            loss = clf_loss + bbox_reg_loss/C
            _, pred = torch.max(out_class, 1)
            correct += pred.eq(y_class).sum().item()
            sum_loss += loss.item()
            total += batch
    return sum_loss/total, correct/total

In [21]:
model1 = PyKitti_model().to(device, dtype=torch.float32)
# model1 = PyKitti_model().to(device)
parameters = filter(lambda p: p.requires_grad, model1.parameters())
optimizer = torch.optim.Adam(parameters, lr=0.006)
summary(model1, input_size=(16, 3, 544, 960))

Layer (type:depth-idx)                        Output Shape              Param #
PyKitti_model                                 --                        --
├─Sequential: 1-1                             [16, 128, 68, 120]        --
│    └─Conv2d: 2-1                            [16, 64, 272, 480]        9,408
│    └─BatchNorm2d: 2-2                       [16, 64, 272, 480]        128
│    └─ReLU: 2-3                              [16, 64, 272, 480]        --
│    └─MaxPool2d: 2-4                         [16, 64, 136, 240]        --
│    └─Sequential: 2-5                        [16, 64, 136, 240]        --
│    │    └─BasicBlock: 3-1                   [16, 64, 136, 240]        73,984
│    │    └─BasicBlock: 3-2                   [16, 64, 136, 240]        73,984
│    │    └─BasicBlock: 3-3                   [16, 64, 136, 240]        73,984
│    └─Sequential: 2-6                        [16, 128, 68, 120]        --
│    │    └─BasicBlock: 3-4                   [16, 128, 68, 120]        230,144

In [22]:
#%%time
#train(model1, optimizer, train_kitti_pt, val_kitti_pt, epochs=15)

## Automatic Mixed Precision Training

In [23]:
# Training Dataset Split
X = df.image_path
y = df[['bboxes', 'class']]
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the train, val dataset with the custom Dataset class
train_kitti_amp = KittiDS(X_train, y_train['bboxes'], y_train['class'])
val_kitti_amp = KittiDS(X_val, y_val['bboxes'], y_val['class'])

batch_size = 16

# Leverage torch dataloader for autobatching, parallelization
train_kitti_amp = DataLoader(train_kitti_amp,
        batch_size=batch_size, shuffle=True, drop_last=True
    )
val_kitti_amp = DataLoader(val_kitti_amp, 
                           batch_size=batch_size, drop_last=True)

In [None]:
def save_checkpoint(model, optimizer,best,epoch,loss,val_loss,path):
    # Additional information
    EPOCH = epoch
    PATH = path
    LOSS = loss

    torch.save({
                'epoch': EPOCH,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': LOSS,
                'val_loss_min': val_loss
                }, PATH)
        
    if best:
        best_fpath = 'model/best_model/'
        if !os.path.isdir(best_fpath):
            os.mkdir(best_fpath,0o666)
        # copy that checkpoint file to best path given, best_model_path
        shutil.copyfile(path, best_fpath + os.path.basename(path))
    
def load_model():
    model = model.load_state_dict(checkpoint['model_state_dict'])
    optimizer = optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    return model, optimizer, epoch, loss

In [32]:
# Automatic Mixed Precision
def train_amp(model, optimizer, val_loss_min = np.inf, train_kitti_amp, val_kitti_amp, start_epoch=0,epochs=10, C=1000):
    
    model.train()


    # X_val = torch.tensor(X_val, dtype=torch.float32)
    # y_val = torch.tensor(y_val, dtype=torch.float32)

    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer, 0.006,
        cycle_momentum=False,
        epochs=10,
        steps_per_epoch=int(np.ceil(len(X) / batch_size)),
    )
    
    sum_loss = 0
    correct = 0
    total = 0
    # set the init scale of gradscaler to 2^14 instead of ^16
    scaler = torch.cuda.amp.GradScaler(init_scale = 16384)

    for epoch in range(start_epoch, epochs):
        for i, (x, y_bbox, y_class) in enumerate(train_kitti_amp):
            batch = y_class.shape[0]
            x = x.float().to(device)
            y_bbox = y_bbox.float().to(device)
            y_class = y_class.long().to(device)
            optimizer.zero_grad()

            # NEW
            with torch.cuda.amp.autocast():
                #y_pred = model(X_batch).squeeze()
                out_class, out_bbox = model(x)
                clf_loss = F.cross_entropy(out_class, y_class, reduction="sum")
                # compute L1 loss
                bbox_reg_loss = F.l1_loss(out_bbox, y_bbox, reduction="none").sum(1)
            bbox_reg_loss = bbox_reg_loss.sum()
                # computing total loss
            loss = clf_loss + bbox_reg_loss/C
            # NEW
            scaler.scale(loss).backward()
            total += batch
            lv = loss.detach().cpu().numpy()
            sum_loss += lv.item()
            _, pred = torch.max(out_class, 1)
            correct += pred.eq(y_class).sum().item()

            if i%100==0:
                print("Epoch: %.3f, Batch: %d, Loss: %.3f" % 
                      (i/len(train_kitti_amp),i,lv))

            # NEW
            scaler.step(optimizer)
            scaler.update()

            scheduler.step()
            
        train_loss = sum_loss/total
        train_acc = correct/total
        val_loss, val_acc = val_metrics_amp(model, val_kitti_amp, C)
        # save model checkpoint
        save_checkpoint(model,optimizer,best=False,
                        epoch,lv,val_loss,
                        path = 'model/model_amp_epoch_' + str(epoch))
        
        print("Epoch: ",i+1,"/",epochs,"\n----------------------------")
        print("Train_loss: %.3f, Train_acc: %.3f,\nVal_loss: %.3f, Val_acc: %.3f" % 
              (train_loss, train_acc,val_loss, val_acc))
        
        # save best model
        if val_loss <= val_loss_min:
            save_checkpoint(model,optimizer,best=True,
                epoch,lv,val_loss,
                path = 'model/model_amp_epoch_' + str(epoch))   

In [31]:
# validation_amp
def val_metrics_amp(model, val_kitti_amp, C=1000):
    # evaluation mode
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0
    # pair .eval() with no_grad
    # turn off grad computation
    
    with torch.no_grad():
        for i, (x, y_bbox, y_class) in enumerate(val_kitti_amp):
            #x = torch.tensor(x, dtype=torch.float32)
            #y_bbox = torch.tensor(y_bbox, dtype=torch.float32)
            #y_class = torch.tensor(y_class, dtype=torch.long)
            x = x.float().to(device)
            y_bbox = y_bbox.float().to(device)
            y_class = y_class.long().to(device)
            out_class, out_bbox = model.forward(x)
            # NEW
            with torch.cuda.amp.autocast():
                #y_pred = model(X_batch).squeeze()
                out_class, out_bbox = model(x)
                clf_loss = F.cross_entropy(out_class, y_class, reduction="sum")
                # compute L1 loss
                bbox_reg_loss = F.l1_loss(out_bbox, y_bbox, reduction="none").sum(1)
            bbox_reg_loss = bbox_reg_loss.sum()
                # computing total loss
            loss = clf_loss + bbox_reg_loss/C
            # NEW
            total += batch
            lv = loss.detach().cpu().numpy()
            sum_loss += lv.item()
            _, pred = torch.max(out_class, 1)
            correct += pred.eq(y_class).sum().item()
            sum_loss += loss.item()
            total += batch
    return sum_loss/total, correct/total

In [33]:
%%time
train_amp(model1, optimizer, train_kitti_amp, val_kitti_amp, epochs=5)

Epoch: 0.000, Batch: 0, Loss: 20.541


KeyboardInterrupt: 

In [None]:
!nvidia-smi