In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd /content/drive/My\ Drive/DL\ Project/Submission

/content/drive/My Drive/DL Project/Submission


In [0]:
## import libraries
import numpy as np
import torch
from torch.autograd import Variable
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
import torch.optim as optim
import torch.nn.functional as F
from models import resnet18_encoderdecoder, resnet18_encoderdecoder_wbottleneck, resnet50_encoderdecoder
from models import resnet18_coach_vae
import torchvision
import torch.optim as optim
import warnings
warnings.filterwarnings('ignore')

## fix seeds
torch.cuda.manual_seed(7)
torch.manual_seed(7)
np.random.seed(7)

In [6]:
!pip install kornia
import kornia



In [0]:
### Homography transform matrices
# 6 x 3 x 3: order same as camera CAM_FRONT_LEFT, CAM_FRONT, CAM_FRONT_RIGHT, CAM_BACK_LEFT, CAM_BACK, 􏰀CAM_BACK_RIGHT
M_matrices = torch.tensor([
    # CAM_FRONT_LEFT
    [[-6.92946073e-02, -1.17143003e+00,  1.64122408e+02],
        [-1.33781874e-14, -1.67019853e+00,  2.34084846e+02],
        [-7.00394603e-17, -7.63146706e-03,  1.00000000e+00]], 
    # CAM_FRONT
    [[-6.92636526e-02, -1.17089785e+00,  1.64264194e+02],
        [-1.12965193e-14, -1.66944201e+00,  2.34140507e+02],
        [-5.76795556e-17, -7.62799727e-03,  1.00000000e+00]],
    # CAM_FRONT_RIGHT
    [[-7.02452787e-02, -1.17762492e+00,  1.64369634e+02],
        [-2.27595720e-14, -1.67903365e+00,  2.34318471e+02],
        [-1.16009632e-16, -7.67182090e-03,  1.00000000e+00]],
    # CAM_BACK_LEFT
    [[-6.94775392e-02, -1.17675499e+00,  1.64135286e+02],
        [-1.19904087e-14, -1.67779415e+00,  2.34164782e+02],
        [-5.78963960e-17, -7.66615368e-03,  1.00000000e+00]],
    # CAM_BACK
    [[-6.82085369e-02, -1.16228084e+00,  1.64011808e+02],
        [-1.23234756e-14, -1.65715610e+00,  2.33912863e+02],
        [-6.39679282e-17, -7.57186452e-03,  1.00000000e+00]],
    # CAM_BACK_RIGHT
    [[-6.91003275e-02, -1.16814423e+00,  1.63997347e+02],
        [-1.59872116e-14, -1.66551463e+00,  2.34087152e+02],
        [-8.30498864e-17, -7.61006318e-03,  1.00000000e+00]]
        ])
# rotation matrices
M_rotations = torch.tensor([[[ 5.0000e-01,  8.6603e-01, -1.8330e+01],
      [-8.6603e-01,  5.0000e-01,  1.8725e+02]],

    [[ 1.0000e+00,  0.0000e+00,  0.0000e+00],
      [-0.0000e+00,  1.0000e+00,  0.0000e+00]],

    [[ 5.0000e-01, -8.6603e-01,  1.7133e+02],
      [ 8.6603e-01,  5.0000e-01, -7.7752e+01]],

    [[-5.0000e-01,  8.6603e-01,  1.3467e+02],
      [-8.6603e-01, -5.0000e-01,  2.9675e+02]],

    [[-1.0000e+00,  8.7423e-08,  3.0600e+02],
      [-8.7423e-08, -1.0000e+00,  2.1900e+02]],

    [[-5.0000e-01, -8.6603e-01,  3.2433e+02],
      [ 8.6603e-01, -5.0000e-01,  3.1748e+01]]])

#flip 90 degree to align car facing right
M_flip = torch.tensor([[[-4.3711e-08, -1.0000e+00,  4.3800e+02],
      [ 1.0000e+00, -4.3711e-08,  0.0000e+00]]])

M_matrices = M_matrices.cuda()
M_rotations = M_rotations.cuda()
M_flip = M_flip.cuda()

In [0]:
import kornia.augmentation as K
#helper function to stitch 6 BEV views
def stitch(x, M_matrices,M_rotations, M_flip, label=True):
    #Preprocessing: image stitch
    data = [] #list to store all the features maps from multi-views
    for i in range(6):
        #get a batch of *same* view images
        img_batch = x[:,i,:,:,:] # torch.stack(x)[:,i,:,:,:] #
        img_warp = kornia.warp_perspective(img_batch, M_matrices[i].unsqueeze(0).repeat(len(x), 1,1), dsize=(219, 306))
        img_rotated = kornia.warp_affine(img_warp, M_rotations[i].unsqueeze(0).repeat(len(x), 1,1), dsize=(219, 306))
        data.append(img_rotated)

    data = torch.cat(data, dim=0).view(6,len(x),3,219,306)
    #max pool feature maps from multi-view:black canvas and ensemble
    h, w = 219, 306
    #print(h,w)
    agg = torch.zeros((x.shape[0],3,2*h,2*w)) #[batch_size, 3 ,h, w], twice width/height
    if torch.cuda.is_available():
        agg = agg.cuda()
    #two bases: front and back view
    agg[:,:, 0:h, (w-w//2):(w+w//2)] = data[1]
    agg[:,:, h:, (w-w//2):(w+w//2)] = data[4]
    #top left
    agg[:,:, (0+55):(h+55), (0+55):(w+55)] = torch.max(data[0], agg[:,:, (0+55):(h+55), (0+55):(w+55)])
    #top right
    agg[:,:,(0+55):(h+55), (w-55):(-55)] = torch.max(data[2], agg[:,:,(0+55):(h+55), (w-55):(-55)])
    #bottom left
    agg[:,:,(h-55):(-55), (0+55):(w+55)] = torch.max(data[3],agg[:,:,(h-55):(-55), (0+55):(w+55)])
    #bottom right
    agg[:,:,(h-55):(-55), (w-55):(-55)] = torch.max(data[5],agg[:,:,(h-55):(-55),(w-55):(-55)])

    #center-crop
    crop_fn = kornia.augmentation.CenterCrop(size=438)
    agg = crop_fn(agg)

    #flip 90 degree
    agg = kornia.warp_affine(agg, M_flip.repeat(len(x), 1,1), dsize=(438,438))
    #Normalize color
    if label:
      normalize = K.Normalize(torch.tensor([0.698, 0.718, 0.730]),
                              torch.tensor([0.322, 0.313, 0.308]))
    else:
      normalize = K.Normalize(torch.tensor([0.548, 0.597, 0.630]),
                         torch.tensor([0.339, 0.340, 0.342]))

    return normalize(agg)


In [0]:
#inverse back for visualization
from torchvision import transforms
def inv_transform(x, label=True):
  if label:
    inv_transform = transforms.Compose([transforms.Normalize(mean = [ 0., 0., 0. ],
                                                            std = [1/0.322, 1/0.313, 1/0.308]),
                                        transforms.Normalize(mean = [-0.698, -0.718, -0.730],
                                                             std = [1., 1., 1.])])
  else:
    inv_transform = transforms.Compose([transforms.Normalize(mean = [ 0., 0., 0. ],
                                                            std = [1/0.339, 1/0.340, 1/0.342]), 
                                        transforms.Normalize(mean = [-0.548, -0.597, -0.630],
                                                             std = [1., 1., 1.])])
  return inv_transform(x)

___

## Train Final Stitched Model

In [0]:
from model_lane_res_stitch import Stitch_Classfier, resnet18_encoderdecoder

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = resnet18_encoderdecoder().cuda() 
net_segmentation = Stitch_Classfier(net, n_class=2).cuda()
del(net)

In [0]:
from helper import collate_fn, compute_ts_road_map
from data_loading import get_loaders

In [0]:
train_seg_loss = []
val_seg_loss = []

In [0]:
train_seg_loader, val_seg_loader = get_loaders('labeled', batch_size = 16,visual=True)


In [0]:
def train_segmentation(epoch, net_segmentation, seg_optimizer):
    progbar = tqdm_notebook(total=len(train_seg_loader), desc='Train')
    loss = kornia.losses.DiceLoss()
    net_segmentation.train()
        
    train_seg_loss.append(0)
    seg_optimizer.zero_grad()
    hist = np.zeros((nClasses, nClasses))
    for batch_idx, data in enumerate(train_seg_loader):
      sample, target, road_image = data
      BEV_inputs = stitch(torch.stack(sample).cuda(),M_matrices, M_rotations, M_flip, label=True)
      road_image = torch.stack(road_image).long().cuda()
      outputs = net_segmentation(BEV_inputs)
      total_loss = loss(outputs, road_image)   
      total_loss.backward()
        
      seg_optimizer.step()
      seg_optimizer.zero_grad()
        
      train_seg_loss[-1] += total_loss.data
        
      _, predicted = torch.max(outputs.data, 1)

      progbar.set_description('Train (loss=%.4f)' % (train_seg_loss[-1]/(batch_idx+1)))
      progbar.update(1)

    train_seg_loss[-1] = train_seg_loss[-1]/len(train_seg_loader)
    
    
def val_segmentation(epoch, net_segmentation):
    global best_score
    global val_score

    loss = kornia.losses.DiceLoss()
    progbar = tqdm_notebook(total=len(val_seg_loader), desc='Val')
    net_segmentation.eval()
        
    val_seg_loss.append(0)
    total = 0
    total_ts_road_map = 0
    with torch.no_grad():
      for batch_idx, data in enumerate(val_seg_loader):
        sample, target, road_image = data
        total += 1
        BEV_inputs = stitch(torch.stack(sample).cuda(),M_matrices, M_rotations, M_flip,label=True)
        road_image = torch.stack(road_image).long().cuda()
        outputs = net_segmentation(BEV_inputs)
        total_loss = loss(outputs, road_image) 
        val_seg_loss[-1] += total_loss.data
        predicted_road_map = outputs.data.max(1)[1] # get the index of the max (no need to normalize)     
        ts_road_map = compute_ts_road_map(predicted_road_map, road_image)  
        total_ts_road_map += ts_road_map
        progbar.set_description('Val (loss=%.4f, Score=%.4f)' % (val_seg_loss[-1]/(batch_idx+1), ts_road_map))
        progbar.update(1)
      val_seg_loss[-1] = val_seg_loss[-1]/len(val_seg_loader)
      val_score = total_ts_road_map / total
      print(f'Road Map Score: {total_ts_road_map / total:.4}')

      if best_score < val_score:
          best_score = val_score
          print('Saving..')
          torch.save(net_segmentation.state_dict(), 'resnet18_stitch')

In [0]:
best_score = 0.63

In [0]:
###train from sratch
best_score = 0.63 #previous benchmark
learning_rate = 1e-3
seg_optimizer = optim.SGD(net_segmentation.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-3)
progbar = tqdm_notebook(total=30, desc='Epochs')
for epoch in range(0, 30):
  if epoch+1 % 10 == 0:
    learning_rate /= 10 #lr decay
    print('lr decay...')
    for param_group in seg_optimizer.param_groups: 
      param_group['lr'] = learning_rate
  print('starting epoch', epoch)
  train_segmentation(epoch, net_segmentation=net_segmentation, seg_optimizer=seg_optimizer)
  val_segmentation(epoch, net_segmentation=net_segmentation)
  progbar.update(1)

## Train ResNet50 with pretrained weights

In [0]:
from model_lane import Multi_Classfier, resnet50_encoderdecoder, warp_transform
net = resnet50_encoderdecoder().cuda() 
net.load_state_dict(torch.load('./47-DreamTeam-Round3/resnet50_bs16')) #pretrained weights
net_segmentation = Multi_Classfier(net, n_class = 2).cuda()
del(net)

In [0]:
#Pretrain: freeze layers - first 8 layers before decoder; train for 10 epochs
for i, child in enumerate(net_segmentation.features.children()): 
  for param in child.parameters():
    param.requires_grad = False #freeze weight
  if i == 7:
    break
seg_optimizer = optim.Adam(filter(lambda p: p.requires_grad, net_segmentation.parameters()), lr=1e-3, weight_decay=1e-4, betas=(0.9, 0.999)) 
print('num of trainable params:', sum(p.numel() for p in net_segmentation.parameters() if p.requires_grad))
print('num of total params:', sum(p.numel() for p in net_segmentation.parameters()))
progbar = tqdm_notebook(total=10, desc='Epochs')
for epoch in range(0, 10):
  print('starting epoch', epoch)
  train_segmentation(epoch, net_segmentation=net_segmentation, seg_optimizer=seg_optimizer)
  val_segmentation(epoch, net_segmentation=net_segmentation)
  progbar.update(1)

In [0]:
#Unfreeze encoder layers for end-to-end training
for i, child in enumerate(net_segmentation.features.children()): 
  for param in child.parameters():
    param.requires_grad = True
  if i == 7:
    break
print('num of trainable params:', sum(p.numel() for p in net_segmentation.parameters() if p.requires_grad))
print('num of total params:', sum(p.numel() for p in net_segmentation.parameters()))

In [0]:
###End-to-end training

seg_optimizer = optim.Adam(net_segmentation.parameters(), lr=1e-3, weight_decay=1e-4, betas=(0.9, 0.999)) 
best_score = 0.63
learning_rate = 1e-3
progbar = tqdm_notebook(total=30, desc='Epochs')
for epoch in range(0, 30):
  if epoch+1 % 10 == 0:
    learning_rate /= 10 #lr decay
    print('lr decay...')
    for param_group in seg_optimizer.param_groups: 
      param_group['lr'] = learning_rate
  print('starting epoch', epoch)
  train_segmentation(epoch, net_segmentation=net_segmentation, seg_optimizer=seg_optimizer)
  val_segmentation(epoch, net_segmentation=net_segmentation)
  progbar.update(1)