## Import Packages

In [1]:
import zipfile as zf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.utils.data as data
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import StepLR
import cv2
import os
from tqdm.notebook import tqdm
from PIL import Image

import future

%load_ext autoreload
%autoreload 2

## Setup tensorboard

In [2]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('runs/experiment_1')

## Define the ENet model

We decided to model following residual blocks as separate class to model ENET encoder and decoder:
    - Initial block
    - RDDNeck - class for regular, downsampling and dilated bottlenecks
    - ASNeck - class for asymetric bottlenecks
    - UBNeck - class for upsampling bottlenecks

ENET architecture is autoencoder based model and is divided into 5 sub-blocks. Pleas refer [ENET paper](https://arxiv.org/pdf/1606.02147.pdf) for details of each sub-block. ENET building blocks code is taken from [here](https://github.com/iArunava/ENet-Real-Time-Semantic-Segmentation).

Fast scene understanding uses first 2 sub-blocks as encoder and remaining 3 as decoder. In this implemantation, there is 1 shared encoder and 3 separate decoder for 3 tasks(instance segementation, semantic segmentation, Depth estimation )

In [3]:
import os, sys
nb_dir = os.getcwd()
if nb_dir not in sys.path:
    sys.path.append(nb_dir)
print(nb_dir)

/home/lin/ECE6254_Project_Enet


In [4]:
from models.ENetDecoder import ENetDecoder
from models.ENetEncoder import ENetEncoder

class BranchedENet(nn.Module):
    def __init__(self, C):
        super().__init__()
        
        # Define class variables
        # C - number of classes
        self.C = C
        
        self.enc = ENetEncoder(C)
        
        self.dec1 = ENetDecoder(C)
        self.dec2 = ENetDecoder(C)
        self.dec3 = ENetDecoder(C)
        
        
    def forward(self, x):
        # Output of Encoder
        x, i1, i2 = self.enc(x)
        # output of all 3 decoder in list
        #x = torch.stack([self.dec1(x, i1, i2), self.dec2(x, i1, i2), self.dec3(x, i1, i2)])
        x = (self.dec1(x, i1, i2), self.dec2(x, i1, i2), self.dec3(x, i1, i2))
        return x

## Instantiate the ENet model

In [5]:
enet = BranchedENet(35)

In [6]:
# Checking if there is any gpu available and pass the model to gpu or cpu
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
enet = enet.to(device)

## Define Dataloader

In [7]:
from data.cityscapes import Cityscapes as dataset

In [8]:
height = 512
width = 1024
dataset_dir = 'data/cityscape'
image_transform = transforms.Compose(
        [transforms.Resize((height,width)),transforms.ToTensor()])
train_set = dataset(dataset_dir,transform=image_transform)

train_loader = data.DataLoader(train_set,batch_size=1,shuffle=True,
        num_workers=1)

In [9]:
dataiter = iter(train_loader)
img, label, inst, dpth = dataiter.next()

writer.add_graph(enet, img.to(device))
writer.close()

data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/bremen/bremen_000282_000019_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/bremen/bremen_000282_000019_gtFine_labelIds.png data/cityscape/gtFine_trainvaltest/gtFine/train/bremen/bremen_000282_000019_gtFine_instanceIds.png data/cityscape/disparity_trainvaltest/disparity/train/bremen/bremen_000282_000019_disparity.png
data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/erfurt/erfurt_000022_000019_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/erfurt/erfurt_000022_000019_gtFine_labelIds.png data/cityscape/gtFine_trainvaltest/gtFine/train/erfurt/erfurt_000022_000019_gtFine_instanceIds.png data/cityscape/disparity_trainvaltest/disparity/train/erfurt/erfurt_000022_000019_disparity.png
data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/bremen/bremen_000267_000019_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/bremen/bremen_000267_000019_gtFine_labelIds.png data/cityscape/g

Not within tolerance rtol=1e-05 atol=1e-05 at input[0, 19, 3, 489] (0.019528158009052277 vs. 0.009444912895560265) and 91505 other locations (0.00%)
  check_tolerance, _force_outplace, True, _module_class)
Not within tolerance rtol=1e-05 atol=1e-05 at input[0, 27, 24, 504] (-0.0011107708560302854 vs. 0.0064564417116343975) and 77003 other locations (0.00%)
  check_tolerance, _force_outplace, True, _module_class)
Not within tolerance rtol=1e-05 atol=1e-05 at input[0, 18, 55, 505] (0.015072399750351906 vs. 0.007174754049628973) and 86947 other locations (0.00%)
  check_tolerance, _force_outplace, True, _module_class)


data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/erfurt/erfurt_000077_000019_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/erfurt/erfurt_000077_000019_gtFine_labelIds.png data/cityscape/gtFine_trainvaltest/gtFine/train/erfurt/erfurt_000077_000019_gtFine_instanceIds.png data/cityscape/disparity_trainvaltest/disparity/train/erfurt/erfurt_000077_000019_disparity.png
data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/strasbourg/strasbourg_000000_004112_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/strasbourg/strasbourg_000000_004112_gtFine_labelIds.png data/cityscape/gtFine_trainvaltest/gtFine/train/strasbourg/strasbourg_000000_004112_gtFine_instanceIds.png data/cityscape/disparity_trainvaltest/disparity/train/strasbourg/strasbourg_000000_004112_disparity.png
data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/jena/jena_000010_000019_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/jena/jena_000010_000019_gtFine_label

In [10]:
## Need to add in training loop
# writer.add_scalar('training loss',running_loss / 1000,epoch * len(trainloader) + i)

# for n_iter in range(100):
#     writer.add_scalar('Loss/train', np.random.random(), n_iter)
#     writer.add_scalar('Loss/test', np.random.random(), n_iter)
#     writer.add_scalar('Accuracy/train', np.random.random(), n_iter)
#     writer.add_scalar('Accuracy/test', np.random.random(), n_iter)

## 3 - Losses(todo)
(1) Semantic Segmentation Loss

(2) Instantance Segmentation Loss

(3) Depth Estimation Loss

In [11]:
# def compute_instance_cost(a_C, a_G):
#     M = {}
#     tools = require 'tools/tools'

#     in_margin = 0.5
#     out_margin = 1.5
#     Lnorm = 2

#     function norm(inp, L)
#         n
#         if (L == 1) then
#             n = torch.sum(torch.abs(inp), 1)
#         else
#             n = torch.sqrt(torch.sum(torch.pow(inp, 2), 1) + 1e-8)
#         end
#         return n
#     end

#     -- prediction: batchsize x nDim x h x w
#     -- labels: batchsize x classes x h x w

#     local lossf =
#         function(prediction, labels)
#         local batchsize = prediction:size(1)
#         local c = prediction:size(2)
#         local height = prediction:size(3)
#         local width = prediction:size(4)
#         local nInstanceMaps = labels:size(2)
#         local loss = 0

#         M.loss_dist = 0
#         M.loss_var = 0

#         for b = 1, batchsize do
#             local pred = prediction[b] -- c x h x w
#             local loss_var = 0
#             local loss_dist = 0

#             for h = 1, nInstanceMaps do
#                 local label = labels[b][h]:view(1, height, width) -- 1 x h x w
#                 local means = {}
#                 local loss_v = 0
#                 local loss_d = 0

#                 -- center pull force
#                 for j = 1, label:max() do
#                     local mask = label:eq(j)
#                     local mask_sum = mask:sum()
#                     if (mask_sum > 1) then
#                         local inst = pred[mask:expandAs(pred)]:view(c, -1, 1) -- c x -1 x 1

#                         -- Calculate mean of instance
#                         local mean = torch.mean(inst, 2) -- c x 1 x 1
#                         table.insert(means, mean)

#                         -- Calculate variance of instance
#                         local var = norm((inst - mean:expandAs(inst)), 2) -- 1 x -1 x 1
#                         var = torch.cmax(var - (in_margin), 0)
#                         local not_hinged = torch.sum(torch.gt(var, 0))

#                         var = torch.pow(var, 2)
#                         var = var:view(-1)

#                         var = torch.mean(var)
#                         loss_v = loss_v + var
#                     end
#                 end

#                 loss_var = loss_var + loss_v

#                 -- center push force
#                 if (#means > 1) then
#                     for j = 1, #means do
#                         local mean_A = means[j] -- c x 1 x 1
#                         for k = j + 1, #means do
#                             local mean_B = means[k] -- c x 1 x 1
#                             local d = norm(mean_A - mean_B, Lnorm) -- 1 x 1 x 1
#                             d = torch.pow(torch.cmax(-(d - 2 * out_margin), 0), 2)
#                             loss_d = loss_d + d[1][1][1]
#                         end
#                     end

#                     loss_dist = loss_dist + loss_d / ((#means - 1) + 1e-8)
#                 end
#             end

#             loss = loss + (loss_dist + loss_var)
#         end

#         loss = loss / batchsize + torch.sum(prediction) * 0

#         return loss
#     end

# return lossf


In [12]:
def inverse_huber_loss(out, target):
    absdiff = torch.abs(out-target)
    C = 0.2*torch.max(absdiff)
    return torch.mean(torch.where(absdiff<C, absdiff, (absdiff*absdiff+C*C)/(2*C)))

In [13]:
# local grad = require 'autograd'

# def lossfunction(lossf_name, weights):
#     if (lossf_name == 'softmaxLoss') then
#         lossfunction = cudnn.SpatialCrossEntropyCriterion(weights)
#     elseif (lossf_name == 'huberLoss') then
#         lossfunction = grad.nn.AutoCriterion('depthLoss_huber')(require 'lossf/myHuberLoss')
#     elseif (lossf_name == 'instanceLoss') then
#         lossfunction = grad.nn.AutoCriterion('instance_loss')(require 'lossf/myInstanceLoss')
#     else
#         assert(false, 'Cannot load lossfunction ' .. opts.lossf)
#     end

#     return lossfunction
# end

# return getLoss

# Step 5 and 6 has been done in dataloader


## 7 - Define the Hyperparameters(todo)

In [14]:
from data.utils import enet_weighing
lr = 5e-4
batch_size = 1

# figure out enet_weighing issue
#criterion = nn.CrossEntropyLoss(weight=torch.FloatTensor(enet_weighing(train_loader, 12)).to(device))
criterion = nn.CrossEntropyLoss().to(device)
criterion_dpth = torch.nn.MSELoss(reduction='mean').to(device)
optimizer = torch.optim.Adam(enet.parameters(), 
                             lr=lr,
                             weight_decay=2e-4)

print_every = 5
eval_every = 5

## 8 - Training loop(todo)

In [15]:
train_losses = []
eval_losses = []

bc_train = 367 // batch_size # mini_batch train
bc_eval = 101 // batch_size  # mini_batch validation

epochs = 100

In [16]:
# Train loop

for e in range(1, epochs+1):
    
    
    train_loss = 0
    print ('-'*15,'Epoch %d' % e, '-'*15)
    
    enet.train()
    
    for _ in tqdm(range(bc_train)):
        img, label, inst, dpth = dataiter.next()

        # assign data to cpu/gpu
        img, label, inst, dpth = img.to(device), label.to(device), inst.to(device), dpth.to(device)
        label = label.squeeze(1)
        inst = inst.squeeze(1)
        dpth = dpth.squeeze(1)
        
        # set non-car labels to 0 for inst
        inst[inst!=26] = 0
        #inst[inst!=15] = 0
    
        optimizer.zero_grad()
        
        out = enet(img.float())

        # split output into three predictions
        label_out, inst_out, dpth_out = out[0], out[1], out[2]
    
        # get pixel-wise sum for depth
        dpth_out = torch.sum(dpth_out, dim=1)

        # loss calculation for class segmentation
        loss = criterion(label_out, label.long()).float()

        # loss calculation for class instance
        loss += criterion(inst_out, inst.long()).float()

        # loss calculation for depth
        loss += criterion_dpth(dpth_out, dpth.float())
        loss.backward()
        
        # update weights
        optimizer.step()

        train_loss += loss.item()
        
    writer.add_scalar('Loss/train', train_loss, e)
    
    if e % eval_every == 0:
        with torch.no_grad():
            enet.eval()
            
            eval_loss = 0

            # Validation loop
            for _ in tqdm(range(bc_eval)):
                img, label, inst, dpth = dataiter.next()
                img, label, inst, dpth = img.to(device), label.to(device), inst.to(device), dpth.to(device)
                label = label.squeeze(1)
                inst = inst.squeeze(1)
                dpth = dpth.squeeze(1)
        
                out = enet(img.float())
                
                # split output into three predictions
                label_out, inst_out, dpth_out = out[0,:,:,:,:], out[1,:,:,:,:], out[2,:,:,:,:]

                # get pixel-wise sum for depth
                dpth_out = torch.sum(dpth_out, dim=1)

                # loss calculation for class segmentation
                eval_loss += criterion(label_out, label.long()).float().item()

                # loss calculation for class instance
                eval_loss += criterion(inst_out, inst.long()).float().item()

                # loss calculation for depth
                eval_loss += criterion_dpth(dpth_out, dpth.float()).item()
                
            
            writer.add_scalar('Loss/test', eval_loss, e // eval_every)
        
    if e % print_every == 0:
        checkpoint = {
            'epochs' : e,
            'state_dict' : enet.state_dict()
        }
        torch.save(checkpoint, '/content/ckpt-enet-{}-{}.pth'.format(e, train_loss))
        print ('Model saved!')

print ('Epoch {}/{}...'.format(e, epochs),
       'Total Mean Loss: {:6f}'.format(sum(train_losses) / epochs))

--------------- Epoch 1 ---------------


HBox(children=(FloatProgress(value=0.0, max=367.0), HTML(value='')))

Count
Count
Count
Count
Count
Count
Count
Count
Count
Count
Count
Count



KeyboardInterrupt: 

In [None]:
import os

In [None]:
for path, _, files in os.walk(folder):