## Import Packages

In [20]:
import zipfile as zf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torch.utils.data as data
import torchvision.transforms as transforms

import numpy as np
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import StepLR
import cv2
import os
from tqdm.notebook import tqdm
from PIL import Image

import future

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Setup tensorboard

In [21]:
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('runs/experiment_1')

## Define the ENet model

We decided to model following residual blocks as separate class to model ENET encoder and decoder:
    - Initial block
    - RDDNeck - class for regular, downsampling and dilated bottlenecks
    - ASNeck - class for asymetric bottlenecks
    - UBNeck - class for upsampling bottlenecks

ENET architecture is autoencoder based model and is divided into 5 sub-blocks. Pleas refer [ENET paper](https://arxiv.org/pdf/1606.02147.pdf) for details of each sub-block. ENET building blocks code is taken from [here](https://github.com/iArunava/ENet-Real-Time-Semantic-Segmentation).

Fast scene understanding uses first 2 sub-blocks as encoder and remaining 3 as decoder. In this implemantation, there is 1 shared encoder and 3 separate decoder for 3 tasks(instance segementation, semantic segmentation, Depth estimation )

In [22]:
import os, sys
nb_dir = os.getcwd()
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

In [23]:
from models.ENetDecoder import ENetDecoder
from models.ENetEncoder import ENetEncoder

class BranchedENet(nn.Module):
    def __init__(self, C):
        super().__init__()
        
        # Define class variables
        # C - number of classes
        self.C = C
        
        self.enc = ENetEncoder(C)
        
        self.dec1 = ENetDecoder(C)
        self.dec2 = ENetDecoder(C)
        self.dec3 = ENetDecoder(1)
        
        
    def forward(self, x):
        # Output of Encoder
        x, i1, i2 = self.enc(x)
        # output of all 3 decoder in list
        #x = torch.stack([self.dec1(x, i1, i2), self.dec2(x, i1, i2), self.dec3(x, i1, i2)])
        x = (self.dec1(x, i1, i2), self.dec2(x, i1, i2), self.dec3(x, i1, i2))
        return x

## Instantiate the ENet model

In [24]:
enet = BranchedENet(20)

In [25]:
# Checking if there is any gpu available and pass the model to gpu or cpu
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
enet = enet.to(device)

## Define Dataloader

In [26]:
from data.cityscapes import Cityscapes as dataset

In [27]:
height = 512
width = 1024
dataset_dir = 'data/cityscape'
image_transform = transforms.Compose(
        [transforms.Resize((height,width)),transforms.ToTensor()])
train_set = dataset(dataset_dir,transform=image_transform)

batch_size=2
train_loader = data.DataLoader(train_set,batch_size=batch_size,shuffle=True,
        num_workers=1)

In [28]:
dataiter = iter(train_loader)
img, label, inst, dpth = dataiter.next()

writer.add_graph(enet, img.to(device))
writer.close()

data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/krefeld/krefeld_000000_014146_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/krefeld/krefeld_000000_014146_gtFine_labelIds.png data/cityscape/gtFine_trainvaltest/gtFine/train/krefeld/krefeld_000000_014146_gtFine_instanceIds.png data/cityscape/disparity_trainvaltest/disparity/train/krefeld/krefeld_000000_014146_disparity.png
data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/darmstadt/darmstadt_000084_000019_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/darmstadt/darmstadt_000084_000019_gtFine_labelIds.png data/cityscape/gtFine_trainvaltest/gtFine/train/darmstadt/darmstadt_000084_000019_gtFine_instanceIds.png data/cityscape/disparity_trainvaltest/disparity/train/darmstadt/darmstadt_000084_000019_disparity.png
data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/tubingen/tubingen_000040_000019_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/tubingen/tubingen_000040_000

Not within tolerance rtol=1e-05 atol=1e-05 at input[0, 11, 31, 409] (0.010448428802192211 vs. -0.0019062859937548637) and 139116 other locations (0.00%)
  check_tolerance, _force_outplace, True, _module_class)
Not within tolerance rtol=1e-05 atol=1e-05 at input[1, 11, 47, 469] (-0.03282495588064194 vs. -0.014643089845776558) and 123184 other locations (0.00%)
  check_tolerance, _force_outplace, True, _module_class)
Not within tolerance rtol=1e-05 atol=1e-05 at input[1, 0, 47, 479] (0.0026244809851050377 vs. -0.018811669200658798) and 7897 other locations (0.00%)
  check_tolerance, _force_outplace, True, _module_class)


data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/tubingen/tubingen_000053_000019_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/tubingen/tubingen_000053_000019_gtFine_labelIds.png data/cityscape/gtFine_trainvaltest/gtFine/train/tubingen/tubingen_000053_000019_gtFine_instanceIds.png data/cityscape/disparity_trainvaltest/disparity/train/tubingen/tubingen_000053_000019_disparity.png
data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/ulm/ulm_000050_000019_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/ulm/ulm_000050_000019_gtFine_labelIds.png data/cityscape/gtFine_trainvaltest/gtFine/train/ulm/ulm_000050_000019_gtFine_instanceIds.png data/cityscape/disparity_trainvaltest/disparity/train/ulm/ulm_000050_000019_disparity.png
data/cityscape/leftImg8bit_trainvaltest/leftImg8bit/train/tubingen/tubingen_000130_000019_leftImg8bit.png data/cityscape/gtFine_trainvaltest/gtFine/train/tubingen/tubingen_000130_000019_gtFine_labelIds.png data/cityscape/g

## 3 - Losses(todo)
(1) Semantic Segmentation Loss

(2) Instantance Segmentation Loss

(3) Depth Estimation Loss

In [29]:
def inverse_huber_loss(out, target):
    absdiff = torch.abs(out-target)
    C = 0.2*torch.max(absdiff)
    return torch.mean(torch.where(absdiff<C, absdiff, (absdiff*absdiff+C*C)/(2*C)))

In [30]:
def instance_loss(out, target):
    return

# Step 5 and 6 has been done in dataloader


## 7 - Define the Hyperparameters(todo)

In [31]:
from data.utils import enet_weighing
lr = 5e-4

# figure out enet_weighing issue
criterion_label = nn.CrossEntropyLoss().to(device)
criterion_inst = criterion_label
#criterion_inst = instance_loss
criterion_dpth = inverse_huber_loss
optimizer = torch.optim.Adam(enet.parameters(), 
                             lr=lr,
                             weight_decay=2e-4)

print_every = 5
eval_every = 5

## 8 - Training loop(todo)

In [32]:
train_losses = []
eval_losses = []

bc_train = 367 // batch_size # mini_batch train
bc_eval = 101 // batch_size  # mini_batch validation

epochs = 100

In [33]:
# Train loop

for e in range(1, epochs+1):
    
    
    train_loss = 0
    print ('-'*15,'Epoch %d' % e, '-'*15)
    
    enet.train()
    
    for _ in tqdm(range(bc_train)):
        img, label, inst, dpth = dataiter.next()

        # assign data to cpu/gpu
        img, label, inst, dpth = img.to(device), label.to(device), inst.to(device), dpth.to(device)
        label = label.squeeze(1)
        inst = inst.squeeze(1)
        
        optimizer.zero_grad()
        out = enet(img.float())

        # split output into three predictions
        label_out, inst_out, dpth_out = out[0], out[1], out[2]

        # loss calculation for class segmentation
        loss = criterion_label(label_out, label.long()).float()

        # loss calculation for class instance
        loss += criterion_inst(inst_out, inst.long()).float()

        # loss calculation for depth
        loss += criterion_dpth(dpth_out, dpth.float())
        loss.backward()
        
        # update weights
        optimizer.step()

        train_loss += loss.item()
        
    writer.add_scalar('Loss/train', train_loss, e)
    
    if e % eval_every == 0:
        with torch.no_grad():
            enet.eval()
            
            eval_loss = 0

            # Validation loop
            for _ in tqdm(range(bc_eval)):
                img, label, inst, dpth = dataiter.next()
                img, label, inst, dpth = img.to(device), label.to(device), inst.to(device), dpth.to(device)
                label = label.squeeze(1)
                inst = inst.squeeze(1)
        
                out = enet(img.float())
                
                # split output into three predictions
                label_out, inst_out, dpth_out = out[0], out[1], out[2]

                # loss calculation for class segmentation
                eval_loss += criterion_label(label_out, label.long()).float().item()

                # loss calculation for class instance
                eval_loss += criterion_inst(inst_out, inst.long()).float().item()

                # loss calculation for depth
                eval_loss += criterion_dpth(dpth_out, dpth.float()).item()
                
            
            writer.add_scalar('Loss/test', eval_loss, e // eval_every)
        
    if e % print_every == 0:
        checkpoint = {
            'epochs' : e,
            'state_dict' : enet.state_dict()
        }
        torch.save(checkpoint, '/content/ckpt-enet-{}-{}.pth'.format(e, train_loss))
        print ('Model saved!')

print ('Epoch {}/{}...'.format(e, epochs),
       'Total Mean Loss: {:6f}'.format(sum(train_losses) / epochs))

--------------- Epoch 1 ---------------


HBox(children=(FloatProgress(value=0.0, max=183.0), HTML(value='')))




KeyboardInterrupt: 

In [None]:
import os

In [None]:
for path, _, files in os.walk(folder):