## Dependencies

In [43]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torchvision.models as models
import torch.backends.cudnn as cudnn

import imp
import os
import sys
import math
import utils.training as train_utils; imp.reload(train_utils)
import utils.plot as plot_utils; imp.reload(plot_utils)
import time
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt

because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



In [15]:
DATA_PATH='data/'
RESULTS_PATH='results/'
WEIGHTS_PATH='models/'
CAMVID_PATH=DATA_PATH+'CamVid/'
PROJECT_NAME='tiramisu'

## Design

**FirstConvLayer**

* 3x3 Conv2D (pad=, stride=, in_chans=3, out_chans=48)

**DenseLayer**

* BatchNorm
* ReLU
* 3x3 Conv2d (pad=, stride=, in_chans=, out_chans=) - "no resolution loss" - padding included
* Dropout (.2)

**DenseBlock**

* Input = FirstConvLayer, TransitionDown, or TransitionUp
* Loop to create L DenseLayers (L=n_layers)
* On TransitionDown we Concat(Input, FinalDenseLayerActivation)
* On TransitionUp we do not Concat with input, instead pass FinalDenseLayerActivation to TransitionUp block

**TransitionDown**

* BatchNorm
* ReLU
* 1x1 Conv2D (pad=, stride=, in_chans=, out_chans=)
* Dropout (0.2)
* 2x2 MaxPooling

**Bottleneck**

* DenseBlock (15 layers)

**TransitionUp**

* 3x3 Transposed Convolution (pad=, stride=2, in_chans=, out_chans=)
* Concat(PreviousDenseBlock, SkipConnection) - from cooresponding DenseBlock on transition down

**FinalBlock**

* 1x1 Conv2d (pad=, stride=, in_chans=256, out_chans=n_classes)
* Softmax

**FCDenseNet103 Architecture**

* input (in_chans=3 for RGB)
* 3x3 ConvLayer (out_chans=48)
* DB (4 layers) + TD
* DB (5 layers) + TD
* DB (7 layers) + TD
* DB (10 layers) + TD
* DB (12 layers) + TD
* Bottleneck (15 layers)
* TU + DB (12 layers)
* TU + DB (10 layers)
* TU + DB (7 layers)
* TU + DB (5 layers)
* TU + DB (4 layers)
* 1x1 ConvLayer (out_chans=n_classes) n_classes=11 for CamVid
* Softmax

**FCDenseNet56**

GrowthRate (k) = 12
4 layers per dense block
1 Conv Layer
5 DenseBlocks Downsample (20 layers)
5 TransitionDown
4 Bottleneck layers
5 Dense Blocks Upsample (20 layers)
5 TransitionUp
1 Conv Layer
1 Softmax layer (doesn't count)
56 Total layers

## Layers

In [16]:
class DenseLayer(nn.Module):
    def __init__(self, in_channels, growth_rate):
        super(DenseLayer, self).__init__()
        self.out_channels = in_channels + growth_rate
        self.add_module('norm', nn.BatchNorm2d(num_features=in_channels))
        self.add_module('relu', nn.ReLU(inplace=True))
        
        #author's impl - lasange 'same' pads with half filter size (rounded down) on "both" sides
        self.add_module('conv', nn.Conv2d(in_channels=in_channels, 
                out_channels=self.out_channels, kernel_size=3, stride=1, 
                  padding=3//2, bias=True))
        
        self.add_module('drop', nn.Dropout2d(0.2))

    def forward(self, x):
        out = self.features(x)
        return torch.cat([x, new_features], 1) # 1 = channel axis
    

class DenseBlock(nn.Module):
    def __init__(self, in_channels, growth_rate, n_layers):
        super(DenseBlock, self).__init__()
        
        n_channels = in_channels
        for i in range(n_layers):
            layer = DenseLayer(n_channels, growth_rate)
            self.add_module('denselayer%d' % (i + 1), layer)
            n_channels += growth_rate
    
    def forward(self, x):
        return self.features(x)

    
class TransitionDown(nn.Module):
    def __init__(self, in_channels):
        super(TransitionDown, self).__init__()
        self.add_module('norm', nn.BatchNorm2d(num_features=in_channels))
        self.add_module('relu', nn.ReLU(inplace=True))
        #what is out_channels?
        self.add_module('conv', nn.Conv2d(in_channels=in_channels, out_channels=in_channels,
                                          kernel_size=1, stride=1, padding=0, bias=True))
        self.add_module('drop', nn.Dropout2d(0.2))
        self.add_module('maxpool', nn.MaxPool2d(2))
        
    def forward(self, x):
        return self.forward(x)
    
class TransitionUp(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TransitionUp, self).__init__()
        self.add_module('transpose', nn.ConvTranspose2d(in_channels=in_channels, 
                           out_channels=out_channels, kernel_size=3, stride=2, 
                            padding=0, bias=True))
        
    def forward(self, x, skip_connection):
        out = self.forward(x)
        #l = ConcatLayer([l, skip_connection], cropping=[None, None, 'center', 'center'])
        return torch.cat([out, skip_connection])

class Bottleneck(nn.Module):
    def __init__(self, in_channels, growth_rate, n_layers):
        super(Bottleneck, self).__init__()
        self.add_module('bottleneck', DenseBlock(in_channels, growth_rate, n_layers))

    def forward(self, x):
        return self.forward(x)

In [17]:
#https://github.com/Lasagne/Lasagne/blob/master/lasagne/init.py
def he_uniform(weights):
    pass

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        xavier(m.weight.data)
        xavier(m.bias.data)

## Model

In [18]:
class FCDenseNet(nn.Module):
    def __init__(self, in_channels=3, n_blocks=5, layers_per_block=5, growth_rate=16, 
                 out_chans_first_conv=48, n_classes=11):
        super(FCDenseNet, self).__init__()
        self.n_blocks = n_blocks
        self.n_channels = 0
        
        #####################
        # First Convolution #
        #####################
        #W=HeUniform(gain='relu') ??
        #pad='same'
        self.firstConv = nn.Conv2d(in_channels=in_channels, 
                  out_channels=out_chans_first_conv, kernel_size=3, 
                  stride=1, padding=in_channels//2, bias=False)
        self.n_channels += out_chans_first_conv
        
        #####################
        # Downsampling path #
        #####################
        
        skipConnectChannels = []
        self.denseBlocksDown = []
        self.transDownBlocks = []
        for i in range(n_blocks):
            db = DenseBlock(self.n_channels, growth_rate, layers_per_block)
            self.add_module("DBDown%d" % (i+1), db)
            self.denseBlocksDown.append(db)
            self.n_channels += (growth_rate*layers_per_block)
            skipConnectChannels.insert(0, self.n_channels)
            
            td = TransitionDown(self.n_channels)
            self.transDownBlocks.append(td)
            self.add_module("TD%d" % (i+1), td)
            
        #####################
        #     Bottleneck    #
        #####################
        
        self.bottleneck = Bottleneck(self.n_channels, growth_rate, layers_per_block)
        prev_block_channels = growth_rate*layers_per_block
        self.n_channels += prev_block_channels 
        
        #######################
        #   Upsampling path   #
        #######################

        self.transUpBlocks = []
        self.denseBlocksUp = [] 
        for i in range(n_blocks):
            tu = TransitionUp(self.n_channels, prev_block_channels + skipConnectChannels[i])
            self.transUpBlocks.append(td)
            self.add_module("TU%d" % (i+1), tu)
            
            self.n_channels = prev_block_channels + skipConnectChannels[i]

            db = DenseBlock(self.n_channels, growth_rate, layers_per_block)
            self.denseBlocksUp.append(db)
            self.add_module("DBUp%d" % (i+1), db)

            prev_block_channels = growth_rate*layers_per_block
            self.n_channels += prev_block_channels
            
        #####################
        #      Softmax      #
        #####################
        
        self.finalConv = nn.Conv2d(in_channels=self.n_channels, out_channels=n_classes,
                       kernel_size=1, stride=1, padding=0, bias=True)
        self.softmax = nn.Softmax()
        
    def forward(self, x):
        out = self.firstConv(x)
        skip_connections = []
        for i in range(self.n_blocks):
            out = self.denseBlocksDown[i](out)
            skip_connections.insert(0, out)
            out = self.transDownBlocks[i](out)
            
        out = self.bottleneck(out)
        
        for i in range(self.n_blocks):
            out = self.transDownBlocks[i](out, skip_connections[i]) 
            out = self.denseBlocksUp[i](out)
        
        out = self.finalConv(out)
        # Reshape
        batch_size = out.size()[0]
        rows = out.size()[2]
        cols = out.size()[3]
        out = out.view(batch_size*row*cols, n_classes)
        
        out = self.softmax(out)
        return out

In [19]:
net = FCDenseNet()

In [20]:
print(net)

FCDenseNet (
  (firstConv): Conv2d(3, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (DBDown1): DenseBlock (
    (denselayer1): DenseLayer (
      (norm): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True)
      (relu): ReLU (inplace)
      (conv): Conv2d(48, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (drop): Dropout2d (p=0.2)
    )
    (denselayer2): DenseLayer (
      (norm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
      (relu): ReLU (inplace)
      (conv): Conv2d(64, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (drop): Dropout2d (p=0.2)
    )
    (denselayer3): DenseLayer (
      (norm): BatchNorm2d(80, eps=1e-05, momentum=0.1, affine=True)
      (relu): ReLU (inplace)
      (conv): Conv2d(80, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (drop): Dropout2d (p=0.2)
    )
    (denselayer4): DenseLayer (
      (norm): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True)
      (relu): ReLU (inplace)

## Metrics

* https://github.com/SimJeg/FC-DenseNet/blob/master/metrics.py

In [44]:
def IoU(y_pred, y_true, n_classes, void_labels):
    """
    Returns the intersection I and union U (to compute the jaccard I/U) and the accuracy.
    :param y_pred: nd.array of predictions. shape  (b*0*1, c) with c = n_classes
    :param y_true: groundtruth, shape  (b,0,1) or (b,c,0,1) with c=1
    :param n_classes: int
    :param void_labels: list of indexes of void labels
    :return: return nd.array I and U of size (n_classes), and scalar acc
    """

    # Put y_pred and y_true under the same shape
    y_pred = np.argmax(y_pred, axis=1)
    y_true = y_true.flatten()

    # We use not_void in case the prediction falls in the void class of the groundtruth
    not_void = ~ np.any([y_true == label for label in void_labels], axis=0)

    I = np.zeros(n_classes)
    U = np.zeros(n_classes)

    for i in range(n_classes):
        y_true_i = y_true == i
        y_pred_i = y_pred == i

        I[i] = np.sum(y_true_i & y_pred_i)
        U[i] = np.sum((y_true_i | y_pred_i) & not_void)

    accuracy = np.sum(I) / np.sum(not_void)
    return I, U, accuracy

def crossentropy(y_pred, y_true, void_labels):
    # Flatten y_true
    y_true = T.flatten(y_true)
    
    # Clip predictions

    # Create mask
    mask = T.ones_like(y_true)
    for el in void_labels:
        mask = T.switch(T.eq(y_true, el), np.int32(0), mask)

    # Modify y_true temporarily
    y_true_tmp = y_true * mask

    # Compute cross-entropy
    loss = T.nnet.categorical_crossentropy(y_pred, y_true_tmp)

    # Compute masked mean loss
    loss *= mask
    loss = T.sum(loss) / T.sum(mask).astype('float32')

    return loss

## Data

* https://github.com/SimJeg/FC-DenseNet/blob/master/data_loader.py

In [21]:
BATCH_SIZE=3
torch.cuda.manual_seed(1)

In [45]:
traindir = os.path.join(CAMVID_PATH, 'train')
valdir = os.path.join(CAMVID_PATH, 'val')
testdir = os.path.join(CAMVID_PATH, 'test')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

train_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(traindir, transforms.Compose([
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        #normalize,
    ])),
    batch_size=BATCH_SIZE, shuffle=True,
    num_workers=4, pin_memory=True)

val_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(valdir, transforms.Compose([
        transforms.ToTensor(),
        #normalize
    ])),
    batch_size=BATCH_SIZE, shuffle=False,
    num_workers=4, pin_memory=True)

test_loader = torch.utils.data.DataLoader(
    datasets.ImageFolder(valdir, transforms.Compose([
        transforms.ToTensor(),
        normalize
    ])),
    batch_size=BATCH_SIZE, shuffle=False,
    num_workers=4, pin_memory=True)

In [50]:
train_loader.dataset.classes

['0001TP_006690.png',
 '0001TP_006720.png',
 '0001TP_006750.png',
 '0001TP_006780.png',
 '0001TP_006810.png',
 '0001TP_006840.png',
 '0001TP_006870.png',
 '0001TP_006900.png',
 '0001TP_006930.png',
 '0001TP_006960.png',
 '0001TP_006990.png',
 '0001TP_007020.png',
 '0001TP_007050.png',
 '0001TP_007080.png',
 '0001TP_007110.png',
 '0001TP_007140.png',
 '0001TP_007170.png',
 '0001TP_007200.png',
 '0001TP_007230.png',
 '0001TP_007260.png',
 '0001TP_007290.png',
 '0001TP_007320.png',
 '0001TP_007350.png',
 '0001TP_007380.png',
 '0001TP_007410.png',
 '0001TP_007440.png',
 '0001TP_007470.png',
 '0001TP_007500.png',
 '0001TP_007530.png',
 '0001TP_007560.png',
 '0001TP_007590.png',
 '0001TP_007620.png',
 '0001TP_007650.png',
 '0001TP_007680.png',
 '0001TP_007710.png',
 '0001TP_007740.png',
 '0001TP_007770.png',
 '0001TP_007800.png',
 '0001TP_007830.png',
 '0001TP_007860.png',
 '0001TP_007890.png',
 '0001TP_007920.png',
 '0001TP_007950.png',
 '0001TP_007980.png',
 '0001TP_008010.png',
 '0001TP_0

## Visualize

* https://discuss.pytorch.org/t/convert-pixel-wise-class-tensor-to-image-segmentation/1268

In [22]:
Sky = [128,128,128]
Building = [128,0,0]
Pole = [192,192,128]
Road_marking = [255,69,0]
Road = [128,64,128]
Pavement = [60,40,222]
Tree = [128,128,0]
SignSymbol = [192,128,128]
Fence = [64,64,128]
Car = [64,0,128]
Pedestrian = [64,64,0]
Bicyclist = [0,128,192]
Unlabelled = [0,0,0]

label_colours = np.array([Sky, Building, Pole, Road, Pavement,
      Tree, SignSymbol, Fence, Car, Pedestrian, Bicyclist, Unlabelled])

def visualize(temp, plot=True):
    r = temp.copy()
    g = temp.copy()
    b = temp.copy()
    for l in range(0,11):
        r[temp==l]=label_colours[l,0]
        g[temp==l]=label_colours[l,1]
        b[temp==l]=label_colours[l,2]

    rgb = np.zeros((temp.shape[0], temp.shape[1], 3))
    rgb[:,:,0] = (r/255.0)#[:,:,0]
    rgb[:,:,1] = (g/255.0)#[:,:,1]
    rgb[:,:,2] = (b/255.0)#[:,:,2]
    if plot:
        plt.imshow(rgb)
    else:
        return rgb
    
def imshow(inp):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    plt.imshow(inp)

In [None]:
# Get a batch of training data
inputs, classes = next(iter(dset_loaders['train']))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out)
plt.title([dset_classes[x] for x in classes])
plt.show()

## Train

* https://github.com/pytorch/examples/blob/master/imagenet/main.py

**Hyperparameters**

* WeightInitialization = HeUniform
* Optimizer = RMSProp
* LR = .001 with exponential decay of 0.995 after each epoch
* Data Augmentation = Random Crops, Vertical Flips
* ValidationSet with early stopping based on IoU or MeanAccuracy with patience of 100 (50 during finetuning)
* WeightDecay = .0001
* Finetune with full-size images, LR = .0001
* Dropout = 0.2
* BatchNorm "we use current batch stats at training, validation, and test time"

**CamVid**

* TrainingSet = 367 frames
* ValidationSet = 101 frames
* TestSet = 233 frames
* Images of resolution 360x480
* Images "Cropped" to 224x224 for training --- center crop?
* FullRes images used for finetuning
* NumberOfClasses = 11 (output)
* BatchSize = 3

**FCDenseNet103**

* GrowthRate = 16 (k, number of filters to each denselayer adds to the ever-growing concatenated output)
* No pretraining

In [37]:
def adjust_learning_rate(optimizer, epoch):
    """Decay LR by .995 every 1 epochs"""
    lr = optimizer.param_groups[0]['lr']
    lr *= (0.995 ** (epoch // 1))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [None]:
# Adjust patience
# https://github.com/SimJeg/FC-DenseNet/blob/master/train.py#L176

In [82]:
existing_weights_fpath=None
nEpochs=1

net = FCDenseNet(in_channels=3, n_blocks=5, layers_per_block=5, growth_rate=16, 
                 out_chans_first_conv=48, n_classes=11)
net = net.cuda()
cudnn.benchmark = True

optimizer = optim.RMSprop(net.parameters(), lr=.001, weight_decay=.0001)
criterion = nn.CrossEntropyLoss().cuda()

print('  + Number of params: {}'.format(
    sum([p.data.nelement() for p in net.parameters()])))

if existing_weights_fpath:
    startEpoch = train_utils.load_weights(net, existing_weights_fpath)
    endEpoch = startEpoch + nEpochs
    print ('Resume training at epoch: {}'.format(startEpoch))
    if os.path.exists(RESULTS_PATH+'train.csv'): #assume test.csv exists
        append_write = 'a' # append if already exists
    else:
        append_write = 'w' # make a new file if not
    trainF = open(os.path.join(RESULTS_PATH, 'train.csv'), append_write)
    testF = open(os.path.join(RESULTS_PATH, 'test.csv'), append_write)
else:
    print ("Training new model from scratch")
    startEpoch = 1
    endEpoch = nEpochs
    trainF = open(os.path.join(RESULTS_PATH, 'train.csv'), 'w')
    testF = open(os.path.join(RESULTS_PATH, 'test.csv'), 'w')


for epoch in tqdm(range(startEpoch, endEpoch+1)):
    since = time.time()
    train_utils.adjust_opt("sgd", optimizer, epoch)
    train_utils.train(epoch, net, trainLoader, optimizer, trainF)
    train_utils.test(epoch, net, testLoader, optimizer, testF)
    time_elapsed = time.time() - since  
    print('Time {:.0f}m {:.0f}s\n'.format(
        time_elapsed // 60, time_elapsed % 60))
    if epoch != 1:
        os.system('./utils/plot.py {} &'.format(RESULTS_PATH))

trainF.close()
testF.close()


  0%|          | 0/1 [00:00<?, ?it/s]

  + Number of params: 1059298
Training new model from scratch


[A



Epoch 1: Train - Loss: 1.370707	Error: 43.750000


100%|██████████| 1/1 [01:12<00:00, 72.19s/it]

Test - Loss: 1.3404, Error: 4590/10000 (46%)
Time 1m 12s




