## Imports

In [45]:
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torchvision.models as models
import torch.backends.cudnn as cudnn
import torchvision
import torch.autograd as autograd

import imp
import os
import sys
import math
import time
import random
import shutil

from tqdm import tqdm
import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
plt.style.use('bmh')

%matplotlib inline

## Numpy

In [7]:
##Initialize tensor
a = torch.ones(5)
print(a)


 1
 1
 1
 1
 1
[torch.FloatTensor of size 5]



In [8]:
#Convert tensor to numpy array
b = a.numpy()
print(b)

[ 1.  1.  1.  1.  1.]


In [9]:
#Convert numpy array to tensor
a = np.ones(5)
b = torch.from_numpy(a)
print(b)


 1
 1
 1
 1
 1
[torch.DoubleTensor of size 5]



In [10]:
# let us run this cell only if CUDA is available
x = torch.ones(5)
y = torch.ones(5)
if torch.cuda.is_available():
    x = x.cuda()
    y = y.cuda()
    x + y
    
def add():
    x + y
%timeit -n 1 add()

1 loop, best of 3: 6.96 µs per loop


A microsecond µs is 1 millionth of a second

### Tensor

http://pytorch.org/docs/tensors.html

In [11]:
#2D Tensor
x = torch.FloatTensor(
    [[1, 2, 3], 
     [4, 5, 6]]
)
print(x.size())
print(x)

torch.Size([2, 3])

 1  2  3
 4  5  6
[torch.FloatTensor of size 2x3]



In [12]:
#3D Tensor
x = torch.FloatTensor([
    [[1, 1, 1], 
     [2, 2, 2]],
    [[3, 3, 3], 
     [4, 4, 4]]
])
print(x.size())
print(x)

torch.Size([2, 2, 3])

(0 ,.,.) = 
  1  1  1
  2  2  2

(1 ,.,.) = 
  3  3  3
  4  4  4
[torch.FloatTensor of size 2x2x3]



In [13]:
x = torch.randn(2,2,3)
x


(0 ,.,.) = 
 -0.8209  0.0792 -0.0169
 -0.2988 -0.6930  0.7690

(1 ,.,.) = 
  0.8349 -1.4692  0.0718
 -1.7422  1.2717 -2.4279
[torch.FloatTensor of size 2x2x3]

## Autograd

In [14]:
from torch.autograd import Variable
from torch import Tensor

In [15]:
x = Variable(torch.ones(2,2), requires_grad=True)
print (x)
print (x.data)
print (x.creator)

Variable containing:
 1  1
 1  1
[torch.FloatTensor of size 2x2]


 1  1
 1  1
[torch.FloatTensor of size 2x2]

None


In [16]:
y = x + 2
print (y)
print (y.creator)

Variable containing:
 3  3
 3  3
[torch.FloatTensor of size 2x2]

<torch.autograd._functions.basic_ops.AddConstant object at 0x7ff051618668>


In [17]:
z = y*y*3
out = z.mean()
out

Variable containing:
 27
[torch.FloatTensor of size 1]

In [18]:
out.backward()

In [19]:
print(x.grad)

Variable containing:
 4.5000  4.5000
 4.5000  4.5000
[torch.FloatTensor of size 2x2]



## Neural Networks

In [20]:
import torch.nn as nn
import torch.nn.functional as F

In [21]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        self.conv1 = nn.Conv2d(1,6,5)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5, 120) # an affine operation: y = Wx + b
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2)) #2x2 pool window
        x = F.max_pool2d(F.relu(self.conv2(x)), 2) #if shape is square, you only need to define one number
        x = x.view(-1, self.num_flat_features(x)) #Flatten()
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    ## Backward() function is automatically defined for you!!
    
    def num_flat_features(self, x):
        #basically counting parameters and flattening 
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [22]:
net = NeuralNetwork()
print (net)

NeuralNetwork (
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (400 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)


In [23]:
X = Variable(torch.randn(1,1,32,32)) #nSamples x nChannels x Height x Width
yHat = net(X)
yHat

Variable containing:
-0.0078  0.0949  0.0414  0.0722  0.0060  0.0250 -0.1048 -0.0385 -0.1151 -0.1062
[torch.FloatTensor of size 1x10]

## Loss Function

http://pytorch.org/docs/nn.html#loss-functions

In [24]:
yHat = net(X)
target = Variable(torch.range(1, 10))  # a dummy target, for example
criterion = nn.MSELoss()

loss = criterion(yHat, target)
print(loss)

Variable containing:
 38.9785
[torch.FloatTensor of size 1]



In [25]:
print(loss.creator)  # MSELoss
print(loss.creator.previous_functions[0][0])  # Linear
print(loss.creator.previous_functions[0][0].previous_functions[0][0])  # ReLU

<torch.nn._functions.thnn.auto.MSELoss object at 0x7ff0907fdac8>
<torch.nn._functions.linear.Linear object at 0x7ff0907fd908>
<torch.nn._functions.thnn.auto.Threshold object at 0x7ff0907fd3c8>


## Backprop

In [26]:
#First clear the existing gradients!! 
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

#Backprop as simple as..
loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
Variable containing:
1.00000e-02 *
 -4.9376
 -3.0339
 -4.2199
  3.5306
  6.0344
  8.6081
[torch.FloatTensor of size 6]



## Update Weights

In [27]:
#SGD
lr = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * lr)  #subtract gradient from weights...

In [28]:
#Other optimizers
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.01)
criterion = nn.MSELoss()

In [29]:
#using the optimizers
#Zero the gradient
optimizer.zero_grad()
yHat = net(X)
loss = criterion(yHat,target)
loss.backward()
optimizer.step() #update the weights

## CNNs

* https://chsasank.github.io/pytorch-tutorials/beginner/blitz/cifar10_tutorial.html
* http://cs231n.github.io/convolutional-networks/

Accepts a volume of size 

W1×H1×D1

Requires four hyperparameters:

K - Number of filters K
F - their spatial extent F
S - the stride S
P - the amount of zero padding

Produces a volume of size W2×H2×D2 where:

W2 = (W1−F+2P)/S+1
H2 = (H1−F+2P)/S+1 (i.e. width and height are computed equally by symmetry)
D2 = K

With parameter sharing, it introduces F⋅F⋅D1 weights per filter, for a total of (F⋅F⋅D1)⋅K weights and K biases.

In the output volume, the d-th depth slice (of size W2×H2) is the result of performing a valid convolution of the d-th filter over the input volume with a stride of S and then offset by d-th bias.

### Concat

http://pytorch.org/docs/torch.html#torch.cat

In [30]:
#Does this work?
torch.manual_seed??
torch.cuda.manual_seed(1)

In [31]:
x = torch.randn(2,3)
x


 0.2895  0.0358 -0.3661
-1.4977 -0.0709 -0.2322
[torch.FloatTensor of size 2x3]

In [32]:
# 2D - combine new tensors as new rows stacked on stop of each other
torch.cat([x,x,x], 0) #0 = row = first dimension of input


 0.2895  0.0358 -0.3661
-1.4977 -0.0709 -0.2322
 0.2895  0.0358 -0.3661
-1.4977 -0.0709 -0.2322
 0.2895  0.0358 -0.3661
-1.4977 -0.0709 -0.2322
[torch.FloatTensor of size 6x3]

In [33]:
# 2D - combine new tensors as new columns stacked next to each other
torch.cat([x,x,x], 1) #1 = column = 2nd dimension of input


 0.2895  0.0358 -0.3661  0.2895  0.0358 -0.3661  0.2895  0.0358 -0.3661
-1.4977 -0.0709 -0.2322 -1.4977 -0.0709 -0.2322 -1.4977 -0.0709 -0.2322
[torch.FloatTensor of size 2x9]

In [34]:
# 2D - Stack - combine tensors along new dimension - 3D now
torch.stack([x,x,x],0)


(0 ,.,.) = 
  0.2895  0.0358 -0.3661
 -1.4977 -0.0709 -0.2322

(1 ,.,.) = 
  0.2895  0.0358 -0.3661
 -1.4977 -0.0709 -0.2322

(2 ,.,.) = 
  0.2895  0.0358 -0.3661
 -1.4977 -0.0709 -0.2322
[torch.FloatTensor of size 3x2x3]

In [35]:
#3D Tensor
x = torch.FloatTensor([
    [[1, 1, 1], 
     [2, 2, 2]],
    [[3, 3, 3], 
     [4, 4, 4]]
])
print(x.size())
print(x)

torch.Size([2, 2, 3])

(0 ,.,.) = 
  1  1  1
  2  2  2

(1 ,.,.) = 
  3  3  3
  4  4  4
[torch.FloatTensor of size 2x2x3]



In [36]:
out = torch.cat([x,x],0)
print(out.size())
print(out)

torch.Size([4, 2, 3])

(0 ,.,.) = 
  1  1  1
  2  2  2

(1 ,.,.) = 
  3  3  3
  4  4  4

(2 ,.,.) = 
  1  1  1
  2  2  2

(3 ,.,.) = 
  3  3  3
  4  4  4
[torch.FloatTensor of size 4x2x3]



In [37]:
out = torch.cat([x,x],1)
print(x.size())
print(out.size())
print(out)

torch.Size([2, 2, 3])
torch.Size([2, 4, 3])

(0 ,.,.) = 
  1  1  1
  2  2  2
  1  1  1
  2  2  2

(1 ,.,.) = 
  3  3  3
  4  4  4
  3  3  3
  4  4  4
[torch.FloatTensor of size 2x4x3]



## Conv Transpose

* http://deeplearning.net/software/theano/tutorial/conv_arithmetic.html
* no padding = o' = i' + (k - 1)
* w padding = o' = i' + (k - 1) - 2p
* w stride = o' = s (i' - 1) + k

In [38]:
layer = nn.ConvTranspose2d(in_channels=80, 
       out_channels=80, kernel_size=3, stride=2, padding=0, bias=True)
input = torch.randn(1, 80, 11, 15)
layer(Variable(input)).size()

torch.Size([1, 80, 23, 31])

In [39]:
layer = nn.ConvTranspose2d(in_channels=80, 
       out_channels=80, kernel_size=3, stride=2, padding=0, bias=True) #outputdim = s(i-1) + k
input = torch.randn(1, 80, 11, 15)
k = input.dim() - 2 #get num dimensions and subtract 2
min_sizes = [dim_size(layer, input, d) for d in range(k)] #This just gets the dimension sizes of output after transpose
#So it can't go any smaller, only adds padding - minimum output size is the output of the normal transpose
#min_size = output of normal transpose
#max_size = 
[min_sizes[d] + layer.stride[d] - 1 for d in range(k)]

NameError: name 'dim_size' is not defined

## Center Crop

In [None]:
#https://github.com/pytorch/pytorch/blob/master/torch/nn/_functions/conv.py#L59
#https://github.com/Lasagne/Lasagne/blob/master/lasagne/layers/merge.py#L162
#Author does a center crop which crops both inputs (skip and upsample) to size of minimum dimension on both w/h
#But does this get us back to the output image size then?
def center_crop(layer, max_height, max_width):
    print("maxwidth", max_width)
    print("maxheigth", max_height)

    batch_size, n_channels, layer_height, layer_width = layer.size()
    print('layer', batch_size, n_channels, layer_height, layer_width)
    xy1 = (layer_width - max_width) // 2
    xy2 = (layer_height - max_height) // 2
    print('xy1', xy1)
    print('xy2', xy2)
    return layer[:, :, xy2:(xy2 + max_height), xy1:(xy1 + max_width)]

In [None]:
t1 = Variable(torch.randn(1,80,11,15))
skip = Variable(torch.randn(1, 448, 22, 30))
layer = nn.ConvTranspose2d(in_channels=80, 
       out_channels=80, kernel_size=3, stride=2, padding=0, bias=True) #outputdim = s(i-1) + k
out = layer(t1)

## Model Definition

In [None]:
#add example architectures here...

## Config

In [None]:
DATA_PATH='data/'
RESULTS_PATH='results/'
WEIGHTS_PATH='models/'
IMAGE_PATH=DATA_PATH+'GET_SMALL_IMAGE_SAMPLE_HERE/'
PROJECT_NAME='cheatsheet'

In [None]:
train_crop_size = (224, 224) # None for full size
n_classes = 12 #11 + background
# Training
seed = 0

train_file = RESULTS_PATH+PROJECT_NAME+'-train.csv'
test_file = RESULTS_PATH+PROJECT_NAME+'-test.csv'
existing_weights_fpath=None#WEIGHTS_PATH+'latest.pth'
existing_optimizer_fpath=None #WEIGHTS_PATH+'latest-optim.pth'

#Finetune overrides
n_epochs=50
learning_rate=.00001
lr_sched_decay = 0.995 # Applied each epoch exponential - α=αe^−kt0,k are hyperparameters and 
weight_decay = 0.0001
num_epochs = 750
max_patience = 150

## Data Loader

In [None]:
batch_size = 2
seed = 1
torch.cuda.manual_seed(seed)
traindir = os.path.join(IMAGE_PATH, 'train')
valdir = os.path.join(IMAGE_PATH, 'val')
testdir = os.path.join(IMAGE_PATH, 'test')
IMAGE_MEAN=1.2
IMAGE_STD=1.2

In [None]:
normalize = transforms.Normalize(mean=IMAGE_MEAN, std=IMAGE_STD)
train_dset = camvid.CamVid(CAMVID_PATH, 'train',
      transform=transforms.Compose([
          transforms.RandomCrop(224),
          transforms.RandomHorizontalFlip()
          transforms.ToTensor(),
          normalize,
    ]))
train_loader = torch.utils.data.DataLoader(
    train_dset, batch_size=batch_size, shuffle=True)

In [None]:
# print(train_loader.dataset.classes)
# print(train_loader.dataset.class_weight)
# print(train_loader.dataset.imgs[:3])
# print(train_loader.dataset.mean)
# print(train_loader.dataset.std)
print("TrainImages: %d" %len(train_loader.dataset.imgs))
print("ValImages: %d" %len(val_loader.dataset.imgs))
print("TestImages: %d" %len(test_loader.dataset.imgs))
print("NumClasses: %d" % len(train_loader.dataset.classes))

example_inputs, example_targets = next(iter(train_loader))
print("InputsBatchSize: ", example_inputs.size())
print("TargetsBatchSize: ", example_targets.size())

#Inputs are tensors of normalized pixel values
print ("\nInput (size, max, min) ---")
i = example_inputs[0]
print (i.size())
print(i.max())
print(i.min())


#Targets are tensors of class labels from 0-11 (0 means background)
print ("Target (size, max, min) ---")
t = example_targets[0]
print(t.size())
print(t.max())
print(t.min())

## Visdom Visualizations

* https://github.com/facebookresearch/visdom
* https://github.com/facebookresearch/visdom/blob/master/example/demo.py
* http://73.223.178.63:8097

In [None]:
import visdom
viz = visdom.Visdom()

In [None]:
def viz_plot_tst_trn(window, epoch, tst_val, trn_val, name='loss', env='main'):
    if window is None:
        return viz.line(
            X=np.array([epoch]),
            Y=np.array([[tst_val, trn_val]]),
            opts=dict(
                xlabel='epoch',
                ylabel=name,
                title=env+' '+name,
                legend=['Validation', 'Train']
            ),
            env=env
        )
    return viz.line(
        X=np.ones((1, 2)) * epoch,
        Y=np.expand_dims([tst_val, trn_val],0),
        win=window,
        update='append',
        env=env
    )

def viz_plot_img(window, tensor, env='main', title='Image'):
    '''
    This function draws an img on your Visdom web app. 
    It takes as input an `CxHxW` tensor `img`
    The array values can be float in [0,1] or uint8 in [0, 255]'''
    np_img = decode_image(tensor)
    np_img = np.rollaxis(np_img, 2, 0)
    viz.image(
        np_img,
        opts=dict(title=title, caption='Silly image'),
        win=window,
        env=env
    )
    
def viz_plot_text(window, text, env='main'):
    if window is None:
        return viz.text(
            text,
            env=env
        )
    return viz.text(
        text,
        win=window,
        env=env
    )

def viz_plot_summary(window, epoch, tst_loss, trn_loss,
                       tst_err, trn_err, env='main'):
    txt = ("""Epoch: %d
        Train - Loss: %.3f Err: %.3f
        Test - Loss: %.3f Err: %.3f""" % (epoch, 
        trn_loss, trn_err, tst_loss, tst_err))
    return viz_plot_text(window, txt, env)

In [None]:
#Should plot one chart and update it
txt_chart = viz_plot_summary(None, 1, 2, 3, 4, 5)
txt_chart = viz_plot_summary(txt_chart, 5, 2, 3, 4, 5)
txt_chart = viz_plot_summary(txt_chart, 5, 3, 8, 7, 6)

In [None]:
#Should plot one chart and update it
sum_chart = viz_plot_text(None, 'Hello, world3!')
sum_chart = viz_plot_text(sum_chart, 'Hello, world4!')

In [None]:
#Should plot one chart and update it
#window, epoch, tst_val, trn_val, name='loss', env='main'
loss_chart = viz_plot_tst_trn(None, 9, 14, 27, 'loss')
loss_chart = viz_plot_tst_trn(loss_chart, 10, 18, 30, 'loss')
loss_chart = viz_plot_tst_trn(loss_chart, 11, 19, 32, 'loss')

In [None]:
#Should plot one chart and update it
#window, epoch, tst_val, trn_val, name='loss', env='main'
err_chart = viz_plot_tst_trn(None, 9, 14, 27, 'error')
err_chart = viz_plot_tst_trn(err_chart, 10, 18, 30, 'error')
err_chart = viz_plot_tst_trn(err_chart, 11, 19, 32, 'error')

In [40]:
inputs, targets = next(iter(train_loader))
img_chart = viz.image(
    np.random.rand(3,360,480),
    opts=dict(title="Image", caption='Silly random'),
)
viz_plot_img(img_chart, inputs[1])

NameError: name 'train_loader' is not defined

## Image Visualizations

In [None]:
Sky = [128,128,128]
Building = [128,0,0]
Pole = [192,192,128]
#Road_marking = [255,69,0] ???
Road = [128,64,128]
Pavement = [60,40,222]
Tree = [128,128,0]
SignSymbol = [192,128,128]
Fence = [64,64,128]
Car = [64,0,128]
Pedestrian = [64,64,0]
Bicyclist = [0,128,192]
Unlabelled = [0,0,0]

label_colours = np.array([Sky, Building, Pole, Road, Pavement,
      Tree, SignSymbol, Fence, Car, Pedestrian, Bicyclist, Unlabelled])

def view_annotated(tensor, plot=True):
    temp = tensor.numpy()
    r = temp.copy()
    g = temp.copy()
    b = temp.copy()
    for l in range(0,11):
        r[temp==l]=label_colours[l,0]
        g[temp==l]=label_colours[l,1]
        b[temp==l]=label_colours[l,2]

    rgb = np.zeros((temp.shape[0], temp.shape[1], 3))
    rgb[:,:,0] = (r/255.0)#[:,:,0]
    rgb[:,:,1] = (g/255.0)#[:,:,1]
    rgb[:,:,2] = (b/255.0)#[:,:,2]
    if plot:
        plt.imshow(rgb)
        plt.show()
    else:
        return rgb

def decode_image(tensor):
    inp = tensor.numpy().transpose((1, 2, 0))
    mean = np.array(camvid.mean)
    std = np.array(camvid.std)
    inp = std * inp + mean
    return inp

def view_image(tensor):
    """Imshow for Tensor."""
    inp = decode_image(tensor)
    plt.imshow(inp)
    plt.show()

In [None]:
# Get a batch of training data
inputs, targets = next(iter(train_loader))
#inputs, targets = next(iter(val_loader))
#inputs, targets = next(iter(test_loader))

# Plot Single Image
view_image(inputs[0])

# Plot Target Image
view_annotated(targets[0])

# Plot Grid of images
out = torchvision.utils.make_grid(inputs, nrow=3)
view_image(out)

## Train

In [None]:
def train(model, train_loader, optimizer, criterion, trainF, epoch, projectName):
    model.train()
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = Variable(inputs.cuda()), Variable(targets.cuda())
        optimizer.zero_grad()
        output = model(inputs)
        loss = criterion(output, targets)
        loss.backward()
        optimizer.step()
        pred = get_predictions(output)
        train_err = error(pred, targets.data.cpu())
        partialEpoch = epoch + batch_idx / len(train_loader) - 1
        trainF.write('{},{},{}\n'.format(partialEpoch, loss.data[0], train_err))
        trainF.flush()
    print('Epoch {:d}: Train - Loss: {:.4f}\tErr: {:.4f}'.format(epoch, loss.data[0], train_err))
    return loss.data[0], train_err


## Test

In [None]:
def test(model, test_loader, criterion, testF=None, epoch=1):
    model.eval()
    test_loss = 0
    test_error = 0
    for data, target in test_loader:
        data, target = Variable(data.cuda(), volatile=True), Variable(target.cuda())
        output = model(data)
        test_loss += criterion(output, target).data[0]
        pred = get_predictions(output)
        test_error += error(pred, target.data.cpu())
    test_loss /= len(test_loader) #n_batches
    test_error /= len(test_loader)
    print('Test - Loss: {:.4f}, Error: {:.4f}'.format(
        test_loss, test_error))
    if testF:
        testF.write('{},{},{}\n'.format(int(epoch), test_loss, test_error))
        testF.flush()
    return test_loss, test_error

In [None]:
criterion = nn.NLLLoss2d(weight=camvid.class_weight.cuda()).cuda()
test(model, test_loader, criterion)

## Serialization (Save/Load Weights)

In [None]:
def save_weights(model, epoch, loss, err, sessionName, isBest=False):
    weights_fname = sessionName+'-%d-%.3f-%.3f.pth' % (epoch, loss, err)
    weights_fpath = os.path.join(WEIGHTS_PATH, weights_fname)
    torch.save({
            'startEpoch': epoch+1,
            'loss':loss,
            'error': err,
            'sessionName': sessionName,
            'state_dict': model.state_dict()
        }, weights_fpath )
    shutil.copyfile(weights_fpath, WEIGHTS_PATH+'latest.pth')
    if isBest:
        shutil.copyfile(weights_fpath, WEIGHTS_PATH+'best.pth')

def load_weights(model, fpath):
    print("loading weights '{}'".format(fpath))
    state = torch.load(fpath)
    start_epoch = state['startEpoch']
    model.load_state_dict(state['state_dict'])
    print("loaded weights from session {} (lastEpoch {}, loss {}, error {})"
          .format(state['sessionName'], start_epoch-1, state['loss'],
                  state['error']))
    return state

def save_optimizer(optimizer, epoch, sessionName):
    optim_fname = sessionName+'-optim-%d.pth' % (epoch)
    optim_fpath = os.path.join(WEIGHTS_PATH, optim_fname)
    torch.save({
            'lastEpoch': epoch,
            'sessionName': sessionName,
            'state_dict': optimizer.state_dict()
        }, optim_fpath )
    shutil.copyfile(optim_fpath, WEIGHTS_PATH+'latest-optim.pth')

def load_optimizer(optimizer, fpath):
    print("loading optimizer '{}'".format(fpath))
    optim = torch.load(fpath)
    optimizer.load_state_dict(optim['state_dict'])
    print("loaded optimizer from session {}, lastEpoch {}"
          .format(optim['sessionName'], optim['lastEpoch']))
    
# Add patience function
# https://github.com/SimJeg/FC-DenseNet/blob/master/train.py#L176

## Run Training

In [None]:
# Initialize Model
model = FCDenseNet(in_channels=3, n_blocks=5, layers_per_block=5, growth_rate=16, 
                 out_chans_first_conv=48, n_classes=n_classes)
model = model.cuda()
print('  + Number of params: {}'.format(
    sum([p.data.nelement() for p in model.parameters()])))

In [None]:
if existing_weights_fpath:
    state = load_weights(model, existing_weights_fpath)
    start_epoch = state['startEpoch']
    endEpoch = state['startEpoch'] + n_epochs
    print ('Resume training at epoch: {}'.format(state['startEpoch']))
    if os.path.exists(train_file): #assume test.csv exists
        append_write = 'a' # append if already exists
    else:
        append_write = 'w' # make a new file if not
    trainF = open(os.path.join(train_file), append_write)
    testF = open(os.path.join(test_file), append_write)
else:
    print ("Training new model from scratch")
    model.apply(weights_init)
    start_epoch = 1
    endEpoch = n_epochs
    trainF = open(os.path.join(train_file), 'w')
    testF = open(os.path.join(test_file), 'w')

cudnn.benchmark = True # ????
optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
if existing_optimizer_fpath:
    print("Loading existing optimizer: ", existing_optimizer_fpath)
    load_optimizer(optimizer, existing_optimizer_fpath)
criterion = nn.NLLLoss2d(weight=camvid.class_weight.cuda()).cuda()

In [None]:
loss_chart, err_chart, txt_chart = None, None, None
for epoch in range(start_epoch, endEpoch+1):
    since = time.time()
    trn_loss, trn_err = train(model, train_loader, optimizer, criterion, trainF, epoch, PROJECT_NAME)
    tst_loss, tst_err = test(model, val_loader, criterion, testF, epoch)
    time_elapsed = time.time() - since  
    print('Time {:.0f}m {:.0f}s\n'.format(
        time_elapsed // 60, time_elapsed % 60))
    save_weights(model, epoch, tst_loss, tst_err, PROJECT_NAME)
    save_optimizer(optimizer, epoch, PROJECT_NAME)
    if visdom_enabled:
        loss_chart = viz_plot_tst_trn(loss_chart, epoch, tst_loss, trn_loss, 'loss', PROJECT_NAME)
        err_chart = viz_plot_tst_trn(err_chart, epoch, tst_err, trn_err, 'error', PROJECT_NAME)
        txt_chart = viz_plot_summary(txt_chart, epoch, tst_loss, trn_loss, tst_err, trn_err, PROJECT_NAME)

trainF.close()
testF.close()

## Predict

In [None]:
def predict(model, input_loader, n_batches=1):
    input_loader.batch_size = 1
    #Takes input_loader and returns array of prediction tensors
    predictions = []
    model.eval()
    for input, target in input_loader:
        data, label = Variable(input.cuda(), volatile=True), Variable(target.cuda())
        output = model(data)
        pred = get_predictions(output)
        predictions.append([input,target,pred])
    return predictions

#predictions = predict_all(model, test_loader, 1)
# for out in predictions[:1]:
#     view_image(out[0][0])
#     view_annotated(out[1][0])
#     view_annotated(out[2][0])

def view_sample_predictions(n):
    #torch.cuda.manual_seed(random.randint(0,10**7))
    test_loader = torch.utils.data.DataLoader(test_dset, batch_size=n, shuffle=True)
    inputs, targets = next(iter(test_loader))
    data, label = Variable(inputs.cuda(), volatile=True), Variable(targets.cuda())
    output = model(data)
    pred = get_predictions(output)
    batch_size = inputs.size(0)
    for i in range(batch_size):
        view_image(inputs[i])
        view_annotated(targets[i])
        view_annotated(pred[i])  

In [None]:
view_sample_predictions(3)