# Pytorch Cheatsheet

## Overview

In [None]:
* How does a Conv layer impact the size of output feature maps?
* How does a Conv Transpose layer impact the size of the output feature maps?
* How does padding / stride impact both of the above?

Normal Convolution
* W2 = (W1 - KS + 2P)/S + 1

Convolution Transpose
* W2 = S(W1 - 1) + KS - 2P

## Resources

In [None]:
* ss
* ss
* ss

## Imports

In [None]:
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
import torch.nn.functional as F
from torch.autograd import Variable
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision.utils as torch_img_utils
from torch.utils.data import DataLoader
import torchvision.models as models
import torch.backends.cudnn as cudnn
import torchvision
import torch.autograd as autograd
from PIL import Image
import imp
import os
import sys
import math
import time
import random
import shutil
import cv2
import scipy.misc
from glob import glob
import sklearn
import logging

from tqdm import tqdm
import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
plt.style.use('bmh')

%matplotlib inline

# Basics

## Tutorials

* [Official Pytorch Tutorials](http://pytorch.org/tutorials/)
* [Pytorch Documentation](http://pytorch.org/docs)

## Tensors

# Datasets

## File Management

In [None]:
random.seed(1)
torch.manual_seed(1)
DATA_PATH='/media/bfortuner/bigguy/data/'
CUB_PATH=DATA_PATH+'cub/'
CUB_IMAGES_PATH=CUB_PATH+'images/'
MNIST_PATH=DATA_PATH+'mnist/'
MNIST_WEIGHTS_PATH=MNIST_PATH+'weights/'
MNIST_RESULTS_PATH='results/'
CIFAR10_PATH=DATA_PATH+'cifar10/'
CIFAR10_IMGS_PATH=CIFAR10_PATH+'images/'
CIFAR10_RESULTS_PATH=CIFAR10_PATH+'results/'
CIFAR10_WEIGHTS_PATH=CIFAR10_PATH+'weights/'

In [None]:
def get_paths_to_files(dir_path):
    filepaths = []
    fnames = []
    for (dirpath, dirnames, filenames) in os.walk(dir_path):
        filepaths.extend(os.path.join(dirpath, f) for f in filenames if not f[0] == '.')
        fnames.extend([f for f in filenames if not f[0] == '.'])
    return filepaths, fnames

def get_random_image_path(dir_path):
    filepaths = get_paths_to_files(dir_path)[0]
    return filepaths[random.randrange(len(filepaths))]

## MNIST

In [None]:
MNIST_BATCH_SIZE = 128
MNIST_MEAN = np.array([0.1307,])
MNIST_STD = np.array([0.3081,])
mnist_train_loader = torch.utils.data.DataLoader(
    datasets.MNIST(MNIST_PATH, train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize(MNIST_MEAN, MNIST_STD)
                   ])),
    batch_size=MNIST_BATCH_SIZE, shuffle=True)
mnist_test_loader = torch.utils.data.DataLoader(
    datasets.MNIST(MNIST_PATH, train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize(MNIST_MEAN, MNIST_STD)
                   ])),
    batch_size=MNIST_BATCH_SIZE*8, shuffle=True)

mnist_train_labels = mnist_train_loader.dataset.train_labels
MNIST_CLASSES = np.unique(mnist_train_labels.numpy())
print("MNIST Train Samples:", len(mnist_train_loader.dataset))
print("MNIST Test Samples:", len(mnist_test_loader.dataset))

## CIFAR10

In [None]:
CIFAR_BATCH_SIZE = 64
CIFAR_MEAN = np.array([0.49139968, 0.48215827, 0.44653124])
CIFAR_STD = np.array([0.24703233, 0.24348505, 0.26158768])
normTransform = transforms.Normalize(CIFAR_MEAN, CIFAR_STD)

trainTransform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    normTransform
])
testTransform = transforms.Compose([
    transforms.ToTensor(),
    normTransform
])

cifar_train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(CIFAR10_IMGS_PATH, train=True, download=True,
                 transform=trainTransform),
    batch_size=CIFAR_BATCH_SIZE, shuffle=True)
cifar_test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10(CIFAR10_IMGS_PATH, train=False, download=True,
                 transform=testTransform),
    batch_size=CIFAR_BATCH_SIZE*16, shuffle=False)

cifar_classes = cifar_train_loader.dataset.train_labels
CIFAR_CLASSES = np.unique(np.array(cifar_classes))
CIFAR_CLASS_NAMES = np.array(['airplane','automobile','bird','cat',
                          'deer','dog','frog', 'horse','ship','truck'])
print("CIFAR Train Samples:", len(cifar_train_loader.dataset))
print("CIFAR Test Samples:", len(cifar_test_loader.dataset))

## Swiss Roll

In [None]:
from sklearn.datasets.samples_generator import make_swiss_roll

def get_swiss_roll(n_samples=100):
    noise = 0.2
    X, _ = make_swiss_roll(n_samples, noise)
    X = X.astype('float32')[:, [0, 2]]
    return X, _

def plot_roll(data):
    # data.shape = (N, 2)
    if type(data) != np.ndarray:
        data = data.numpy()
    x = data[:,0]
    y = data[:,1]
    plt.scatter(x,y)

SWISS_ROLL_BATCH_SIZE = 100
X, _ = get_swiss_roll(100)
swiss_roll_dataset = torch.utils.data.TensorDataset(torch.FloatTensor(X), torch.FloatTensor(_))
swiss_roll_loader = torch.utils.data.DataLoader(swiss_roll_dataset, batch_size=SWISS_ROLL_BATCH_SIZE, shuffle=True)
    
# Test
data = get_swiss_roll(1000)[0]
plot_roll(data)
inputs,targets = next(iter(swiss_roll_loader))
print(inputs.size(),targets.size())
plot_roll(inputs)

# Image Handling

## Preprocessing

## Viewing

In [None]:
def imshow(inp, mean_arr, std_arr, title=None):
    """Imshow for Tensor."""
    if inp.size(0) == 1:
        inp = np.squeeze(inp.numpy())
        kwargs = {'cmap':'gray'}
    else:
        inp = inp.numpy().transpose((1, 2, 0))
        kwargs = {}
    inp = std_arr * inp + mean_arr
    plt.imshow(inp, **kwargs)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

def visualize_preds(model, data_loader, class_names, mean_arr, std_arr, num_images=6):
    images_so_far = 0
    fig = plt.figure()

    for i, data in enumerate(data_loader):
        inputs, labels = data
        inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())

        outputs = model(inputs)
        _, preds = torch.max(outputs.data, 1)
        preds = preds.cpu().numpy()
        labels = labels.data.cpu().numpy()
        for j in range(inputs.size()[0]):
            images_so_far += 1
            ax = plt.subplot(num_images//2, 2, images_so_far)
            ax.axis('off')
            ax.set_title('P: {}, A:{}'.format(class_names[preds[j][0]], 
                                              class_names[labels[j]]))
            imshow(inputs.cpu().data[j], mean_arr, std_arr)

            if images_so_far == num_images:
                return
        plt.tight_layout()

def plot_mnist_samples(arr, dim=(4,4), figsize=(6,6)):
    if type(arr) is not np.ndarray:
        arr = arr.numpy()
    bs = arr.shape[0]
    arr = arr.reshape(bs, 28, 28)
    plt.figure(figsize=figsize)
    for i,img in enumerate(arr):
        plt.subplot(*dim, i+1)
        plt.imshow(img, cmap='gray')
        plt.axis('off')
    plt.tight_layout()

def denorm_meanstd(t, mean, std):
    return (t * std) + mean

def plot_cifar_samples(arr, dim=(4,4), figsize=(6,6)):
    if type(arr) is not np.ndarray:
        arr = arr.numpy().transpose((0, 2, 3, 1))
    arr = denorm_meanstd(arr, CIFAR_MEAN, CIFAR_STD)
    plt.figure(figsize=figsize)
    for i,img in enumerate(arr):
        plt.subplot(*dim, i+1)
        plt.imshow(img, cmap='gray')
        plt.axis('off')
    plt.tight_layout()
    
def get_cifar_batch_of_size(bs):
    # Return batchsize smaller than 64 for testing
    inputs,targets = next(iter(cifar_test_loader))
    return inputs[:bs], targets[:bs]

def get_mnist_batch(bs):
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST(MNIST_PATH, train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize(MNIST_MEAN, MNIST_STD)
                       ])),
        batch_size=bs, shuffle=True)
    inputs,targets = next(iter(train_loader))
    return inputs,targets

inps,targs = get_mnist_batch(12)
plot_mnist_samples(inps)

In [None]:
inputs,targets = get_cifar_batch_of_size(12)
plot_cifar_samples(inputs)

# Models

## Logging

In [None]:
#https://docs.python.org/3/howto/logging-cookbook.html

def get_logger(ch_log_level=logging.ERROR, 
               fh_log_level=logging.INFO):
    logging.shutdown()
    imp.reload(logging)
    logger = logging.getLogger("cheatsheet")
    logger.setLevel(logging.DEBUG)
    
    # Console Handler
    if ch_log_level:
        ch = logging.StreamHandler()
        ch.setLevel(ch_log_level)
        ch.setFormatter(logging.Formatter('%(message)s'))
        logger.addHandler(ch)
    
    # File Handler
    if fh_log_level:
        fh = logging.FileHandler('cheatsheet.log')
        fh.setLevel(fh_log_level)
        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
        fh.setFormatter(formatter)
        logger.addHandler(fh)

    return logger

#Test
logger = get_logger() #Singleton
logger.info("LOG TO FILE")
logger.error("LOG TO FILE and CONSOLE")

logger = get_logger(ch_log_level=logging.DEBUG, 
               fh_log_level=None)
logger.debug("Init Console Logger Only")

## Linear

In [None]:
class LinearNetMNIST(nn.Module):
    def __init__(self):
        super(LinearNetMNIST, self).__init__()
        #Let's start with an input of (bs, 784)
        self.layer1 = nn.Linear(784, 500)
        self.layer2 = nn.Linear(500, 250)
        self.layer3 = nn.Linear(250, 10)
        self.softmax = nn.Softmax()
        
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.softmax(x)
        return x
    
class LinearNetCIFAR(nn.Module):
    def __init__(self):
        super(LinearNetCIFAR, self).__init__()
        #Let's start with an input of (bs, 3, 32, 32)
        #We flatten it to fit the first linear layer
        #(bs,32*32*3)
        self.layer1 = nn.Linear(3072, 512)
        self.relu1 = nn.ReLU()
        self.layer2 = nn.Linear(512, 256)
        self.relu2 = nn.ReLU()
        self.layer3 = nn.Linear(256, 10)
        self.softmax = nn.Softmax()
        
    def forward(self, x):
        x = x.view(x.size(0), -1) #(bs,3*32*32)
        x = self.layer1(x)
        x = self.relu1(x)
        x = self.layer2(x)
        x = self.relu2(x)
        x = self.layer3(x)
        x = self.softmax(x)
        return x

In [None]:
#MNIST
inputs, targets = get_mnist_batch(2)
net = LinearNetMNIST()
print(net(Variable(inputs)))

#CIFAR
inputs, targets = get_cifar_batch_of_size(2)
net2 = LinearNetCIFAR()
print(net2(Variable(inputs)))

## CNN

Tips and Guidelines
http://cs231n.github.io/convolutional-networks/

* The input layer (that contains the image) should be divisible by 2 many times
* The conv layers should be using small filters (e.g. 3x3 or at most 5x5), using a stride of S=1, and crucially, padding the input volume with zeros in such way that the conv layer does not alter the spatial dimensions of the input. That is, when FS=3, then using P=1 will retain the original size of the input. When FS=5, P=2. For a general FS, it can be seen that P = (FS − 1)/2 preserves the input size. 
* The pool layers are in charge of downsampling the spatial dimensions of the input. Introduces zero parameters since it simply computes a fixed function of the input. The most common setting is to use max-pooling with 2x2 receptive fields (i.e. FS=2), and with a stride of 2 (i.e. S=2). Output dimensions equal W2 = (W1 − FS)/S + 1.

In [None]:
class BasicCNN(nn.Module):
    def __init__(self, logger=None):
        super(BasicCNN, self).__init__()
        self.logger = logger
        # Input (bs, 3, 32, 32)
        # W2 = (W1 - KS + 2P) / S + 1
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=50, kernel_size=2, stride=1, padding=0)
        # (bs, 50, 31, 31)
        self.conv2 = nn.Conv2d(in_channels=50, out_channels=100, kernel_size=2, stride=1, padding=0)
        # (bs, 100, 30, 30)
        self.linear = nn.Linear(in_features=100*30*30, out_features=10) # "affine" (linear) op
        # (bs, 10)
        self.softmax = nn.Softmax()
    
    def log(self, msg):
        if self.logger:
            self.logger.debug(msg)

    def forward(self, x):
        self.log(x.size())
        x = self.conv1(x)
        self.log(x.size())
        x = self.conv2(x)
        self.log(x.size())
        x = x.view(x.size(0), -1) #(bs,100*30*30)
        self.log(x.size())
        x = self.linear(x)
        self.log(x.size())
        x = self.softmax(x)
        return x
    
class DeeperCNN(nn.Module):
    def __init__(self, logger=None):
        super(DeeperCNN, self).__init__()
        self.logger = logger
        # Input (bs, 3, 32, 32)
        # Conv Dimensions - W2 = (W1-FS+2P)/S + 1)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
        # (bs, 64, 32, 32)
        self.bn1 = nn.BatchNorm2d(num_features=64, momentum=0.9)
        self.relu1 = nn.ReLU()
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        # (bs, 128, 32, 32)
        self.bn2 = nn.BatchNorm2d(num_features=128, momentum=0.9)
        self.relu2 = nn.ReLU()
        # Pooling Dimensions - W2 = (W1 − FS)/S + 1
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2) #shrinks by half)
        # (bs, 128, 16, 16)
        self.linear1 = nn.Linear(in_features=128*16*16, out_features=512)
        # (bs, 512)
        self.dropout = nn.Dropout(p=0.5)
        self.linear2 = nn.Linear(in_features=512, out_features=10)
        # (bs, 10)
        self.softmax = nn.Softmax()
    
    def log(self, msg):
        if self.logger:
            self.logger.debug(msg)

    def forward(self, x):
        self.log(x.size())

        x = self.relu1(self.bn1(self.conv1(x)))
        self.log(x.size())
        
        x = self.relu2(self.bn2(self.conv2(x)))
        self.log(x.size())
        
        x = self.pool(x)
        self.log(x.size())
            
        x = x.view(x.size(0), -1)
        self.log(x.size())
        
        x = self.linear1(x)
        self.log(x.size())
        
        x = self.dropout(x)
        
        x = self.linear2(x)
        self.log(x.size())
        
        x = self.softmax(x)
        return x

In [None]:
logger = get_logger(logging.DEBUG, logging.DEBUG)
net = BasicCNN(logger)
inputs,targets = next(iter(cifar_train_loader))
inputs = Variable(inputs[:2])
net(inputs)

net = DeeperCNN(logger)
inputs,targets = next(iter(cifar_train_loader))
inputs = Variable(inputs[:2])
net(inputs)

## GAN

# Training

## Helpers

In [None]:
def train(net, dataloader, criterion, optimizer, epoch=1):
    n_batches = len(dataloader)
    total_loss = 0
    for inputs,targets in dataloader:
        inputs = Variable(inputs.cuda())
        targets = Variable(targets.cuda())
        
        ## Forward Pass
        out = net(inputs)
        
        ## Clear Gradients
        net.zero_grad()
        
        ## Get Loss
        loss = criterion(out, targets)
    
        ## Backprop
        loss.backward()
        optimizer.step()
        
        total_loss += loss.data[0]
    
    return total_loss / n_batches

def predict(net, inputs):
    out = net(inputs)
    val,idx = torch.max(out,dim=1)
    return idx.data.cpu().view(-1)

def get_metrics(net, test_loader, criterion):
    data,targets = next(iter(test_loader)) #1000??
    inputs,labels = Variable(data.cuda()), Variable(targets.cuda())
    
    outputs = net(inputs)
    loss = criterion(outputs, labels).data[0]
    
    #Flatten and Get ArgMax to compute accuracy
    val,idx = torch.max(outputs,dim=1)
    predictions = idx.data.cpu().view(-1).numpy()    
    targets = targets.numpy()
    correct = np.sum(predictions==targets)
    accuracy = correct / len(targets)
    
    return loss, accuracy

In [None]:
def adjust_learning_rate(lr, optimizer, cur_epoch, decay=.995, n_epochs=1):
    """Sets the optimizers learning rate to be the original `lr` 
    multiplied by `decay` every `n_epochs`
    
    # Arguments
        lr: global learning rate defined at start of training
        optimizer: your optimizer
        cur_epoch: current epochs in training
        decay: (.995)
        n_epochs: number of epochs per new application of `decay`
        
    # Output
        None: Updates optimizer in-place
    """
    new_lr = lr * (decay ** (cur_epoch // n_epochs))
    for param_group in optimizer.param_groups:
        param_group['lr'] = new_lr

## MNIST

In [None]:
# Linear Model
mnistnet = LinearNetMNIST().cuda()

N_EPOCHS = 5
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(mnistnet.parameters(), lr=1e-4)

In [None]:
train_loss_history = []
test_loss_history = []
test_acc_history = []
for epoch in range(1, N_EPOCHS+1):
    train_loss = train(mnistnet, mnist_train_loader, criterion, optimizer, epoch)
    test_loss, test_acc = get_metrics(mnistnet, mnist_test_loader, criterion)
    print('Epoch %d, TrainLoss: %.3f, TestLoss: %.3f, TestAcc: %.3f' % (
        epoch, train_loss, test_loss, test_acc))
    train_loss_history.append(train_loss)
    test_loss_history.append(test_loss)
    test_acc_history.append(test_acc)

In [None]:
plt.plot(np.stack([train_loss_history, test_loss_history],1))

In [None]:
plt.plot(test_acc_history)

In [None]:
visualize_preds(mnistnet, mnist_test_loader, MNIST_CLASSES, MNIST_MEAN, MNIST_STD, 6)

## CIFAR

In [None]:
# Linear Model
cifarnet = LinearNetCIFAR().cuda()

# Basic CNN
cifarnet = BasicCNN().cuda()

# Deeper CNN
cifarnet = DeeperCNN().cuda()

N_EPOCHS = 5
criterion = nn.CrossEntropyLoss().cuda()
optimizer = optim.Adam(cifarnet.parameters(), lr=1e-4)

print('  + Number of params: {}'.format(
    sum([p.data.nelement() for p in cifarnet.parameters()])))

In [None]:
train_loss_history = []
test_loss_history = []
test_acc_history = []
for epoch in range(1, N_EPOCHS+1):
    train_loss = train(cifarnet, cifar_train_loader, criterion, optimizer, epoch)
    test_loss, test_acc = get_metrics(cifarnet, cifar_test_loader, criterion)
    print('Epoch %d, TrainLoss: %.3f, TestLoss: %.3f, TestAcc: %.3f' % (
        epoch, train_loss, test_loss, test_acc))
    train_loss_history.append(train_loss)
    test_loss_history.append(test_loss)
    test_acc_history.append(test_acc)

In [None]:
plt.plot(np.stack([train_loss_history, test_loss_history],1))

In [None]:
plt.plot(test_acc_history)

In [None]:
visualize_preds(cifarnet, cifar_test_loader, CIFAR_CLASS_NAMES, CIFAR_MEAN, CIFAR_STD, 6)

# Experiments

## Load/Save Weights

In [None]:
def save_weights(model, weights_dir, trn_loss, val_loss, trn_err, 
                 val_err, expr_name, epoch, is_best_loss=False):
    weights_fname = name+'-weights-%d-%.3f-%.3f-%.3f-%.3f.pth' % (
        epoch, trn_loss, trn_err, val_loss, val_err)
    weights_fpath = os.path.join(weights_dir, weights_fname)
    torch.save({
            'last_epoch': epoch,
            'trn_loss': trn_loss,
            'val_loss': val_loss,
            'trn_err': trn_err,
            'val_err': val_err,
            'experiment': expr_name,
            'state_dict': model.state_dict()
        }, weights_fpath )
    shutil.copyfile(weights_fpath, 'latest.pth')
    if is_best_loss:
        shutil.copyfile(weights_fpath, 'best.pth')

def load_weights(self, model, fpath, expr_name):
    print("loading weights '{}'".format(fpath))
    state = torch.load(fpath)
    model.load_state_dict(state['state_dict'])
    print (state['trn_err'], state['val_loss'], state['val_err'])
    print("loaded weights from experiment %s (last_epoch %d, trn_loss %s, trn_err %s, val_loss %s, val_err %s)" % (
              expr_name, state['last_epoch'], state['trn_loss'],
                state['trn_err'], state['val_loss'], state['val_err']))
    return model, state

def save_optimizer(optimizer, epoch, sessionName):
    optim_fname = sessionName+'-optim-%d.pth' % (epoch)
    optim_fpath = os.path.join(WEIGHTS_PATH, optim_fname)
    torch.save({
            'lastEpoch': epoch,
            'sessionName': sessionName,
            'state_dict': optimizer.state_dict()
        }, optim_fpath )
    shutil.copyfile(optim_fpath, WEIGHTS_PATH+'latest-optim.pth')

def load_optimizer(optimizer, fpath):
    print("loading optimizer '{}'".format(fpath))
    optim = torch.load(fpath)
    optimizer.load_state_dict(optim['state_dict'])
    print("loaded optimizer from session {}, lastEpoch {}"
          .format(optim['sessionName'], optim['lastEpoch']))

## Visdom Web Server

In [None]:
# https://github.com/facebookresearch/visdom
import visdom
viz = visdom.Visdom()

In [None]:
def viz_plot_tst_trn(window, epoch, tst_val, trn_val, name='loss', env='main'):
    if window is None:
        return viz.line(
            X=np.array([epoch]),
            Y=np.array([[tst_val, trn_val]]),
            opts=dict(
                xlabel='epoch',
                ylabel=name,
                title=env+' '+name,
                legend=['Validation', 'Train']
            ),
            env=env
        )
    return viz.line(
        X=np.ones((1, 2)) * epoch,
        Y=np.expand_dims([tst_val, trn_val],0),
        win=window,
        update='append',
        env=env
    )

def viz_plot_img(window, arr, mean, std, env='main', title='Image'):
    '''
    This function draws an img on your Visdom web app. 
    It takes as input an `CxHxW` tensor `img`
    The array values can be float in [0,1] or uint8 in [0, 255]'''
    if type(arr) is not np.ndarray:
        arr = arr.numpy().transpose((1, 2, 0))
    arr = denorm_meanstd(arr, mean, std)
    arr = arr.transpose((2, 0, 1))
    viz.image(
        arr,
        opts=dict(title=title, caption='Silly image'),
        win=window,
        env=env
    )
    
def viz_plot_text(window, text, env='main'):
    if window is None:
        return viz.text(
            text,
            env=env
        )
    return viz.text(
        text,
        win=window,
        env=env
    )

def viz_plot_summary(window, epoch, tst_loss, trn_loss,
                       tst_err, trn_err, env='main'):
    txt = ("""Epoch: %d
        Train - Loss: %.3f Err: %.3f
        Test - Loss: %.3f Err: %.3f""" % (epoch, 
        trn_loss, trn_err, tst_loss, tst_err))
    return viz_plot_text(window, txt, env)

In [None]:
#Visit http://localhost:8097 to view plots

#Should plot one chart and update it
txt_chart = viz_plot_summary(None, 1, 2, 3, 4, 5)
txt_chart = viz_plot_summary(txt_chart, 5, 2, 3, 4, 5)
txt_chart = viz_plot_summary(txt_chart, 5, 3, 8, 7, 6)

#Should plot one chart and update it
sum_chart = viz_plot_text(None, 'Hello, world3!')
sum_chart = viz_plot_text(sum_chart, 'Hello, world4!')

#Should plot one chart and update it
#window, epoch, tst_val, trn_val, name='loss', env='main'
loss_chart = viz_plot_tst_trn(None, 9, 14, 27, 'loss')
loss_chart = viz_plot_tst_trn(loss_chart, 10, 18, 30, 'loss')
loss_chart = viz_plot_tst_trn(loss_chart, 11, 19, 32, 'loss')

#Should plot one chart and update it
#window, epoch, tst_val, trn_val, name='loss', env='main'
err_chart = viz_plot_tst_trn(None, 9, 14, 27, 'error')
err_chart = viz_plot_tst_trn(err_chart, 10, 18, 30, 'error')
err_chart = viz_plot_tst_trn(err_chart, 11, 19, 32, 'error')

# Plot Image
inputs, targets = next(iter(cifar_train_loader))
img_chart = viz.image(
    np.random.rand(3,100,100),
    opts=dict(title="Image", caption='Silly random'),
)
viz_plot_img(img_chart, inputs[0], CIFAR_MEAN, CIFAR_STD)

## Experiment Class

In [None]:
import numpy as np
import os
import torch
import visdom
import shutil
import sys
from pathlib import Path


class Experiment():
    def __init__(self, name, root):
        self.name = name
        self.root = os.path.join(root,name)
        self.epoch = 1
        self.best_val_loss = sys.maxsize
        self.best_val_loss_epoch = 1
        self.weights_dir = os.path.join(self.root, 'weights')
        self.history_dir = os.path.join(self.root, 'history')
        self.results_dir = os.path.join(self.root, 'results')
        self.latest_weights = os.path.join(self.weights_dir, 'latest_weights.pth')
        self.latest_optimizer = os.path.join(self.weights_dir, 'latest_optim.pth')
        self.best_weights_path = self.latest_weights
        self.best_optimizer_path = self.latest_optimizer
        self.train_history_fpath = os.path.join(self.history_dir, 'train.csv')
        self.val_history_fpath = os.path.join(self.history_dir, 'val.csv')
        self.test_history_fpath = os.path.join(self.history_dir, 'test.csv')
        self.loss_history = {
            'train': np.array([]),
            'val': np.array([]),
            'test': np.array([])
        }
        self.error_history = {
            'train': np.array([]),
            'val': np.array([]),
            'test': np.array([])
        }
        self.viz = visdom.Visdom()
        self.visdom_plots = self.init_visdom_plots()

    def init(self):
        print("Creating new experiment")
        self.init_dirs()
        self.init_history_files()

    def resume(self, model, optim, weights_fpath=None, optim_path=None):
        print("Resuming existing experiment")
        if weights_fpath is None:
            weights_fpath = self.latest_weights
        if optim_path is None:
            optim_path = self.latest_optimizer

        model, state = self.load_weights(model, weights_fpath)
        optim = self.load_optimizer(optim, optim_path)

        self.best_val_loss = state['best_val_loss']
        self.best_val_loss_epoch = state['best_val_loss_epoch']
        self.epoch = state['last_epoch']+1
        self.load_history_from_file('train')
        self.load_history_from_file('val')

        return model, optim

    def init_dirs(self):
        os.makedirs(self.weights_dir)
        os.makedirs(self.history_dir)
        os.makedirs(self.results_dir)

    def init_history_files(self):
        Path(self.train_history_fpath).touch()
        Path(self.val_history_fpath).touch()
        Path(self.test_history_fpath).touch()

    def init_visdom_plots(self):
        loss = self.init_viz_train_plot('loss')
        error = self.init_viz_train_plot('error')
        summary = self.init_viz_txt_plot('summary')
        return {
            'loss':loss,
            'error':error,
            'summary':summary
        }

    def init_viz_train_plot(self, title):
        return self.viz.line(
            X=np.array([1]),
            Y=np.array([[1, 1]]),
            opts=dict(
                xlabel='epoch',
                ylabel=title,
                title=self.name+' '+title,
                legend=['Train', 'Validation']
            ),
            env=self.name
        )

    def init_viz_txt_plot(self, title):
        return self.viz.text(
            "Initializing.. " + title,
            env=self.name
        )

    def viz_epochs(self):
        epochs = np.arange(1,self.epoch+1)
        return np.stack([epochs, epochs],1)

    def update_viz_loss_plot(self):
        loss = np.stack([self.loss_history['train'],
                         self.loss_history['val']],1)
        window = self.visdom_plots['loss']
        return self.viz.line(
            X=self.viz_epochs(),
            Y=loss,
            win=window,
            env=self.name,
            opts=dict(
                xlabel='epoch',
                ylabel='loss',
                title=self.name+' '+'loss',
                legend=['Train', 'Validation']
            ),
        )

    def update_viz_error_plot(self):
        error = np.stack([self.error_history['train'],
                         self.error_history['val']], 1)
        window = self.visdom_plots['error']
        return self.viz.line(
            X=self.viz_epochs(),
            Y=error,
            win=window,
            env=self.name,
            opts=dict(
                xlabel='epoch',
                ylabel='error',
                title=self.name+' '+'error',
                legend=['Train', 'Validation']
            )
        )

    def update_viz_summary_plot(self):
        trn_loss = self.loss_history['train'][-1]
        val_loss = self.loss_history['val'][-1]
        trn_err = self.error_history['train'][-1]
        val_err = self.error_history['val'][-1]
        txt = ("""Epoch: %d
            Train - Loss: %.3f Err: %.3f
            Test - Loss: %.3f Err: %.3f""" % (self.epoch,
            trn_loss, trn_err, tst_loss, tst_err))
        window = self.visdom_plots['summary']
        return self.viz.text(
            txt,
            win=window,
            env=self.name
        )

    def load_history_from_file(self, dset_type):
        fpath = os.path.join(self.history_dir, dset_type+'.csv')
        data = np.loadtxt(fpath, delimiter=',').reshape(-1, 3)
        self.loss_history[dset_type] = data[:,1]
        self.error_history[dset_type] = data[:,2]

    def append_history_to_file(self, dset_type, loss, error):
        fpath = os.path.join(self.history_dir, dset_type+'.csv')
        with open(fpath, 'a') as f:
            f.write('{},{},{}\n'.format(self.epoch, loss, error))

    def save_history(self, dset_type, loss, error):
        self.loss_history[dset_type] = np.append(
            self.loss_history[dset_type], loss)
        self.error_history[dset_type] = np.append(
            self.error_history[dset_type], error)
        self.append_history_to_file(dset_type, loss, error)

        if dset_type == 'val' and self.is_best_loss(loss):
            self.best_val_loss = loss
            self.best_val_loss_epoch = self.epoch

    def is_best_loss(self, loss):
        return loss < self.best_val_loss

    def save_weights(self, model, trn_loss, val_loss, trn_err, val_err):
        weights_fname = self.name+'-weights-%d-%.3f-%.3f-%.3f-%.3f.pth' % (
            epoch, trn_loss, trn_err, val_loss, val_err)
        weights_fpath = os.path.join(self.weights_dir, weights_fname)
        torch.save({
                'last_epoch': self.epoch,
                'trn_loss': trn_loss,
                'val_loss': val_loss,
                'trn_err': trn_err,
                'val_err': val_err,
                'best_val_loss': self.best_val_loss,
                'best_val_loss_epoch': self.best_val_loss_epoch,
                'experiment': self.name,
                'state_dict': model.state_dict()
            }, weights_fpath )
        shutil.copyfile(weights_fpath, self.latest_weights)
        if self.is_best_loss(val_loss):
            self.best_weights_path = weights_fpath

    def load_weights(self, model, fpath):
        print("loading weights '{}'".format(fpath))
        state = torch.load(fpath)
        model.load_state_dict(state['state_dict'])
        print (state['trn_err'], state['val_loss'], state['val_err'])
        print("loaded weights from experiment %s (last_epoch %d, trn_loss %s, trn_err %s, val_loss %s, val_err %s)" % (
                  self.name, state['last_epoch'], state['trn_loss'],
                    state['trn_err'], state['val_loss'], state['val_err']))
        return model, state

    def save_optimizer(self, optimizer, val_loss):
        optim_fname = self.name+'-optim-%d.pth' % (epoch)
        optim_fpath = os.path.join(self.weights_dir, optim_fname)
        torch.save({
                'last_epoch': self.epoch,
                'experiment': self.name,
                'state_dict': optimizer.state_dict()
            }, optim_fpath )
        shutil.copyfile(optim_fpath, self.latest_optimizer)
        if self.is_best_loss(val_loss):
            self.best_optimizer_path = optim_path

    def load_optimizer(self, optimizer, fpath):
        print("loading optimizer '{}'".format(fpath))
        optim = torch.load(fpath)
        optimizer.load_state_dict(optim['state_dict'])
        print("loaded optimizer from session {}, last_epoch {}"
              .format(optim['experiment'], optim['last_epoch']))
        return optim

# Examples

Links To Pytorch Implementations of cool new models

## CNNs

 * [VGG](https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py)
 * [ResNet](https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py)
 * [InceptionNet](https://github.com/pytorch/vision/blob/master/torchvision/models/inception.py)
 * [SqeezeNet](https://github.com/pytorch/vision/blob/master/torchvision/models/squeezenet.py)
 * [DenseNet](https://github.com/pytorch/vision/blob/master/torchvision/models/densenet.py)
 * [FCDenseNet (Tiramisu)](https://github.com/bfortuner/pytorch_tiramisu/blob/master/tiramisu-pytorch.ipynb)
 * [Sub-pixel CNN (superresolution)](https://github.com/pytorch/examples/tree/master/super_resolution)

## GANs

* [DCGAN](https://github.com/pytorch/examples/tree/master/dcgan)
* [Wasserstein GAN](https://github.com/martinarjovsky/WassersteinGAN)

## RNNs

## Other

* [Pytorch Examples](https://github.com/pytorch/examples)