# IFT 6135 A2

In [None]:
import csv
import torch.cuda
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from tensorboardX import SummaryWriter
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import transforms, datasets
from os.path import dirname, abspath, join
from torch import norm
import config
import numpy as np


In [None]:
cuda_available = torch.cuda.is_available()
cuda_available

In [None]:
parent_dir = dirname(dirname(abspath('__file__')))
yaml_file = join(parent_dir, 'config.yaml')
config = config.Configuration('Q1_1', yaml_file)

print(config)

## Load Data

In [None]:
def load_dataset(config):
    mnist_train = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
    mnist_test = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)
    
    test_sampler, valid_sampler = test_valid_split(mnist_test, config)
    
    trainloader = DataLoader(mnist_train, batch_size=config.batch_size, shuffle=True, num_workers=2)
    testloader = DataLoader(mnist_test, batch_size=64, sampler=test_sampler, num_workers=2)
    validloader = DataLoader(mnist_test, batch_size=64, sampler=valid_sampler, num_workers=2)
    return trainloader, testloader, validloader

def test_valid_split(test, config):
    num_test = len(test[0])
    indices = list(range(num_test))
    split = int(np.floor(num_test / 2))

    # split test set into validation and test set
    valid_idx, test_idx = indices[split:], indices[:split]

    valid_sampler = SubsetRandomSampler(valid_idx)
    test_sampler = SubsetRandomSampler(test_idx)

    return test_sampler, valid_sampler

## Define model

In [None]:
class MLPb(nn.Module):
    def __init__(self):
        super(MLPb, self).__init__()
        self.config = config
        self.model = nn.Sequential(
            nn.Linear(784, 600),
            nn.ReLU(),
            nn.Linear(600, 200),
            nn.Dropout(p=0.5), #last layer dropout
            nn.ReLU(),
            nn.Linear(200, 10), 
            nn.Softmax(dim=0))
        
    def forward(self, x):
        output = self.model(x)
        return output

In [None]:
class MLPa(nn.Module):
    def __init__(self):
        super(MLPa, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(784, 600),
            nn.ReLU(),
            nn.Linear(600, 200),
            nn.ReLU(),
            nn.Linear(200, 10),
            nn.Softmax(dim=0))

        
    def forward(self, x):
        output = self.model(x)
        return output

In [None]:
class CNNa(nn.Module):
    def __init__(self):
        super(CNNa, self).__init__()
        self.conv = nn.Sequential(
            # Layer 1
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), padding=1),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
            # Layer 2
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=1),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
            # Layer 3
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), padding=1),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
            # Layer 4
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), padding=1),
            nn.Dropout(p=0.5),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        )
        # Logistic Regression
        self.clf = nn.Linear(128, 10)

    def forward(self, x):
        return self.clf(self.conv(x).squeeze())   

In [None]:
class CNNb(nn.Module):
    def __init__(self):
        super(CNNb, self).__init__()
        self.conv = nn.Sequential(
            # Layer 1
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(16),
            nn.Dropout(p=0.5),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
            # Layer 2
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(p=0.5),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
            # Layer 3
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Dropout(p=0.5),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2),
            
            # Layer 4
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.Dropout(p=0.5),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        )
        # Logistic Regression
        self.clf = nn.Linear(128, 10)

    def forward(self, x):
        return self.clf(self.conv(x).squeeze())   

In [None]:
def build_model():
    if config.model_type == 'MLPa':
        model = MLPa()
    elif config.model_type == 'MLPb':
        model = MLPb()
    elif config.model_type == 'CNN':
        if not config.batch_norm:
            model = CNNa()
        else:
            model = CNNb()

    if torch.cuda.is_available():
        model = model.cuda()
         
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=config.lr0, weight_decay = config.weight_decay/config.batch_size)
    return model, criterion, optimizer

## Train model

In [None]:
# USES TENSORBOARDX http://tensorboard-pytorch.readthedocs.io/en/latest/tensorboard.html

def train_model(config, model, criterion, optimizer):
    losses = []
    parameter_norms = []
    writer = SummaryWriter('isaacsultan/IFT6135/Assignments/Assignment2/logs')
    # record the performance for this epoch
    trainloader, testloader, validloader = load_dataset(config)
    
    for epoch in range(config.num_epochs):
        for batch_idx, (inputs, targets) in enumerate(trainloader):
            parameter_norm = []
            optimizer.zero_grad()
             
            if config.model_type == 'MLPa' or config.model_type == 'MLPb':
                inputs = Variable(inputs).view(-1,784)
                targets = Variable(targets).view(-1)
            elif model_type == 'CNN':
                inputs = Variable(inputs).view(-1,1,28,28)
                targets = Variable(y).view(-1)
            if cuda_available:
                inputs, targets = inputs.cuda(), targets.cuda()
            
            optimizer.zero_grad()
            # compute loss
            loss = criterion(model(inputs), targets)
            loss.backward()
            optimizer.step()
            losses.append(loss.data[0])
            test_loss = find_testloss(model, testloader)
            writer.add_scalars('learning_curve', {'train_loss':loss.data[0], 'test_loss':test_loss}, batch_idx)
            for param in model.parameters():
                parameter_norm.append(norm(param))
        # print the results for this epoch
        print("Epoch {0} \n Train Loss : {1:.3f} \Test Loss : {2:.3f}".format(epoch, np.mean(losses), test_loss))
        parameter_norms.append(parameter_norm) #and dump 

In [None]:
def find_testloss(model,test_loader):
    model.eval() #import when using dropout
    test_loss_iter = []
    for data in test_loader:
        inputs, targets = data

        if config.model_type == 'MLPa' or config.model_type == 'MLPb':
            inputs = Variable(inputs, volatile=True).view(-1,784)
            targets = Variable(targets, volatile=True).view(-1)
        elif model_type == 'CNN':
            inputs = Variable(inputs, volatile=True).view(-1,1,28,28)
            targets = Variable(targets, volatile=True).view(-1)
        if cuda_available:
            inputs, targets = inputs.cuda(), targets.cuda()

        outputs = model(inputs)
        test_loss = criterion(outputs, targets)
        test_loss_iter.append(test_loss)
    iteration_test_loss = np.mean(test_loss_iter)
    return iteration_test_loss.data[0]

In [None]:
model, criterion, optimizer = build_model()
train_model(config, model, criterion, optimizer)
