# A SMALL CNN IN PYTORCH

### Imports

In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import DataParallel
import torchvision
from torchvision import datasets, transforms
import torch.optim as optim

import os
import pandas as pd
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

### Hyperparameters

In [2]:
n_epochs = 5
batch_size = 100
learning_rate = 0.001

### Get training data - MNIST

In [3]:
train_dataset = datasets.MNIST(root='../data/',
                            train=True, 
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = datasets.MNIST(root='../data/',
                           train=False, 
                           transform=transforms.ToTensor())

# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


### Define model architecture

In [4]:
class CNN(nn.Module):
    """
    This is a simple CNN in PyTorch. It is organized into two blocks each consisting of 4 ops/layers.
    Args:
    """
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.num_classes = num_classes
        
        self.block1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.block2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))
        self.fc = nn.Linear(7*7*32, num_classes)
        
    def forward(self, x):
        out = self.block1(x)
        out = self.block2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

### Build network

In [5]:
#Get model
cnn = CNN(num_classes=10)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)

### Train loop

In [None]:
%%time

# Train the Model
for epoch in range(n_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images)
        labels = Variable(labels)
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = cnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' 
                   %(epoch+1, n_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))

### Let's try again... but this time using data parallelization across both of my GPUs

In [None]:
#Get model
cnn = CNN(num_classes=10)

#One additional line of PyTorch = data parallelization across my 2 GPUs!
cnn = DataParallel(cnn.cuda(), device_ids=[0, 1]) 

# Loss and Optimizer
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)

In [None]:
%%time

# Train the Model
for epoch in range(n_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images).cuda()
        labels = Variable(labels).cuda()
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = cnn(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f' 
                   %(epoch+1, n_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))

In [None]:
# Test the Model
cnn.eval()  # Change model to 'eval' mode b/c BN uses moving mean/var
correct = 0
total = 0
for images, labels in test_loader:
    images = Variable(images)
    outputs = cnn(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted.cpu() == labels).sum() #Note that I'm calling .cpu() to push the Torch tensor to cpu

print('Test Accuracy of the model on the 10000 test images: %d %%' % (100 * correct / total))

In [None]:
# Save the Trained Model
torch.save(cnn.state_dict(), '../models/cnn.pkl')