In [1]:
import math
import torch
from torch.autograd import Variable
from torch.optim import Adam
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import mnist
from torchvision import transforms
from torch.utils.data import DataLoader

In [2]:
NUM_CLASSES = 10
BATCH_SIZE = 20
LEARING_RATE = 0.001

In [15]:
import numpy as np
import collections

class GaussianNoise(object):
    """
    Add gaussian noise to a numpy.ndarray (H x W x C)
    """
    def __init__(self, mean, sigma, random_state=np.random):
        self.sigma = sigma
        self.mean = mean
        self.random_state = random_state

    def __call__(self, image):
        row, col, ch = image.shape
        gauss = self.random_state.normal(self.mean, self.sigma, (row, col, ch))
        gauss = gauss.reshape(row, col, ch)
        image += torch.from_numpy(gauss).float()
        return image

In [21]:
# train set
train_transforms = transforms.Compose([
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    GaussianNoise(0.01, 0.001),
])
dataset = mnist.MNIST('./data/', train=True, download=True, transform=train_transforms)
loader = DataLoader(dataset, batch_size=BATCH_SIZE)

# validation set
validation_dataset = mnist.MNIST('./data/', train=False, download=True, transform=transforms.ToTensor())
validation_loader = DataLoader(dataset, batch_size=BATCH_SIZE)

In [6]:
# input_size
data, _ = next(iter(loader))
x_size = len(data[0][0][0])
y_size = len(data[0][0])
input_size = x_size * y_size  # flatten 28x28 tensor to 1x784 tensor
print(input_size)

784


In [None]:
HIDDEN_SIZE = 50

class Model2Linear(nn.Module):
    def __init__(self):
        super().__init__()
        self.h1 = nn.Linear(input_size, HIDDEN_SIZE)
        self.h2 = nn.Linear(HIDDEN_SIZE, HIDDEN_SIZE)
        self.h3 = nn.Linear(HIDDEN_SIZE, NUM_CLASSES)
    
    def forward(self, x):
        x = x.data.view(-1, input_size)
        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.relu(x)
        x = self.h3(x)
        x = F.softmax(x, dim=1)
        return x

In [None]:
HIDDEN_SIZE = 500

class Model1Linear(nn.Module):
    def __init__(self):
        super().__init__()
        self.h1 = nn.Linear(input_size, HIDDEN_SIZE)
        self.h2 = nn.Linear(HIDDEN_SIZE, NUM_CLASSES)
    
    def forward(self, x):
        x = x.data.view(-1, input_size)
        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.softmax(x, dim=1)
        return x

In [None]:
HIDDEN_SIZE = 100

class Model1LinearDropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.h1 = nn.Linear(input_size, HIDDEN_SIZE)
        self.h2 = nn.Linear(HIDDEN_SIZE, NUM_CLASSES)
    
    def forward(self, x):
        x = x.data.view(-1, input_size)
        x = self.h1(x)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.h2(x)
        x = F.softmax(x, dim=1)
        return x

In [7]:
class ModelConv1(nn.Module):
    def __init__(self, kernel_size=5, conv_out_channels=5, linear_size=50):
        super().__init__()
        if kernel_size % 2 != 1:
            raise Exception('Only odd kernel_size are supported')
        self.conv_out_channels = conv_out_channels
        self.conv1 = nn.Conv2d(1, conv_out_channels, kernel_size=kernel_size)
        # convolution kernels are not applied on the border of the image, because the kernel would be outside the image
        conv_layer_output_size = int(x_size - (kernel_size - 1))
        self.pooled_pixels = int(conv_layer_output_size / 2)
        self.h1 = nn.Linear(self.pooled_pixels * self.pooled_pixels  * conv_out_channels, linear_size)
        self.h2 = nn.Linear(linear_size, NUM_CLASSES)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, self.pooled_pixels * self.pooled_pixels * self.conv_out_channels)
        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.softmax(x, dim=1)
        return x

In [None]:
class ModelConv2(nn.Module):
    def __init__(self, kernel_size=5, conv_out_channels1=5, conv_out_channels2=5, linear_size=50):
        super().__init__()
        if kernel_size % 2 != 1:
            raise Exception('Only odd kernel_size are supported')

        self.conv1 = nn.Conv2d(1, conv_out_channels1, kernel_size=kernel_size)
        conv_out_channels1 = conv_out_channels1
        conv_layer_output_size1 = int(x_size - (kernel_size - 1))
        pooled_pixels1 = int(conv_layer_output_size1 / 2)
        
        self.conv2 = nn.Conv2d(conv_out_channels1, conv_out_channels2, kernel_size=kernel_size)
        self.conv_out_channels2 = conv_out_channels2
        conv_layer_output_size2 = int(pooled_pixels1 - (kernel_size - 1))
        self.pooled_pixels2 = int(conv_layer_output_size2 / 2)

        self.h1 = nn.Linear(self.pooled_pixels2 * self.pooled_pixels2  * conv_out_channels2, linear_size)
        self.h2 = nn.Linear(linear_size, NUM_CLASSES)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2, 2)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2, 2)
        
        x = x.view(-1, self.pooled_pixels2 * self.pooled_pixels2 * self.conv_out_channels2)
        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.softmax(x, dim=1)
        return x

In [8]:
def evalulate(model):
    model.eval()
    loss = 0.0
    for data, labels in validation_loader:
        predictions_per_class = model(data.cuda())
        _, highest_prediction_class = predictions_per_class.max(1)
        loss += F.nll_loss(predictions_per_class, labels.cuda())
    return loss/len(validation_loader)

In [9]:
def learn(model, epochs=30):
    optimizer = Adam(params=model.parameters(), lr=LEARING_RATE)

    for epoch in range(epochs):
        model.train()
        total_loss = 0.0
        for data, labels in loader:
            predictions_per_class = model(data.cuda())
            highest_prediction, highest_prediction_class = predictions_per_class.max(1)

            # how good are we? compare output with the target classes
            loss = F.nll_loss(predictions_per_class, labels.cuda())
            total_loss += loss.item()

            model.zero_grad()
            loss.backward()
            optimizer.step()
        
        train_loss = total_loss/len(loader)
        validation_loss = evalulate(model)
        print(f'Epoch: {epoch}, Train Loss: {train_loss}, Validation Loss: {validation_loss.item()}')
        
    return model

In [None]:
model = Model1Linear().cuda()
learn(model, 100)

In [22]:
# 1 convolution layer
configs = [
    #{'kernel_size': 5, 'conv_out_channels': 1, 'linear_size': 50},
    {'kernel_size': 5, 'conv_out_channels': 5, 'linear_size': 500},
    #{'kernel_size': 3, 'conv_out_channels': 2, 'linear_size': 500},
    #{'kernel_size': 3, 'conv_out_channels': 2, 'linear_size': 300},
    #{'kernel_size': 3, 'conv_out_channels': 2, 'linear_size': 200},
    #{'kernel_size': 5, 'conv_out_channels': 2, 'linear_size': 50},
    #{'kernel_size': 5, 'conv_out_channels': 5, 'linear_size': 50},
    #{'kernel_size': 9, 'conv_out_channels': 5, 'linear_size': 500},
    #{'kernel_size': 7, 'conv_out_channels': 5, 'linear_size': 500},
    #{'kernel_size': 11, 'conv_out_channels': 5, 'linear_size': 500},
]

for config in configs:
    print(config)
    torch.cuda.empty_cache()
    model = ModelConv1(**config).cuda()
    try:
        learn(model, 100)
    except Exception as ex:
        print(ex)

{'kernel_size': 5, 'conv_out_channels': 5, 'linear_size': 500}
Epoch: 0, Train Loss: -0.8230898158053557, Validation Loss: -0.9288526773452759
Epoch: 1, Train Loss: -0.95513512301445, Validation Loss: -0.9639253616333008
Epoch: 2, Train Loss: -0.9642195931871732, Validation Loss: -0.9621785283088684
Epoch: 3, Train Loss: -0.9716175346374512, Validation Loss: -0.972173810005188
Epoch: 4, Train Loss: -0.9739205879370372, Validation Loss: -0.9746850728988647
Epoch: 5, Train Loss: -0.9769066201249759, Validation Loss: -0.9768523573875427
Epoch: 6, Train Loss: -0.9779947476784389, Validation Loss: -0.9776179790496826
Epoch: 7, Train Loss: -0.9787428175806999, Validation Loss: -0.9810363054275513
Epoch: 8, Train Loss: -0.9806776974995931, Validation Loss: -0.982674241065979
Epoch: 9, Train Loss: -0.9806296016772588, Validation Loss: -0.979028582572937
Epoch: 10, Train Loss: -0.9812834273775418, Validation Loss: -0.9834086894989014
Epoch: 11, Train Loss: -0.9818317886789639, Validation Loss: 

In [None]:
# 2 convolution layers
configs = [
    #{'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 5, 'linear_size': 50},
    #{'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 5, 'linear_size': 300},
    #{'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 5, 'linear_size': 500},
    
    #{'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 10, 'linear_size': 50},
    {'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 10, 'linear_size': 300},
    #{'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 10, 'linear_size': 500},
    
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 10, 'linear_size': 50},
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 10, 'linear_size': 300},
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 10, 'linear_size': 500},
    
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 20, 'linear_size': 50},
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 20, 'linear_size': 300},
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 20, 'linear_size': 500},
]

for config in configs:
    print(config)
    torch.cuda.empty_cache()
    model = ModelConv2(**config).cuda()
    try:
        learn(model, 100)
    except Exception as ex:
        print(ex)