In [1]:
import math
import torch
from torch.autograd import Variable
from torch.optim import Adam
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import mnist
from torchvision import transforms
from torch.utils.data import DataLoader

In [2]:
NUM_CLASSES = 10
BATCH_SIZE = 20
LEARING_RATE = 0.001

In [3]:
# train set
dataset = mnist.MNIST('./data/', train=True, download=True, transform=transforms.ToTensor())
loader = DataLoader(dataset, batch_size=BATCH_SIZE)

# validation set
validation_dataset = mnist.MNIST('./data/', train=False, download=True, transform=transforms.ToTensor())
validation_loader = DataLoader(dataset, batch_size=BATCH_SIZE)

In [4]:
# input_size
data, _ = next(iter(loader))
x_size = len(data[0][0][0])
y_size = len(data[0][0])
input_size = x_size * y_size  # flatten 28x28 tensor to 1x784 tensor
print(input_size)

784


In [5]:
HIDDEN_SIZE = 50

class Model2Linear(nn.Module):
    def __init__(self):
        super().__init__()
        self.h1 = nn.Linear(input_size, HIDDEN_SIZE)
        self.h2 = nn.Linear(HIDDEN_SIZE, HIDDEN_SIZE)
        self.h3 = nn.Linear(HIDDEN_SIZE, NUM_CLASSES)
    
    def forward(self, x):
        x = x.data.view(-1, input_size)
        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.relu(x)
        x = self.h3(x)
        x = F.softmax(x, dim=1)
        return x

In [6]:
HIDDEN_SIZE = 100

class Model1Linear(nn.Module):
    def __init__(self):
        super().__init__()
        self.h1 = nn.Linear(input_size, HIDDEN_SIZE)
        self.h2 = nn.Linear(HIDDEN_SIZE, NUM_CLASSES)
    
    def forward(self, x):
        x = x.data.view(-1, input_size)
        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.softmax(x, dim=1)
        return x

In [7]:
HIDDEN_SIZE = 100

class Model1LinearDropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.h1 = nn.Linear(input_size, HIDDEN_SIZE)
        self.h2 = nn.Linear(HIDDEN_SIZE, NUM_CLASSES)
    
    def forward(self, x):
        x = x.data.view(-1, input_size)
        x = self.h1(x)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.h2(x)
        x = F.softmax(x, dim=1)
        return x

In [8]:
class ModelConv1(nn.Module):
    def __init__(self, kernel_size=5, conv_out_channels=5, linear_size=50):
        super().__init__()
        if kernel_size % 2 != 1:
            raise Exception('Only odd kernel_size are supported')
        self.conv_out_channels = conv_out_channels
        self.conv1 = nn.Conv2d(1, conv_out_channels, kernel_size=kernel_size)
        # convolution kernels are not applied on the border of the image, because the kernel would be outside the image
        conv_layer_output_size = int(x_size - (kernel_size - 1))
        self.pooled_pixels = int(conv_layer_output_size / 2)
        self.h1 = nn.Linear(self.pooled_pixels * self.pooled_pixels  * conv_out_channels, linear_size)
        self.h2 = nn.Linear(linear_size, NUM_CLASSES)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, self.pooled_pixels * self.pooled_pixels * self.conv_out_channels)
        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.softmax(x, dim=1)
        return x

In [9]:
def evalulate(model):
    model.eval()
    loss = 0.0
    for data, labels in validation_loader:
        predictions_per_class = model(data.cuda())
        _, highest_prediction_class = predictions_per_class.max(1)
        loss += F.nll_loss(predictions_per_class, labels.cuda())
    return loss/len(validation_loader)

In [10]:
def learn(model, epochs=30):
    optimizer = Adam(params=model.parameters(), lr=LEARING_RATE)

    for epoch in range(epochs):
        model.train()
        for data, labels in loader:
            predictions_per_class = model(data.cuda())
            highest_prediction, highest_prediction_class = predictions_per_class.max(1)

            # how good are we? compare output with the target classes
            loss = F.nll_loss(predictions_per_class, labels.cuda())

            model.zero_grad() # ???
            loss.backward() # backpropagate
            optimizer.step()
        
        validation_loss = evalulate(model)
        print(f'Epoch: {epoch}, Loss: {validation_loss.item()}')
        
    return model

In [None]:
torch.cuda.empty_cache() 
model = ModelConv1(kernel_size=5, conv_out_channels=5, linear_size=50).cuda()
# {'kernel_size': 5, 'conv_out_channels': 5, 'linear_size': 100}
learn(model)

In [None]:
configs = [
    #{'kernel_size': 5, 'conv_out_channels': 10, 'linear_size': 50},
    #{'kernel_size': 5, 'conv_out_channels': 1, 'linear_size': 50},
    #{'kernel_size': 5, 'conv_out_channels': 5, 'linear_size': 500},
    #{'kernel_size': 5, 'conv_out_channels': 5, 'linear_size': 100},
    #{'kernel_size': 5, 'conv_out_channels': 5, 'linear_size': 200},

    #{'kernel_size': 3, 'conv_out_channels': 5, 'linear_size': 100},
    #{'kernel_size': 3, 'conv_out_channels': 5, 'linear_size': 200},
    
    #{'kernel_size': 5, 'conv_out_channels': 2, 'linear_size': 500},
    #{'kernel_size': 5, 'conv_out_channels': 2, 'linear_size': 300},
    #{'kernel_size': 5, 'conv_out_channels': 2, 'linear_size': 200},

    #{'kernel_size': 3, 'conv_out_channels': 2, 'linear_size': 500},
    #{'kernel_size': 3, 'conv_out_channels': 2, 'linear_size': 300},
    #{'kernel_size': 3, 'conv_out_channels': 2, 'linear_size': 200},
    #{'kernel_size': 5, 'conv_out_channels': 2, 'linear_size': 50},
    #{'kernel_size': 5, 'conv_out_channels': 5, 'linear_size': 50},
    
    {'kernel_size': 9, 'conv_out_channels': 5, 'linear_size': 500},
    #{'kernel_size': 7, 'conv_out_channels': 5, 'linear_size': 300},
    #{'kernel_size': 7, 'conv_out_channels': 5, 'linear_size': 200},
    #{'kernel_size': 7, 'conv_out_channels': 5, 'linear_size': 100},
    
    {'kernel_size': 7, 'conv_out_channels': 5, 'linear_size': 500},
    #{'kernel_size': 9, 'conv_out_channels': 5, 'linear_size': 300},
    #{'kernel_size': 9, 'conv_out_channels': 5, 'linear_size': 200},
    #{'kernel_size': 9, 'conv_out_channels': 5, 'linear_size': 100},
    
    {'kernel_size': 11, 'conv_out_channels': 5, 'linear_size': 500},
]

for config in configs:
    print(config)
    torch.cuda.empty_cache()
    model = ModelConv1(**config).cuda()
    try:
        learn(model, 100)
    except Exception as ex:
        print(ex)