In [1]:
import math
import torch
from torch.autograd import Variable
from torch.optim import Adam
from torch import nn
import torch.nn.functional as F
from torchvision.datasets import mnist
from torchvision import transforms
from torch.utils.data import DataLoader

In [3]:
NUM_CLASSES = 10
BATCH_SIZE = 20
LEARING_RATE = 0.001

In [5]:
# train set
dataset = mnist.MNIST('./data/', train=True, download=True, transform=transforms.ToTensor())
loader = DataLoader(dataset, batch_size=BATCH_SIZE)

# validation set
validation_dataset = mnist.MNIST('./data/', train=False, download=True, transform=transforms.ToTensor())
validation_loader = DataLoader(dataset, batch_size=BATCH_SIZE)

In [7]:
# input_size
data, _ = next(iter(loader))
x_size = len(data[0][0][0])
y_size = len(data[0][0])
input_size = x_size * y_size  # flatten 28x28 tensor to 1x784 tensor
print(input_size)

784


In [8]:
HIDDEN_SIZE = 50

class Model2Linear(nn.Module):
    def __init__(self):
        super().__init__()
        self.h1 = nn.Linear(input_size, HIDDEN_SIZE)
        self.h2 = nn.Linear(HIDDEN_SIZE, HIDDEN_SIZE)
        self.h3 = nn.Linear(HIDDEN_SIZE, NUM_CLASSES)
    
    def forward(self, x):
        x = x.data.view(-1, input_size)
        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.relu(x)
        x = self.h3(x)
        x = F.softmax(x, dim=1)
        return x

In [9]:
HIDDEN_SIZE = 500

class Model1Linear(nn.Module):
    def __init__(self):
        super().__init__()
        self.h1 = nn.Linear(input_size, HIDDEN_SIZE)
        self.h2 = nn.Linear(HIDDEN_SIZE, NUM_CLASSES)
    
    def forward(self, x):
        x = x.data.view(-1, input_size)
        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.softmax(x, dim=1)
        return x

In [10]:
HIDDEN_SIZE = 100

class Model1LinearDropout(nn.Module):
    def __init__(self):
        super().__init__()
        self.h1 = nn.Linear(input_size, HIDDEN_SIZE)
        self.h2 = nn.Linear(HIDDEN_SIZE, NUM_CLASSES)
    
    def forward(self, x):
        x = x.data.view(-1, input_size)
        x = self.h1(x)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.h2(x)
        x = F.softmax(x, dim=1)
        return x

In [11]:
class ModelConv1(nn.Module):
    def __init__(self, kernel_size=5, conv_out_channels=5, linear_size=50):
        super().__init__()
        if kernel_size % 2 != 1:
            raise Exception('Only odd kernel_size are supported')
        self.conv_out_channels = conv_out_channels
        self.conv1 = nn.Conv2d(1, conv_out_channels, kernel_size=kernel_size)
        # convolution kernels are not applied on the border of the image, because the kernel would be outside the image
        conv_layer_output_size = int(x_size - (kernel_size - 1))
        self.pooled_pixels = int(conv_layer_output_size / 2)
        self.h1 = nn.Linear(self.pooled_pixels * self.pooled_pixels  * conv_out_channels, linear_size)
        self.h2 = nn.Linear(linear_size, NUM_CLASSES)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, self.pooled_pixels * self.pooled_pixels * self.conv_out_channels)
        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.softmax(x, dim=1)
        return x

In [12]:
class ModelConv2(nn.Module):
    def __init__(self, kernel_size=5, conv_out_channels1=5, conv_out_channels2=5, linear_size=50):
        super().__init__()
        if kernel_size % 2 != 1:
            raise Exception('Only odd kernel_size are supported')

        self.conv1 = nn.Conv2d(1, conv_out_channels1, kernel_size=kernel_size)
        conv_out_channels1 = conv_out_channels1
        conv_layer_output_size1 = int(x_size - (kernel_size - 1))
        pooled_pixels1 = int(conv_layer_output_size1 / 2)
        
        self.conv2 = nn.Conv2d(conv_out_channels1, conv_out_channels2, kernel_size=kernel_size)
        self.conv_out_channels2 = conv_out_channels2
        conv_layer_output_size2 = int(pooled_pixels1 - (kernel_size - 1))
        self.pooled_pixels2 = int(conv_layer_output_size2 / 2)

        self.h1 = nn.Linear(self.pooled_pixels2 * self.pooled_pixels2  * conv_out_channels2, linear_size)
        self.h2 = nn.Linear(linear_size, NUM_CLASSES)
    
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2, 2)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2, 2)
        
        x = x.view(-1, self.pooled_pixels2 * self.pooled_pixels2 * self.conv_out_channels2)
        x = self.h1(x)
        x = F.relu(x)
        x = self.h2(x)
        x = F.softmax(x, dim=1)
        return x

In [13]:
def evalulate(model):
    model.eval()
    loss = 0.0
    for data, labels in validation_loader:
        predictions_per_class = model(data.cuda())
        _, highest_prediction_class = predictions_per_class.max(1)
        loss += F.nll_loss(predictions_per_class, labels.cuda())
    return loss/len(validation_loader)

In [14]:
def learn(model, epochs=30):
    optimizer = Adam(params=model.parameters(), lr=LEARING_RATE)

    for epoch in range(epochs):
        model.train()
        for data, labels in loader:
            predictions_per_class = model(data.cuda())
            highest_prediction, highest_prediction_class = predictions_per_class.max(1)

            # how good are we? compare output with the target classes
            loss = F.nll_loss(predictions_per_class, labels.cuda())

            model.zero_grad() # ???
            loss.backward() # backpropagate
            optimizer.step()
        
        validation_loss = evalulate(model)
        print(f'Epoch: {epoch}, Loss: {validation_loss.item()}')
        
    return model

In [None]:
model = Model1Linear().cuda()
learn(model, 100)

In [None]:
# 1 convolution layer
configs = [
    #{'kernel_size': 5, 'conv_out_channels': 1, 'linear_size': 50},
    {'kernel_size': 5, 'conv_out_channels': 5, 'linear_size': 500},
    #{'kernel_size': 3, 'conv_out_channels': 2, 'linear_size': 500},
    #{'kernel_size': 3, 'conv_out_channels': 2, 'linear_size': 300},
    #{'kernel_size': 3, 'conv_out_channels': 2, 'linear_size': 200},
    #{'kernel_size': 5, 'conv_out_channels': 2, 'linear_size': 50},
    #{'kernel_size': 5, 'conv_out_channels': 5, 'linear_size': 50},
    #{'kernel_size': 9, 'conv_out_channels': 5, 'linear_size': 500},
    #{'kernel_size': 7, 'conv_out_channels': 5, 'linear_size': 500},
    #{'kernel_size': 11, 'conv_out_channels': 5, 'linear_size': 500},
]

for config in configs:
    print(config)
    torch.cuda.empty_cache()
    model = ModelConv1(**config).cuda()
    try:
        learn(model, 100)
    except Exception as ex:
        print(ex)

{'kernel_size': 5, 'conv_out_channels': 5, 'linear_size': 500}
Epoch: 0, Loss: -0.9600119590759277
Epoch: 1, Loss: -0.9807263612747192
Epoch: 2, Loss: -0.9830785989761353
Epoch: 3, Loss: -0.985017716884613
Epoch: 4, Loss: -0.9821376800537109
Epoch: 5, Loss: -0.9865939617156982
Epoch: 6, Loss: -0.9883762001991272
Epoch: 7, Loss: -0.9888348579406738
Epoch: 8, Loss: -0.9915488362312317
Epoch: 9, Loss: -0.9904070496559143
Epoch: 10, Loss: -0.9911392331123352
Epoch: 11, Loss: -0.9909926652908325
Epoch: 12, Loss: -0.9933447241783142
Epoch: 13, Loss: -0.987438440322876
Epoch: 14, Loss: -0.992839515209198
Epoch: 15, Loss: -0.9918549060821533
Epoch: 16, Loss: -0.9927961230278015
Epoch: 17, Loss: -0.9918797612190247
Epoch: 18, Loss: -0.9921971559524536
Epoch: 19, Loss: -0.9942952394485474
Epoch: 20, Loss: -0.9922376275062561
Epoch: 21, Loss: -0.9938608407974243
Epoch: 22, Loss: -0.9941400289535522
Epoch: 23, Loss: -0.993842601776123
Epoch: 24, Loss: -0.9933632612228394
Epoch: 25, Loss: -0.994740

In [None]:
# 2 convolution layers
configs = [
    #{'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 5, 'linear_size': 50},
    #{'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 5, 'linear_size': 300},
    #{'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 5, 'linear_size': 500},
    
    #{'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 10, 'linear_size': 50},
    {'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 10, 'linear_size': 300},
    #{'kernel_size': 5, 'conv_out_channels1': 5, 'conv_out_channels2': 10, 'linear_size': 500},
    
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 10, 'linear_size': 50},
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 10, 'linear_size': 300},
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 10, 'linear_size': 500},
    
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 20, 'linear_size': 50},
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 20, 'linear_size': 300},
    #{'kernel_size': 5, 'conv_out_channels1': 10, 'conv_out_channels2': 20, 'linear_size': 500},
]

for config in configs:
    print(config)
    torch.cuda.empty_cache()
    model = ModelConv2(**config).cuda()
    try:
        learn(model, 100)
    except Exception as ex:
        print(ex)