In [1]:
from fastai.data.all import untar_data, URLs
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torch import nn
from matplotlib import pyplot as plt

import torch
import os
import torch.optim as optim

In [2]:
# Get the dataset
mnist_train = MNIST(os.path.join("..", "storage", "data", "mnist"), download=True)
mnist_test = MNIST(os.path.join("..", "storage", "data", "mnist"), train=False, download=True)

x_train = mnist_train.data.unsqueeze(1).to(dtype=torch.float)
y_train_int = mnist_train.targets
y_train = torch.zeros(y_train_int.shape[0], 10)
y_train.scatter_(1, y_train_int.unsqueeze(-1), 1)

#train_std = x_train.std((0, 1))

x_test = mnist_test.data.unsqueeze(1).to(dtype=torch.float)
y_test_int = mnist_test.targets
y_test = torch.zeros(y_test_int.shape[0], 10)
y_test.scatter_(1, y_test_int.unsqueeze(-1), 1)

train_mean = x_train.mean((0, 1))

x_train = x_train - train_mean
x_train = x_train/255.

x_test = x_test - train_mean
x_test = x_test/255.

In [3]:
train_ds = tuple(zip(x_train, y_train))
test_ds = tuple(zip(x_test, y_test))

In [4]:
# Generate training and testing data loaders
train_dl = DataLoader(train_ds, batch_size=100, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=len(test_ds))

In [5]:
# Create my own architecture
class DenseNN(nn.Module):
    def __init__(self):
        # unique linear transformation layers
        nn.Module.__init__(self)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8,
                               kernel_size=(3, 3), stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16,
                              kernel_size=(3, 3), stride=1, padding=1)
        self.fc7 = nn.Linear(in_features=16*7*7, out_features=10)
        
        # singleton layers (we only need one of each)
        self.relu = nn.ReLU()
        self.max_pool = nn.MaxPool2d(kernel_size=(2, 2))
        
    
    def forward(self, inputs):
        x = self.conv1(inputs)
        x = self.relu(x)
        x = self.max_pool(x)
        
        x = self.conv2(x)
        x = self.relu(x)
        x = self.max_pool(x)
        
        x = self.fc7(x.view(-1, self.fc7.in_features))
        
        return x

In [6]:
# Make model as instance of architecture
model = DenseNN()

In [7]:
# Create optimizer
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Create loss function (ad-hoc)
loss_fn = lambda pred, target: nn.L1Loss(reduction='none')(pred, target).sum(dim=1).mean()
#loss_fn = lambda pred, target: nn.L1Loss(reduction='sum')(pred, target)

In [12]:
# Create training loop
max_epochs = 1000

for epoch in range(max_epochs):
    # train
    train_loss = 0
    total_samples = 0
    correctly_predicted = 0
    
    for inputs, targets in train_dl:
        preds = model(inputs)
        loss = loss_fn(preds, targets)
        
        with torch.no_grad():
            train_loss += loss.item()
            total_samples += len(targets)
            correctly_predicted += (targets.argmax(dim=1) == preds.argmax(dim=1)).sum().item() 
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        
    # test
    with torch.no_grad():
        train_accuracy = correctly_predicted/total_samples
        
        test_loss = 0
        total_samples = 0
        correctly_predicted = 0
        
        for inputs, targets in test_dl:  
            preds = model(inputs)
            loss = loss_fn(preds, targets)
            
            test_loss += loss
            
            # accuracy
            total_samples += len(targets)
            correctly_predicted += (targets.argmax(dim=1) == preds.argmax(dim=1)).sum().item()
            
        test_accuracy = correctly_predicted/total_samples
        
    if ((epoch+1) % 10) == 0:
        print('Epoch: {}'.format(epoch+1))
        print('Training loss: {:6.5f}'.format(train_loss/len(train_dl)))
        print('Testing loss:  {:6.5f}'.format(test_loss/len(test_dl)))
        print('Training accuracy: {:6.2f}%'.format(train_accuracy*100))
        print('Testing accuracy: {:6.2f}%'.format(test_accuracy*100))
        print()

Epoch: 10
Training loss: 0.40049
Testing loss:  0.39830
Training accuracy:  89.84%
Testing accuracy:  90.19%

Epoch: 20
Training loss: 0.40053
Testing loss:  0.39767
Training accuracy:  89.87%
Testing accuracy:  90.48%

Epoch: 30
Training loss: 0.40032
Testing loss:  0.39891
Training accuracy:  89.90%
Testing accuracy:  90.31%

Epoch: 40
Training loss: 0.40034
Testing loss:  0.39726
Training accuracy:  89.74%
Testing accuracy:  90.47%

Epoch: 50
Training loss: 0.40023
Testing loss:  0.39706
Training accuracy:  89.81%
Testing accuracy:  90.16%

Epoch: 60
Training loss: 0.40012
Testing loss:  0.39754
Training accuracy:  89.78%
Testing accuracy:  90.44%

Epoch: 70
Training loss: 0.40001
Testing loss:  0.39736
Training accuracy:  89.82%
Testing accuracy:  90.31%

Epoch: 80
Training loss: 0.39989
Testing loss:  0.39693
Training accuracy:  89.82%
Testing accuracy:  90.27%

Epoch: 90
Training loss: 0.39982
Testing loss:  0.39659
Training accuracy:  89.84%
Testing accuracy:  90.49%

Epoch: 100

KeyboardInterrupt: 

In [9]:
# TODO: compare exact same architecture as above but with NLLoss (CrossEntropy)

In [10]:
# Create new cnn_learner using resnet
# Train on the MNIST dataset
# Produce targets using the resnet

In [11]:
# Generate new training and testing datasets from the resnet outputs
# Create another DenseNN (identical architecture)
# Train the new DenseNN using the resnet predictions as targets (instead or original targets)
# Compare the two DenseNNs and the resnet in terms of top 1, 2, 3 accuracy