In [135]:
from pathlib import Path
from random import randint

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

In [167]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
dtype = torch.float32
torch.set_default_dtype(dtype)

In [5]:
dataset_dir = Path("data")
if not dataset_dir.exists():
    dataset_dir.mkdir()

In [149]:
class OneHot(object):
    """Creates one-hot vector given int"""
    def __init__(self, classes, dtype):
        self.classes = classes
        self.dtype = dtype
        
    def __call__(self, label):
        label = torch.tensor(label, dtype=torch.long)
        return F.one_hot(label, self.classes).to(self.dtype)
    
class RandomOneHot(object):
    """Creates one-hot vector with random class"""
    def __init__(self, classes, dtype):
        self.classes = classes
        self.dtype = dtype
        
    def __call__(self, label):
        random_label = torch.tensor(randint(0, self.classes-1), dtype=torch.long)
        return F.one_hot(random_label, self.classes).to(self.dtype)

In [238]:
training_data = torchvision.datasets.MNIST(dataset_dir, 
                                train=True, 
                                download=True,
                                transform=transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.Lambda(lambda img: img.reshape((784,)))
                                ]),
                                target_transform=OneHot(10, dtype)
                                )

testing_data = torchvision.datasets.MNIST(dataset_dir, 
                                train=False, 
                                download=True,
                                transform=transforms.Compose([
                                    transforms.ToTensor(),
                                    transforms.Lambda(lambda img: img.reshape((784,)))
                                ]))

training_loader = torch.utils.data.DataLoader(training_data, 
                            batch_size=15,
                            shuffle=True)

testing_loader = torch.utils.data.DataLoader(testing_data, 
                            batch_size=15,
                            shuffle=False)

In [239]:
network = nn.Sequential(
    nn.Linear(784, 50, bias=False),
    nn.Sigmoid(),
    nn.Linear(50, 30, bias=False),
    nn.Sigmoid(),
    nn.Linear(30, 30, bias=False),
    nn.Sigmoid(),
    nn.Linear(30, 10, bias=False),
    nn.Sigmoid()
)
layer1, _, layer2, _, layer3, _, layer4, _ = network

In [240]:
optimizer = torch.optim.SGD(network.parameters(), lr=1e-1)
# optimizer = torch.optim.Adam(network.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss(reduction="sum")

In [241]:
for epoch in range(10):
    epoch_loss = 0
    for sample, target in training_loader:
        optimizer.zero_grad()
        batch_loss = loss_fn(network(sample.to(device)), target.to(device))
        batch_loss.backward()
        optimizer.step()
        epoch_loss += batch_loss.item()
    outof = 0
    accuracy = 0
    with torch.no_grad():
        for sample, labels in testing_loader:
            output = network(sample.to(device))
            _, pred = torch.max(output, 1)
            accuracy += (pred == labels.to(device)).sum().item()
            outof += len(labels)
    accuracy = accuracy / outof
    print("Epoch: {0:2d} | Loss: {1:.3f} | Accuracy: {2:.3f}".format(epoch, epoch_loss, accuracy))

Epoch:  0 | Loss: 29082.028 | Accuracy: 0.904
Epoch:  1 | Loss: 7256.468 | Accuracy: 0.937
Epoch:  2 | Loss: 5230.797 | Accuracy: 0.952
Epoch:  3 | Loss: 4217.021 | Accuracy: 0.952
Epoch:  4 | Loss: 3661.362 | Accuracy: 0.962
Epoch:  5 | Loss: 3295.837 | Accuracy: 0.961
Epoch:  6 | Loss: 3066.118 | Accuracy: 0.962
Epoch:  7 | Loss: 2797.924 | Accuracy: 0.962
Epoch:  8 | Loss: 2664.039 | Accuracy: 0.967
Epoch:  9 | Loss: 2457.844 | Accuracy: 0.965
