In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

In [2]:
from torch.autograd import Variable
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.utils.data.sampler import SubsetRandomSampler

In [3]:
batch_size = 32
test_size = 10000
valid_size = 0.2

In [4]:
train_data = datasets.MNIST(root = 'data', train = True, download = True, 
                            transform = ToTensor())
test_data = datasets.MNIST(root = 'data', train = False, download = True, 
                            transform = ToTensor())
print(train_data.data.size())
print(test_data.data.size())

torch.Size([60000, 28, 28])
torch.Size([10000, 28, 28])


In [5]:
def choose_batch(batch_size_trn, batch_size_tst):
    train_loader = torch.utils.data.DataLoader(train_data, batch_size = batch_size_trn, shuffle = True)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size = batch_size_tst, shuffle = True)
    return train_loader, test_loader

In [6]:
train_loader, test_loader = choose_batch(batch_size, test_size)

In [7]:
batch_index, (exmpl_data, exmpl_target) = next(enumerate(train_loader))
print("Train batch is ", exmpl_data.shape, exmpl_target.shape)

batch_index, (exmpl_data, exmpl_target) = next(enumerate(test_loader))
print("Test batch is ", exmpl_data.shape, exmpl_target.shape)

Train batch is  torch.Size([32, 1, 28, 28]) torch.Size([32])
Test batch is  torch.Size([10000, 1, 28, 28]) torch.Size([10000])


In [8]:
class MLP(nn.Module):
    def __init__(self, Win, W1, W2, Wout):
        super(MLP, self).__init__()
        self.Win = Win
        self.layer1 = nn.Linear(Win, W1)
        self.layer2 = nn.Linear(W1, W2)
        self.layerout = nn.Linear(W2, Wout)

    def forward(self, x):
        x = x.view(-1, self.Win)
        lr1 = nn.functional.relu(self.layer1(x))
        lr2 = nn.functional.relu(self.layer2(lr1))
        lrout = nn.functional.log_softmax(self.layerout(lr2), dim = 1)
        return lrout

In [9]:
def train(network, learning_rate, epochs):
    losses, accuracies = dict(train = [], val = []), dict(train = [], val = [])
    loss_func = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(network.parameters(), learning_rate)

    for epoch in range(epochs):
        network.train()
        tr_loss = 0.0
        tr_corr = 0
        n_samples = 0
        for data, label in train_loader:
            n_samples += data.shape[0]
            optimizer.zero_grad()
            output = network(data)
            _, preds = torch.max(output, 1)
            loss = loss_func(output, label)
            loss.backward()
            optimizer.step()
            tr_loss += loss.item() * data.size(0)
            tr_corr += torch.sum(preds == label.data)
        
        train_loss = tr_loss / n_samples
        train_acc = tr_corr.double() / n_samples
        losses['train'].append(train_loss)
        accuracies['train'].append(train_acc)

        network.eval()

        val_loss = 0.0
        val_corr = 0
        n_samples = 0
        for data, label in test_loader:
            n_samples += data.shape[0]
            output = network(data)
            _, preds = torch.max(output, 1)
            loss = loss_func(output, label)
            val_loss += loss.item() * data.size(0)
            val_corr += torch.sum(preds == label.data)

        valid_loss = val_loss / n_samples
        valid_acc = val_corr.double() / n_samples
        losses['val'].append(valid_loss)
        accuracies['val'].append(valid_acc)
