In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms

import seaborn as sns

from tqdm.notebook import tqdm

In [3]:
train_dataset = datasets.MNIST('./data', train=True, download=True,  # Downloads into a directory ../data
                               transform=transforms.ToTensor())
test_dataset = datasets.MNIST('./data', train=False, download=False,  # No need to download again
                              transform=transforms.ToTensor())

In [6]:
def create_model(layer1, layer2, dropout1, dropout2):
    model = nn.Sequential(
        # In problem 2, we don't use the 2D structure of an image at all. Our network
        # takes in a flat vector of the pixel values as input.
        nn.Flatten(),
        nn.Linear(784, layer1),
        nn.ReLU(),
        nn.Dropout(dropout1),
        nn.Linear(layer1, layer2),
        nn.ReLU(),
        nn.Dropout(dropout2),
        nn.Linear(layer2, 10)
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss_fn = nn.CrossEntropyLoss()

    return model, optimizer, loss_fn

In [4]:
def train(model, optimizer, loss_fn, n_epochs, batch_size, record_losses=False):
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

    # Some layers, such as Dropout, behave differently during training
    model.train()

    losses = []
    testlosses = []

    loss_n = 250

    for epoch in tqdm(range(n_epochs)):
        running_loss = 0

        for batch_idx, (data, target) in enumerate(train_loader):
            # Erase accumulated gradients
            optimizer.zero_grad()

            # Forward pass
            output = model(data)

            # Calculate loss
            loss = loss_fn(output, target)

            # Backward pass
            loss.backward()
            
            # Weight update
            optimizer.step()

            if record_losses:
                running_loss += loss.item()
                if batch_idx % loss_n == loss_n - 1:
                    losses.append(running_loss / (batch_size * loss_n))
                    running_loss = 0

                    test_loss = 0
                    with torch.no_grad():
                        for data, target in test_loader:
                            output = model(data)
                            test_loss += loss_fn(output, target).item()  # Sum up batch loss

                    test_loss /= len(test_loader.dataset)
                    testlosses.append(test_loss)

    return losses, testlosses

In [8]:
def test(model, loss_fn):
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=True)

    # Putting layers like Dropout into evaluation mode
    model.eval()

    test_loss = 0
    correct = 0

    # Turning off automatic differentiation
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss += loss_fn(output, target).item()  # Sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max class score
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('Test set: Average loss: %.4f, Accuracy: %d/%d (%.4f)' %
        (test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    return test_loss, correct / len(test_loader.dataset)

In [9]:
n_epochs = 10
batch_size = 32

for _ in range(10):
    model, opt, loss_fn = create_model(140, 60, 0.2, 0.2)
    trainlosses, testlosses = train(model, opt, loss_fn, n_epochs, batch_size)
    test(model, loss_fn)

  0%|          | 0/10 [00:00<?, ?it/s]

KeyboardInterrupt: 