In [13]:
import torch
import numpy as np
from torch.autograd import Variable
from torch import nn
from torch.nn import functional as F

import dlc_practical_prologue as prologue

In [29]:
# Load data
train_input, train_target, test_input, test_target = \
    prologue.load_data(one_hot_labels = True, normalize = True, flatten = False)

# Define network module
class Net(nn.Module):
    def __init__(self, hidden=200):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, hidden)
        self.fc2 = nn.Linear(hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

# Transform inputs and targets into autograd variables
train_input, train_target = Variable(train_input), Variable(train_target)

* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples


In [25]:
# Define a train function
def train_model(model, train_input, train_target, mini_batch_size, print_progress=False):
    # Set hypterparameters
    criterion = nn.MSELoss()
    eta = 1e-1
    max_iter = 25
    print_step = 5
    
    for step in range(max_iter):
        sum_loss = 0
        # Iterate data in mini-batches
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            sum_loss += loss.item()
            # Reset gradients for the parameters
            model.zero_grad()
            # Propagate gradients through the network
            loss.backward()
            # Update parameters
            for p in model.parameters():
                p.data.sub_(eta * p.grad.data)
        # Print progress
        if print_progress and (step % print_step == 0 or step == max_iter - 1):
            print(f'Step {step}/{max_iter-1}:\tLoss {sum_loss}')

In [23]:
# Define the test function
def compute_nb_errors(model, input, target):
    output = model(input)
    _, preds = output.max(1)
    _, target = target.max(1)
    return (preds != target).sum()

In [28]:
mini_batch_size = 100
num_runs = 10

errs = []
for _ in range(num_runs):
    model = Net()
    # Train model
    train_model(model, train_input, train_target, mini_batch_size)
    # Test model
    errs.append(compute_nb_errors(model, test_input, test_target))
# Compute the average number of errors
avg_err = 100 * np.mean(errs) / test_input.size(0)
print(f'Test error {avg_err}')

Test error 15.06


In [31]:
# Test several hidden layer sizes
hiddens = [10, 50, 200, 500, 1000]

for hidden in hiddens:
    model = Net(hidden)
    train_model(model, train_input, train_target, mini_batch_size)
    err = 100 * compute_nb_errors(model, test_input, test_target) / test_input.size(0)
    print(f'Hidden {hidden}\tTest error {err}')

Hidden 10	Test error 17
Hidden 50	Test error 18
Hidden 200	Test error 15
Hidden 500	Test error 13
Hidden 1000	Test error 11


In [35]:
# Define a new network
class Net2(nn.Module):
    def __init__(self, hidden=200):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3)
        self.fc1 = nn.Linear(256, hidden)
        self.fc2 = nn.Linear(hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

In [36]:
mini_batch_size = 100
num_runs = 10

errs = []
for _ in range(num_runs):
    model = Net2()
    # Train model
    train_model(model, train_input, train_target, mini_batch_size)
    # Test model
    errs.append(compute_nb_errors(model, test_input, test_target))
# Compute the average number of errors
avg_err = 100 * np.mean(errs) / test_input.size(0)
print(f'Test error {avg_err}')

Test error 22.98
