In [1]:
from dlc_practical_prologue import *

In [2]:
def normalize(train_input, test_input):
    mu, std = train_input.mean(), train_input.std()
    train_input.sub_(mu).div_(std)
    test_input.sub_(mu).div_(std)
    return train_input, test_input

## Load datasets

In [3]:
train_input, train_target, train_classes, test_input, test_target, test_classes = generate_pair_sets(1000)

Name | Tensor dimension | Type | Content
-----|-----|-----|-----
`train_input` | N × 2 × 14 × 14 | float32 | Images
`train_target` | N | int64 | Class to predict ∈ {0, 1}
`train_classes` | N × 2 | int64 | Classes of the two digits ∈ {0, . . . , 9}
`test_input` | N × 2 × 14 × 14 | float32 | Images
`test_target` | N | int64 | Class | to predict ∈ {0, 1}
`test_classes` | N × 2 | int64 | Classes of the two digits ∈ {0, . . . , 9}

## Data preprocessing

In [4]:
train_input = train_input.clone().reshape(train_input.size(0), 2, -1)
test_input = test_input.clone().reshape(test_input.size(0), 2, -1)

In [5]:
train_input1 = train_input[:, 0]
train_input2 = train_input[:, 1]

test_input1 = test_input[:, 0]
test_input2 = test_input[:, 1]

In [6]:
train_classes1 = train_classes[:,0]
train_classes2 = train_classes[:,1]

test_classes1 = test_classes[:,0]
test_classes2 = test_classes[:,1]

In [7]:
train_input1 = 0.9*train_input1
train_input2 = 0.9*train_input2

test_input1 = 0.9*test_input1
test_input2 = 0.9*test_input2

In [8]:
train_classes1 = convert_to_one_hot_labels(train_input1, train_classes1)
train_classes2 = convert_to_one_hot_labels(train_input2, train_classes2)

test_classes1 = convert_to_one_hot_labels(test_input1, test_classes1)
test_classes2 = convert_to_one_hot_labels(test_input2, test_classes2)

In [9]:
train_input1, test_classes1 = normalize(train_input1, test_classes1)
train_input2, test_classes2 = normalize(train_input2, test_classes2)

## Architecture Lab 5

In [10]:
import torch
import math

from torch import optim
from torch import Tensor
from torch import nn
from torch.autograd import Variable

In [11]:
mini_batch_size = 100

In [12]:
def train_model(model, train_input, train_target):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr = 1e-1)
    nb_epochs = 250

    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            # max needed if train_target is one-hot encoded
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size).max(1)[1])
            model.zero_grad()
            loss.backward()
            optimizer.step()

In [13]:
def compute_nb_errors(model, data_input, data_target):

    nb_data_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        output = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(output.data, 1)
        for k in range(mini_batch_size):
            # max needed if one-hot encoded
            if data_target.data[b + k].max(0)[1] != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors

In [14]:
def create_shallow_model():
    return nn.Sequential(
        nn.Linear(196, 50),
        nn.ReLU(),
        nn.Linear(50, 10)
    )

In [15]:
def create_deep_model():
    return nn.Sequential(
        nn.Linear(196, 4),
        nn.ReLU(),
        nn.Linear(4, 8),
        nn.ReLU(),
        nn.Linear(8, 16),
        nn.ReLU(),
        nn.Linear(16, 32),
        nn.ReLU(),
        nn.Linear(32, 64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, 10)
    )

In [16]:
train_input1, train_classes1 = Variable(train_input1), Variable(train_classes1)
test_input1, test_classes1 = Variable(test_input1), Variable(test_classes1)

In [17]:
for std in [ -1, 1e-3, 1e-2, 1e-1, 1e-0, 1e1 ]:

    for m in [ create_shallow_model, create_deep_model ]:

        model = m()

        if std > 0:
            for p in model.parameters(): 
                p.data.normal_(0, std)

        train_model(model, train_input1, train_classes1)

        print('std {:s} {:f} train_error {:.02f}% test_error {:.02f}%'.format(
            m.__name__,
            std,
            compute_nb_errors(model, train_input1, train_classes1) / train_input1.size(0) * 100,
            compute_nb_errors(model, test_input1, test_classes1) / test_input1.size(0) * 100
        )
        )

std create_shallow_model -1.000000 train_error 0.00% test_error 15.60%
std create_deep_model -1.000000 train_error 20.40% test_error 56.70%
std create_shallow_model 0.001000 train_error 0.00% test_error 20.40%
std create_deep_model 0.001000 train_error 87.00% test_error 88.60%
std create_shallow_model 0.010000 train_error 0.00% test_error 18.30%
std create_deep_model 0.010000 train_error 87.00% test_error 88.60%
std create_shallow_model 0.100000 train_error 0.00% test_error 17.60%
std create_deep_model 0.100000 train_error 22.20% test_error 55.40%
std create_shallow_model 1.000000 train_error 0.00% test_error 45.90%
std create_deep_model 1.000000 train_error 87.00% test_error 88.60%
std create_shallow_model 10.000000 train_error 0.00% test_error 40.40%
std create_deep_model 10.000000 train_error 91.80% test_error 90.50%
