In [52]:
from dlc_practical_prologue import *
from helper_functions import *
from torch import nn
from torch.autograd import Variable

## Load datasets

In [2]:
train_input, train_target, train_classes, test_input, test_target, test_classes = generate_pair_sets(1000)

Name | Tensor dimension | Type | Content
-----|-----|-----|-----
`train_input` | N × 2 × 14 × 14 | float32 | Images
`train_target` | N | int64 | Class to predict ∈ {0, 1}
`train_classes` | N × 2 | int64 | Classes of the two digits ∈ {0, . . . , 9}
`test_input` | N × 2 × 14 × 14 | float32 | Images
`test_target` | N | int64 | Class | to predict ∈ {0, 1}
`test_classes` | N × 2 | int64 | Classes of the two digits ∈ {0, . . . , 9}

## Preprocess Data

In [3]:
train_input1, train_input2, \
train_classes1, train_classes2, \
test_input1, test_input2, \
test_classes1, test_classes2 = preprocess_data(train_input, train_classes, test_input, test_classes)

## Architecture Lab 5

In [4]:
train_input1, train_classes1 = Variable(train_input1), Variable(train_classes1)
test_input1, test_classes1 = Variable(test_input1), Variable(test_classes1)

In [44]:
def create_shallow_model():
    return nn.Sequential(
        nn.Linear(196, 50),
        nn.BatchNorm1d(50),
        nn.ReLU(),
        nn.Linear(50, 10))

In [43]:
def create_deep_model():
    return nn.Sequential(
        nn.Linear(196, 4),
        nn.BatchNorm1d(4),
        nn.ReLU(),
        nn.Linear(4, 8),
        nn.BatchNorm1d(8),
        nn.ReLU(),
        nn.Linear(8, 16),
        nn.BatchNorm1d(16),
        nn.ReLU(),
        nn.Linear(16, 32),
        nn.BatchNorm1d(32),
        nn.ReLU(),
        nn.Linear(32, 64),
        nn.BatchNorm1d(64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.BatchNorm1d(128),
        nn.ReLU(),
        nn.Linear(128, 10))

In [7]:
models = [create_shallow_model, create_deep_model]

In [45]:
stds = [-1, 1e-3, 1e-2, 1e-1, 1e-0, 1e1]
for model in models:
    compute_errors(m=model, 
                   train_input=train_input1, 
                   train_classes=train_classes1, 
                   test_input=test_input1,
                   test_classes=test_classes1,
                   stds=stds)

std -1.000000 create_shallow_model train_error 0.00% test_error 18.80%
std 0.001000 create_shallow_model train_error 0.00% test_error 19.90%
std 0.010000 create_shallow_model train_error 0.00% test_error 17.80%
std 0.100000 create_shallow_model train_error 0.00% test_error 15.60%
std 1.000000 create_shallow_model train_error 0.00% test_error 32.80%
std 10.000000 create_shallow_model train_error 0.00% test_error 35.60%
std -1.000000 create_deep_model train_error 7.80% test_error 50.20%
std 0.001000 create_deep_model train_error 87.00% test_error 88.60%
std 0.010000 create_deep_model train_error 87.00% test_error 88.60%
std 0.100000 create_deep_model train_error 14.80% test_error 50.70%
std 1.000000 create_deep_model train_error 91.80% test_error 90.50%
std 10.000000 create_deep_model train_error 91.80% test_error 90.50%


In [53]:
# Xavier initialization
for model in models:
    compute_errors(m=model, 
                   train_input=train_input1, 
                   train_classes=train_classes1, 
                   test_input=test_input1,
                   test_classes=test_classes1,
                   stds=None)

Computed standard deviation according to 'Xavier initialization': 0.183
std 0.182574 create_shallow_model train_error 0.00% test_error 17.90%
Computed standard deviation according to 'Xavier initialization': 0.120
std 0.120386 create_deep_model train_error 39.20% test_error 58.80%
