In [1]:
from dlc_practical_prologue import *

In [2]:
def convert_to_one_hot_labels(input, target):
    tmp = input.new_zeros(target.size(0), target.max() + 1)
    tmp.scatter_(1, target.view(-1, 1), 1.0)
    return tmp

def normalize(train_input, test_input):
    mu, std = train_input.mean(), train_input.std()
    train_input.sub_(mu).div_(std)
    test_input.sub_(mu).div_(std)
    return train_input, test_input

def backward_pass(w1, b1, w2, b2,
                  t,
                  x, s1, x1, s2, x2,
                  dl_dw1, dl_db1, dl_dw2, dl_db2):
    x0 = x
    dl_dx2 = dloss(x2, t)
    dl_ds2 = dsigma(s2) * dl_dx2
    dl_dx1 = w2.t().mv(dl_ds2)
    dl_ds1 = dsigma(s1) * dl_dx1

    dl_dw2.add_(dl_ds2.view(-1, 1).mm(x1.view(1, -1)))
    dl_db2.add_(dl_ds2)
    dl_dw1.add_(dl_ds1.view(-1, 1).mm(x0.view(1, -1)))
    dl_db1.add_(dl_ds1)
    
def forward_pass(w1, b1, w2, b2, x):
    x0 = x
    s1 = w1.mv(x0) + b1
    x1 = sigma(s1)
    s2 = w2.mv(x1) + b2
    x2 = sigma(s2)
    

    return x0, s1, x1, s2, x2

def sigma(x):
    return torch.tanh(x)

def dsigma(x):
    return 4 * (x.exp() + x.mul(-1).exp()).pow(-2)

def loss(v, t):
    return (v - t).pow(2).sum()

def dloss(v, t):
    return 2 * (v - t)

## Load datasets

In [3]:
train_input, train_target, train_classes, test_input, test_target, test_classes = generate_pair_sets(1000)

Name | Tensor dimension | Type | Content
-----|-----|-----|-----
`train_input` | N × 2 × 14 × 14 | float32 | Images
`train_target` | N | int64 | Class to predict ∈ {0, 1}
`train_classes` | N × 2 | int64 | Classes of the two digits ∈ {0, . . . , 9}
`test_input` | N × 2 × 14 × 14 | float32 | Images
`test_target` | N | int64 | Class | to predict ∈ {0, 1}
`test_classes` | N × 2 | int64 | Classes of the two digits ∈ {0, . . . , 9}

## Data preprocessing

In [4]:
train_input = train_input.clone().reshape(train_input.size(0), 2, -1)
test_input = test_input.clone().reshape(test_input.size(0), 2, -1)

In [5]:
train_input1 = train_input[:, 0]
train_input2 = train_input[:, 1]

test_input1 = test_input[:, 0]
test_input2 = test_input[:, 1]

In [6]:
train_classes1 = train_classes[:,0]
train_classes2 = train_classes[:,1]

test_classes1 = test_classes[:,0]
test_classes2 = test_classes[:,1]

In [7]:
train_input1 = 0.9*train_input1
train_input2 = 0.9*train_input2

test_input1 = 0.9*test_input1
test_input2 = 0.9*test_input2

In [8]:
train_classes1 = convert_to_one_hot_labels(train_input1, train_classes1)
train_classes2 = convert_to_one_hot_labels(train_input2, train_classes2)

test_classes1 = convert_to_one_hot_labels(test_input1, test_classes1)
test_classes2 = convert_to_one_hot_labels(test_input2, test_classes2)

train_input1, test_classes1 = normalize(train_input1, test_classes1)
train_input2, test_classes2 = normalize(train_input2, test_classes2)

In [9]:
print(test_classes1.shape)

torch.Size([1000, 10])


## Architecture 1 (Lab 3)

In [10]:
def initialize_params(nb_hidden, epsilon, size, nb_classes):
    w1 = torch.empty(nb_hidden, size).normal_(0, epsilon)
    b1 = torch.empty(nb_hidden).normal_(0, epsilon)
    w2 = torch.empty(nb_classes, nb_hidden).normal_(0, epsilon)
    b2 = torch.empty(nb_classes).normal_(0, epsilon)
    
    dl_dw1 = torch.empty(w1.size())
    dl_db1 = torch.empty(b1.size())
    dl_dw2 = torch.empty(w2.size())
    dl_db2 = torch.empty(b2.size())
    
    return w1, b1, w2, b2, dl_dw1, dl_db1, dl_dw2, dl_db2

In [11]:
def train_model(w1, b1, w2, b2, dl_dw1, dl_db1, dl_dw2, dl_db2, train_input, train_target, acc_loss):
    
    for k in range(nb_epochs): 
        # Back-prop
        nb_train_errors = 0

        dl_dw1.zero_()
        dl_db1.zero_()
        dl_dw2.zero_()
        dl_db2.zero_()

        for n in range(nb_train_samples):
            x0, s1, x1, s2, x2 = forward_pass(w1, b1, w2, b2, train_input[n])

            pred = x2.max(0)[1].item()
            if train_target[n, pred] < 0.5: 
                nb_train_errors = nb_train_errors + 1
            acc_loss = acc_loss + loss(x2, train_target[n])

            backward_pass(w1, b1, w2, b2,
                          train_target[n],
                          x0, s1, x1, s2, x2,
                          dl_dw1, dl_db1, dl_dw2, dl_db2)

        # Gradient step

        w1 = w1 - eta * dl_dw1
        b1 = b1 - eta * dl_db1
        w2 = w2 - eta * dl_dw2
        b2 = b2 - eta * dl_db2
    
    return w1, b1, w2, b2, nb_train_errors

In [12]:
def test_errors(w1, b1, w2, b2, train_input, test_input, test_target, acc_loss, nb_train_errors):
    
    for k in range(nb_epochs):
        nb_test_errors = 0
        output = []
    
        print('test_input',test_input[0].shape)
        for n in range(test_input.size(0)):
            _, _, _, _, x2 = forward_pass(w1, b1, w2, b2, test_input[n])
            output.append(x2)
    
            pred = x2.max(0)[1].item()
            if test_target[n, pred] < 0.5: 
                nb_test_errors = nb_test_errors + 1

        print('{:d} acc_train_loss {:.02f} acc_train_error {:.02f}% test_error {:.02f}%'
              .format(k,
                      acc_loss,
                      (100 * nb_train_errors) / train_input.size(0),
                      (100 * nb_test_errors) / test_input.size(0)))
    return output

In [13]:
eta = 0.1
epsilon = 1**-6
nb_hidden = 50

In [14]:
nb_train_samples = train_input.size(0)
nb_hidden = 50
eta = 1e-1 / nb_train_samples
epsilon = 1e-6
nb_classes = 10

img1_w1, img1_b1, img1_w2, img1_b2, img1_dl_dw1, img1_dl_db1, img1_dl_dw2, img1_dl_db2 = \
initialize_params(nb_hidden, epsilon, train_input1.size(1), nb_classes)

img2_w1, img2_b1, img2_w2, img2_b2, img2_dl_dw1, img2_dl_db1, img2_dl_dw2, img2_dl_db2 = \
initialize_params(nb_hidden, epsilon, train_input2.size(1), nb_classes)

In [15]:
# nb_epochs = 25
# acc_loss = 0
# print("training of first image")

# img1_w1, img1_b1, img1_w2, img1_b2, nb_train_errors_1 = train_model(img1_w1, img1_b1, img1_w2, 
#                                                      img1_b2, img1_dl_dw1, img1_dl_db1, img1_dl_dw2, img1_dl_db2,
#                                                      train_input1, train_classes1, acc_loss)

# img1_output = test_errors(img1_w1, img1_b1, img1_w2, img1_b2, 
#                               train_input1, test_input1, test_classes1, acc_loss, nb_train_errors_1)

# print("training second image")


# img2_w1, img2_b1, img2_w2, img2_b2, nb_train_errors_2 = train_model(img2_w1, img2_b1, img2_w2, img2_b2, 
#                                                      img2_dl_dw1, img2_dl_db1, img2_dl_dw2, img2_dl_db2,
#                                                      train_input2, train_classes2, acc_loss)

# img2_output = test_errors(img2_w1, img2_b1, img2_w2, img2_b2,
#             train_input2, test_input2, test_classes2, acc_loss, nb_train_errors_2)


## Architecture Lab 5

In [16]:
import torch
import math

from torch import optim
from torch import Tensor
from torch import nn
from torch.autograd import Variable

In [17]:
mini_batch_size = 100

In [18]:
def train_model(model, train_input, train_target):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr = 1e-1)
    nb_epochs = 250

    for e in range(nb_epochs):
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            # max needed if train_target is one-hot encoded
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size).max(1)[1])
            model.zero_grad()
            loss.backward()
            optimizer.step()

In [41]:
def compute_nb_errors(model, data_input, data_target):

    nb_data_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        output = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(output.data, 1)
        for k in range(mini_batch_size):
            # max needed if one-hot encoded
            if data_target.data[b + k].max(0)[1] != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors

In [20]:
def create_shallow_model():
    return nn.Sequential(
        nn.Linear(196, 50),
        nn.ReLU(),
        nn.Linear(50, 10)
    )

In [21]:
def create_deep_model():
    return nn.Sequential(
        nn.Linear(196, 4),
        nn.ReLU(),
        nn.Linear(4, 8),
        nn.ReLU(),
        nn.Linear(8, 16),
        nn.ReLU(),
        nn.Linear(16, 32),
        nn.ReLU(),
        nn.Linear(32, 64),
        nn.ReLU(),
        nn.Linear(64, 128),
        nn.ReLU(),
        nn.Linear(128, 10)
    )

In [22]:
print(train_input1.shape)

torch.Size([1000, 196])


In [23]:
train_input1, train_classes1 = Variable(train_input1), Variable(train_classes1)
test_input1, test_classes1 = Variable(test_input1), Variable(test_classes1)

In [42]:
for std in [ -1, 1e-3, 1e-2, 1e-1, 1e-0, 1e1 ]:

    for m in [ create_shallow_model, create_deep_model ]:

        model = m()

        if std > 0:
            for p in model.parameters(): 
                p.data.normal_(0, std)

        train_model(model, train_input1, train_classes1)

        print('std {:s} {:f} train_error {:.02f}% test_error {:.02f}%'.format(
            m.__name__,
            std,
            compute_nb_errors(model, train_input1, train_classes1) / train_input1.size(0) * 100,
            compute_nb_errors(model, test_input1, test_classes1) / test_input1.size(0) * 100
        )
        )

std create_shallow_model -1.000000 train_error 0.00% test_error 16.90%
std create_deep_model -1.000000 train_error 13.80% test_error 51.40%
std create_shallow_model 0.001000 train_error 0.00% test_error 21.50%
std create_deep_model 0.001000 train_error 87.00% test_error 88.60%
std create_shallow_model 0.010000 train_error 0.00% test_error 18.80%
std create_deep_model 0.010000 train_error 87.00% test_error 88.60%
std create_shallow_model 0.100000 train_error 0.00% test_error 17.40%
std create_deep_model 0.100000 train_error 18.30% test_error 54.00%
std create_shallow_model 1.000000 train_error 0.00% test_error 42.40%
std create_deep_model 1.000000 train_error 91.80% test_error 90.50%
std create_shallow_model 10.000000 train_error 0.00% test_error 41.40%
std create_deep_model 10.000000 train_error 91.80% test_error 90.50%
