In [1]:
import torch
from torch import optim
from torch.nn import functional as F
from torch import nn

import dlc_practical_prologue as prologue

In [2]:
def compute_nb_errors(model, data_input, data_target, mini_batch_size):

    nb_data_errors = 0

    for b in range(0, data_input.size(0), mini_batch_size):
        _,_,output = model(data_input.narrow(0, b, mini_batch_size))
        _, predicted_classes = torch.max(output, 1)
        for k in range(mini_batch_size):
            if data_target[b + k] != predicted_classes[k]:
                nb_data_errors = nb_data_errors + 1

    return nb_data_errors

In [3]:
def train_model(model, train_input, train_target, train_classes, nb_epochs, mini_batch_size):
    criterion = nn.CrossEntropyLoss()
    eta = 1e-3
    optimizer = optim.Adam(model.parameters(), lr = eta)

    for e in range(nb_epochs):
        acc_loss = 0
        
        for b in range(0, train_input.size(0), mini_batch_size):
            first_digit, second_digit, result = model(train_input.narrow(0, b, mini_batch_size))
            
            loss_digit1 = criterion(first_digit, train_classes[:,0].narrow(0, b, mini_batch_size))
            loss_digit2 = criterion(second_digit, train_classes[:,1].narrow(0, b, mini_batch_size))
            loss_result = criterion(result, train_target.narrow(0, b, mini_batch_size))
            loss_total = loss_result

            acc_loss = acc_loss + loss_total.item()
            model.zero_grad()
            loss_total.backward()
            optimizer.step()
    
        #print(e, acc_loss)

In [4]:
class MLP_NoWS_NoAL(nn.Module):
    def __init__(self):
        super(MLP_NoWS_NoAL, self).__init__()
        
        nb_hidden = 100
        input_size = 14*14
        
        self.layers1 = nn.Sequential(
            nn.Linear(input_size, nb_hidden),
            nn.ReLU(),
            nn.Linear(nb_hidden, nb_hidden),
            nn.ReLU(),
            nn.Linear(nb_hidden, nb_hidden),
            nn.ReLU(),
            nn.Linear(nb_hidden, 10),
            nn.LogSoftmax(dim=1) #Technically this is already in the nn.CrossEntropyLoss()
        )
        
        self.layers2 = nn.Sequential(
            nn.Linear(input_size, nb_hidden),
            nn.ReLU(),
            nn.Linear(nb_hidden, nb_hidden),
            nn.ReLU(),
            nn.Linear(nb_hidden, nb_hidden),
            nn.ReLU(),
            nn.Linear(nb_hidden, 10),
            nn.LogSoftmax(dim=1)
        )
        
        self.layers_comp = nn.Sequential(
            nn.Linear(20, 200),
            nn.ReLU(),
            nn.Linear(200, 2000),
            nn.ReLU(),
            nn.Linear(2000, 2)
        )
        
    def forward(self, x):
        first_digit = x[:,[0]]
        second_digit = x[:,[1]]
        
        first_digit = first_digit.view(first_digit.size(0),-1) #torch.reshape() can also be used
        second_digit = second_digit.view(second_digit.size(0),-1)
        
        first_digit = self.layers1(first_digit)
        second_digit = self.layers2(second_digit)
        
        result = torch.cat((first_digit, second_digit), dim=1, out=None)
        result = self.layers_comp(result)
    
        return first_digit, second_digit, result

In [10]:
train_input, train_target, train_classes, test_input, test_target, test_classes = prologue.generate_pair_sets(1000)    

def get_tests(n):
    M = []
    for k in range (0, n):
        L = []
        _, _, _, test_input, test_target, test_classes =  prologue.generate_pair_sets(1000)
        L.append(test_input)
        L.append(test_target)
        L.append(test_classes)
        M.append(L)
    return M

MLP = MLP_NoWS_NoAL()
nb_epochs = 100
mini_batch_size = 100

train_model(MLP, train_input, train_target, train_classes, nb_epochs, mini_batch_size)

nb_train_errors = compute_nb_errors(MLP, train_input, train_target, mini_batch_size)
print('train error MLP without weight sharing or auxiliary loss {:0.2f}% {:f}/{:f}'.format((100 * nb_train_errors) / train_input.size(0), nb_train_errors, train_target.size(0)))

L = get_tests(100)
average_nb_test_error = 0
for k in range (0, len(L)):
    nb_test_errors = compute_nb_errors(MLP, L[k][0], L[k][1], mini_batch_size)
    average_nb_test_error += nb_test_errors
    #nb_test_errors = test_accuracy_based_on_result(MLP, L[k][0], L[k][1], mini_batch_size)
    #average_nb_test_error += nb_test_errors
    print('test error MLP {:0.2f}% {:f}/{:f}'.format((100 * nb_test_errors) / L[k][0].size(0), nb_test_errors, L[k][0].size(0)))
print('Average test error MLP without weight sharing or auxiliary loss {:0.2f}% {:0.1f}/{:d}'.format((100*average_nb_test_error/len(L)) / L[0][0].size(0), average_nb_test_error/len(L), L[0][0].size(0)))

train error MLP without weight sharing or auxiliary loss 0.00% 0.000000/1000.000000
test error MLP 19.30% 193.000000/1000.000000
test error MLP 18.70% 187.000000/1000.000000
test error MLP 21.00% 210.000000/1000.000000
test error MLP 18.80% 188.000000/1000.000000
test error MLP 19.50% 195.000000/1000.000000
test error MLP 19.40% 194.000000/1000.000000
test error MLP 21.10% 211.000000/1000.000000
test error MLP 22.80% 228.000000/1000.000000
test error MLP 16.20% 162.000000/1000.000000
test error MLP 19.90% 199.000000/1000.000000
test error MLP 20.00% 200.000000/1000.000000
test error MLP 20.70% 207.000000/1000.000000
test error MLP 19.70% 197.000000/1000.000000
test error MLP 21.10% 211.000000/1000.000000
test error MLP 20.30% 203.000000/1000.000000
test error MLP 18.70% 187.000000/1000.000000
test error MLP 19.20% 192.000000/1000.000000
test error MLP 21.70% 217.000000/1000.000000
test error MLP 20.10% 201.000000/1000.000000
test error MLP 18.80% 188.000000/1000.000000
test error MLP 1