## Imports

In [245]:
import torch
from torch import nn
from torch.nn import functional as F
import time
from math import sqrt

import matplotlib.pyplot as plt

import dlc_practical_prologue

## Useful functions

In [246]:
def mean(x):
    return sum(x)/len(x)

def var(x):
    u = mean(x)
    return sum([(v-u)**2 for v in x])/len(x)

In [247]:
def number_parameters(model):   # Returns the number of parameter of a given model
    return sum(p.numel() for p in model.parameters())

## Load data

In [248]:
N = 1000

x_train, y_train, y_train_classes, x_test, y_test, y_test_classes = \
dlc_practical_prologue.generate_pair_sets(N)

assert x_train.shape == torch.Size([N, 2, 14, 14])    # float32
assert y_train.shape == torch.Size([N])               # int64
assert y_train_classes.shape == torch.Size([N, 2])    # int64

assert x_test.shape == torch.Size([N, 2, 14, 14])
assert y_test.shape == torch.Size([N])
assert y_test_classes.shape == torch.Size([N, 2])

In [249]:
if torch.cuda.is_available():  
    dev = "cuda:0"
else:  
    dev = "cpu"  
device = torch.device(dev)
# x_train = x_train.to(device)
# y_train = y_train.to(device)
# y_train_classes = y_train_classes.to(device)
# x_test = x_test.to(device)
# y_test = y_test.to(device)
# y_test_classes = y_test_classes.to(device)

## Normalize input

In [250]:
tmp_list = torch.flatten(x_train).tolist()

x_train = x_train.sub(mean(tmp_list)).div(sqrt(var(tmp_list)))
x_test = x_test.sub(mean(tmp_list)).div(sqrt(var(tmp_list)))

## -----------------------------------------------------------------------------------------------

## Define models 

# Model 0
Is a naive MLP

In [151]:
## Is a naive Multi Layer Perceptron, composed of fully connected linear layers.

class model_0(nn.Module):   
    def __init__(self, ns = [392,256,64,2]):
        super().__init__()
        layers = [nn.Linear(n_in, n_out) for n_in, n_out in zip(ns[:-1], ns[1:])]   
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        x = torch.flatten(x,1)          # flatten the 2 images in one vector (but keeping batch size)
        for layer in self.layers[:-1]:
            x = layer(x).relu()
            
        x = self.layers[-1](x) 
        return x

model_test = model_0()
model_test(x_train[0:2])    
print(number_parameters(model_0()))

117186


# MODEL 1

Is a simple CNN of **2 Conv Layers** -- 
4 versions of it are implemented:
 * straightforward CNN (model_1)
 * **Finally, I think this one is to simple to justify an intermediate loss without digit prediction** -> Not present
 * with the two images composing the input treated identically and independently (model_1_ws)
 * with the two images composing the input treated identically and independently + Digit predictions for aux loss (model_1_ws_aux2)

In [217]:
## Is a convolutional neural network poorly paramztrized    

class model_1(nn.Module):                 
    def __init__(self):
        super(model_1, self).__init__()
        self.conv1 = nn.Conv2d(2, 16, kernel_size=2, padding=1)        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=2)

        self.dense1 = nn.Linear(1568, 16)
        self.dense2 = nn.Linear(16, 2)
        
    def forward(self, x):
        x = F.leaky_relu(self.conv1(x))
        x = F.leaky_relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2) 
        
        x = torch.flatten(x, 1)
        
        x = F.leaky_relu(self.dense1(x))
        x = self.dense2(x)
        return x

model_test = model_1()
model_test(x_train[0:2])
print(number_parameters(model_test))

27362


In [None]:
model_list = [('Model_1 ', model_1)]

for model_params in model_list :
    results.append((experiment_model(*model_params), model_params))

In [230]:
## Is a convolutional neural network poorly paramztrized    

class model_1_ws(nn.Module):                 
    def __init__(self):
        super(model_1_ws, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=2, padding=1)        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=2)

        self.dense1 = nn.Linear(1152, 16)
        self.dense_digits = nn.Linear(16, 10)
        self.dense2 = nn.Linear(20, 2)
        
    def forward(self, x):
        d1 = F.leaky_relu(self.conv1(x[:, 0:1, ...]))
        d1 = F.max_pool2d(d1, kernel_size=2) 
        d1 = F.leaky_relu(self.conv2(d1))
        d1 = torch.flatten(d1, 1)
        d1 = F.leaky_relu(self.dense1(d1))
        d1 = self.dense_digits(d1)
        
        d2 = F.leaky_relu(self.conv1(x[:, 1:2, ...]))
        d2 = F.max_pool2d(d2, kernel_size=2) 
        d2 = F.leaky_relu(self.conv2(d2))
        d2 = torch.flatten(d2, 1)
        d2 = F.leaky_relu(self.dense1(d2))
        d2 = self.dense_digits(d2)
        
        x = F.leaky_relu(torch.cat((torch.softmax(d1, -1), torch.softmax(d2, -1)), 1))
        x = torch.flatten(x, 1)
        
        x = F.leaky_relu(self.dense2(x))
        
        return x

model_test = model_1_ws()
model_test(x_train[0:2])
print(number_parameters(model_test))

20820


In [231]:
model_list = [('Model_1 ', model_1_ws)]

for model_params in model_list :
    results.append((experiment_model(*model_params), model_params))

Starting 10 training of 25 epochs, with model 'Model_1 ' containing 20820 parameters. 

Attempt 1 - accuracy 53.80%
Epoch 12 : loss 8.701

KeyboardInterrupt: 

In [236]:
## Is a convolutional neural network poorly paramztrized    

class model_1_ws_aux2(nn.Module):                 
    def __init__(self):
        super(model_1_ws_aux2, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=2, padding=1)        
        self.conv2 = nn.Conv2d(16, 32, kernel_size=2)

        self.dense1 = nn.Linear(1152, 16)
        self.dense_digits = nn.Linear(16, 10)
        self.dense2 = nn.Linear(20, 2)
        
    def forward(self, x):
        d1 = F.leaky_relu(self.conv1(x[:, 0:1, ...]))
        d1 = F.max_pool2d(d1, kernel_size=2) 
        d1 = F.leaky_relu(self.conv2(d1))
        d1 = torch.flatten(d1, 1)
        d1 = F.leaky_relu(self.dense1(d1))
        d1 = self.dense_digits(d1)
        
        d2 = F.leaky_relu(self.conv1(x[:, 1:2, ...]))
        d2 = F.max_pool2d(d2, kernel_size=2) 
        d2 = F.leaky_relu(self.conv2(d2))
        d2 = torch.flatten(d2, 1)
        d2 = F.leaky_relu(self.dense1(d2))
        d2 = self.dense_digits(d2)
        
        x = F.leaky_relu(torch.cat((torch.softmax(d1, -1), torch.softmax(d2, -1)), 1))
        x = torch.flatten(x, 1)
        
        x = F.leaky_relu(self.dense2(x))
        
        return x,d1,d2

model_test = model_1_ws()
model_test(x_train[0:2])
print(number_parameters(model_test))

20820


# MODEL 2

Is a more complex CNN, of **3 conv Layers** -- 
4 versions of it are implemented:
 * straightforward CNN (model_2)
 * !!NOT WORKING!! with intermediate output for auxiliary loss (model_2_aux1)
 * with the two images composing the input treated identically and independently (model_2_ws)
 * with the two images composing the input treated identically and independently + Digit predictions for aux loss (model_2_ws_aux2) **currently our best model**

In [207]:
class model_2(nn.Module):
    def __init__(self):
        super(model_2, self).__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=2)        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=2) # NEW size
        self.conv3 = nn.Conv2d(64, 128, kernel_size=2)
        
        self.dense1 = nn.Linear(3200, 64)
        self.dense2 = nn.Linear(64, 12)
        self.dense3 = nn.Linear(12, 2) 

        
    def forward(self, x):
        x = F.leaky_relu(self.conv1(x))
        x = F.leaky_relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2) 
        x = self.conv3(x)
        
        x = torch.flatten(x, 1)
        x = F.leaky_relu(self.dense1(x))
        x = self.dense2(x)
        x = F.dropout(x)
        x = self.dense3(x)

        return x

model_test = model_2()
model_test(x_train[0:2])
print(number_parameters(model_test))

247110


In [None]:
model_list = [('Model_2 ', model_2)]

for model_params in model_list :
    results.append((experiment_model(*model_params), model_params))

In [186]:
# Early output for an auxiliary loss (without digit prediction)

class model_2_aux1(nn.Module): # NOT WORKING MODEL -> NO TRAIN-MODEL() WITH INTERMEDIATE RESULTS (AND NOT DIGITS)
    def __init__(self):
        super(model_2_aux1, self).__init__()
        self.conv1 = nn.Conv2d(2, 32, kernel_size=2)        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=2) 
        self.conv3 = nn.Conv2d(64, 128, kernel_size=2)
        
        self.dense1 = nn.Linear(3200, 64)
        self.dense1_intermediate = nn.Linear(9216, 64)
        self.dense2 = nn.Linear(64, 20)
        self.dense3 = nn.Linear(20, 2)

        
    def forward(self, x):
        x = F.leaky_relu(self.conv1(x))
        x = F.leaky_relu(self.conv2(x))
                         
        x_ = torch.flatten(x, 1)
        x_ = self.dense1_intermediate(x_)
        x_ = F.dropout(x_)  
        x_ = self.dense2(x_)   # For intermediate output   
        x_ = F.dropout(x_)
        x_ = self.dense3(x_)                 
                         
        x = F.max_pool2d(x, kernel_size=2) 
        x = self.conv3(x)
                         
        x = torch.flatten(x, 1)
        x = F.leaky_relu(self.dense1(x))
        x = F.dropout(x)
        x = self.dense2(x)
        x = F.dropout(x) 
        x = self.dense3(x)


        return x,x_

model_test = model_2_aux1()
model_test(x_train[0:2])
print(number_parameters(model_test))

837534


In [187]:
model_list = [('Model_2 + aux 1', model_2_aux1)]

for model_params in model_list :
    results.append((experiment_model(*model_params), model_params))

Starting 10 training of 25 epochs, with model 'Model_2 ws + aux 2' containing 837534 parameters. 



ValueError: not enough values to unpack (expected 3, got 2)

In [197]:
class model_2_ws(nn.Module):  
    def __init__(self):
        super(model_2_ws, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=2)        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=2) 
        self.conv3 = nn.Conv2d(64, 32, kernel_size=2)
        
        self.dense1 = nn.Linear(800, 64)
        self.dense2 = nn.Linear(64, 10)
        
        self.dense3 = nn.Linear(20, 12)
        self.dense4 = nn.Linear(12, 2) 

        
    def forward(self, x):
        d1 = F.leaky_relu(self.conv1(x[:, 0:1, ...]))
        d1 = F.leaky_relu(self.conv2(d1))
        d1 = F.max_pool2d(d1, kernel_size=2) 
        d1 = self.conv3(d1)
        d1 = torch.flatten(d1, 1)
        d1 = F.leaky_relu(self.dense1(d1))
        d1 = self.dense2(d1)

        d2 = F.leaky_relu(self.conv1(x[:, 1:2, ...]))
        d2 = F.leaky_relu(self.conv2(d2))
        d2 = F.max_pool2d(d2, kernel_size=2)
        d2 = self.conv3(d2)
        d2 = torch.flatten(d2, 1)
        d2 = F.leaky_relu(self.dense1(d2))
        d2 = self.dense2(d2)
        
        x = F.leaky_relu(torch.cat((torch.softmax(d1, -1), torch.softmax(d2, -1)), 1))
        x = self.dense3(x)
        x = F.dropout(x)  
        x = self.dense4(x) 
        return x

model_test = model_2_ws()
model_test(x_train[0:2])
print(number_parameters(model_test))

68832


In [None]:
model_list = [('Model_2 ws', model_2_ws)]

for model_params in model_list :
    results.append((experiment_model(*model_params), model_params))

In [179]:
class model_2_ws_aux2(nn.Module):  # Previously :  model BEST
    def __init__(self):
        super(model_2_ws_aux2, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=2)        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=2) # NEW size
        self.conv3 = nn.Conv2d(64, 128, kernel_size=2)
        
        self.dense1 = nn.Linear(3200, 64)
        self.dense2 = nn.Linear(64, 10)
        
        self.dense3 = nn.Linear(20, 12)
        self.dense4 = nn.Linear(12, 2) # NEW

        
    def forward(self, x):
        d1 = F.leaky_relu(self.conv1(x[:, 0:1, ...]))
        d1 = F.leaky_relu(self.conv2(d1))
        d1 = F.max_pool2d(d1, kernel_size=2) #NEW
        d1 = self.conv3(d1)
        d1 = torch.flatten(d1, 1)
        d1 = F.leaky_relu(self.dense1(d1))
        d1 = self.dense2(d1)

        d2 = F.leaky_relu(self.conv1(x[:, 1:2, ...]))
        d2 = F.leaky_relu(self.conv2(d2))
        d2 = F.max_pool2d(d2, kernel_size=2) #NEW
        d2 = self.conv3(d2)
        d2 = torch.flatten(d2, 1)
        d2 = F.leaky_relu(self.dense1(d2))
        d2 = self.dense2(d2)
        
        x = F.leaky_relu(torch.cat((torch.softmax(d1, -1), torch.softmax(d2, -1)), 1))
        x = self.dense3(x)
        x = F.dropout(x)  #NEW
        x = self.dense4(x) #NEW
        return x, d1, d2

model_test = model_2_ws_aux2()
model_test(x_train[0:2])
print(number_parameters(model_test))

247104


In [None]:
model_list = [('Model_2 ws + aux 2', model_2_ws_aux2)]

for model_params in model_list :
    results.append((experiment_model(*model_params), model_params))

## Training/Evaluation function

In [42]:
def train_model(model, train_input, train_target, digit_target,
                 epochs, optimizer,
                 loss_function = nn.CrossEntropyLoss(), digit_loss = nn.CrossEntropyLoss(),
                 batch_size=10,
                 auxiliary = False,
                 verbose = True):
    
    # Inspired by exercise corrige 
    model.train()
    losses = []
    
    for e in range(epochs):
        sum_loss = 0
        
        if auxiliary:
            sum_loss_digit1 = 0
            sum_loss_digit2 = 0
        
        for x_batch, y_batch, y_digit_batch in zip(train_input.split(batch_size),
                                                    train_target.split(batch_size),
                                                    digit_target.split(batch_size)):
            
            if auxiliary :
                
                output, d1, d2 = model(x_batch)

                loss_digit1 = digit_loss(d1, y_digit_batch[..., 0])
                loss_digit2 = digit_loss(d2, y_digit_batch[..., 1])
                loss_final  = loss_function(output, y_batch)

                loss_total = loss_final + 0.4*(loss_digit1 + loss_digit2)
                
                sum_loss_digit1 += loss_digit1.item()
                sum_loss_digit2 += loss_digit2.item()
                
            else :
                output = model(x_batch)
                loss_total = loss_function(output, y_batch)
                
            optimizer.zero_grad()
            loss_total.backward()  
            optimizer.step()

            sum_loss += loss_total.item()
            
        losses.append(sum_loss)
        
        if verbose : 
            if auxiliary :
                print("Epoch %i: loss %.3f --- loss_d1 %.2f --- loss_d2 %.2f" \
                      % (e,sum_loss, sum_loss_digit1, sum_loss_digit2), end="\r")  
            else :
                print("Epoch %i : loss %.3f" % (e, sum_loss), end = "\r")
    return losses

In [43]:
def evaluate_model(model, test_input, test_target):
    model.eval()
    
    with torch.no_grad():
        out = model(test_input)   
        preds = torch.argmax(out, -1)
        accuracy = (preds.float() == test_target.float()).sum().item()/preds.size(0)
    return accuracy * 100

In [44]:
def evaluate_model_aux(model, test_input, test_target, test_digits = None):
    model.eval()
    
    with torch.no_grad():
        out = model(test_input) 
        
        preds = torch.argmax(torch.softmax(out[0], -1), axis = -1)
        d1 = torch.argmax(torch.softmax(out[1], -1), axis = -1)
        d2 = torch.argmax(torch.softmax(out[2], -1), axis = -1)
        
        accuracy = (preds.float() == test_target.float()).sum().item()/preds.size(0)
        accuracy_d1 = (d1.float() == test_digits[:, 0].float()).sum().item()/preds.size(0)
        accuracy_d2 = (d2.float() == test_digits[:, 1].float()).sum().item()/preds.size(0)
    return accuracy * 100, accuracy_d1 * 100, accuracy_d2 * 100

## GridSearch

In [332]:
models = [model_1, model_best]
models_names = ["model 1", "model best"]
models_best_cfg = []

lrs = dict()

# lrs["Adam"] = [0.003, 0.002, 0.001, 0.0005]
# lrs["SGD"] = [0.1, 0.05, 0.01]
# batchsizes = [16, 32, 64]

lrs["Adam"] = [0.002]
lrs["SGD"] = [0.2]
batchsizes = [16]

epochs = 25
number_training = 2

gridsearch_number = len(lrs["Adam"]) * len(batchsizes) + len(lrs["SGD"]) * len(batchsizes)

model_accuracies = []
model_vars = []
model_parameters = []

nfold = 5
perm = torch.randperm(len(x_train))
folds = perm.split(int(len(x_train) / nfold))

for build_model, model_name in zip(models, models_names):
    
    dummy_model = build_model()
    auxiliary = (type(dummy_model(x_train[0:1])) == tuple)
    print("Starting %i parameter grid search %i times each over %i epochs, with model '%s' containing %i parameters. \n" % 
      (gridsearch_number, number_training, epochs, model_name, number_parameters(dummy_model)))
    del dummy_model
    
    mean_accuracies = []
    var_accuracies = []
    parameters = []
    
    i_param = 0
    for optim in [(torch.optim.Adam, "Adam"), (torch.optim.SGD, "SGD")]:
        for lr in lrs[optim[1]]:
            for batchsize in batchsizes :               
                exp_accuracies = []
                
                print("\rGrid Search %i/%i : " % (i_param, gridsearch_number))
                
                for i_fold in range(nfold):
                    print("Fold %i/%i" % (i_fold+1, nfold), end = "")
                    i_valid = folds[i_fold]
                    i_train = torch.cat((*folds[:i_fold], *folds[i_fold+1:]))
                    
                    for i, rnd_exp in enumerate(range(number_training)):

                        model = build_model()
                        optimizer = optim[0](model.parameters(), lr)

                        if torch.cuda.is_available():
                            model.cuda()

                        train_loss = train_model(model, x_train[i_train].to(device),
                                     y_train[i_train].to(device),
                                     y_train_classes[i_train].to(device),
                                     epochs = epochs, optimizer=optimizer, batch_size=batchsize, 
                                                 auxiliary = auxiliary, verbose = False)


                        if auxiliary :
                            accuracy, accuracy_d1, accuracy_d2 = evaluate_model_aux(model, x_train[i_valid].to(device),
                                                                                    y_train[i_valid].to(device),
                                                                                    y_train_classes[i_valid].to(device))
                        else :
                            accuracy = evaluate_model(model,
                                                      x_train[i_valid].to(device),
                                                      y_train[i_valid].to(device))

                        exp_accuracies.append(accuracy)
                    
                    print(": %.2f%% mean accuracy" % (mean(exp_accuracies[-number_training:])))
                
                print("All folds : %.2f%% mean accuracy" % (mean(exp_accuracies)))
                mean_accuracies.append(mean(exp_accuracies))
                var_accuracies.append(var(exp_accuracies))
                parameters.append((optim[1], lr, batchsize))
    
    print()
    model_accuracies.append(mean_accuracies)
    model_vars.append(var_accuracies)
    model_parameters.append(parameters)


Starting 2 parameter grid search 2 times each over 25 epochs, with model 'model 1' containing 33552 parameters. 

Grid Search 1/2 : 
Fold 1/5: 79.250% mean accuracy
Fold 2/5: 81.100% mean accuracy
Fold 3/5: 80.350% mean accuracy
Fold 4/5: 81.300% mean accuracy
Fold 5/5: 80.800% mean accuracy
All folds : 80.560% mean accuracy
Grid Search 2/2 : 
Fold 1/5: 77.050% mean accuracy
Fold 2/5: 80.100% mean accuracy
Fold 3/5: 80.450% mean accuracy
Fold 4/5: 81.550% mean accuracy
Fold 5/5: 80.200% mean accuracy
All folds : 79.870% mean accuracy

Starting 2 parameter grid search 2 times each over 25 epochs, with model 'model best' containing 506788 parameters. 

Grid Search 1/2 : 
Fold 1/5

KeyboardInterrupt: 

## Experiment parametrization

In [176]:
def experiment_model(model_name, build_model):
    ################################ TRAINING PARAMETERS ################################################ 

    number_training = 10
    epochs = 25

    accuracies = []
    times = []
    
    dummy_model = build_model()
    auxiliary = (type(dummy_model(x_train[0:1])) == tuple)

    print("Starting %i training of %i epochs, with model '%s' containing %i parameters. \n" % 
      (number_training, epochs, model_name, number_parameters(dummy_model)))

    del dummy_model
    
    ################################ TRAINING  ################################################ 
    
    for i_train in range(number_training):  
        start = time.time()

        model = build_model()
        if torch.cuda.is_available():
            model.cuda()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        #optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=.9, weight_decay=0)
        indices_shuffle = torch.randperm(N)
#         indices_shuffle = list(range(N))
        
        train_loss = train_model(model, x_train[indices_shuffle].to(device),
                                 y_train[indices_shuffle].to(device),
                                 y_train_classes[indices_shuffle].to(device),
                                 epochs = epochs, optimizer=optimizer, batch_size = 64, auxiliary = auxiliary)
                
        if auxiliary :
            accuracy, accuracy_d1, accuracy_d2 = evaluate_model_aux(model, x_test.to(device),
                                                                    y_test.to(device),
                                                                    y_test_classes.to(device))
            print("Attempt", i_train + 1, "- accuracy %.2f%%"%accuracy,
                  " -- d1 : %.2f%% , d2 : %.2f%%" % (accuracy_d1, accuracy_d2))
        else :
            accuracy = evaluate_model(model, x_test.to(device), y_test.to(device))
            print("Attempt", i_train + 1, "- accuracy %.2f%%"%accuracy)

        accuracies.append(accuracy)

        times.append(time.time() - start)

    print("\nExperiment results :")
    print("Accuracy mean : %.2f%% (%.1f)" % (mean(accuracies), var(accuracies)))
    print("Average training time : %.1f seconds (%.1f)\n\n" % (mean(times), var(times)))
    
    return accuracies

## Run experiments

In [46]:
results = []
model_list = [('Simple Net', model_0),
             ('Auxiliary Loss', model_auxiliary_loss),
             ('Weight Sharing', model_weight_sharing),
             ('Auxiliary Loss + Weight Sharing', model_weight_sharing_with_auxiliary_loss)]

In [54]:
model_list = [('Model simple', model_1_aux)]

for model_params in model_list :
    results.append((experiment_model(*model_params), model_params))

Starting 10 training of 25 epochs, with model 'Model simple' containing 71845 parameters. 



RuntimeError: Assertion `cur_target >= 0 && cur_target < n_classes' failed.  at /Users/distiller/project/conda/conda-bld/pytorch_1565272526878/work/aten/src/THNN/generic/ClassNLLCriterion.c:94

In [53]:
model = model_best()
# indices_shuffle = torch.randperm(N)
# indices_shuffle = list(range(999, -1, -1))
indices_shuffle = list(range(1000))
epochs = 25
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_model(model, x_train[indices_shuffle], y_train[indices_shuffle], y_train_classes[indices_shuffle],
                    epochs = 25, optimizer=optimizer, batch_size = 64, auxiliary = True)
evaluate_model_aux(model, x_test, y_test, y_test_classes)

Epoch 7: loss 10.562 --- loss_d1 1.74 --- loss_d2 1.5917

KeyboardInterrupt: 

In [100]:
x, d1, d2 = model(x_test[:10])
print(torch.argmax(x, -1).tolist())
print(y_test[:10].tolist())
print()
print(torch.argmax(d1, -1).tolist())
print(y_test_classes[:10, 0].tolist())
print()
print(torch.argmax(d2, -1).tolist())
print(y_test_classes[:10, 1].tolist())

[1, 0, 0, 1, 1, 0, 1, 1, 1, 1]
[1, 0, 0, 1, 0, 1, 0, 1, 1, 1]

[0, 4, 8, 2, 5, 0, 7, 1, 6, 1]
[0, 4, 8, 2, 5, 0, 7, 1, 6, 1]

[7, 1, 7, 9, 4, 0, 6, 5, 8, 0]
[7, 1, 7, 9, 4, 0, 6, 9, 8, 7]


## Results comparision between models

In [None]:
for i, (result, model_params) in enumerate(results):
    plt.plot(result, label = model_params[0] + "_" + str(i))
    plt.legend()
    
plt.show()