# DEEP LEARNING PROJECT 1
---

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
%cd /content/drive/My Drive/deep

/content/drive/My Drive/deep


In [0]:
INPUT_SIZE = (2, 14, 14)
rounds = 10

import torch.nn as nn
import numpy as np

NB_SAMPLES = 1000
DATA_DIR = './data'

NUMBER_OF_CLASSES = 10

WIDTH_HEIGHT = 14
SINGLE_IMAGE_SIZE = WIDTH_HEIGHT * WIDTH_HEIGHT
DOUBLE_IMAGE_SIZE = 2 * SINGLE_IMAGE_SIZE

# ----Train Config-----#
LEARNING_RATE = 0.001
TRAIN_BATCH_SIZE = 5
SUB_CRITERION = nn.CrossEntropyLoss()
FINAL_CRITERION = nn.BCELoss()
EPOCHS = 20

# ----AuxLoss Config-----#
ALPHA = 0.5

BEST_ALPHA_O = 0

# ----Search Config-----#
FCNEURONS = [32,64,128, 256,512]
NB_LAYERS = [1, 2]
ALPHAS = np.linspace(0, 1, 10)

#----Test Config-----#
TEST_BATCH_SIZE = NB_SAMPLES

#----OscarNet Config-----#
OSCAR_NET_NAME = "oscar_net"
OSCAR_NET_HIDDEN_LAYER = 64
OSCAR_NET_NB_HIDDEN = 1

OSCAR_BEST_HIDDEN = 0
OSCAR_BEST_NB = 0

#----DesmondNet Config-----#
DESMOND_NET_NAME = "desmond_net"
DESMOND_NET_HIDDEN_LAYER = 64
DESMOND_NET_NB_HIDDEN = 1

DESMOND_BEST_HIDDEN = 0
DESMOND_BEST_NB = 0

# Helpers

In [0]:
############################################# HELPERS ###############################

import torch
from torchvision import datasets
import os
import time
from pathlib import Path


######################################################################
# The data

def convert_to_one_hot_labels(input, target):
    tmp = input.new_zeros(target.size(0), target.max() + 1)
    #set ones
    tmp.scatter_(1, target.view(-1, 1), 1.0)
    return tmp

def load_data(cifar = None, one_hot_labels = False, normalize = False, flatten = True):

    data_dir = './data'

    if (cifar is not None and cifar):
        print('* Using CIFAR')
        cifar_train_set = datasets.CIFAR10(data_dir + '/cifar10/', train = True, download = True)
        cifar_test_set = datasets.CIFAR10(data_dir + '/cifar10/', train = False, download = True)

        train_input = torch.from_numpy(cifar_train_set.data)
        train_input = train_input.transpose(3, 1).transpose(2, 3).float()
        train_target = torch.tensor(cifar_train_set.targets, dtype = torch.int64)

        test_input = torch.from_numpy(cifar_test_set.data).float()
        test_input = test_input.transpose(3, 1).transpose(2, 3).float()
        test_target = torch.tensor(cifar_test_set.targets, dtype = torch.int64)

    else:
        print('* Using MNIST')
        mnist_train_set = datasets.MNIST(data_dir + '/mnist/', train = True, download = True)
        mnist_test_set = datasets.MNIST(data_dir + '/mnist/', train = False, download = True)

        train_input = mnist_train_set.data.view(-1, 1, 28, 28).float()
        train_target = mnist_train_set.targets
        test_input = mnist_test_set.data.view(-1, 1, 28, 28).float()
        test_target = mnist_test_set.targets

    if flatten:
        train_input = train_input.clone().reshape(train_input.size(0), -1)
        test_input = test_input.clone().reshape(test_input.size(0), -1)
        
        
    train_input = train_input.narrow(0, 0, 1000)
    train_target = train_target.narrow(0, 0, 1000)
    test_input = test_input.narrow(0, 0, 1000)
    test_target = test_target.narrow(0, 0, 1000)

    print('** Use {:d} train and {:d} test samples'.format(train_input.size(0), test_input.size(0)))

    if one_hot_labels:
        train_target = convert_to_one_hot_labels(train_input, train_target)
        test_target = convert_to_one_hot_labels(test_input, test_target)

    if normalize:
        mu, std = train_input.mean(), train_input.std()
        train_input.sub_(mu).div_(std)
        test_input.sub_(mu).div_(std)

    return train_input, train_target, test_input, test_target

######################################################################

def mnist_to_pairs(nb, input, target):
    input = torch.functional.F.avg_pool2d(input, kernel_size = 2)
    a = torch.randperm(input.size(0))
    a = a[:2 * nb].view(nb, 2)
    input = torch.cat((input[a[:, 0]], input[a[:, 1]]), 1)
    classes = target[a]
    target = (classes[:, 0] <= classes[:, 1]).long()
    return input, target, classes

######################################################################

def generate_pair_sets(nb):

    data_dir = DATA_DIR

    train_set = datasets.MNIST(data_dir + '/mnist/', train = True, download = True)
    train_input = train_set.data.view(-1, 1, 28, 28).float()
    train_target = train_set.targets

    test_set = datasets.MNIST(data_dir + '/mnist/', train = False, download = True)
    test_input = test_set.data.view(-1, 1, 28, 28).float()
    test_target = test_set.targets

    return mnist_to_pairs(nb, train_input, train_target) + \
           mnist_to_pairs(nb, test_input, test_target)

######################################################################

######################################################################

def save_model(model, epoch=None, loss=None, save_dir=None, specific_name=None):

    if epoch and loss and save_dir and specific_name:
        model_name = model.model_name
        timestr = time.strftime("%Y%m%d-%H%M%S")
        file_name = f"{timestr}_{model_name}_epoch_{epoch}_loss_{loss:03.3f}.pt"
        Path(save_dir).mkdir(exist_ok=True)
        file_path = Path(save_dir) / file_name
        torch.save(model.state_dict(), str(file_path))
    elif save_dir and specific_name:
        file_path = Path(save_dir) / specific_name
        torch.save(model.state_dict(), str(file_path))

# Modules

In [0]:
import torch.nn as nn
import torch
from torch.nn import functional as F
import math

class OscarNet(nn.Module):

    def __init__(self, nb_hidden_layers= OSCAR_NET_NB_HIDDEN, hidden_layer = OSCAR_NET_HIDDEN_LAYER):
        super(OscarNet, self).__init__()
        self.model_name = OSCAR_NET_NAME
        
        if nb_hidden_layers < 1:
            raise Exception("Minimum 1 hidden layers for " + self.model_name)
        
        self.hiddens = nn.ModuleList([nn.Sequential(nn.Linear(hidden_layer, hidden_layer), nn.LeakyReLU(), nn.Dropout(p=0.2)) for i in range(nb_hidden_layers-1)])

        self.hiddens.insert(0,nn.Sequential(nn.Linear(SINGLE_IMAGE_SIZE, hidden_layer), nn.LeakyReLU(), nn.Dropout(p=0.2)))
        
        self.output = nn.Linear(hidden_layer, NUMBER_OF_CLASSES)

    def forward(self, x):
        flattened = x.view(x.size(0),-1)
        
        hid = flattened
        
        for block in self.hiddens:
            hid = block(hid)
        
        out = self.output(hid)
        
        return F.softmax(out, dim=1), out


class DesmondNet(nn.Module):

    def __init__(self, left_net, right_net = None, nb_hidden_layers = DESMOND_NET_NB_HIDDEN, hidden_layer=DESMOND_NET_HIDDEN_LAYER, soft = False):
        super(DesmondNet, self).__init__()
        self.model_name = DESMOND_NET_NAME
        
        if nb_hidden_layers < 0:
            raise Exception("Minimum 0 hidden layers for " + self.model_name)
        
        self.subnets = nn.ModuleList([left_net])
        
        self.soft = soft
        
        if right_net == None :
            self.weight_sharing = True
        else :
            self.weight_sharing = False
            self.subnets.append(right_net)
        
        self.hiddens = nn.ModuleList()
        
        if nb_hidden_layers > 0:
            self.hiddens = nn.ModuleList([nn.Sequential(nn.Linear(hidden_layer, hidden_layer), nn.LeakyReLU(), nn.Dropout(p=0.2)) for i in range(nb_hidden_layers-1)])

            self.hiddens.insert(0,nn.Sequential(nn.Linear(NUMBER_OF_CLASSES*2, hidden_layer), nn.LeakyReLU(), nn.Dropout(p=0.2)))

            self.output = nn.Linear(hidden_layer, 1)
            
        if nb_hidden_layers == 0:
            self.output = nn.Linear(NUMBER_OF_CLASSES*2, 1)

    def forward(self, x):
        #SPLIT x which is of size [N, 2, 14, 14] to two distinct tensors of size [N, 1, 14, 14]
        input1 = x[:,0:1,:,:]   #(batch_size,1,14,14)
        input2 = x[:,1:2,:,:]   #(batch_size,1,14,14)
        
        lefted, lefted_no = self.subnets[0](input1)
        if self.weight_sharing :
            righted, righted_no = self.subnets[0](input2)
        else :
            righted, righted_no = self.subnets[1](input2)
        
        #CONCAT lefted and righted which are of size [N,10] each to a single tensor of size [N,20]
        if(self.soft):
            hid = torch.cat((lefted, righted),1)
        else:
            hid = torch.cat((lefted_no, righted_no),1)
        
        for block in self.hiddens:
            hid = block(hid)
        
        out = self.output(hid)
        
        return torch.sigmoid(out), lefted_no, righted_no

# Train & Predict

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.optim.lr_scheduler import StepLR

def train_siamese(model, 
                  dataloader, 
                  test_dataloader,
                  epochs = EPOCHS,
                  final_criterion = FINAL_CRITERION, 
                  learning_rate = LEARNING_RATE,
                  aux_loss = False,
                  sub_criterion = SUB_CRITERION, 
                  alpha = ALPHA):
    
    cuda = torch.cuda.is_available()
    if cuda:
        model = model.to(device="cuda")

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # gamma is the decaying factor, after every 1 epoch new_lr = lr*gamma 
    scheduler = StepLR(optimizer, step_size=1, gamma = 0.9)

    training_losses = []
    training_acc = []
    
    training_losses_l = []
    
    training_losses_r = []
    
    test_losses = []
    test_acc = []
    
    test_losses_l = []
    
    test_losses_r = []

    for epoch in range(1, epochs+1):  
        model.train()
        
        sum_loss_epoch = 0
        total = 0
        correct = 0
        accuracy_epoch = 0
        
        sum_loss_epoch_l = 0
        
        sum_loss_epoch_r = 0
        
        for ind_batch, sample_batched in enumerate(dataloader):
            
            images = sample_batched["images"]
            labels = sample_batched["bool_labels"]
            digit_labels = sample_batched["digit_labels"]
            
            labels = labels.unsqueeze(1)
            
            if cuda:
                images = images.to(device="cuda")
                labels = labels.to(device="cuda")
                digit_labels = digit_labels.to(device="cuda")

            optimizer.zero_grad()
                       
            output, lefted, righted = model(images)
            
            loss = final_criterion(output.flatten(), labels.float().flatten())
            loss_left = sub_criterion(lefted, digit_labels[:,0])
            loss_right = sub_criterion(righted, digit_labels[:,1])
            
            if aux_loss:
                loss = alpha * loss + ((1-alpha)/2) * loss_left + ((1-alpha)/2) * loss_right

            loss.require_grad = True
            loss.backward()

            optimizer.step()
            
            #update the accuracy 
            total += images.size(0)  
            correct += (output.round() == labels).sum() 
            
            # if ind_batch % 250 == 0:
                # print("[Epoch {}, Batch {}/{}]:  [Loss: {:.2f}]".format(epoch, ind_batch, len(dataloader), loss) )
                
            #add the loss for this batch to the total loss of the epoch
            sum_loss_epoch = sum_loss_epoch + loss.item()
            sum_loss_epoch_l = sum_loss_epoch_l + loss_left.item()
            sum_loss_epoch_r = sum_loss_epoch_r + loss_right.item()
            
        scheduler.step()
        #compute the mean to obtain the loss for this epoch 
        mean_loss = sum_loss_epoch / float(len(dataloader))
        mean_loss_l = sum_loss_epoch_l / float(len(dataloader))
        mean_loss_r = sum_loss_epoch_r / float(len(dataloader))
        
        # print("At epoch {0} the training loss is {1}".format(epoch, mean_loss) )
        training_losses.append(mean_loss)
        
        accuracy_epoch = float(correct) / float(total)
        # print("At epoch {0} the training accuracy is {1}".format(epoch, accuracy_epoch) )
        training_acc.append(accuracy_epoch)
        
        training_losses_l.append(mean_loss_l)
        training_losses_r.append(mean_loss_r)
        
#         print('epoch {0}/{1}'.format(epoch, epochs))
        
        test_loss, test_accuracy, test_loss_l, test_loss_r = predict_siamese(model,
                                                                     test_dataloader,
                                                                     final_criterion,
                                                                     aux_loss,
                                                                     sub_criterion,
                                                                     alpha)
        
        test_losses.append(test_loss)
        test_acc.append(test_accuracy)
        test_losses_l.append(test_loss_l)
        test_losses_r.append(test_loss_r)
        
    return training_losses, training_acc, training_losses_l, training_losses_r, test_losses, test_acc, test_losses_l, test_losses_r


def predict_siamese(model, 
            dataloader,
            final_criterion = FINAL_CRITERION,
            aux_loss = False,
            sub_criterion = SUB_CRITERION, 
            alpha = ALPHA):
    
    model.eval()
    
    cuda = torch.cuda.is_available()
    if cuda:
        model = model.to(device="cuda")
        
    sum_loss = 0
    total = 0
    correct = 0
    accuracy = 0

    sum_loss_l = 0

    sum_loss_r = 0

    for ind_batch, sample_batched in enumerate(dataloader):

        images = sample_batched["images"]
        labels = sample_batched["bool_labels"]
        digit_labels = sample_batched["digit_labels"]
        
        if cuda:
            images = images.to(device="cuda")
            labels = labels.to(device="cuda")
            digit_labels = digit_labels.to(device="cuda")

        output, lefted, righted = model(images)
        
        labels = labels.unsqueeze(1)

        loss = final_criterion(output.flatten(), labels.float().flatten())
        loss_left = sub_criterion(lefted, digit_labels[:,0])
        loss_right = sub_criterion(righted, digit_labels[:,1])

        if aux_loss:
            loss = alpha * loss + ((1-alpha)/2) * loss_left + ((1-alpha)/2) * loss_right

        #update the accuracy 
        total += images.size(0)  
        correct += (output.round() == labels).sum() 

        #add the loss for this batch to the total loss of the epoch
        sum_loss = sum_loss + loss.item()
        sum_loss_l = sum_loss_l + loss_left.item()
        sum_loss_r = sum_loss_r + loss_right.item()

    #compute the mean to obtain the loss for this epoch 
    mean_loss = sum_loss / float(len(dataloader))
    mean_loss_l = sum_loss_l / float(len(dataloader))
    mean_loss_r = sum_loss_r / float(len(dataloader))
    
    # print("The test loss is {0}".format(mean_loss) )

    accuracy = float(correct) / float(total)
    # print("The test accuracy is {0}".format(accuracy) )
        
    return mean_loss, accuracy, mean_loss_l, mean_loss_r

# Data

In [0]:
######## DATA #####################################################
import torch.utils.data as data
import matplotlib.pyplot as plt
import numpy as np

from torch.utils.data import Dataset


class PairDataset(Dataset):

    def __init__(self, data, bool_labels, digit_labels = None):
        self.images = data
        self.bool_labels = bool_labels
        
        if digit_labels is not None:
            self.digit_labels = digit_labels

    def __len__(self):
        # override the class method. return the length of data
        return len(self.bool_labels)

    def __getitem__(self, idx):
        # override the class method. return the item at the index(idx)
        if self.digit_labels is not None:
            sample = {"images" : self.images[idx],
                      "bool_labels" : self.bool_labels[idx],
                      "digit_labels" : self.digit_labels[idx]}
        else:
            sample = {"images" : self.images[idx],
                      "bool_labels" : self.bool_labels[idx]}
            
        return sample
    
class SingleDataset(Dataset):

    def __init__(self, data, digit_labels):
        self.images = data
        self.digit_labels = digit_labels

    def __len__(self):
        # override the class method. return the length of data
        return len(self.digit_labels)

    def __getitem__(self, idx):
        # override the class method. return the item at the index(idx)
        sample = {"images" : self.images[idx],
                  "digit_labels" : self.digit_labels[idx]}
            
        return sample



pairs = generate_pair_sets(NB_SAMPLES)

train_dataset = PairDataset(pairs[0], pairs[1], pairs[2])
train_dataloader = data.DataLoader(dataset=train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True)

test_dataset = PairDataset(pairs[3], pairs[4], pairs[5])
test_dataloader = data.DataLoader(dataset=test_dataset, batch_size=TEST_BATCH_SIZE, shuffle=True)

##############################################################

# FCNEURONS (inner network) search

In [8]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

round_results_nb_neurons_inner = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind1, nb in enumerate(NB_LAYERS):
        results_tmp = []
        
        for ind2, n in enumerate(FCNEURONS):
        
            classifier = OscarNet(nb_hidden_layers = nb, hidden_layer = n)

            model = DesmondNet(classifier)

            training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(model = model,
                                         dataloader = train_dataloader,
                                         test_dataloader = test_dataloader,
                                         epochs = EPOCHS,
                                         final_criterion = FINAL_CRITERION, 
                                         learning_rate = LEARNING_RATE,
                                         aux_loss = True,
                                         sub_criterion = SUB_CRITERION, 
                                         alpha = ALPHA)

            print('{0}/{1}'.format(ind1 * len(FCNEURONS) + ind2 + 1, len(NB_LAYERS) * len(FCNEURONS)))
            print('With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : {0}/{1}/{2}/{3}/{4}'.format(nb,
                                                                                                                   n,
                                                                                                                   DESMOND_NET_NB_HIDDEN,
                                                                                                                   DESMOND_NET_HIDDEN_LAYER,
                                                                                                                   ALPHA))
            final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
            print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))

            results_tmp.append([training_losses, training_acc, test_losses, test_acc])
    
        results.append(results_tmp)
    
    print('round {0} end'.format(i+1))
    round_results_nb_neurons_inner.append(results)

CUDA available
round 1 start
1/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 1/32/1/64/0.5
On the test set we obtain a loss of 0.45 and an accuracy of 0.86
2/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 1/64/1/64/0.5
On the test set we obtain a loss of 0.44 and an accuracy of 0.85
3/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 1/128/1/64/0.5
On the test set we obtain a loss of 0.40 and an accuracy of 0.86
4/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 1/256/1/64/0.5
On the test set we obtain a loss of 0.46 and an accuracy of 0.88
5/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 1/512/1/64/0.5
On the test set we obtain a loss of 0.53 and an accuracy of 0.88
6/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/32/1/64/0.5
On the test set we obtain a loss of 0.35 and an accuracy of 0.88
7/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/h

In [9]:
np.savez("results-fcns-nb_fcneurons_inner_search",round_results_nb_neurons_inner)

copy_of = np.array(round_results_nb_neurons_inner).copy()

last_accs_only = copy_of[:, :, :, 3, EPOCHS-1]

means_fcni = last_accs_only.mean(axis=0)
stds_fcni = last_accs_only.std(axis=0)

print(means_fcni)
print(stds_fcni)

raveled_i_max = np.argmax(means_fcni)

print(raveled_i_max)

unraveled_i_max = np.unravel_index(raveled_i_max, means_fcni.shape)

print(unraveled_i_max)

best_nb_hidden_inner = NB_LAYERS[unraveled_i_max[0]]
best_hidden_layer_inner = FCNEURONS[unraveled_i_max[1]]

print(best_nb_hidden_inner)
print(best_hidden_layer_inner)

[[0.8545 0.864  0.8675 0.8722 0.8776]
 [0.8896 0.8978 0.8983 0.8983 0.8969]]
[[0.00941541 0.00781025 0.00824924 0.00778203 0.00631189]
 [0.00971802 0.00713863 0.00436005 0.00760329 0.00450444]]
8
(1, 3)
2
256


# FCNEURONS (outer network) search

In [10]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

round_results_nb_neurons_outer = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind1, nb in enumerate(NB_LAYERS):
        results_tmp = []
        
        for ind2, n in enumerate(FCNEURONS):
        
            classifier = OscarNet(nb_hidden_layers = best_nb_hidden_inner, hidden_layer = best_hidden_layer_inner)

            model = DesmondNet(classifier, nb_hidden_layers = nb, hidden_layer = n)

            training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(model = model,
                                         dataloader = train_dataloader,
                                         test_dataloader = test_dataloader,
                                         epochs = EPOCHS,
                                         final_criterion = FINAL_CRITERION, 
                                         learning_rate = LEARNING_RATE,
                                         aux_loss = True,
                                         sub_criterion = SUB_CRITERION, 
                                         alpha = ALPHA)

            print('{0}/{1}'.format(ind1 * len(FCNEURONS) + ind2 + 1, len(NB_LAYERS) * len(FCNEURONS)))
            print('With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : {0}/{1}/{2}/{3}/{4}'.format(best_nb_hidden_inner,
                                                                                                                   best_hidden_layer_inner,
                                                                                                                   nb,
                                                                                                                   n,
                                                                                                                   ALPHA))
            final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
            print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))

            results_tmp.append([training_losses, training_acc, test_losses, test_acc])
    
        results.append(results_tmp)
    
    print('round {0} end'.format(i+1))
    round_results_nb_neurons_outer.append(results)

CUDA available
round 1 start
1/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/1/32/0.5
On the test set we obtain a loss of 0.41 and an accuracy of 0.90
2/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/1/64/0.5
On the test set we obtain a loss of 0.43 and an accuracy of 0.90
3/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/1/128/0.5
On the test set we obtain a loss of 0.41 and an accuracy of 0.90
4/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/1/256/0.5
On the test set we obtain a loss of 0.37 and an accuracy of 0.90
5/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/1/512/0.5
On the test set we obtain a loss of 0.46 and an accuracy of 0.92
6/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/2/32/0.5
On the test set we obtain a loss of 0.44 and an accuracy of 0.89
7/10
With parameters nb_hid_inner/hid_inner/nb_hid

In [11]:
np.savez("results-fcns-nb_fcneurons_outer_search",round_results_nb_neurons_outer)

copy_of = np.array(round_results_nb_neurons_outer).copy()

last_accs_only = copy_of[:, :, :, 3, EPOCHS-1]

means_fcno = last_accs_only.mean(axis=0)
stds_fcno = last_accs_only.std(axis=0)

print(means_fcno)
print(stds_fcno)

raveled_i_max = np.argmax(means_fcno)

print(raveled_i_max)

unraveled_i_max = np.unravel_index(raveled_i_max, means_fcno.shape)

print(unraveled_i_max)

best_nb_hidden_outer = NB_LAYERS[unraveled_i_max[0]]
best_hidden_layer_outer = FCNEURONS[unraveled_i_max[1]]

print(best_nb_hidden_outer)
print(best_hidden_layer_outer)

[[0.8946 0.9038 0.9028 0.9086 0.9108]
 [0.8933 0.9047 0.911  0.9138 0.9182]]
[[0.00407922 0.00663023 0.00575847 0.0051225  0.00754718]
 [0.00549636 0.00679779 0.00471169 0.00437721 0.00552811]]
9
(1, 4)
2
512


# ALPHA search

In [12]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

round_results_alpha = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind, a in enumerate(ALPHAS):
        
        classifier = OscarNet(nb_hidden_layers = best_nb_hidden_inner, hidden_layer = best_hidden_layer_inner)

        model = DesmondNet(classifier, nb_hidden_layers = best_nb_hidden_outer, hidden_layer = best_hidden_layer_outer)
        
        training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(model = model,
                                     dataloader = train_dataloader,
                                     test_dataloader = test_dataloader,
                                     epochs = EPOCHS,
                                     final_criterion = FINAL_CRITERION, 
                                     learning_rate = LEARNING_RATE,
                                     aux_loss = True,
                                     sub_criterion = SUB_CRITERION, 
                                     alpha = a)
        
        print('{0}/{1}'.format(ind+1, len(ALPHAS)))
        print('With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : {0}/{1}/{2}/{3}/{4}'.format(best_nb_hidden_inner,
                                                                                                                best_hidden_layer_inner,
                                                                                                                best_nb_hidden_outer,
                                                                                                                best_hidden_layer_outer,
                                                                                                                round(a, 2)))
        final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
        print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))
        
        results.append([training_losses, training_acc, test_losses, test_acc])
    
    
    print('round {0} end'.format(i+1))
    round_results_alpha.append(results)

CUDA available
round 1 start
1/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/2/512/0.0
On the test set we obtain a loss of 0.34 and an accuracy of 0.55
2/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/2/512/0.11
On the test set we obtain a loss of 0.32 and an accuracy of 0.92
3/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/2/512/0.22
On the test set we obtain a loss of 0.33 and an accuracy of 0.92
4/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/2/512/0.33
On the test set we obtain a loss of 0.35 and an accuracy of 0.92
5/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/2/512/0.44
On the test set we obtain a loss of 0.48 and an accuracy of 0.91
6/10
With parameters nb_hid_inner/hid_inner/nb_hid_out/hid_outer/alpha : 2/256/2/512/0.56
On the test set we obtain a loss of 0.38 and an accuracy of 0.92
7/10
With parameters nb_hid_inner/hid_inne

In [13]:
np.savez("results-alpha".format(TRAIN_BATCH_SIZE),round_results_alpha)

copy_of = np.array(round_results_alpha).copy()

last_accs_only = copy_of[:, :, 3, EPOCHS-1]

means_alpha = last_accs_only.mean(axis=0)
stds_alpha = last_accs_only.std(axis=0)

print(means_alpha)
print(stds_alpha)

i_max = np.argmax(means_alpha)

print(i_max)

best_alpha = ALPHAS[i_max]

print(best_alpha)

[0.5075 0.9295 0.9243 0.9222 0.9209 0.9184 0.9174 0.9158 0.9073 0.873 ]
[0.03854154 0.00520096 0.00695773 0.00561783 0.0047     0.00553534
 0.00465188 0.00602993 0.00511957 0.00913236]
1
0.1111111111111111
