# DEEP LEARNING PROJECT 1
---

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# %cd /content/drive/My Drive/

In [None]:
import torch.nn as nn
import numpy as np

NB_SAMPLES = 1000
DATA_DIR = './data'

NUMBER_OF_CLASSES = 10

WIDTH_HEIGHT = 14
SINGLE_IMAGE_SIZE = WIDTH_HEIGHT * WIDTH_HEIGHT
DOUBLE_IMAGE_SIZE = 2 * SINGLE_IMAGE_SIZE

# ----Train Config-----#
LEARNING_RATE = 0.001
TRAIN_BATCH_SIZE = 5
SUB_CRITERION = nn.CrossEntropyLoss()
FINAL_CRITERION = nn.BCELoss()
EPOCHS = 20

# ----AuxLoss Config-----#
ALPHA = 0.5

# ----Search Config-----#
KERNEL_SIZES = [3,5]
NB_CHANNELS = [4,8,16,24,48]
FCNEURONS = [32,64,128, 256,512]
NB_LAYERS = [1, 2, 3]
ALPHAS = np.linspace(0, 1, 10)

#----Test Config-----#
TEST_BATCH_SIZE = NB_SAMPLES

#----BasicNet Config-----#
BASIC_NET_NAME = "basic_net"
BASIC_NET_HIDDEN_LAYER = 128
BASIC_NET_NB_HIDDEN = 1

#----OscarNet Config-----#
OSCAR_NET_NAME = "oscar_net"
OSCAR_NET_HIDDEN_LAYER = 128
OSCAR_NET_NB_HIDDEN = 1

#----RobertNet Config-----#
ROBERT_NET_NAME = "robert_net"
ROBERT_NET_HIDDEN_LAYER = 128
ROBERT_NET_BASE_CHANNEL_SIZE = 8
ROBERT_NET_NB_HIDDEN = 1

#----MaryJaneNet Config-----#
MARYJANE_NET_NAME = "maryjane_net"
MARYJANE_NET_HIDDEN_LAYER = 128
MARYJANE_NET_BASE_CHANNEL_SIZE = 8
MARYJANE_NET_NB_HIDDEN = 1
MARYJANE_NET_KERNEL_SIZE = 3

#----DesmondNet Config-----#
DESMOND_NET_NAME = "desmond_net"
DESMOND_NET_HIDDEN_LAYER = 256
DESMOND_NET_NB_HIDDEN = 2

#----LeonardtNet Config-----#
LEONARD_NET_NAME = "leonard_net"
LEONARD_NET_HIDDEN_LAYER = 256
LEONARD_NET_NB_HIDDEN = 2

# Helpers

In [None]:
############################################# HELPERS ###############################

import torch
from torchvision import datasets
import os
import time
from pathlib import Path


######################################################################
# The data

def convert_to_one_hot_labels(input, target):
    tmp = input.new_zeros(target.size(0), target.max() + 1)
    #set ones
    tmp.scatter_(1, target.view(-1, 1), 1.0)
    return tmp

def load_data(cifar = None, one_hot_labels = False, normalize = False, flatten = True):

    data_dir = './data'

    if (cifar is not None and cifar):
        print('* Using CIFAR')
        cifar_train_set = datasets.CIFAR10(data_dir + '/cifar10/', train = True, download = True)
        cifar_test_set = datasets.CIFAR10(data_dir + '/cifar10/', train = False, download = True)

        train_input = torch.from_numpy(cifar_train_set.data)
        train_input = train_input.transpose(3, 1).transpose(2, 3).float()
        train_target = torch.tensor(cifar_train_set.targets, dtype = torch.int64)

        test_input = torch.from_numpy(cifar_test_set.data).float()
        test_input = test_input.transpose(3, 1).transpose(2, 3).float()
        test_target = torch.tensor(cifar_test_set.targets, dtype = torch.int64)

    else:
        print('* Using MNIST')
        mnist_train_set = datasets.MNIST(data_dir + '/mnist/', train = True, download = True)
        mnist_test_set = datasets.MNIST(data_dir + '/mnist/', train = False, download = True)

        train_input = mnist_train_set.data.view(-1, 1, 28, 28).float()
        train_target = mnist_train_set.targets
        test_input = mnist_test_set.data.view(-1, 1, 28, 28).float()
        test_target = mnist_test_set.targets

    if flatten:
        train_input = train_input.clone().reshape(train_input.size(0), -1)
        test_input = test_input.clone().reshape(test_input.size(0), -1)
        
        
    train_input = train_input.narrow(0, 0, 1000)
    train_target = train_target.narrow(0, 0, 1000)
    test_input = test_input.narrow(0, 0, 1000)
    test_target = test_target.narrow(0, 0, 1000)

    print('** Use {:d} train and {:d} test samples'.format(train_input.size(0), test_input.size(0)))

    if one_hot_labels:
        train_target = convert_to_one_hot_labels(train_input, train_target)
        test_target = convert_to_one_hot_labels(test_input, test_target)

    if normalize:
        mu, std = train_input.mean(), train_input.std()
        train_input.sub_(mu).div_(std)
        test_input.sub_(mu).div_(std)

    return train_input, train_target, test_input, test_target

######################################################################

def mnist_to_pairs(nb, input, target):
    input = torch.functional.F.avg_pool2d(input, kernel_size = 2)
    a = torch.randperm(input.size(0))
    a = a[:2 * nb].view(nb, 2)
    input = torch.cat((input[a[:, 0]], input[a[:, 1]]), 1)
    classes = target[a]
    target = (classes[:, 0] <= classes[:, 1]).long()
    return input, target, classes

######################################################################

def generate_pair_sets(nb):

    data_dir = DATA_DIR

    train_set = datasets.MNIST(data_dir + '/mnist/', train = True, download = True)
    train_input = train_set.data.view(-1, 1, 28, 28).float()
    train_target = train_set.targets

    test_set = datasets.MNIST(data_dir + '/mnist/', train = False, download = True)
    test_input = test_set.data.view(-1, 1, 28, 28).float()
    test_target = test_set.targets

    return mnist_to_pairs(nb, train_input, train_target) + \
           mnist_to_pairs(nb, test_input, test_target)

######################################################################

def compute_accuracy(tensor1, tensor2):
    
    tensor_accuracy = torch.where(tensor1 == tensor2, torch.tensor(1), torch.tensor(0))
    
    accuracy = torch.sum(tensor_accuracy).item() / NB_SAMPLES
    
    return accuracy

######################################################################

def save_model(model, epoch=None, loss=None, save_dir=None, specific_name=None):

    if epoch and loss and save_dir and specific_name:
        model_name = model.model_name
        timestr = time.strftime("%Y%m%d-%H%M%S")
        file_name = f"{timestr}_{model_name}_epoch_{epoch}_loss_{loss:03.3f}.pt"
        Path(save_dir).mkdir(exist_ok=True)
        file_path = Path(save_dir) / file_name
        torch.save(model.state_dict(), str(file_path))
    elif save_dir and specific_name:
        file_path = Path(save_dir) / specific_name
        torch.save(model.state_dict(), str(file_path))

# Modules

In [None]:
import torch.nn as nn
import torch
from torch.nn import functional as F
import math

class MaryJaneNet(nn.Module):

    def __init__(self, nb_hidden_layers = MARYJANE_NET_NB_HIDDEN, base_channel_size = MARYJANE_NET_BASE_CHANNEL_SIZE, hidden_layer = MARYJANE_NET_HIDDEN_LAYER, kernel_size = MARYJANE_NET_KERNEL_SIZE):
        
        super(MaryJaneNet, self).__init__()
        self.model_name = MARYJANE_NET_NAME
        
        if nb_hidden_layers < 0:
             raise Exception("Minimum 0 hidden layers for " + self.model_name)
        
        self.base_channel_size = base_channel_size
        
        conv_channel_size = self.base_channel_size*2
        
        #Change to Module list instead of Sequential if the number of ConvNets is dynamic (i.e passed as parameter)
        #(W−F+2P)/S+1
        #k= 1, 3, 5  
        self.conv_net = nn.Sequential(nn.Conv2d(1, self.base_channel_size, kernel_size= kernel_size ),  #(14-k)+1 = 15 -k # 14, 12,10
                                                 nn.LeakyReLU(),
                                                 nn.MaxPool2d(kernel_size=2, stride=2), #(15-k) / 2 : 7, 6, 5  
                                                 nn.Dropout(p=0.2),
                                                 nn.Conv2d(self.base_channel_size, conv_channel_size, 
                                                           kernel_size = kernel_size),    #7, 5, 3   # (15 - k) /2 - k + 1
                                                 nn.LeakyReLU(),
                                                 #nn.MaxPool2d(kernel_size=3, stride=3), #1  
                                                 nn.Dropout(p=0.2) )

        fcn_input_size = int( ( ((15 - kernel_size) /2 - kernel_size + 1)** 2 ) * conv_channel_size)
        
        self.fc_net = nn.ModuleList()
        
        if nb_hidden_layers > 0:
            self.fc_net = nn.ModuleList([nn.Sequential(nn.Linear(hidden_layer, hidden_layer), nn.LeakyReLU(), nn.Dropout(p=0.2)) for i in range(nb_hidden_layers-1)])

            self.fc_net.insert(0,nn.Sequential(nn.Linear(fcn_input_size, hidden_layer), nn.LeakyReLU(), nn.Dropout(p=0.2)))

            self.output = nn.Linear(hidden_layer, NUMBER_OF_CLASSES)
            
        if nb_hidden_layers == 0:
            self.output = nn.Linear(fcn_input_size, NUMBER_OF_CLASSES)

    def forward(self, x):
        conved = self.conv_net(x)
        
        flattened = conved.view(conved.size(0),-1)
        
        hid = flattened
        
        for block in self.fc_net:
            hid = block(hid)
        
        out = self.output(hid)
        
        return F.softmax(out, dim=1), out


class LeonardNet(nn.Module):

    def __init__(self, image_net, nb_hidden_layers= LEONARD_NET_NB_HIDDEN, hidden_layer=LEONARD_NET_HIDDEN_LAYER):
        super(LeonardNet, self).__init__()
        self.model_name = LEONARD_NET_NAME
        
        if nb_hidden_layers < 1 :
            raise Exception("Minimum 1 hidden layer")
        
        self.image_net = image_net
        
        self.hiddens = nn.ModuleList()
        
        if nb_hidden_layers > 0:
            self.hiddens = nn.ModuleList([nn.Sequential(nn.Linear(hidden_layer, hidden_layer), nn.LeakyReLU(), nn.Dropout(p=0.2)) for i in range(nb_hidden_layers-1)])

            self.hiddens.insert(0,nn.Sequential(nn.Linear(NUMBER_OF_CLASSES*2, hidden_layer), nn.LeakyReLU(), nn.Dropout(p=0.2)))

            self.output = nn.Linear(hidden_layer, 1)
            
        if nb_hidden_layers == 0:
            self.output = nn.Linear(NUMBER_OF_CLASSES*2, 1)

    def forward(self, x):
        #SPLIT x which is of size [N, 2, 14, 14] to two distinct tensors of size [N, 1, 14, 14]
        input1 = x[:,0:1,:,:]   #(batch_size,1,14,14)
        input2 = x[:,1:2,:,:]   #(batch_size,1,14,14)
        
        lefted, lefted_no = self.image_net(input1)
        
        righted, righted_no = self.image_net(input2)
        
        #CONCAT lefted and righted which are of size [N,10] each to a single tensor of size [N,20]
        hid = torch.cat((lefted, righted),1)
        
        for block in self.hiddens:
            hid = block(hid)
        
        out = self.output(hid)
        
        return torch.sigmoid(out), lefted_no, righted_no

# Train & Predict

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
from torch.optim.lr_scheduler import StepLR

def train_siamese(model, 
                  dataloader, 
                  test_dataloader,
                  epochs = EPOCHS,
                  final_criterion = FINAL_CRITERION, 
                  learning_rate = LEARNING_RATE,
                  aux_loss = False,
                  sub_criterion = SUB_CRITERION, 
                  alpha = ALPHA):
    
    cuda = torch.cuda.is_available()
    if cuda:
        model = model.to(device="cuda")

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # gamma is the decaying factor, after every 1 epoch new_lr = lr*gamma 
    scheduler = StepLR(optimizer, step_size=1, gamma = 0.9)

    training_losses = []
    training_acc = []
    
    training_losses_l = []
    
    training_losses_r = []
    
    test_losses = []
    test_acc = []
    
    test_losses_l = []
    
    test_losses_r = []

    for epoch in range(1, epochs+1):  
        model.train()
        
        sum_loss_epoch = 0
        total = 0
        correct = 0
        accuracy_epoch = 0
        
        sum_loss_epoch_l = 0
        
        sum_loss_epoch_r = 0
        
        for ind_batch, sample_batched in enumerate(dataloader):
            
            images = sample_batched["images"]
            labels = sample_batched["bool_labels"]
            digit_labels = sample_batched["digit_labels"]
            
            labels = labels.unsqueeze(1)
            
            if cuda:
                images = images.to(device="cuda")
                labels = labels.to(device="cuda")
                digit_labels = digit_labels.to(device="cuda")

            optimizer.zero_grad()
                       
            output, lefted, righted = model(images)
            
            loss = final_criterion(output.flatten(), labels.float().flatten())
            loss_left = sub_criterion(lefted, digit_labels[:,0])
            loss_right = sub_criterion(righted, digit_labels[:,1])
            
            if aux_loss:
                loss = alpha * loss + ((1-alpha)/2) * loss_left + ((1-alpha)/2) * loss_right

            loss.require_grad = True
            loss.backward()

            optimizer.step()
            
            #update the accuracy 
            total += images.size(0)  
            correct += (output.round() == labels).sum() 
            
            # if ind_batch % 250 == 0:
                # print("[Epoch {}, Batch {}/{}]:  [Loss: {:.2f}]".format(epoch, ind_batch, len(dataloader), loss) )
                
            #add the loss for this batch to the total loss of the epoch
            sum_loss_epoch = sum_loss_epoch + loss.item()
            sum_loss_epoch_l = sum_loss_epoch_l + loss_left.item()
            sum_loss_epoch_r = sum_loss_epoch_r + loss_right.item()
            
        scheduler.step()
        #compute the mean to obtain the loss for this epoch 
        mean_loss = sum_loss_epoch / float(len(dataloader))
        mean_loss_l = sum_loss_epoch_l / float(len(dataloader))
        mean_loss_r = sum_loss_epoch_r / float(len(dataloader))
        
        # print("At epoch {0} the training loss is {1}".format(epoch, mean_loss) )
        training_losses.append(mean_loss)
        
        accuracy_epoch = float(correct) / float(total)
        # print("At epoch {0} the training accuracy is {1}".format(epoch, accuracy_epoch) )
        training_acc.append(accuracy_epoch)
        
        training_losses_l.append(mean_loss_l)
        training_losses_r.append(mean_loss_r)
        
        print('epoch {0}/{1}'.format(epoch, epochs))
        
        test_loss, test_accuracy, test_loss_l, test_loss_r = predict_siamese(model,
                                                                     test_dataloader,
                                                                     final_criterion,
                                                                     aux_loss,
                                                                     sub_criterion,
                                                                     alpha)
        
        test_losses.append(test_loss)
        test_acc.append(test_accuracy)
        test_losses_l.append(test_loss_l)
        test_losses_r.append(test_loss_r)
        
    return training_losses, training_acc, training_losses_l, training_losses_r, test_losses, test_acc, test_losses_l, test_losses_r


def predict_siamese(model, 
            dataloader,
            final_criterion = FINAL_CRITERION,
            aux_loss = False,
            sub_criterion = SUB_CRITERION, 
            alpha = ALPHA):
    
    model.eval()
    
    cuda = torch.cuda.is_available()
    if cuda:
        model = model.to(device="cuda")
        
    sum_loss = 0
    total = 0
    correct = 0
    accuracy = 0

    sum_loss_l = 0

    sum_loss_r = 0

    for ind_batch, sample_batched in enumerate(dataloader):

        images = sample_batched["images"]
        labels = sample_batched["bool_labels"]
        digit_labels = sample_batched["digit_labels"]
        
        if cuda:
            images = images.to(device="cuda")
            labels = labels.to(device="cuda")
            digit_labels = digit_labels.to(device="cuda")

        output, lefted, righted = model(images)
        
        labels = labels.unsqueeze(1)

        loss = final_criterion(output.flatten(), labels.float().flatten())
        loss_left = sub_criterion(lefted, digit_labels[:,0])
        loss_right = sub_criterion(righted, digit_labels[:,1])

        if aux_loss:
            loss = alpha * loss + ((1-alpha)/2) * loss_left + ((1-alpha)/2) * loss_right

        #update the accuracy 
        total += images.size(0)  
        correct += (output.round() == labels).sum() 

        #add the loss for this batch to the total loss of the epoch
        sum_loss = sum_loss + loss.item()
        sum_loss_l = sum_loss_l + loss_left.item()
        sum_loss_r = sum_loss_r + loss_right.item()

    #compute the mean to obtain the loss for this epoch 
    mean_loss = sum_loss / float(len(dataloader))
    mean_loss_l = sum_loss_l / float(len(dataloader))
    mean_loss_r = sum_loss_r / float(len(dataloader))
    
    # print("The test loss is {0}".format(mean_loss) )

    accuracy = float(correct) / float(total)
    # print("The test accuracy is {0}".format(accuracy) )
        
    return mean_loss, accuracy, mean_loss_l, mean_loss_r

# Data

In [None]:
######## DATA #####################################################
import torch.utils.data as data
import matplotlib.pyplot as plt
import numpy as np

from torch.utils.data import Dataset


class PairDataset(Dataset):

    def __init__(self, data, bool_labels, digit_labels = None):
        self.images = data
        self.bool_labels = bool_labels
        
        if digit_labels is not None:
            self.digit_labels = digit_labels

    def __len__(self):
        # override the class method. return the length of data
        return len(self.bool_labels)

    def __getitem__(self, idx):
        # override the class method. return the item at the index(idx)
        if self.digit_labels is not None:
            sample = {"images" : self.images[idx],
                      "bool_labels" : self.bool_labels[idx],
                      "digit_labels" : self.digit_labels[idx]}
        else:
            sample = {"images" : self.images[idx],
                      "bool_labels" : self.bool_labels[idx]}
            
        return sample
    
class SingleDataset(Dataset):

    def __init__(self, data, digit_labels):
        self.images = data
        self.digit_labels = digit_labels

    def __len__(self):
        # override the class method. return the length of data
        return len(self.digit_labels)

    def __getitem__(self, idx):
        # override the class method. return the item at the index(idx)
        sample = {"images" : self.images[idx],
                  "digit_labels" : self.digit_labels[idx]}
            
        return sample



pairs = generate_pair_sets(NB_SAMPLES)

train_dataset = PairDataset(pairs[0], pairs[1], pairs[2])
train_dataloader = data.DataLoader(dataset=train_dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True)

test_dataset = PairDataset(pairs[3], pairs[4], pairs[5])
test_dataloader = data.DataLoader(dataset=test_dataset, batch_size=TEST_BATCH_SIZE, shuffle=True)

##############################################################

# BASE_CHANNEL_SIZE search

In [None]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

rounds = 10
round_results_nb_channels = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind, c in enumerate(NB_CHANNELS):
        
        classifier = MaryJaneNet(base_channel_size = c)

        model = LeonardNet(image_net = classifier)
        
        training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(model = model,
                                     dataloader = train_dataloader,
                                     test_dataloader = test_dataloader,
                                     epochs = EPOCHS,
                                     final_criterion = FINAL_CRITERION, 
                                     learning_rate = LEARNING_RATE,
                                     aux_loss = True,
                                     sub_criterion = SUB_CRITERION, 
                                     alpha = ALPHA)
        
        print('{0}/{1}'.format(ind+1, len(NB_CHANNELS)))
        print('With parameters kernel_size/base_channel/nb_hid_neur_inner/nb_hid_neur_out/alpha : {0}/{1}/{2}/{3}/{4}'.format(MARYJANE_NET_KERNEL_SIZE,
                                                                                                                              c, 
                                                                                                                              MARYJANE_NET_HIDDEN_LAYER,
                                                                                                                              LEONARD_NET_HIDDEN_LAYER,
                                                                                                                              ALPHA))
        final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
        print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))
        
        results.append([training_losses, training_acc, test_losses, test_acc])
    
    
    print('round {0} end'.format(i+1))
    round_results_nb_channels.append(results)

In [None]:
np.savez("results-batch{0}-nb_channels_search".format(TRAIN_BATCH_SIZE),round_results_nb_channels)

copy_of = round_results_nb_channels.copy()

print('Shape of "copy_of" : {0}'.format(copy_of.shape))

last_accs_only = copy_of[:, :, 3, 19]

print('Shape of "last_accs_only" : {0}'.format(last_accs_only.shape))

means_ch = last_accs_only.mean(axis=0)
stds_ch = last_accs_only.std(axis=0)

print(means_ch)
print(stds_ch)

i_max = np.argmax(means_ch)

print(i_max)

best_ch_size = NB_CHANNELS[i_max]

# FCNEURONS (inner network) search

In [None]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

rounds = 10
round_results_nb_neurons_inner = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind, n in enumerate(FCNEURONS):
        
        classifier = MaryJaneNet(base_channel_size = best_ch_size, hidden_layer = n)

        model = LeonardNet(image_net = classifier)
        
        training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(model = model,
                                     dataloader = train_dataloader,
                                     test_dataloader = test_dataloader,
                                     epochs = EPOCHS,
                                     final_criterion = FINAL_CRITERION, 
                                     learning_rate = LEARNING_RATE,
                                     aux_loss = True,
                                     sub_criterion = SUB_CRITERION, 
                                     alpha = ALPHA)
        
        print('{0}/{1}'.format(ind+1, len(NB_CHANNELS)))
        print('With parameters kernel_size/base_channel/nb_hid_neur_inner/nb_hid_neur_out/alpha : {0}/{1}/{2}/{3}/{4}'.format(MARYJANE_NET_KERNEL_SIZE,
                                                                                                                              best_ch_size, 
                                                                                                                              n,
                                                                                                                              LEONARD_NET_HIDDEN_LAYER,
                                                                                                                              ALPHA))
        final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
        print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))
        
        results.append([training_losses, training_acc, test_losses, test_acc])
    
    
    print('round {0} end'.format(i+1))
    round_results_nb_neurons_inner.append(results)

In [3]:
np.savez("results-batch{0}-nb_fcneurons_inner_search".format(TRAIN_BATCH_SIZE),round_results_nb_neurons_inner)

copy_of = round_results_nb_neurons_inner.copy()

print('Shape of "copy_of" : {0}'.format(copy_of.shape))

last_accs_only = copy_of[:, :, 3, 19]

print('Shape of "last_accs_only" : {0}'.format(last_accs_only.shape))

means_fcni = last_accs_only.mean(axis=0)
stds_fcni = last_accs_only.std(axis=0)

print(means_fcni)
print(stds_fcni)

i_max = np.argmax(means_fcni)

print(i_max)

best_fcni = FCNEURONS[i_max]

(10, 5, 4, 20)
(10, 5)
(5,)
(5,)


# KERNEL_SIZE (inner network) search

In [None]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

rounds = 10
round_results_kernel = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind, k in enumerate(KERNEL_SIZES):
        
        classifier = MaryJaneNet(base_channel_size = best_ch_size, hidden_layer = best_fcni, kernel_size=k)

        model = LeonardNet(image_net = classifier)
        
        training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(model = model,
                                     dataloader = train_dataloader,
                                     test_dataloader = test_dataloader,
                                     epochs = EPOCHS,
                                     final_criterion = FINAL_CRITERION, 
                                     learning_rate = LEARNING_RATE,
                                     aux_loss = True,
                                     sub_criterion = SUB_CRITERION, 
                                     alpha = ALPHA)
        
        print('{0}/{1}'.format(ind+1, len(NB_CHANNELS)))
        print('With parameters kernel_size/base_channel/nb_hid_neur_inner/nb_hid_neur_out/alpha : {0}/{1}/{2}/{3}/{4}'.format(k,
                                                                                                                              best_ch_size, 
                                                                                                                              best_fcni,
                                                                                                                              LEONARD_NET_HIDDEN_LAYER,
                                                                                                                              ALPHA))
        final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
        print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))
        
        results.append([training_losses, training_acc, test_losses, test_acc])
    
    
    print('round {0} end'.format(i+1))
    round_results_kernel.append(results)

In [None]:
np.savez("results-batch{0}-nb_kernel_search".format(TRAIN_BATCH_SIZE),round_results_kernel)

copy_of = round_results_kernel.copy()

print('Shape of "copy_of" : {0}'.format(copy_of.shape))

last_accs_only = copy_of[:, :, 3, 19]

print('Shape of "last_accs_only" : {0}'.format(last_accs_only.shape))

means_kernel = last_accs_only.mean(axis=0)
stds_kernel = last_accs_only.std(axis=0)

print(means_fcni)
print(stds_fcni)

i_max = np.argmax(means_kernel)

print(i_max)

best_kernel = KERNEL_SIZES[i_max]

# FCNEURONS (outer network) search

In [None]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

rounds = 10
round_results_nb_neurons_outer = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind, n in enumerate(FCNEURONS):
        
        classifier = MaryJaneNet(base_channel_size = best_ch_size, hidden_layer = best_fcni, kernel_size = best_kernel)

        model = LeonardNet(image_net = classifier, hidden_layer = n)
        
        training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(model = model,
                                     dataloader = train_dataloader,
                                     test_dataloader = test_dataloader,
                                     epochs = EPOCHS,
                                     final_criterion = FINAL_CRITERION, 
                                     learning_rate = LEARNING_RATE,
                                     aux_loss = True,
                                     sub_criterion = SUB_CRITERION, 
                                     alpha = ALPHA)
        
        print('{0}/{1}'.format(ind+1, len(NB_CHANNELS)))
        print('With parameters kernel_size/base_channel/nb_hid_neur_inner/nb_hid_neur_out/alpha : {0}/{1}/{2}/{3}/{4}'.format(best_kernel,
                                                                                                                              best_ch_size, 
                                                                                                                              best_fcni,
                                                                                                                              n,
                                                                                                                              ALPHA))
        final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
        print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))
        
        results.append([training_losses, training_acc, test_losses, test_acc])
    
    
    print('round {0} end'.format(i+1))
    round_results_nb_neurons_outer.append(results)

In [3]:
np.savez("results-batch{0}-nb_fcneurons_outer_search".format(TRAIN_BATCH_SIZE),round_results_nb_neurons_outer)

copy_of = round_results_nb_neurons_outer.copy()

print('Shape of "copy_of" : {0}'.format(copy_of.shape))

last_accs_only = copy_of[:, :, 3, 19]

print('Shape of "last_accs_only" : {0}'.format(last_accs_only.shape))

means_fcno = last_accs_only.mean(axis=0)
stds_fcno = last_accs_only.std(axis=0)

print(means_fcno)
print(stds_fcno)

i_max = np.argmax(means_fcno)

print(i_max)

best_fcno = FCNEURONS[i_max]

(10, 5, 4, 20)
(10, 5)
(5,)
(5,)


# ALPHA search

In [None]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

rounds = 10
round_results_alpha = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind, a in enumerate(ALPHAS):
        
        classifier = MaryJaneNet(base_channel_size = best_ch_size, hidden_layer = best_fcni, kernel_size = best_kernel)

        model = LeonardNet(image_net = classifier, hidden_layer = best_fcno)
        
        training_losses, training_acc, _, _, test_losses, test_acc, test_losses_l, test_losses_r = train_siamese(model = model,
                                     dataloader = train_dataloader,
                                     test_dataloader = test_dataloader,
                                     epochs = EPOCHS,
                                     final_criterion = FINAL_CRITERION, 
                                     learning_rate = LEARNING_RATE,
                                     aux_loss = True,
                                     sub_criterion = SUB_CRITERION, 
                                     alpha = a)
        
        print('{0}/{1}'.format(ind+1, len(NB_CHANNELS)))
        print('With parameters kernel_size/base_channel/nb_hid_neur_inner/nb_hid_neur_out/alpha : {0}/{1}/{2}/{3}/{4}'.format(best_kernel,
                                                                                                                              best_ch_size, 
                                                                                                                              best_fcni,
                                                                                                                              best_fcno,
                                                                                                                              a))
        final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
        print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))
        
        results.append([training_losses, training_acc, test_losses, test_acc, test_losses_l, test_losses_r])
    
    
    print('round {0} end'.format(i+1))
    round_results_alpha.append(results)

In [3]:
np.savez("results-batch{0}-nb_fcneurons_outer_search".format(TRAIN_BATCH_SIZE),round_results_alpha)

copy_of = round_results_alpha.copy()

print('Shape of "copy_of" : {0}'.format(copy_of.shape))

last_accs_only = copy_of[:, :, 3, 19]

print('Shape of "last_accs_only" : {0}'.format(last_accs_only.shape))

means_alpha = last_accs_only.mean(axis=0)
stds_alpha = last_accs_only.std(axis=0)

print(means_alpha)
print(stds_alpha)

i_max = np.argmax(means_alpha)

print(i_max)

best_alpha = ALPHAS[i_max]

(10, 5, 4, 20)
(10, 5)
(5,)
(5,)


In [None]:
last_loss_l_only = copy_of[:, :, 4, 19]
last_loss_r_only = copy_of[:, :, 5, 19]

means_alpha_loss_l = last_loss_l_only.mean(axis=0)
stds_alpha_loss_l = last_loss_l_only.std(axis=0)

means_alpha_loss_r = last_loss_r_only.mean(axis=0)
stds_alpha_loss_r = last_loss_r_only.std(axis=0)

print(means_alpha)
print(stds_alpha)