# Hyper parameter tuning CNN
---

In [0]:
import helpers
import torch.nn as nn
import numpy as np
import config
from models.CNN import CNN
from models.SiameseNet import SiameseNet
from train import train_siamese
from predict import predict_siamese
import torch.utils.data as data
import matplotlib.pyplot as plt
from datasets import PairDataset
import torch

In [0]:
rounds = 10

In [0]:
pairs = helpers.generate_pair_sets(config.NB_SAMPLES)

train_dataset = PairDataset(pairs[0], pairs[1], pairs[2])
train_dataloader = data.DataLoader(dataset=train_dataset, batch_size=config.TRAIN_BATCH_SIZE, shuffle=True)

test_dataset = PairDataset(pairs[3], pairs[4], pairs[5])
test_dataloader = data.DataLoader(dataset=test_dataset, batch_size=config.TEST_BATCH_SIZE, shuffle=True)

# FCNEURONS (inner network) search

In [6]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

round_results_nb_neurons_inner = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind1, nb in enumerate(config.NB_LAYERS):
        results_tmp = []
        
        for ind2, n in enumerate(config.FCNEURONS):
        
            subnet = CNN(nb_hidden_layers=nb, hidden_layer = n)

            model = SiameseNet(subnet)

            training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(
                model = model, dataloader = train_dataloader, test_dataloader = test_dataloader,
                epochs = config.EPOCHS, learning_rate = config.LEARNING_RATE, aux_loss = True,
                alpha = config.ALPHA)

            print('{0}/{1}'.format(ind1 * len(config.FCNEURONS) + ind2 + 1, len(config.NB_LAYERS) * len(config.FCNEURONS)))
            print('With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : {0}/{1}/{2}/{3}/{4}/{5}/{6}'.format(nb,
                                                                                                                   n,
                                                                                                                   config.CNN_BASE_CHANNEL_SIZE,
                                                                                                                   config.CNN_KERNEL_SIZE,
                                                                                                                   config.SIAMESE_NET_NB_HIDDEN,
                                                                                                                   config.SIAMESE_NET_HIDDEN_LAYER,
                                                                                                                   config.ALPHA))
            final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
            print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))

            results_tmp.append([training_losses, training_acc, test_losses, test_acc])
    
        results.append(results_tmp)
    
    print('round {0} end'.format(i+1))
    round_results_nb_neurons_inner.append(results)

CUDA available
round 1 start
1/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 1/32/4/3/2/128/0.5
On the test set we obtain a loss of 0.24 and an accuracy of 0.90
2/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 1/64/4/3/2/128/0.5
On the test set we obtain a loss of 0.23 and an accuracy of 0.91
3/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 1/128/4/3/2/128/0.5
On the test set we obtain a loss of 0.25 and an accuracy of 0.90
4/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 1/256/4/3/2/128/0.5
On the test set we obtain a loss of 0.23 and an accuracy of 0.91
5/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 1/512/4/3/2/128/0.5
On the test set we obtain a loss of 0.22 and an accuracy of 0.89
6/10
With parameters nb_hid_inner/hid_inner/base_channel/k

In [7]:
np.savez("results-cnn-nb_fcneurons_inner_search",round_results_nb_neurons_inner)

copy_of = np.array(round_results_nb_neurons_inner).copy()

last_accs_only = copy_of[:, :, :, 3, config.EPOCHS-1]

means_fcni = last_accs_only.mean(axis=0)
stds_fcni = last_accs_only.std(axis=0)

print("Mean over 10 rounds changing the Number of hidden layers and number of units per hidden layer if the CNN subnet : ")
print(means_fcni)

print("Standard deviation over 10 rounds changing the Number of hidden layers and number of units per hidden layer if the CNN subnet : ")
print(stds_fcni)

raveled_i_max = np.argmax(means_fcni)

unraveled_i_max = np.unravel_index(raveled_i_max, means_fcni.shape)

best_nb_hidden_inner = config.NB_LAYERS[unraveled_i_max[0]]
best_hidden_layer_inner = config.FCNEURONS[unraveled_i_max[1]]
best_acc = np.max(means_fcni)

print("Best score with number if hidden layer {0} and units per hidden layer {1} with accuracy {2}".format(best_nb_hidden_inner, best_hidden_layer_inner, best_acc))

Mean over 10 rounds changing the Number of hidden layers and number of units per hidden layer if the CNN subnet : 
[[0.9037 0.8986 0.9046 0.9015 0.9079]
 [0.9086 0.9146 0.9234 0.9278 0.9298]]
Standard deviation over 10 rounds changing the Number of hidden layers and number of units per hidden layer if the CNN subnet : 
[[0.01006032 0.01369087 0.00976934 0.01245191 0.01511589]
 [0.00671118 0.01233856 0.00824864 0.01221311 0.0079975 ]]
Best score with number if hidden layer 2 and units per hidden layer 512 with accuracy 0.9298


# BASE_CHANNEL_SIZE search

In [8]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

round_results_nb_channels = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind, c in enumerate(config.NB_CHANNELS):
        
        subnet = CNN(nb_hidden_layers = best_nb_hidden_inner,
                                 hidden_layer = best_hidden_layer_inner,
                                 base_channel_size = c)

        model = SiameseNet(subnet)
        
        training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(
            model = model, dataloader = train_dataloader, test_dataloader = test_dataloader,
            epochs = config.EPOCHS, learning_rate = config.LEARNING_RATE, aux_loss = True, 
            alpha = config.ALPHA)
        
        print('{0}/{1}'.format(ind+1, len(config.NB_CHANNELS)))
        print('With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : {0}/{1}/{2}/{3}/{4}/{5}/{6}'.format(best_nb_hidden_inner,
                                                                                                                   best_hidden_layer_inner,
                                                                                                                   c,
                                                                                                                   config.CNN_KERNEL_SIZE,
                                                                                                                   config.SIAMESE_NET_NB_HIDDEN,
                                                                                                                   config.SIAMESE_NET_HIDDEN_LAYER,
                                                                                                                   config.ALPHA))
        final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
        print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))
        
        results.append([training_losses, training_acc, test_losses, test_acc])
    
    
    print('round {0} end'.format(i+1))
    round_results_nb_channels.append(results)

CUDA available
round 1 start
1/5
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/4/3/2/128/0.5
On the test set we obtain a loss of 0.18 and an accuracy of 0.94
2/5
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/8/3/2/128/0.5
On the test set we obtain a loss of 0.16 and an accuracy of 0.95
3/5
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/16/3/2/128/0.5
On the test set we obtain a loss of 0.19 and an accuracy of 0.95
4/5
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/2/128/0.5
On the test set we obtain a loss of 0.17 and an accuracy of 0.96
5/5
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/48/3/2/128/0.5
On the test set we obtain a loss of 0.15 and an accuracy of 0.96
round 1 end
round 2 start
1/5
With parameters nb_hid_inner

In [9]:
np.savez("results-nb_channels_search",round_results_nb_channels)

copy_of = np.array(round_results_nb_channels).copy()

last_accs_only = copy_of[:, :, 3, config.EPOCHS-1]

means_ch = last_accs_only.mean(axis=0)
stds_ch = last_accs_only.std(axis=0)

print("Mean over 10 rounds changing the base channel size of the CNN subnet : ")
print(means_ch)

print("Standard deviation over 10 rounds changing the base channel size of the CNN subnet : ")
print(stds_ch)

i_max = np.argmax(means_ch)

best_ch_size = config.NB_CHANNELS[i_max]

best_acc = np.max(means_ch)

print("Best score with base channel size of {0} with accuracy {1}".format(best_ch_size, best_acc))

Mean over 10 rounds changing the base channel size of the CNN subnet : 
[0.9309 0.9513 0.9583 0.9638 0.9637]
Standard deviation over 10 rounds changing the base channel size of the CNN subnet : 
[0.00587282 0.00453982 0.00429069 0.00493559 0.00300167]
Best score with base channel size of 24 with accuracy 0.9638


# KERNEL_SIZE (inner network) search

In [10]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

round_results_kernel = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind, k in enumerate(config.KERNEL_SIZES):
        
        subnet = CNN(nb_hidden_layers = best_nb_hidden_inner,
                                 hidden_layer = best_hidden_layer_inner,
                                 base_channel_size = best_ch_size,
                                 kernel_size = k)

        model = SiameseNet(subnet)
        
        training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(
            model = model, dataloader = train_dataloader, test_dataloader = test_dataloader,
            epochs = config.EPOCHS, learning_rate = config.LEARNING_RATE, aux_loss = True, 
            alpha = config.ALPHA)
        
        print('{0}/{1}'.format(ind+1, len(config.KERNEL_SIZES)))
        print('With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : {0}/{1}/{2}/{3}/{4}/{5}/{6}'.format(best_nb_hidden_inner,
                                                                                                                   best_hidden_layer_inner,
                                                                                                                   best_ch_size,
                                                                                                                   k,
                                                                                                                   config.SIAMESE_NET_NB_HIDDEN,
                                                                                                                   config.SIAMESE_NET_HIDDEN_LAYER,
                                                                                                                   config.ALPHA))
        final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
        print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))
        
        results.append([training_losses, training_acc, test_losses, test_acc])
    
    
    print('round {0} end'.format(i+1))
    round_results_kernel.append(results)

CUDA available
round 1 start
1/2
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/2/128/0.5
On the test set we obtain a loss of 0.16 and an accuracy of 0.96
2/2
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/5/2/128/0.5
On the test set we obtain a loss of 0.14 and an accuracy of 0.96
round 1 end
round 2 start
1/2
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/2/128/0.5
On the test set we obtain a loss of 0.19 and an accuracy of 0.97
2/2
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/5/2/128/0.5
On the test set we obtain a loss of 0.23 and an accuracy of 0.96
round 2 end
round 3 start
1/2
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/2/128/0.5
On the test set we obtain a loss of 0.14 and an accuracy of 0.96
2/2


In [11]:
np.savez("results-kernel_search",round_results_kernel)

copy_of = np.array(round_results_kernel).copy()

last_accs_only = copy_of[:, :, 3, config.EPOCHS-1]

means_kernel = last_accs_only.mean(axis=0)
stds_kernel = last_accs_only.std(axis=0)

print("Mean over 10 rounds changing the kernel size of the CNN subnet : ")
print(means_kernel)

print("Standard deviation over 10 rounds changing the kernel size of the CNN subnet : ")
print(stds_kernel)

i_max = np.argmax(means_kernel)

best_kernel = config.KERNEL_SIZES[i_max]

best_acc = np.max(means_kernel)

print("Best score with kernel size of {0} with accuracy {1}".format(best_kernel, best_acc))

Mean over 10 rounds changing the kernel size of the CNN subnet : 
[0.9614 0.9575]
Standard deviation over 10 rounds changing the kernel size of the CNN subnet : 
[0.00387814 0.00603738]
Best score with kernel size of 3 with accuracy 0.9613999999999997


# FCNEURONS (outer network) search

In [12]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

round_results_nb_neurons_outer = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind1, nb in enumerate(config.NB_LAYERS):
        results_tmp = []
        
        for ind2, n in enumerate(config.FCNEURONS):
        
            subnet = CNN(nb_hidden_layers = best_nb_hidden_inner, hidden_layer = best_hidden_layer_inner,
                         base_channel_size = best_ch_size, kernel_size = best_kernel)

            model = SiameseNet(subnet, nb_hidden_layers = nb, hidden_layer = n)

            training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(
                model = model, dataloader = train_dataloader, test_dataloader = test_dataloader,
                epochs = config.EPOCHS, learning_rate = config.LEARNING_RATE, aux_loss = True,
                alpha = config.ALPHA)

            print('{0}/{1}'.format(ind1 * len(config.FCNEURONS) + ind2 + 1, len(config.NB_LAYERS) * len(config.FCNEURONS)))
            print('With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : {0}/{1}/{2}/{3}/{4}/{5}/{6}'.format(best_nb_hidden_inner,
                                                                                                                   best_hidden_layer_inner,
                                                                                                                   best_ch_size,
                                                                                                                   best_kernel,
                                                                                                                   nb,
                                                                                                                   n,
                                                                                                                   config.ALPHA))
            final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
            print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))

            results_tmp.append([training_losses, training_acc, test_losses, test_acc])
    
        results.append(results_tmp)
    
    print('round {0} end'.format(i+1))
    round_results_nb_neurons_outer.append(results)

CUDA available
round 1 start
1/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/1/32/0.5
On the test set we obtain a loss of 0.29 and an accuracy of 0.93
2/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/1/64/0.5
On the test set we obtain a loss of 0.18 and an accuracy of 0.95
3/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/1/128/0.5
On the test set we obtain a loss of 0.21 and an accuracy of 0.95
4/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/1/256/0.5
On the test set we obtain a loss of 0.20 and an accuracy of 0.96
5/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/1/512/0.5
On the test set we obtain a loss of 0.15 and an accuracy of 0.97
6/10
With parameters nb_hid_inner/hid_inner/base_chan

In [13]:
np.savez("results-cnn-nb_fcneurons_outer_search",round_results_nb_neurons_outer)

copy_of = np.array(round_results_nb_neurons_outer).copy()

last_accs_only = copy_of[:, :, :, 3, config.EPOCHS-1]

means_fcno = last_accs_only.mean(axis=0)
stds_fcno = last_accs_only.std(axis=0)

print("Mean over 10 rounds changing the Number of hidden layers and number of units per hidden layer of the Siamese net : ")
print(means_fcno)

print("Standard deviation over 10 rounds changing the Number of hidden layers and number of units per hidden layer of the Siamese net : ")
print(stds_fcno)

raveled_i_max = np.argmax(means_fcno)

unraveled_i_max = np.unravel_index(raveled_i_max, means_fcno.shape)

best_nb_hidden_outer = config.NB_LAYERS[unraveled_i_max[0]]
best_hidden_layer_outer = config.FCNEURONS[unraveled_i_max[1]]
best_acc = np.max(means_fcno)

print("Best score with number of hidden layer {0} and units per hidden layer {1} with accuracy {2}".format(best_nb_hidden_outer, best_hidden_layer_outer, best_acc))

Mean over 10 rounds changing the Number of hidden layers and number of units per hidden layer of the Siamese net : 
[[0.945  0.9554 0.9563 0.9579 0.9651]
 [0.9498 0.956  0.9608 0.9645 0.9676]]
Standard deviation over 10 rounds changing the Number of hidden layers and number of units per hidden layer of the Siamese net : 
[[0.00513809 0.0050636  0.00645058 0.00585577 0.00266271]
 [0.0031241  0.00352136 0.00299333 0.00393065 0.00392938]]
Best score with number of hidden layer 2 and units per hidden layer 512 with accuracy 0.9676


# ALPHA search

In [14]:
cuda = torch.cuda.is_available()
if cuda:
    print("CUDA available")
else:
    print("NO CUDA")

round_results_alpha = [] #3D

for i in range(rounds):
    results = [] #training_losses, training_acc, test_losses, test_acc
    
    print('round {0} start'.format(i+1))
    
    for ind, a in enumerate(config.ALPHAS):
        
        subnet = CNN(nb_hidden_layers = best_nb_hidden_inner,
                                 hidden_layer = best_hidden_layer_inner,
                                 base_channel_size = best_ch_size,
                                 kernel_size = best_kernel)

        model = model = SiameseNet(subnet, nb_hidden_layers = best_nb_hidden_outer, hidden_layer = best_hidden_layer_outer)
        
        training_losses, training_acc, _, _, test_losses, test_acc, _, _ = train_siamese(
            model = model, dataloader = train_dataloader, test_dataloader = test_dataloader,
            epochs = config.EPOCHS, learning_rate = config.LEARNING_RATE, aux_loss = True, 
            alpha = a)
        
        print('{0}/{1}'.format(ind+1, len(config.ALPHAS)))
        print('With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : {0}/{1}/{2}/{3}/{4}/{5}/{6}'.format(best_nb_hidden_inner,
                                                                                                                   best_hidden_layer_inner,
                                                                                                                   best_ch_size,
                                                                                                                   best_kernel,
                                                                                                                   best_nb_hidden_outer,
                                                                                                                   best_hidden_layer_outer,
                                                                                                                   round(a, 2)))
        final_test_loss, final_test_loss_acc = test_losses[-1], test_acc[-1]
        print("On the test set we obtain a loss of {:.2f} and an accuracy of {:.2f}".format(final_test_loss,final_test_loss_acc))
        
        results.append([training_losses, training_acc, test_losses, test_acc])
    
    
    print('round {0} end'.format(i+1))
    round_results_alpha.append(results)

CUDA available
round 1 start
1/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/2/512/0.0
On the test set we obtain a loss of 0.14 and an accuracy of 0.46
2/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/2/512/0.11
On the test set we obtain a loss of 0.13 and an accuracy of 0.97
3/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/2/512/0.22
On the test set we obtain a loss of 0.13 and an accuracy of 0.97
4/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/2/512/0.33
On the test set we obtain a loss of 0.17 and an accuracy of 0.96
5/10
With parameters nb_hid_inner/hid_inner/base_channel/kernel_size/nb_hid_out/hid_outer/alpha : 2/512/24/3/2/512/0.44
On the test set we obtain a loss of 0.12 and an accuracy of 0.98
6/10
With parameters nb_hid_inner/hid_inner/bas

In [15]:
np.savez("results-cnn-alpha",round_results_alpha)

copy_of = np.array(round_results_alpha).copy()

last_accs_only = copy_of[:, :, 3, config.EPOCHS-1]

means_alpha = last_accs_only.mean(axis=0)
stds_alpha = last_accs_only.std(axis=0)

print("Mean over 10 rounds changing alpha of the aux loss : ")
print(means_alpha)

print("Standard deviation over 10 rounds changing alpha of the aux loss : ")
print(stds_alpha)

i_max = np.argmax(means_alpha)

best_alpha = config.ALPHAS[i_max]

best_acc = np.max(means_alpha)

print("Best score with alpha of {0} with accuracy {1}".format(best_alpha, best_acc))

Mean over 10 rounds changing alpha of the aux loss : 
[0.5633 0.9699 0.9712 0.968  0.9721 0.9661 0.9636 0.9588 0.9487 0.8598]
Standard deviation over 10 rounds changing alpha of the aux loss : 
[0.05894922 0.00298161 0.00381576 0.00583095 0.00413401 0.00492849
 0.00467333 0.00442267 0.00555068 0.10909794]
Best score with alpha of 0.4444444444444444 with accuracy 0.9721
