# **Hyperparameters tuning for Convolutional Neural Network based on Global Optimization** 

---

![Global Optimization](http://www.globaloptimization.org/wp-content/uploads/2017/09/GKLS-e1507106893485-300x216.png) 

# **Requirements**

Here we download the libraries for our project (bayesian-optimization, optunity) and we define a class for using the pytorch dataset.


## ***Pip install libraries***

In [0]:
pip install bayesian-optimization

Collecting bayesian-optimization
  Downloading https://files.pythonhosted.org/packages/72/0c/173ac467d0a53e33e41b521e4ceba74a8ac7c7873d7b857a8fbdca88302d/bayesian-optimization-1.0.1.tar.gz
Building wheels for collected packages: bayesian-optimization
  Building wheel for bayesian-optimization (setup.py) ... [?25l[?25hdone
  Created wheel for bayesian-optimization: filename=bayesian_optimization-1.0.1-cp36-none-any.whl size=10032 sha256=bd258ef537cccd0a1e4b85bd6e538cb4944210b628414cee851c436ae4bd7618
  Stored in directory: /root/.cache/pip/wheels/1d/0d/3b/6b9d4477a34b3905f246ff4e7acf6aafd4cc9b77d473629b77
Successfully built bayesian-optimization
Installing collected packages: bayesian-optimization
Successfully installed bayesian-optimization-1.0.1


In [0]:
pip install optunity


Collecting optunity
[?25l  Downloading https://files.pythonhosted.org/packages/32/4d/d49876a49e105b56755eb5ba06a4848ee8010f7ff9e0f11a13aefed12063/Optunity-1.1.1.tar.gz (4.6MB)
[K     |████████████████████████████████| 4.6MB 1.4MB/s 
[?25hBuilding wheels for collected packages: optunity
  Building wheel for optunity (setup.py) ... [?25l[?25hdone
  Created wheel for optunity: filename=Optunity-1.1.1-cp36-none-any.whl size=72024 sha256=d04ef9f4e68d9d8ec9806e411e93966613f51590e7c0922eed5ba62a5b5198b0
  Stored in directory: /root/.cache/pip/wheels/c9/e2/80/d3794524ae0042e147e035e132ec5fac09b8794b4acd94f046
Successfully built optunity
Installing collected packages: optunity
Successfully installed optunity-1.1.1


## ***Database Menagement***


In [0]:
import torch
import torchvision
from torch.utils.data import sampler
import torchvision.transforms as transforms


class ChunkSampler(sampler.Sampler):
    """Samples elements sequentially from some offset.
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """

    def __init__(self, num_samples, start=0):
        self.num_samples = num_samples
        self.start = start

    def __iter__(self):
        return iter(range(self.start, self.start + self.num_samples))

    def __len__(self):
        return self.num_samples


def getDataset(validation=False,dataset_name='mnist'):

    transform = transforms.ToTensor()

    nw = 4      # number of workers threads
    bs = 64     # batch size

    if dataset_name == 'mnist':
        train_size = 60000
    if dataset_name == 'cifar10':
        train_size = 50000

    if validation:
        if dataset_name == 'mnist':
            train_size = 50000
        if dataset_name == 'cifar10':
            train_size = 40000
        validation_size = 10000


    if dataset_name == 'mnist':
        train_set = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    if dataset_name == 'cifar10':
        train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
        #FIXME here found a methods for remove the printed line from console log 'Files already downloaded and verified'
        # print('\r')  # back to previous line
        # comando per cancellare o rimuvere quella scritta


    train_loader = torch.utils.data.DataLoader(train_set, batch_size=bs, shuffle=False, num_workers=nw, sampler=ChunkSampler(train_size, 0))

    if dataset_name == 'mnist':
        test_set = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    if dataset_name == 'cifar10':
        test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

    test_loader = torch.utils.data.DataLoader(test_set, batch_size=bs, shuffle=False, num_workers=nw)

    if validation:
        validation_loader = torch.utils.data.DataLoader(train_set, batch_size=bs, shuffle=False, num_workers=nw, sampler=ChunkSampler(validation_size, train_size))
        return train_loader, validation_loader, test_loader

    return train_loader, test_loader

# ***`Parameters `***   

1.   Number of evaluations
2.   Number of init points for bayesian
3.   Max epoches for eval on neural network
4.   Name of output file
5.   Hyperparameters domains
6.   Dataset name



In [0]:
# param for experiment
output_file = 'result.csv'
evaluations = 25
init_points = 5
max_epochs = 50
# here is possible to select MNIST of CIFAR10 dataset
dataset_name= 'cifar10' # mnist
# gpu id for colab or gpu on pc
gpu = 0

# hyperparameters domains
hyperparameters = {"learning_rate": (0.0001, 0.1), "weight_decay": (0, 0.001)}


# **Neural Network**

In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable


class Net(nn.Module):

    def __init__(self, learning_rate, weight_decay, epochs, gpu, dataset_name):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        if dataset_name == 'cifar10':
            self.conv1 = nn.Conv2d(3, 10, kernel_size=5)

        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.mp = nn.MaxPool2d(2)
        self.fc = nn.Linear(320, 10)
        if dataset_name == 'cifar10':
            self.fc = nn.Linear(500, 10)

        self.optimizer = optim.SGD(self.parameters(), lr=learning_rate, weight_decay=weight_decay)
        self.max_epochs = epochs

        # indicate if the network module is created through training
        self.fitted = False

        #set criterion
        self.criterion = F.nll_loss

        # selection of device to use
        self.device = torch.device("cuda:" + str(gpu) if torch.cuda.is_available() and gpu is not None else "cpu")
        self.gpu = gpu
        if self.device == "cpu":
            self.gpu = None

    def forward(self, x):
        in_size = x.size(0)
        x = F.relu(self.mp(self.conv1(x)))
        x = F.relu(self.mp(self.conv2(x)))
        x = x.view(in_size, -1)  # flatten the tensor
        x = self.fc(x)
        return F.log_softmax(x, dim=1)

  #FIXME eventualmente ricontrollare
    def init_net(self, m):
        #reset all parameters for Conv2d layer
        if isinstance(m, nn.Conv2d):
            m.reset_parameters()
            # m.weight.data.fill_(0.01)
            # m.bias.data.fill_(0.01)
        #reset all parameters for Linear layer
        if isinstance(m, nn.Linear):
            m.weight.data.fill_(0.01)
            m.bias.data.fill_(0.01)

    def reset_parameters(self):
        self.apply(self.init_net)

    def fit(self, train_loader):
        # same initial point for all the network
        self.reset_parameters()
        self.train()
        if torch.cuda.is_available():
            self.cuda()
        for epochs in range(self.max_epochs):
            # debug line
            # print('epochs:'+epochs.__str__())
            for data in train_loader:
                x,y=data
                if self.gpu is not None:
                    x, y = x.to(self.device), y.to(self.device)
                # if torch.cuda.is_available():
                #     x, y = x.cuda(), y.cuda()
                self.optimizer.zero_grad()
                output = self(x)
                train_loss = self.criterion(output, y)
                train_loss.backward()
                self.optimizer.step()
        self.fitted = True
        return train_loss

    def validation(self, validation_loader):
        if not self.fitted:
            exit(1)
        else:
            correct = 0
            total = 0
            loss = 0.0
            num_batches = 0
            with torch.no_grad():
                for data in validation_loader:
                    # get some test images
                    x, y = data
                    if self.gpu is not None:
                        x, y = x.to(self.device), y.to(self.device)

                    # images classes prediction
                    outputs = self(x)
                    _, predicted = torch.max(outputs.data, 1)

                    # loss update
                    loss += self.criterion(outputs, y).item()
                    num_batches += 1

                    # update numbers of total and correct predictions
                    total += y.size(0)
                    correct += (predicted == y).sum().item()

            accuracy = correct / total
            loss /= num_batches
            return loss, accuracy


## Print the network with *`torchsummary`*

In [0]:
from torchsummary import summary

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = Net(learning_rate=0.0001, weight_decay=0.01, epochs=max_epochs, gpu=0, dataset_name='cifar10').to(device)

if dataset_name == 'mnist':
  summary(model,(1, 28, 28))
if dataset_name == 'cifar10':
  summary(model,(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 28, 28]             760
         MaxPool2d-2           [-1, 10, 14, 14]               0
            Conv2d-3           [-1, 20, 10, 10]           5,020
         MaxPool2d-4             [-1, 20, 5, 5]               0
            Linear-5                   [-1, 10]           5,010
Total params: 10,790
Trainable params: 10,790
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/backward pass size (MB): 0.09
Params size (MB): 0.04
Estimated Total Size (MB): 0.15
----------------------------------------------------------------


## ***Evaluation***

Here we have the metod for evaluate the network

In [0]:
import torch
import optunity
import csv

def evaluate(learning_rate, weight_decay):
    device = torch.device('cuda:' + gpu.__str__() if torch.cuda.is_available() else 'cpu')
    model = Net(learning_rate, weight_decay, max_epochs, gpu, dataset_name=dataset_name).to(device)
    train_loader, validation_loader, test_loader = getDataset(validation=True, dataset_name=dataset_name)
    training_losses = model.fit(train_loader)
    validation_losses, validation_accuracy = model.validation(test_loader)
    best_val_loss = validation_losses

    # print("Accuracy Validation: " + str(validation_accuracy))
    # print('--------')
    # print('Learning rate, weight decay')
    # print(learning_rate,weight_decay)

    # Save results in csv
    with open(output_file, 'a') as file:
        my_csv_row = 'iter,' + best_val_loss.__str__() + ',' + learning_rate.__str__() + ',' + weight_decay.__str__() + ',' + validation_accuracy.__str__() + '\n'
        file.write(my_csv_row)

    return -best_val_loss

# **Test**


In [0]:
from __future__ import print_function
# from optimization import evaluate_BAY
from bayes_opt import BayesianOptimization
import optunity
import csv
from datetime import datetime

def bayesian():
    bayesian = BayesianOptimization(f=evaluate, pbounds=hyperparameters)
    bayesian.maximize(init_points=init_points, n_iter=evaluations - init_points)
    return '\nResults with Bayesian optimizer: ' + str(bayesian.max) + '\n'

def quasi_random():
    quasi_random=optunity.maximize(f=evaluate, num_evals=evaluations, solver_name='sobol', learning_rate=[0.0001, 0.1], weight_decay=[0, 0.001])
    return '\nResult with quasiRandom optimizer: ' + str(quasi_random) + '\n'

if __name__ == '__main__':

    now = datetime.now()
    with open(output_file, 'a', newline='') as file:
        file.write('\nBayesian ' + now.isoformat() + '\n')
        file.write('\nIter, Loss, Learning Rate, Weight Decay, Accuracy Validation\n')

    result_bay = bayesian()
    print(result_bay)

    with open(output_file, 'a') as file:
        file.write(result_bay + '\n')

    print('------------')

    now = datetime.now()
    with open(output_file, 'a') as file:
        file.write('QuasiRandom ' + now.isoformat() + '\n')
        file.write('\nIter, Loss, Learning Rate, Weight Decay, Accuracy Validation\n')


    result_qr = quasi_random()
    print(result_qr)

    with open(output_file, 'a') as file:
        file.write(result_qr+'\n')


0it [00:00, ?it/s]

|   iter    |  target   | learni... | weight... |
-------------------------------------------------
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


170500096it [00:06, 26984529.29it/s]                               


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
