# Bayesian Optimisation for Convolutional Networks

In [15]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

In [31]:
import numpy as np
import os
import matplotlib.pyplot as plt

In [33]:
os.chdir('/Users/matthewkeys/Desktop/BigDataTopicsII/Hrvoje/fashionMNIST-bayesian-optimization')

In [17]:
# Hyper parameters
num_epochs = 1
num_classes = 10
batch_size = 100
learning_rate = 0.001

In [19]:
# MNIST dataset
train_dataset = torchvision.datasets.FashionMNIST(root='/Users/matthewkeys/Desktop/BigDataTopicsII/Hrvoje/Group Assignment',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.FashionMNIST(root='/Users/matthewkeys/Desktop/BigDataTopicsII/Hrvoje/Group Assignment',
                                          train=False,
                                          transform=transforms.ToTensor())

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [20]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False)

In [22]:
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
    def __init__(self, num_classes=10, num_channels_1 = 16, num_channels_2 = 32):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, num_channels_1, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(num_channels_1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(num_channels_1, num_channels_2, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(num_channels_2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.fc = nn.Linear(7*7*num_channels_2, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out

In [23]:
def test_model(num_channels_1, num_channels_2, learning_rate, num_epochs = 1):
    model = ConvNet(num_classes, num_channels_1, num_channels_2)

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Train the model
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                       .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

    # Test the model
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

    return correct / total


In [66]:
error = test_model(15, 5, 0.1)

Epoch [1/1], Step [100/600], Loss: 0.5638
Epoch [1/1], Step [200/600], Loss: 0.5203
Epoch [1/1], Step [300/600], Loss: 0.5636
Epoch [1/1], Step [400/600], Loss: 0.4894
Epoch [1/1], Step [500/600], Loss: 0.5056
Epoch [1/1], Step [600/600], Loss: 0.8197
Test Accuracy of the model on the 10000 test images: 83.09 %


###### Bayesian Optimisation with bayes_opt

In [42]:
from bayes_opt import BayesianOptimization
from sklearn.cross_validation import cross_val_score, StratifiedKFold, StratifiedShuffleSplit
from sklearn.metrics import log_loss, matthews_corrcoef, roc_auc_score
from sklearn.preprocessing import MinMaxScaler
import contextlib
from tqdm import tqdm

In [48]:
# Set the ranges of the hyperparamaters we want to explore 
ConvNet_param_bounds = {
    'num_channels_1': (1,3),
    'num_channels_2': (1,3),
    'learning_rate':  (0.05, 0.95)
}

# Set the GP Parameters 
gp_params = {'kernel': None,
             'alpha': 1e-5}

# Initialize BO object 
bo = BayesianOptimization(test_model, ConvNet_param_bounds)

In [None]:
# Initialize 
bo.explore

In [50]:
bo.maximize(init_points=5, n_iter=15, acq='ucb', kappa=2)

[31mInitialization[0m
[94m-------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   learning_rate |   num_channels_1 |   num_channels_2 | 


ValueError: out_channels must be divisible by groups

In [None]:
print(bo.res['max'])

###### Bayesian Optimisation with pyGPGO
Matern32 covariance:



In [52]:
import numpy as np
from pyGPGO.covfunc import matern32
from pyGPGO.acquisition import Acquisition
from pyGPGO.surrogates.GaussianProcess import GaussianProcess
from pyGPGO.GPGO import GPGO

In [62]:
cov = matern32() # other kernel types: sqdExp, matern, matern52, gammaExp, rationalQuadratic
gp = GaussianProcess(cov)
acq = Acquisition(mode='ExpectedImprovement')  # other modes: UCB, ProbabilityImprovement, loads more
param = {'num_channels_1': ('int', [1, 5]),
         'num_channels_2': ('int', [1, 10]),
         'learning_rate': ('int', [0.1,1])
        }

In [63]:
np.random.seed(1337)
gpgo = GPGO(gp, acq, test_model, param)
gpgo.run(max_iter=10)

Evaluation 	 Proposed point 	  Current eval. 	 Best eval.
Epoch [1/1], Step [100/600], Loss: 2.4442
Epoch [1/1], Step [200/600], Loss: 2.3542
Epoch [1/1], Step [300/600], Loss: 2.3591
Epoch [1/1], Step [400/600], Loss: 2.3459
Epoch [1/1], Step [500/600], Loss: 2.3538
Epoch [1/1], Step [600/600], Loss: 2.3192
Test Accuracy of the model on the 10000 test images: 6.58 %
Epoch [1/1], Step [100/600], Loss: 2.3471
Epoch [1/1], Step [200/600], Loss: 2.3704
Epoch [1/1], Step [300/600], Loss: 2.3869
Epoch [1/1], Step [400/600], Loss: 2.3263
Epoch [1/1], Step [500/600], Loss: 2.3982
Epoch [1/1], Step [600/600], Loss: 2.3833
Test Accuracy of the model on the 10000 test images: 10.03 %
Epoch [1/1], Step [100/600], Loss: 2.3673
Epoch [1/1], Step [200/600], Loss: 2.3318
Epoch [1/1], Step [300/600], Loss: 2.4298
Epoch [1/1], Step [400/600], Loss: 2.3635
Epoch [1/1], Step [500/600], Loss: 2.3975
Epoch [1/1], Step [600/600], Loss: 2.3059
Test Accuracy of the model on the 10000 test images: 10.81 %
init

ValueError: out_channels must be divisible by groups