#Importing relevant libraries

In [0]:
#Requirements
#pip install bayesian-optimization

In [0]:
import numpy as np
import pandas as pd
import gzip
import matplotlib.pyplot as plt
from matplotlib import gridspec
from scipy.interpolate import BSpline as spline
from time import time
from sklearn.model_selection import train_test_split
import keras
import tensorflow as tf
from tensorflow.keras import backend as K
import keras.layers as layers
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from tensorflow.keras.callbacks import TensorBoard
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Dropout, BatchNormalization, MaxPooling2D, Flatten, Activation, AveragePooling2D
from tensorflow.python.keras.optimizer_v2 import rmsprop
from bayes_opt import BayesianOptimization
from bayes_opt import BayesianOptimization
from bayes_opt import UtilityFunction
from functools import partial
%matplotlib inline

Using TensorFlow backend.


# Reading SVHN Cropped image data

In [0]:
from scipy.io import loadmat
x = loadmat('train_32x32.mat')
y = loadmat('test_32x32.mat')

# Subsampling and splitting into training and testing data

In [0]:
NUM_CLASSES = 10
input_shape = (32,32,3)

train_features = np.swapaxes(np.swapaxes(np.swapaxes(x['X'],2,3), 1,2), 0,1)
train_labels = x['y']
test_features = np.swapaxes(np.swapaxes(np.swapaxes(y['X'],2,3), 1,2), 0,1)
test_labels = y['y']

train_features, validation_features, train_labels, validation_labels = train_test_split(train_features, train_labels, test_size=0.2, random_state=0)
train_features, validation_features, train_labels, validation_labels = train_test_split(validation_features, validation_labels, test_size=0.2, random_state=0)
train_labels = train_labels.reshape((train_labels.shape[0],))
validation_labels = validation_labels.reshape((validation_labels.shape[0],))

# Converting data into relevant data format

In [0]:
use_bfloat16=False
img_rows, img_cols = 32, 32

cast_dtype = tf.bfloat16 if use_bfloat16 else tf.float32

x_test = validation_features
y_test = validation_labels

x_train = train_features
y_train = train_labels

# convert class vectors to binary class matrices
y_train = tf.keras.utils.to_categorical(y_train)
y_train = y_train[:,1:]
y_test = tf.keras.utils.to_categorical(y_test)
y_test = y_test[:,1:]

# train dataset
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_ds = train_ds.repeat()

# train_ds = train_ds.shuffle(seed=10)
train_ds = train_ds.map(lambda x, y: (tf.cast(x, cast_dtype), y))
train_ds = train_ds.batch(64, drop_remainder=True)

# eval dataset
eval_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test))
eval_ds = eval_ds.repeat()

# eval_ds = eval_ds.shuffle(seed=10,buffer_size=10)
eval_ds = eval_ds.map(lambda x, y: (tf.cast(x, cast_dtype), y))
eval_ds = eval_ds.batch(64, drop_remainder=True)

(11721, 32, 32, 3) (11721,)
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


# Function which holds the given CNN model

In [0]:
def get_model(input_shape, dropout1_rate=0.25, dropout2_rate=0.5):
    """Builds a Sequential CNN model to recognize SVHN.
    Args:
      input_shape: Depends on the image data format. For SVHN, it is (32,32,3).
      dropout1_rate: float between 0 and 1. Fraction of the input units to drop for `dense1` layer.
      dropout2_rate: float between 0 and 1. Fraction of the input units to drop for `dense2` layer.
    Returns:
      a Keras model
    """
    # Reset the tensorflow backend session.
    # tf.keras.backend.clear_session()
    
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(5, 5),
                     activation='relu',
                     input_shape=input_shape,
                     name="conv2d_1", strides=1, padding='valid', kernel_initializer="he_uniform", bias_initializer="zeros"))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), 
                     activation='relu', name="conv2d_2", 
                     strides=1, padding='valid', kernel_initializer="he_uniform", bias_initializer="zeros"))
    model.add(Conv2D(filters=128, kernel_size=(3, 3), 
                     activation='relu', name="conv2d_3", 
                     strides=1, padding='same', kernel_initializer="he_uniform", bias_initializer="zeros")) 
    model.add(Flatten(name="flatten"))
    model.add(Dense(units=1024, activation='relu', name="dense_1", kernel_initializer="glorot_uniform", bias_initializer="zeros"))
    model.add(Dropout(dropout1_rate, name="dropout_1"))
    model.add(Dense(units=1024, activation='relu', name="dense_2", kernel_initializer="glorot_uniform", bias_initializer="zeros"))
    model.add(Dropout(dropout2_rate, name="dropout_2"))
    model.add(Dense(NUM_CLASSES, activation='softmax', name="dense_3"))

    return model

# Function which returns the accuracy by running the previous function

In [0]:
def fit_with(input_shape, verbose, dropout1_rate, dropout2_rate, lr, batch_size, decay_rate):
    # Create the model using a specified hyperparameters.
    model = get_model(input_shape, dropout1_rate, dropout2_rate)

    # Train the model for a specified number of epochs.
    optimizer = tf.keras.optimizers.Adam(lr=lr,decay=decay_rate)
    model.compile(loss=tf.keras.losses.categorical_crossentropy,
                  optimizer=optimizer,
                  metrics=['accuracy'])

    # Train the model with the train dataset.
    model.fit(x=train_ds, epochs=1, steps_per_epoch=batch_size, verbose=verbose)

    # Evaluate the model with the eval dataset.
    score = model.evaluate(eval_ds, steps=10, verbose=0)
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])

    # To Return the loss, return score[0]
    # To Return the accuracy, return score[1].
    return score[1]

verbose = 0
fit_with_partial = partial(fit_with, input_shape, verbose)

# To check whether the function is working or not
# fit_with_partial(dropout1_rate=0.25, dropout2_rate=0.5, lr=0.001, batch_size=512, decay_rate=0.5)

# Creating object which maximizes the target function 

In [0]:
# Bounded region of parameter space
pbounds = {'dropout1_rate': (0, 0.5), 'dropout2_rate': (0, 0.5), 'lr': (1e-6, 1e-1), 'batch_size': (32,512), 'decay_rate': (0, 0.5)}

# Function which maximizes the black box function
# verbose = 0 is silent
# verbose = 1 prints only when a maximum is observed
optimizer = BayesianOptimization(
    f=fit_with_partial,
    pbounds=pbounds,
    verbose=2, 
    random_state=1,
)

# Prints the maximum value achieved
# print(optimizer.max)

CPU times: user 279 µs, sys: 943 µs, total: 1.22 ms
Wall time: 1.03 ms


In [0]:
# Reading the csv file which contains the values of the hyper parameter and its corresponding accuracy value 
df = pd.read_csv("lr.csv")
# df = pd.read_csv("decayrate.csv")
# df = pd.read_csv("dropout.csv")
# df = pd.read_csv("DROPOUT_P1.csv")

#LR
df['params'] = df.params.replace({"DROPOUT_P1=0.2, DROPOUT_P2=0.2, batch_size=128, decay=1e-06, lr=":""},regex=True)
#DECAY
# df['params'] = df.params.replace({"DROPOUT_P1=0.2, DROPOUT_P2=0.2, batch_size=128, decay=":"", ", lr=0.02":""},regex=True)
#DROPOUT 2
# df['params'] = df.params.replace({"DROPOUT_P1=0.2, DROPOUT_P2=":"", ", batch_size=128, decay=1e-06, lr=0.02":""},regex=True)

x = np.array(df['params'])
y = np.array(df['accuracy'])

# GP Regression and Plotting the results

In [0]:
# Function which fits a Gaussian on our training data, and returns its Mean and Variance 
def posterior(optimizer, x_obs, y_obs, x):
    optimizer._gp.fit(x_obs, y_obs)
    mu, sigma = optimizer._gp.predict(x, return_std=True)
    return mu, sigma

# Function which plots the target function, the mean and confidence, and the sampled points
def plot_gp(optimizer, x, y):
    fig = plt.figure(figsize=(16, 10))
    steps = len(optimizer.space)
    fig.suptitle(
        'Gaussian Process and Utility Function After {} Steps'.format(steps),
        fontdict={'size':30}
    )
    
    gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1]) 
    acq = plt.subplot(gs[1])
    plt.subplots_adjust(hspace=1)
    
    x_obs = np.array([[res["params"]["decay_rate"]] for res in optimizer.res])
    # x_obs = np.array([[res["params"]["dropout_rate1"]] for res in optimizer.res])
    # x_obs = np.array([[res["params"]["dropout_rate2"]] for res in optimizer.res])
    # x_obs = np.array([[res["params"]["lr"]] for res in optimizer.res])

    y_obs = np.array([res["target"] for res in optimizer.res])
    
    # Fitting the gaussian, and sampling from it
    mu, sigma = posterior(optimizer, x_obs, y_obs, x.reshape(x.shape[0],1))

    ax=fig.add_subplot(gs[0], label="1")
    ax2=fig.add_subplot(gs[0], label="2", frame_on=False)
    ax3=fig.add_subplot(gs[0], label="3", frame_on=False)

    ax.plot(x, y, linewidth=3, label='Target')
    ax2.plot(x, mu, '--', color='k', label='Prediction')
    ax2.set_xticklabels([])
    ax2.set_yticklabels([])
    ax2.fill(np.concatenate([x, x[::-1]]), 
              np.concatenate([mu - sigma, (mu + sigma)[::-1]]),
        alpha=.6, fc='c', ec='None', label='95% confidence interval')
    ax2.set_xlim((0, 50))
    ax2.set_ylim((None, None))
    ax2.set_ylabel('f(x)', fontdict={'size':20})
    ax3.plot(x_obs, y_obs, 'D', markersize=8, label=u'Observations', color='r')
    ax3.set_xticklabels([])
    ax3.set_yticklabels([])
    plt.sca(ax)
    plt.xticks(rotation='vertical')
        
    # Calculates the utility or acquisition function, and the next best guess 
    utility_function = UtilityFunction(kind="ucb", kappa=5, xi=0)
    utility = utility_function.utility(x.reshape(x.shape[0],1), optimizer._gp, 0)
    acq.plot(x, utility, label='Utility Function', color='purple')
    acq.plot(x[np.argmax(utility)], np.max(utility), '*', markersize=15, 
              label=u'Next Best Guess', markerfacecolor='gold', markeredgecolor='k', markeredgewidth=1)
    acq.set_xlim((0, 50))
    acq.set_ylim((0, np.max(utility) + 0.5))
    acq.set_ylabel('Utility', fontdict={'size':20})
    acq.set_xlabel('x', fontdict={'size':20})
    plt.sca(acq)
    plt.xticks(rotation='vertical')
    
    plt.legend(loc=2, bbox_to_anchor=(1.01, 1), borderaxespad=0.)
    plt.legend(loc=2, bbox_to_anchor=(1.01, 1), borderaxespad=0.)

    # Saving the image 
    plt.savefig("fig.png")

In [0]:
%%time
# Sampling different points from the Gaussian based on different values of init_points, n_iter, and kappa
optimizer.maximize(init_points=10, n_iter=10,kappa=10)
plot_gp(optimizer, x, y)

|   iter    |  target   | batch_... | decay_... | dropou... | dropou... |    lr     |
-------------------------------------------------------------------------------------
Test loss: 1.6039140939712524
Test accuracy: 0.5359375
| [0m 1       [0m | [0m 0.5359  [0m | [0m 227.2   [0m | [0m 0.3602  [0m | [0m 5.719e-0[0m | [0m 0.1512  [0m | [0m 0.01468 [0m |
Test loss: 2.2409221172332763
Test accuracy: 0.20625
| [0m 2       [0m | [0m 0.2062  [0m | [0m 75.21   [0m | [0m 0.09313 [0m | [0m 0.1728  [0m | [0m 0.1984  [0m | [0m 0.05388 [0m |
Test loss: 2.1750051259994505
Test accuracy: 0.2859375
| [0m 3       [0m | [0m 0.2859  [0m | [0m 228.2   [0m | [0m 0.3426  [0m | [0m 0.1022  [0m | [0m 0.4391  [0m | [0m 0.00274 [0m |
Test loss: 1.7480199098587037
Test accuracy: 0.4484375
| [0m 4       [0m | [0m 0.4484  [0m | [0m 345.8   [0m | [0m 0.2087  [0m | [0m 0.2793  [0m | [0m 0.07019 [0m | [0m 0.01981 [0m |
Test loss: 2.2399300575256347
Test accurac

# Plotting reference:
https://github.com/fmfn/BayesianOptimization/blob/master/examples/visualization.ipynb