# Deep Learning Short Project 
#### Author - Shaurya Pathak. 

This script implements a basic 2d convolutional neural network classifier which finds the best parameters for number of layers and number of filters through a random search algorithm, after which it uses ensemble averaging which involves a combination of neural networks training on subsets of data and voting on the class. Finally this script implements bayesian optimisation using the hyperopt library.

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization, Activation, Input, Conv2D, MaxPooling2D, Flatten, Softmax
from keras import optimizers, regularizers
from sklearn import metrics
import seaborn as sns
import keras
import random
from hyperopt import fmin, tpe, hp

In [2]:
# Defining a neural network class that is hard coded to work for this assignment
class nn:
    def __init__(self, n_classes, n_layers, n_filters, filter_size, stride_length, d_prob, epsilon):

        '''
        Instantiates a sequential neural network based on the input parameters, it is also hard coded to accommodate inputs from this assignment assuming input size of (98,50), pool-size of (2,2), relu activation, same padding, adam optimiser and categorical-cross-entropy loss
        
        Args: 
            n_classes (int): The number of classes
            n_layers (int): The number of layers, in this context a layer is a convolutional layer + batch normalisation layer + activation + max pool. This does not include the input layer or the dense softmax layer.
            n_filters (int): The number of filters added to each convolutional layer
            filter_size (tuple (int)): The size of each filter. Should be in format (i, j), i.e. a tuple
            stride_length (tuple (int)): The rate at which each filter will convolve over the input. Should be in format (i, j), i.e. a tuple
            d_prob (float .00): The dropout probability of the dropout layer. Ideally between (0.00:0.30)
            epsilon (float .000): The learning rate of the model
        '''
        # Initialise the model
        self.model = Sequential()
        
        # Input Layer
        self.model.add(Input(shape=(98,50,1)))

        # Convolutional Layers: nothing special here just skips adding a pooling layer at whatever the last convolutional layer is to accommdate for the dropout
        for i in range(n_layers):
            self.model.add(Conv2D(n_filters, kernel_size = filter_size, padding='same'))
            self.model.add(BatchNormalization())
            self.model.add(Activation('relu'))
            if i != n_layers-1:
                self.model.add(MaxPooling2D(pool_size =(2, 2), strides=stride_length, padding= 'same'))
        
        self.model.add(Dropout((d_prob)))
        self.model.add(MaxPooling2D(pool_size =(12, 1), strides=(1, 1), padding = 'same'))
        # Final Layer
        self.model.add(Flatten())
        self.model.add(Dense(n_classes))
        self.model.add(Softmax())

        # Set the optimization options and compile the model
        opt = optimizers.Adam(learning_rate=epsilon)
        self.model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])
        
    def fit(self, x_train, y_train):
        ''' 
        Fits the designed model as it was first initialised
        '''
        callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=0)
        self.history = self.model.fit(x_train, y_train, batch_size=16, epochs=20, validation_split=0.2, callbacks=callback, verbose=0)

    def evaluate(self, x_val, y_val):
        '''
        Stores the keras evaluation score list and also prints the validation accuracy as a percentage
        '''
        self.score = self.model.evaluate(x_val, y_val, verbose=0)
        print([f'Validation Accuracy: {np.round(self.score[1]*100, 2)}%'])

In [3]:
# Image dataset loading code
train_ds = tf.keras.utils.image_dataset_from_directory(
    directory='speechImageData/TrainData',
    labels='inferred',
    color_mode="grayscale",
    label_mode='categorical',
    batch_size=128,
    image_size=(98, 50)
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    directory='speechImageData/ValData',
    labels='inferred',
    color_mode="grayscale",
    label_mode='categorical',
    batch_size=128,
    image_size=(98, 50)
)

Found 2001 files belonging to 12 classes.
Found 1171 files belonging to 12 classes.


In [4]:
# Extract the  training input images and output class labels
x_train = []
y_train = []
for images, labels in train_ds.take(-1):
    x_train.append(images.numpy())
    y_train.append(labels.numpy())

x_train = np.concatenate(x_train, axis=0)
y_train = np.concatenate(y_train, axis=0)

# Extract the validation input images and output class labels
x_val = []
y_val = []
for images, labels in val_ds.take(-1):
    x_val.append(images.numpy())
    y_val.append(labels.numpy())

x_val = np.concatenate(x_val, axis=0)
y_val = np.concatenate(y_val, axis=0)

In [5]:
y_val.shape

(1171, 12)

In [6]:
# This cell performs the random search algorithm, by randomly selecting points from two pre-defined arrays containing hyperparameters for number of layers and number of filters 

# Seeding for reproducability
np.random.seed(0)

# Defining the number of random points to investigate variable
rndm_pts = 9

# These two just create the range of hyperparameters our for loop is going to use while randomly sampling
layers = np.arange(1, 10)
filters = np.power(2, np.arange(9))

# At this stage I don't save the instantiated models in a list because we're just not interested in that yet, just the final performance
for i in range(rndm_pts):

    # Randomly selecting hyperparameters from the structures defined outside of the loop
    l = np.random.choice(layers)
    f = np.random.choice(filters)

    # Instantiating a temporary model
    model = nn(n_classes= y_train.shape[1], 
                        n_layers= l,
                        n_filters= f,
                        filter_size= (3,3),
                        stride_length= (2,2),
                        d_prob= 0.2,
                        epsilon= 0.001)
    
    # Printing our found results
    model.fit(x_train, y_train)
    print('layers:', l,'filters: ', f)
    model.evaluate(x_val, y_val)
    # Printing empty line for neatness :)
    print('')

layers: 6 filters:  1
['Validation Accuracy: 14.18%']



KeyboardInterrupt: 

Over multiple iterations of running the random search algorithm it is determined that on average 6 layers and 128 filters performs the best

In [None]:
# This cell performs model averaging, by instantiating multiple models using the best hyperparameters found through the random search algorithm.

# Seeding for reproducability
random.seed(0)

# Defining the number of models variable and constructing a list to store said models 
n_models = 6
model_list = list()

# Following block of code creates subsets of data based on the number of models we decide we want to average across
nsamples = len(x_train)

# Create a data index
data_index = list(range(1,nsamples))

# Following section simply trains the models with their respective subsets
# Printing the best parameters discovered beforehand 
        
l = 6
f = 128

print('layers:', l,'filters: ', f,'\n')

for i in range(n_models):

    model = nn(n_classes= y_train.shape[1], 
                        n_layers= l,
                        n_filters= f,
                        filter_size= (3,3),
                        stride_length= (2,2),
                        d_prob= 0.2,
                        epsilon= 0.001)
    
    # create random index using sampling with replacement
    rndx = random.choices(data_index, k=nsamples)

    # initialise data set 1
    tx = np.zeros([nsamples,98,50,1])
    ty = np.zeros([nsamples,12])
    
    # resample training data with replacement
    for j in range(nsamples):
        tx[j] = x_train[rndx[j],:,:,:]
        ty[j] = y_train[rndx[j],:]

    model.fit(tx, ty)
    model.evaluate(x_val, y_val)
    # Printing empty line for neatness :)
    print('')
    
    model_list.append(model)

# Getting model predictions from the appended list of models and storing them in this huge list
predictions = [model.model.predict(x_val, verbose=0) for model in model_list]

# Summing and converting our predictions into integers 
prediction_ensemble = np.sum(predictions, axis=0)
yhats = np.argmax(prediction_ensemble, axis=1)
ys = np.argmax(y_val, axis=1)

print('Ensemble Accuracy: ', metrics.accuracy_score(ys, yhats))

layers: 6 filters:  128 

['Validation Accuracy: 71.39%']

['Validation Accuracy: 65.76%']

['Validation Accuracy: 70.62%']

['Validation Accuracy: 71.14%']

['Validation Accuracy: 67.04%']

['Validation Accuracy: 71.56%']

Ensemble Accuracy:  0.762596071733561


In [None]:
# This cell performs Bayesian optimisation using the hyperopt library, reusing code from the neural network class

def bayesian_function(params):

    '''
    Instantiates a sequential neural network based on the input parameters, it is also hard coded to accommodate inputs from this assignment assuming input size of (98,50), pool-size of (2,2), relu activation, same padding, adam optimiser and categorical-cross-entropy loss
    
    Args: 
        n_classes (int): The number of classes
        n_layers (int): The number of layers, in this context a layer is a convolutional layer + batch normalisation layer + activation + max pool. This does not include the input layer or the dense softmax layer.
        n_filters (int): The number of filters added to each convolutional layer
        filter_size (tuple (int)): The size of each filter. Should be in format (i, j), i.e. a tuple
        stride_length (tuple (int)): The rate at which each filter will convolve over the input. Should be in format (i, j), i.e. a tuple
        d_prob (float .00): The dropout probability of the dropout layer. Ideally between (0.00:0.30)
        epsilon (float .000): The learning rate of the model
    '''
    # Initialise the model
    model = Sequential()
    
    # Input Layer
    model.add(Input(shape=(98,50,1)))

    # Convolutional Layers: nothing special here just skips adding a pooling layer at whatever the last convolutional layer is to accommdate for the dropout
    for i in range(params['layers']):
        model.add(Conv2D(params['filters'], kernel_size = (3,3), padding='same'))
        model.add(BatchNormalization())
        model.add(Activation('relu'))
        if i != params['layers']-1:
            model.add(MaxPooling2D(pool_size =(2, 2), strides=(2, 2), padding= 'same'))
    
    model.add(Dropout((0.2)))
    model.add(MaxPooling2D(pool_size =(12, 1), strides=(1, 1), padding = 'same'))
    # Final Layer
    model.add(Flatten())
    model.add(Dense(12))
    model.add(Softmax())

    # Set the optimization options and compile the model
    opt = optimizers.Adam(learning_rate=0.001)
    model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

    # Training the model and evaluating on the testing set 
    model.fit(x_train, y_train, epochs=20, batch_size=16, verbose=0)
    loss, accuracy = model.evaluate(x_val, y_val, verbose=0)

    return {'loss': -accuracy, 'status': 'ok'}


hyperparams = {'layers' : hp.choice('layers', np.arange(1, 10)), 'filters' : hp.choice('filters', np.power(2, np.arange(9)))}

bayesian_params = fmin(fn=bayesian_function, space=hyperparams, algo=tpe.suggest, max_evals=20)
print('Best parameters determined by Bayesian Optimisation are: ', bayesian_params)
                                                                                         
bayesian_accuracy = bayesian_function(bayesian_params)
print('Validation accuracy with Bayesian parameters is: ', bayesian_accuracy)

100%|██████████| 20/20 [09:54<00:00, 29.70s/trial, best loss: -0.7352690100669861]
Best parameters determined by Bayesian Optimisation are:  {'filters': 8, 'layers': 6}
Validation accuracy with Bayesian parameters is:  {'loss': -0.5046968460083008, 'status': 'ok'}


Curiously the optimal parameters determined by Bayesian Optimisation are far from the best and the random search algorithm performed better in comparison