# Import Libraries

In [None]:
import tensorflow
import pandas as pd
import time
import numpy as np
import matplotlib.pyplot as plt

# tensorflow libraries
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

# sklearn libraries are useful for preprocessing, performance measures, etc.
from sklearn import preprocessing
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

# Read Data

In [None]:
df = pd.read_csv('./features_30_sec.csv')
df.head()

In [None]:
df['label'].value_counts()

Split and scale dataset

In [None]:
columns_to_drop = ['label', 'filename', 'length']


def prepare_dataset(df, columns_to_drop, test_size, random_state):
    # Encode the labels from 0 to n_classes-1
    label_encoder = preprocessing.LabelEncoder()
    df['label'] = label_encoder.fit_transform(df['label'])

    # devide data to train and test
    df_train, df_test = train_test_split(df, test_size=test_size, random_state=random_state)

    # scale the training inputs
    x_train = df_train.drop(columns_to_drop, axis=1)
    y_train = df_train['label'].to_numpy()

    standard_scaler = preprocessing.StandardScaler()
    x_train_scaled = standard_scaler.fit_transform(x_train)

    #scale and prepare testing data
    x_test = df_test.drop(columns_to_drop, axis=1)
    x_test_scaled = standard_scaler.transform(x_test)
    y_test = df_test['label'].to_numpy()

    return x_train_scaled, y_train, x_test_scaled, y_test

In [None]:
X_train, y_train, X_test, y_test = prepare_dataset(df, columns_to_drop, test_size=0.3, random_state=0)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

## Question 1

Design a feedforward deep neural network (DNN) which consists of an input layer, one hidden layer of 16 neurons with ReLU activation function, and an output softmax layer. Use an stochastic gradient descent with ‘adam’ optimizer with default parameters, and batch size = 1. Apply dropout of probability 0.3 to the hidden layer. Divide the dataset into a 70:30 ratio for training and testing. Use appropriate scaling of input features. We solely assume that there are only two datasets here: training & test. We would look into validation in Question 2 onwards.

In [None]:
class Question1():

    def __init__(self, X_train, y_train, X_test, y_test, epochs=50, num_hidden_neurons=16, batch_size=1):
        self.epochs = epochs
        self.num_hidden_neurons = num_hidden_neurons
        self.batch_size = batch_size
        self.seed = 0
        self.history = None

        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test

        np.random.seed(self.seed)
        tf.random.set_seed(self.seed)

    def create_model(self, num_hidden_neurons=None):
        # create the model
        num_hidden_neurons = num_hidden_neurons or self.num_hidden_neurons
        self.model = tf.keras.Sequential([
            tf.keras.layers.Dense(units=num_hidden_neurons, activation='relu'),
            tf.keras.layers.Dropout(rate=0.3, seed=self.seed),
            tf.keras.layers.Dense(units=10, activation='softmax')
        ])

        self.model.compile(optimizer='adam',
                           loss='sparse_categorical_crossentropy',
                           metrics=['accuracy'])

    def summarize_model(self):
        # summarize the model
        print(self.model.summary())

    def train_model(self, batch_size=None):
        # train the model
        batch_size = batch_size or self.batch_size
        self.history = self.model.fit(x=self.X_train, y=self.y_train,
                                      batch_size=batch_size,
                                      epochs=self.epochs,
                                      verbose=1,
                                      validation_data=(self.X_test, self.y_test))

    def plot_model(self, variable='loss'):
        # plot learning curves
        plt.plot(self.history.history[variable], label='Model training {}'.format(variable))
        plt.plot(self.history.history['val_{}'.format(variable)], label='Model testing {}'.format(variable))
        plt.title('Model {}'.format(variable))
        plt.ylabel('{}'.format(variable))
        plt.xlabel('epoch')
        plt.legend(loc="best")
        plt.show()

a) Use the training dataset to train the model for 50 epochs. Note: Use 50 epochs for subsequent experiments.

In [None]:
question1 = Question1(X_train, y_train, X_test, y_test)
question1.create_model()
question1.train_model()

b) Plot accuracies on training and test data against training epochs and
comment on the plots.

In [None]:
question1.plot_model('accuracy')

c) Plot the losses on training and test data against training epochs. State the approximate number of epochs where the test error begins to converge.

In [None]:
question1.plot_model('loss')

## Question 2

In this question, we will compare the performance of the model using stochastic gradient descent and mini-batch gradient descent, as well as determining the optimal batch size for mini-batch gradient descent. Find the optimal batch size for mini-batch gradient descent by training the neural network and evaluating the performances for different batch sizes. Note: Use 3-fold cross-validation on training partition to perform parameter selection.

In [None]:
class TimeTakenPerEpochCallback(tf.keras.callbacks.Callback):

    def __init__(self):
        self.times = []

    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_begin_time = time.time()

    def on_epoch_end(self, epoch, logs=None):
        self.times.append(time.time() - self.epoch_begin_time)


class Question2():

    def __init__(self, X_train, y_train, X_test, y_test, epochs=50, num_hidden_neurons=16, batch_size=1,
                 num_experiments=10):
        self.epochs = epochs
        self.num_hidden_neurons = num_hidden_neurons
        self.batch_size = batch_size
        self.num_experiments = num_experiments
        self.batch_sizes = [1, 4, 8, 16, 32, 64]
        self.num_folds = 3
        self.seed = 0
        self.history = None

        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test

        np.random.seed(self.seed)
        tf.random.set_seed(self.seed)

    def create_model(self, num_hidden_neurons=None):
        # create the model
        num_hidden_neurons = num_hidden_neurons or self.num_hidden_neurons
        self.model = tf.keras.Sequential([
            tf.keras.layers.Dense(units=num_hidden_neurons, activation='relu'),
            tf.keras.layers.Dropout(rate=0.3, seed=self.seed),
            tf.keras.layers.Dense(units=10, activation='softmax')
        ])

        self.model.compile(optimizer='adam',
                           loss='sparse_categorical_crossentropy',
                           metrics=['accuracy'])

    def cross_validate_run_experiment(self, experiment_X_train, experiment_y_train):
        size_fold = len(experiment_X_train) // self.num_folds
        # experiment_accuracy maps batch_size_key -> num_fold_key -> epoch_key -> val_accuracy
        experiment_accuracy = {}
        # experiment_time maps batch_size_key -> num_fold_key -> time_taken_list
        experiment_time = {}

        for num_fold in range(self.num_folds):
            start, end = num_fold * size_fold, (num_fold + 1) * size_fold
            cv_X_train = np.append(experiment_X_train[:start], experiment_X_train[end:], axis=0)
            cv_y_train = np.append(experiment_y_train[:start], experiment_y_train[end:], axis=0)
            cv_X_test, cv_y_test = experiment_X_train[start:end], experiment_y_train[start:end]

            for batch_size in self.batch_sizes:

                cv_model = tf.keras.Sequential([
                    tf.keras.layers.Dense(units=self.num_hidden_neurons, activation='relu'),
                    tf.keras.layers.Dropout(rate=0.3, seed=self.seed),
                    tf.keras.layers.Dense(units=10, activation='softmax')
                ])

                cv_model.compile(optimizer='adam',
                                 loss='sparse_categorical_crossentropy',
                                 metrics=['accuracy'])

                callback = TimeTakenPerEpochCallback()

                cv_history = cv_model.fit(x=cv_X_train, y=cv_y_train,
                                          batch_size=batch_size,
                                          epochs=self.epochs,
                                          verbose=0,
                                          callbacks=[callback],
                                          validation_data=(cv_X_test, cv_y_test))

                batch_size_key = "batch_size: {}".format(batch_size)
                num_fold_key = "num_fold: {}".format(num_fold)

                # populate experiment_result
                if not experiment_accuracy.get(batch_size_key):
                    experiment_accuracy[batch_size_key] = {}
                if not experiment_accuracy[batch_size_key].get(num_fold_key):
                    experiment_accuracy[batch_size_key][num_fold_key] = {}

                for epoch, val_accuracy in enumerate(cv_history.history['val_accuracy']):
                    epoch_key = "epoch: {}".format(epoch)
                    experiment_accuracy[batch_size_key][num_fold_key][epoch_key] = val_accuracy

                # populate experiment_time
                if not experiment_time.get(batch_size_key):
                    experiment_time[batch_size_key] = {}

                experiment_time[batch_size_key][num_fold_key] = callback.times

        return (experiment_accuracy, experiment_time)

    def cross_validate_model(self):
        size_X_train = len(self.X_train)
        index = np.arange(size_X_train)
        # experiment_accuracies maps experiment_key -> batch_size_key -> num_fold_key -> epoch_key -> val_accuracy
        experiment_accuracies = {}
        # experiment_times maps experiment_key -> batch_size_key -> num_fold_key -> time_taken_per_epoch_list
        experiment_times = {}

        for experiment in range(self.num_experiments):
            np.random.shuffle(index)
            experiment_X_train, experiment_y_train = self.X_train[index], self.y_train[index]
            experiment_accuracy, experiment_time = self.cross_validate_run_experiment(experiment_X_train,
                                                                                      experiment_y_train)
            experiment_key = "experiment: {}".format(experiment)
            experiment_accuracies[experiment_key] = experiment_accuracy
            experiment_times[experiment_key] = experiment_time

        experiment_mean_accuracies = self.cross_validate_get_experiment_mean_accuracies(experiment_accuracies)
        self.mean_of_experiment_mean_accuracies = self.cross_validate_get_mean_of_experiment_mean_accuracies(
            experiment_mean_accuracies)

        self.experiment_median_times = self.cross_validate_get_median_of_experiment_times(experiment_times)

    def get_mean_of_experiment_mean_accuracies(self):
        return self.mean_of_experiment_mean_accuracies

    def get_experiment_median_times(self):
        return self.experiment_median_times

    @staticmethod
    def cross_validate_get_experiment_mean_accuracies(experiment_accuracies):
        # experiment_mean_accuracies maps experiment_key -> batch_size_key -> epoch -> mean_val_accuracy
        experiment_mean_accuracies = {}

        for experiment_key in experiment_accuracies.keys():
            # experiment_accuracy maps batch_size_key -> num_fold_key -> epoch_key -> val_accuracy
            experiment_accuracy = experiment_accuracies[experiment_key]
            # experiment_mean_accuracy maps batch_size_key -> epoch_key -> mean_val_accuracy
            experiment_mean_accuracy = {}

            for batch_size_key in experiment_accuracy.keys():
                if not experiment_mean_accuracy.get(batch_size_key):
                    experiment_mean_accuracy[batch_size_key] = {}
                for num_fold_key in experiment_accuracy[batch_size_key].keys():
                    for epoch_key, val_accuracy in experiment_accuracy[batch_size_key][num_fold_key].items():
                        if not experiment_mean_accuracy[batch_size_key].get(epoch_key):
                            experiment_mean_accuracy[batch_size_key][epoch_key] = []
                        experiment_mean_accuracy[batch_size_key][epoch_key].append(val_accuracy)

            for batch_size_key in experiment_mean_accuracy.keys():
                for epoch_key in experiment_mean_accuracy[batch_size_key].keys():
                    experiment_mean_accuracy[batch_size_key][epoch_key] = np.mean(
                        experiment_mean_accuracy[batch_size_key][epoch_key])

            experiment_mean_accuracies[experiment_key] = experiment_mean_accuracy

        return experiment_mean_accuracies

    @staticmethod
    def cross_validate_get_mean_of_experiment_mean_accuracies(experiment_mean_accuracies):
        # mean_of_experiment_mean_accuracies maps batch_size_key -> epoch -> mean_of_mean_val_accuracy
        mean_of_experiment_mean_accuracies = {}

        for experiment_key in experiment_mean_accuracies.keys():
            # experiment_mean_accuracy maps batch_size_key -> epoch_key -> mean_val_accuracy
            experiment_mean_accuracy = experiment_mean_accuracies[experiment_key]

            for batch_size_key in experiment_mean_accuracy.keys():
                if not mean_of_experiment_mean_accuracies.get(batch_size_key):
                    mean_of_experiment_mean_accuracies[batch_size_key] = {}
                for epoch_key, mean_val_accuracy in experiment_mean_accuracy[batch_size_key].items():
                    if not mean_of_experiment_mean_accuracies[batch_size_key].get(epoch_key):
                        mean_of_experiment_mean_accuracies[batch_size_key][epoch_key] = []
                    mean_of_experiment_mean_accuracies[batch_size_key][epoch_key].append(mean_val_accuracy)

        for batch_size_key in mean_of_experiment_mean_accuracies.keys():
            for epoch_key in experiment_mean_accuracy[batch_size_key].keys():
                mean_of_experiment_mean_accuracies[batch_size_key][epoch_key] = np.mean(
                    mean_of_experiment_mean_accuracies[batch_size_key][epoch_key])

        return mean_of_experiment_mean_accuracies

    @staticmethod
    def cross_validate_get_median_of_experiment_times(experiment_times):
        # experiment_median_times maps batch_size_key -> median_time_taken_per_epoch
        experiment_median_times = {}

        for experiment_key in experiment_times.keys():
            # experiment_time maps batch_size_key -> num_fold_key -> time_taken_per_epoch_list
            experiment_time = experiment_times[experiment_key]

            for batch_size_key in experiment_time.keys():
                for num_fold_key in experiment_time[batch_size_key]:
                    if not experiment_median_times.get(batch_size_key):
                        experiment_median_times[batch_size_key] = []

                    experiment_median_times[batch_size_key].extend(experiment_time[batch_size_key][num_fold_key])

        for batch_size_key in experiment_median_times.keys():
            experiment_median_times[batch_size_key] = np.median(
                experiment_median_times[batch_size_key])

        return experiment_median_times

    def summarize_model(self):
        # summarize the model
        print(self.model.summary())

    def train_model(self, batch_size=None):
        # train the model
        batch_size = batch_size or self.batch_size
        self.history = self.model.fit(x=self.X_train, y=self.y_train,
                                      batch_size=batch_size,
                                      epochs=self.epochs,
                                      verbose=1,
                                      validation_data=(self.X_test, self.y_test))

    def plot_model(self, variable='loss', epoch_start=1, epoch_end=None):
        # plot learning curves
        train_epochs = self.history.history[variable][epoch_start - 1:epoch_end]
        val_epochs = self.history.history['val_{}'.format(variable)][epoch_start - 1:epoch_end]
        number_epochs = len(self.history.history[variable][epoch_start - 1:epoch_end])

        plt.plot(range(epoch_start, epoch_start + number_epochs), train_epochs,
                 label='Model training {}'.format(variable))
        plt.plot(range(epoch_start, epoch_start + number_epochs), val_epochs, label='Model testing {}'.format(variable))
        plt.title('Model {}'.format(variable))
        plt.ylabel('{}'.format(variable))
        plt.xlabel('epoch')
        plt.legend(loc='best')
        plt.show()

    def cross_validate_plot_results(self, epoch_start=1, epoch_end=None):
        for batch_size_key in mean_of_experiment_mean_accuracies.keys():
            val_epochs = [mean_of_experiment_mean_accuracies[batch_size_key]['epoch: {}'.format(epoch)] for epoch in
                          range(self.epochs)]
            number_epochs = len(val_epochs[epoch_start - 1:epoch_end])
            plt.plot(range(epoch_start, epoch_start + number_epochs), val_epochs, label=batch_size_key)

        plt.title('Model mean cross-validation accuracy')
        plt.ylabel('mean cross-validation accuracy')
        plt.xlabel('epoch')
        plt.legend(loc='best')
        plt.show()

a) Plot mean cross-validation accuracies over the training epochs for different batch sizes. Limit search space to batch sizes {1,4,8,16,32,64}.

In [None]:
question2 = Question2(X_train, y_train, X_test, y_test, epochs=20, num_experiments=2)
question2.cross_validate_model()
mean_of_experiment_mean_accuracies = question2.get_mean_of_experiment_mean_accuracies()

In [None]:
question2.cross_validate_plot_results()

In [None]:
print("\n".join("{}\t{}".format(k, v["epoch: {}".format(question2.epochs - 1)]) for k, v in
                mean_of_experiment_mean_accuracies.items()))

optimal_batch_size = None
optimal_batch_size_experiment_value = None

for batch_size_key in mean_of_experiment_mean_accuracies.keys():
    epoch_key = "epoch: {}".format(question2.epochs - 1)
    if not optimal_batch_size_experiment_value or mean_of_experiment_mean_accuracies[batch_size_key][
        epoch_key] > optimal_batch_size_experiment_value:
        optimal_batch_size_experiment_value = mean_of_experiment_mean_accuracies[batch_size_key][epoch_key]
        optimal_batch_size = int(batch_size_key.split(": ")[-1])

b) Create a table of median time taken to train the network for one epoch against different batch sizes. (Hint: Introduce a callback)

In [None]:
experiment_median_times = question2.get_experiment_median_times()
print("\n".join("{}\t{}".format(k, v) for k, v in experiment_median_times.items()))

c) Select the optimal batch size and state reasons for your selection.

d) What is the difference between mini-batch gradient descent and stochastic gradient descent and what does this mean for model training?

e) Plot the train and test accuracies against epochs for the optimal batch size. 

In [None]:
print(optimal_batch_size)

In [None]:
question2.create_model()
question2.train_model(batch_size=optimal_batch_size)

In [None]:
question2.plot_model('accuracy')

# Question 3
Find the optimal number of hidden neurons for the 2-layer network (i.e., one hidden layer) designed in Question 1 and 2.

In [None]:
class Question3():

    def __init__(self, X_train, y_train, X_test, y_test, epochs=50, num_hidden_neurons=16, batch_size=1,
                 num_experiments=10):
        self.epochs = epochs
        self.num_hidden_neurons = num_hidden_neurons
        self.batch_size = batch_size
        self.num_experiments = num_experiments
        self.num_hidden_neurons_list = [8, 16, 32, 64]
        self.num_folds = 3
        self.seed = 0
        self.history = None

        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test

        np.random.seed(self.seed)
        tf.random.set_seed(self.seed)

    def create_model(self, num_hidden_neurons=None):
        # create the model
        num_hidden_neurons = num_hidden_neurons or self.num_hidden_neurons
        self.model = tf.keras.Sequential([
            tf.keras.layers.Dense(units=num_hidden_neurons, activation='relu'),
            tf.keras.layers.Dropout(rate=0.3, seed=self.seed),
            tf.keras.layers.Dense(units=10, activation='softmax')
        ])

        self.model.compile(optimizer='adam',
                           loss='sparse_categorical_crossentropy',
                           metrics=['accuracy'])

    def cross_validate_run_experiment(self, experiment_X_train, experiment_y_train):
        size_fold = len(experiment_X_train) // self.num_folds
        # num_hidden_neurons_key -> num_fold_key -> val_accuracy
        experiment_accuracy = {}

        for num_fold in range(self.num_folds):
            start, end = num_fold * size_fold, (num_fold + 1) * size_fold
            cv_X_train = np.append(experiment_X_train[:start], experiment_X_train[end:], axis=0)
            cv_y_train = np.append(experiment_y_train[:start], experiment_y_train[end:], axis=0)
            cv_X_test, cv_y_test = experiment_X_train[start:end], experiment_y_train[start:end]

            for num_hidden_neurons in self.num_hidden_neurons_list:

                cv_model = tf.keras.Sequential([
                    tf.keras.layers.Dense(units=num_hidden_neurons, activation='relu'),
                    tf.keras.layers.Dropout(rate=0.3, seed=self.seed),
                    tf.keras.layers.Dense(units=10, activation='softmax')
                ])

                cv_model.compile(optimizer='adam',
                                 loss='sparse_categorical_crossentropy',
                                 metrics=['accuracy'])

                cv_history = cv_model.fit(x=cv_X_train, y=cv_y_train,
                                          batch_size=self.batch_size,
                                          epochs=self.epochs,
                                          verbose=0,
                                          validation_data=(cv_X_test, cv_y_test))

                num_hidden_neurons_key = "num_hidden_neurons: {}".format(num_hidden_neurons)
                num_fold_key = "num_fold: {}".format(num_fold)

                # populate experiment_result
                if not experiment_accuracy.get(num_hidden_neurons_key):
                    experiment_accuracy[num_hidden_neurons_key] = {}

                experiment_accuracy[num_hidden_neurons_key][num_fold_key] = cv_history.history['val_accuracy'][
                    self.epochs - 1]

        return experiment_accuracy

    def cross_validate_model(self):
        size_X_train = len(self.X_train)
        index = np.arange(size_X_train)
        # experiment_key -> num_hidden_neurons_key -> num_fold_key -> val_accuracy
        experiment_accuracies = {}

        for experiment in range(self.num_experiments):
            np.random.shuffle(index)
            experiment_X_train, experiment_y_train = self.X_train[index], self.y_train[index]
            experiment_accuracy = self.cross_validate_run_experiment(experiment_X_train, experiment_y_train)
            experiment_key = "experiment: {}".format(experiment)
            experiment_accuracies[experiment_key] = experiment_accuracy

        experiment_mean_accuracies = self.cross_validate_get_experiment_mean_accuracies(experiment_accuracies)
        mean_of_experiment_mean_accuracies = self.cross_validate_get_mean_of_experiment_mean_accuracies(
            experiment_mean_accuracies)

        return mean_of_experiment_mean_accuracies

    @staticmethod
    def cross_validate_get_experiment_mean_accuracies(experiment_accuracies):
        # experiment_key -> num_hidden_neurons_key -> mean_val_accuracy
        experiment_mean_accuracies = {}

        for experiment_key in experiment_accuracies.keys():
            experiment_accuracy = experiment_accuracies[experiment_key]
            experiment_mean_accuracy = {}

            for num_hidden_neurons_key in experiment_accuracy.keys():
                experiment_mean_accuracy[num_hidden_neurons_key] = np.mean(
                    list(experiment_accuracy[num_hidden_neurons_key].values()))

            experiment_mean_accuracies[experiment_key] = experiment_mean_accuracy

        return experiment_mean_accuracies

    @staticmethod
    def cross_validate_get_mean_of_experiment_mean_accuracies(experiment_mean_accuracies):
        # num_hidden_neurons_key -> mean_of_mean_val_accuracy
        mean_of_experiment_mean_accuracies = {}

        for experiment_key in experiment_mean_accuracies.keys():
            experiment_mean_accuracy = experiment_mean_accuracies[experiment_key]

            for num_hidden_neurons_key in experiment_mean_accuracy:
                if not mean_of_experiment_mean_accuracies.get(num_hidden_neurons_key):
                    mean_of_experiment_mean_accuracies[num_hidden_neurons_key] = []
                mean_of_experiment_mean_accuracies[num_hidden_neurons_key].append(
                    experiment_mean_accuracies[experiment_key][num_hidden_neurons_key])

        for num_hidden_neurons_key in mean_of_experiment_mean_accuracies.keys():
            mean_of_experiment_mean_accuracies[num_hidden_neurons_key] = np.mean(
                mean_of_experiment_mean_accuracies[num_hidden_neurons_key])

        return mean_of_experiment_mean_accuracies

    def summarize_model(self):
        # summarize the model
        print(self.model.summary())

    def train_model(self, batch_size=None):
        # train the model
        batch_size = batch_size or self.batch_size
        self.history = self.model.fit(x=self.X_train, y=self.y_train,
                                      batch_size=batch_size,
                                      epochs=self.epochs,
                                      verbose=1,
                                      validation_data=(self.X_test, self.y_test))

    def plot_model(self, variable='loss'):
        # plot learning curves
        plt.plot(self.history.history[variable], label='Model training {}'.format(variable))
        plt.plot(self.history.history['val_{}'.format(variable)], label='Model testing {}'.format(variable))
        plt.title('Model {}'.format(variable))
        plt.ylabel('{}'.format(variable))
        plt.xlabel('epoch')
        plt.legend(loc="best")
        plt.show()

a) Plot the cross-validation accuracies against training epochs for different numbers of hidden-layer neurons. Limit the search space of the number of neurons to {8, 16, 32, 64}. Continue using 3-fold cross validation on training dataset.

In [None]:
print(optimal_batch_size)

In [None]:
question3 = Question3(X_train, y_train, X_test, y_test, batch_size=optimal_batch_size)
mean_of_experiment_mean_accuracies = question3.cross_validate_model()

In [None]:
print(mean_of_experiment_mean_accuracies)

best_mean_of_experiment_mean_accuracies = max(mean_of_experiment_mean_accuracies,
                                              key=mean_of_experiment_mean_accuracies.get)
print(best_mean_of_experiment_mean_accuracies)

optimal_num_hidden_neurons = int(best_mean_of_experiment_mean_accuracies.split(": ")[-1])
print(optimal_num_hidden_neurons)

b) Select the optimal number of neurons for the hidden layer. State the rationale for your selection.

c) Plot the train and test accuracies against training epochs with the optimal number of neurons.

In [None]:
question3.create_model(num_hidden_neurons=optimal_num_hidden_neurons)
question3.train_model()

In [None]:
question3.plot_model('accuracy')

d) What other parameters could possibly be tuned?

# Question 4
After you are done with the 2-layer network, design a 3-layer network with two hiddenlayers with ReLU activation, each consisting of the optimal number of neurons you obtained in Question 3, (apply a dropout with a probability of 0.3 for each hidden layer), and train it with a batch size of 1.

In [None]:
class Question4():

    def __init__(self, X_train, y_train, X_test, y_test, epochs=50, num_hidden_neurons=16, batch_size=1):
        self.epochs = epochs
        self.num_hidden_neurons = num_hidden_neurons
        self.batch_size = batch_size
        self.seed = 0
        self.history = None

        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test

        np.random.seed(self.seed)
        tf.random.set_seed(self.seed)

    def create_model(self, num_hidden_neurons=None):
        # create the model
        num_hidden_neurons = num_hidden_neurons or self.num_hidden_neurons
        self.model = tf.keras.Sequential([
            tf.keras.layers.Dense(units=num_hidden_neurons, activation='relu'),
            tf.keras.layers.Dropout(rate=0.3, seed=self.seed),
            tf.keras.layers.Dense(units=num_hidden_neurons, activation='relu'),
            tf.keras.layers.Dropout(rate=0.3, seed=self.seed),
            tf.keras.layers.Dense(units=10, activation='softmax')
        ])

        self.model.compile(optimizer='adam',
                           loss='sparse_categorical_crossentropy',
                           metrics=['accuracy'])

    def summarize_model(self):
        # summarize the model
        print(self.model.summary())

    def train_model(self, batch_size=None):
        # train the model
        batch_size = batch_size or self.batch_size
        self.history = self.model.fit(x=self.X_train, y=self.y_train,
                                      batch_size=batch_size,
                                      epochs=self.epochs,
                                      verbose=1,
                                      validation_data=(self.X_test, self.y_test))

    def plot_model(self, variable='loss'):
        # plot learning curves
        plt.plot(self.history.history[variable], label='Model training {}'.format(variable))
        plt.plot(self.history.history['val_{}'.format(variable)], label='Model testing {}'.format(variable))
        plt.title('Model {}'.format(variable))
        plt.ylabel('{}'.format(variable))
        plt.xlabel('epoch')
        plt.legend(loc="best")
        plt.show()

a) Plot the train and test accuracy of the 3-layer network against training epochs.

In [None]:
print(optimal_num_hidden_neurons)

In [None]:
question4 = Question4(X_train, y_train, X_test, y_test, num_hidden_neurons=optimal_num_hidden_neurons)
question4.create_model()
question4.train_model()

In [None]:
question4.plot_model('accuracy')

b) Compare and comment on the performances of the optimal 2-layer network from your hyperparameter tuning in Question 2 and 3 and the 3-layer network.

## Question 5 (let’s dig deeper!)
We are going to dissect the purpose of dropout in the model.

In [None]:
class Question5():

    def __init__(self, X_train, y_train, X_test, y_test, epochs=50, num_hidden_neurons=16, batch_size=1):
        self.epochs = epochs
        self.num_hidden_neurons = num_hidden_neurons
        self.batch_size = batch_size
        self.seed = 0
        self.history = None

        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test

        np.random.seed(self.seed)
        tf.random.set_seed(self.seed)

    def create_model(self):
        # create the model
        self.model = tf.keras.Sequential([
            tf.keras.layers.Dense(units=self.num_hidden_neurons, activation='relu'),
            tf.keras.layers.Dense(units=10, activation='softmax')
        ])

        self.model.compile(optimizer='adam',
                           loss='sparse_categorical_crossentropy',
                           metrics=['accuracy'])

    def summarize_model(self):
        # summarize the model
        print(self.model.summary())

    def train_model(self, batch_size=None):
        # train the model
        batch_size = batch_size or self.batch_size
        self.history = self.model.fit(x=self.X_train, y=self.y_train,
                                      batch_size=batch_size,
                                      epochs=self.epochs,
                                      verbose=1,
                                      validation_data=(self.X_test, self.y_test))

    def plot_model(self, variable='loss'):
        # plot learning curves
        plt.plot(self.history.history[variable], label='Model training {}'.format(variable))
        plt.plot(self.history.history['val_{}'.format(variable)], label='Model testing {}'.format(variable))
        plt.title('Model {}'.format(variable))
        plt.ylabel('{}'.format(variable))
        plt.xlabel('epoch')
        plt.legend(loc="best")
        plt.show()

a) Why do we add dropouts? Investigate the purpose of dropouts by removing dropouts from your original 2-layer network (before changing the batch size and number of neurons). Plot accuracies on training and test data with neural network without dropout. Plot as well the losses on training and test data with neural network without dropout.

In [None]:
question5 = Question5(X_train, y_train, X_test, y_test)
question5.create_model()
question5.train_model()

In [None]:
question5.plot_model('accuracy')
question5.plot_model('loss')

b) Explain the effect of removing dropouts.

c) What is another approach that you could take to address overfitting in the model?