In [None]:
%pip install numpy
%pip install matplotlib 
%pip install -U scikit-learn
%pip install -U tensorflow_datasets

In [None]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
from time import time

In [None]:
def plot_loss(history, optimiser, name):
    plt.figure(figsize=(8, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title(f'{name}: Training and Validation Loss using Optimiser {optimiser}')
    plt.show()

def compare_loss(history1, history2, label1, label2, name):
    plt.figure(figsize=(8, 6))
    plt.plot(history1.history['loss'], label=f'Training Loss ({label1})')
    plt.plot(history1.history['val_loss'], label=f'Validation Loss ({label1})')
    plt.plot(history2.history['loss'], label=f'Training Loss ({label2})', linestyle='--')
    plt.plot(history2.history['val_loss'], label=f'Validation Loss ({label2})', linestyle='--')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title(f'{name}: Training and Validation Loss Comparison')
    plt.show()

def compare_accuracy(history1, history2, label1, label2, name):
    plt.figure(figsize=(8, 6))
    plt.plot(history1.history['accuracy'], label=f'Training Accuracy ({label1})')
    plt.plot(history1.history['val_accuracy'], label=f'Validation Accuracy ({label1})')
    plt.plot(history2.history['accuracy'], label=f'Training Accuracy ({label2})', linestyle='--')
    plt.plot(history2.history['val_accuracy'], label=f'Validation Accuracy ({label2})', linestyle='--')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title(f'{name}: Training and Validation Accuracy Comparison')
    plt.show()

def compare_epoch_time(timings, label1, label2, name):
    epochs = int(len(timings) / 2)
    print(f'Epochs: {epochs}')
    print(timings)

    first_epochs = [i for i in range(1, epochs+1)]
    second_epochs = first_epochs
    first_timing = timings[:epochs]
    second_timing = timings[epochs:]

    plt.figure(figsize = (8,10))
    plt.plot(first_epochs, first_timing, label=f'Epoch time: {label1}')
    plt.plot(second_epochs, second_timing, label=f'Epoch time: {label2}', linestyle='--')
    plt.xlabel('Epoch')
    plt.ylabel('Time (s)')
    plt.title(f'{name}: Epoch Execution Time Comparison')
    plt.show()

In [None]:
# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train, y_test = keras.utils.to_categorical(y_train, 10), keras.utils.to_categorical(y_test, 10)

In [None]:
class TimingCallback(tf.keras.callbacks.Callback):
    """Callback to record the timings and epoch

    Args:
        tf (Callback): Class callback.
    """
    timings = []

    def on_epoch_begin(self, epoch, logs=None):
        self.start_time = time()

    def on_epoch_end(self, epoch, logs=None):
        epoch_time = time() - self.start_time
        self.timings.append(epoch_time)

    def get_timings(self):
        timings = self.timings
        return timings

In [None]:
# Adam optimiser
legacy_adam = tf.compat.v1.train.AdamOptimizer()


# Adam optimiser (lower learning rate)
legacy_adam_lower_learning = tf.compat.v1.train.AdamOptimizer(learning_rate=0.001)

# Ada delta optimiser
legacy_adadelta = tf.compat.v1.train.AdadeltaOptimizer()

In [None]:
# Deep Feed forward with more layers and neurones

# Adam Optimiser
dff_more_layers_adam = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28)),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

# Adadelta Optimiser
dff_more_layers_adadelta = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28)),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

# Changed Learning Rate
dff_more_layers_learning_rate = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28)),
    keras.layers.Flatten(),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

DEEP_FORWARD_MORE_NAME = "Deep Feed Forward (more layers/neutrones)"

In [None]:

dff_more_layers_adam.compile(optimizer=legacy_adam, loss='categorical_crossentropy', metrics=['accuracy'])
dff_more_layers_adadelta.compile(optimizer=legacy_adadelta, loss='categorical_crossentropy', metrics=['accuracy'])
dff_more_layers_learning_rate.compile(optimizer=legacy_adam_lower_learning, loss='categorical_crossentropy', metrics=['accuracy'])

dff_callback = TimingCallback()

# Train the models
history_more_adam = dff_more_layers_adam.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2, callbacks=[dff_callback])

more_layers_epoch_time = dff_callback.get_timings()
history_more_adadelta = dff_more_layers_adadelta.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2)

history_more_learning_rate = dff_more_layers_learning_rate.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2)


In [None]:
compare_loss(history1=history_more_adam, history2=history_more_adadelta, label1="Adam", label2="Adadelta", name=DEEP_FORWARD_MORE_NAME)

compare_accuracy(history1=history_more_adam, history2=history_more_adadelta, label1="Adam", label2="Adadelta", name=DEEP_FORWARD_MORE_NAME)
compare_accuracy(history1=history_more_adam, history2=history_more_learning_rate, label1="0.01", label2="0.001", name=DEEP_FORWARD_MORE_NAME)


In [None]:
# Deep Feed forward with less layers and neurones

DEEP_FORWARD_LESS_NAME = "Deep Feed Forward (less layers/neutrones)"

# Adam optimiser
dff_less_layers_adam = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28)),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

# Adadelta optimiser
dff_less_layers_adadelta = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28)),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

# Changed Learning Rate 
dff_less_layers_learning_rate = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28)),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

In [None]:

# Compile the model with the different optimisers
dff_less_layers_adam.compile(optimizer=legacy_adam, loss='categorical_crossentropy', metrics=['accuracy'])
dff_less_layers_adadelta.compile(optimizer=legacy_adadelta, loss='categorical_crossentropy', metrics=['accuracy'])

# Compile the model with different learning rates
dff_less_layers_learning_rate.compile(optimizer=legacy_adam_lower_learning, loss='categorical_crossentropy', metrics=['accuracy'])


# Train the models
history_less_adam = dff_less_layers_adam.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2, callbacks=[dff_callback])
dff_less_layers_epoch_times = dff_callback.get_timings()

# Adadelta optimiser
history_less_adadelta = dff_less_layers_adadelta.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2)

# Lower learning rate
history_lower_learning = dff_less_layers_learning_rate.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2)


In [None]:

compare_loss(history1=history_less_adam, history2=history_more_adam, label1="Less Layers", label2="More Layers", name="Adam Optimiser")
compare_loss(history1=history_less_adadelta, history2=history_more_adadelta, label1="Less Layers", label2="More Layers", name="Adadelta Optimiser")
compare_loss(history1=history_less_adam, history2=history_less_adadelta, label1="Adam", label2="Adadelta", name=DEEP_FORWARD_MORE_NAME)

compare_accuracy(history1=history_less_adam, history2=history_less_adadelta, label1="Adam", label2="Adadelta", name=DEEP_FORWARD_MORE_NAME)
compare_accuracy(history1=history_less_adam, history2=history_lower_learning, label1="0.01", label2="0.001", name=DEEP_FORWARD_MORE_NAME)

compare_epoch_time(dff_less_layers_epoch_times, "Less Layers and Neurones", "More Layers and Neurones", "Layer and Neurone")


In [None]:
conv_more_adam = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28, 1)),
    keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

conv_more_adadelta = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28, 1)),
    keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

conv_more_learning_rate = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28, 1)),
    keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

CONVULATIONAL_LESS_NAME = "Convulational (less layers/neutrones)"
CONVULATIONAL_MORE_NAME = "Convulational (more layers/neutrones)"

In [None]:
# Reshape array to make it 1 dimensional
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)

# Compile the model with the legacy optimizer
conv_more_adam.compile(optimizer=legacy_adam, loss='categorical_crossentropy', metrics=['accuracy'])
conv_more_adadelta.compile(optimizer=legacy_adadelta, loss='categorical_crossentropy', metrics=['accuracy'])
conv_more_learning_rate.compile(optimizer=legacy_adam_lower_learning, loss='categorical_crossentropy', metrics=['accuracy'])

conv_callback = TimingCallback()

# Train the models
history_more_adam = conv_more_adam.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2, callbacks=[conv_callback])
history_more_adadelta = conv_more_adadelta.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2)
history_lower_learning = conv_more_adadelta.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2)

In [None]:
compare_loss(history1=history_more_adam, history2=history_more_adadelta, label1="Adam", label2="Adadelta", name=CONVULATIONAL_MORE_NAME)

compare_accuracy(history1=history_more_adam, history2=history_more_adadelta, label1="Adam", label2="Adadelta", name=CONVULATIONAL_MORE_NAME)
compare_accuracy(history1=history_more_adam, history2=history_more_learning_rate, label1="0.01", label2="0.001", name=CONVULATIONAL_MORE_NAME)

In [None]:
conv_less_adam = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28, 1)),
    keras.layers.Conv2D(16, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

conv_less_adadelta = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28, 1)),
    keras.layers.Conv2D(16, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

conv_less_learning_rate = keras.models.Sequential([
    keras.layers.Input(shape=(28, 28, 1)),
    keras.layers.Conv2D(16, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2, 2)),
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(10, activation='softmax')
])

In [None]:

# Compile the model with the different optimisers
conv_less_adam.compile(optimizer=legacy_adam, loss='categorical_crossentropy', metrics=['accuracy'])
conv_less_adadelta.compile(optimizer=legacy_adadelta, loss='categorical_crossentropy', metrics=['accuracy'])

# Compile the model with different learning rates
conv_less_learning_rate.compile(optimizer=legacy_adam_lower_learning, loss='categorical_crossentropy', metrics=['accuracy'])

# Train the models
history_less_adam = conv_less_adam.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2, callbacks=[conv_callback])
conv_less_layers_epoch_times = conv_callback.get_timings()

# Adadelta optimiser
history_less_adadelta = conv_less_adadelta.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2)

# Lower learning rate
history_lower_learning = conv_less_learning_rate.fit(x_train, y_train, batch_size=32, epochs=10, validation_split=0.2)


In [None]:

compare_loss(history1=history_less_adam, history2=history_more_adam, label1="Less Layers", label2="More Layers", name="Adam Optimiser")
compare_loss(history1=history_less_adadelta, history2=history_more_adadelta, label1="Less Layers", label2="More Layers", name="Adadelta Optimiser")
compare_loss(history1=history_less_adam, history2=history_less_adadelta, label1="Adam", label2="Adadelta", name=CONVULATIONAL_LESS_NAME)

compare_accuracy(history1=history_less_adam, history2=history_less_adadelta, label1="Adam", label2="Adadelta", name=CONVULATIONAL_LESS_NAME)
compare_accuracy(history1=history_less_adam, history2=history_lower_learning, label1="0.01", label2="0.001", name=CONVULATIONAL_LESS_NAME)

compare_epoch_time(conv_less_layers_epoch_times, "Less Layers and Neurones", "More Layers and Neurones", "Layer and Neurone")
