# Coursework 6

## Task 1

### Import required Python libraries



In [0]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Activation
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.optimizers import SGD

### Import and shape the dataset (Fashion Mnist)

In [0]:
print('* Importing and shaping the data *')
print()

mnist = tf.keras.datasets.fashion_mnist  # load mnist dataset from tensorflow
(input_train, output_train_class), (input_test, output_test_class) = mnist.load_data()

print('input_train (original): ', input_train.shape)
print('input_test (original): ', input_test.shape)
print()

input_train= input_train.reshape(input_train.shape[0], 28, 28, 1)  # add an extra dimension to array
input_test= input_test.reshape(input_test.shape[0], 28, 28, 1)

input_train = input_train / 255.0  # max normalise the image data[0:1]
input_test = input_test / 255.0

output_train_class_onehot = tf.keras.utils.to_categorical(output_train_class, 10)  # create one-hot encoded class
output_test_class_onehot = tf.keras.utils.to_categorical(output_test_class, 10)

output_class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']    # class names string

print('input_train : ', input_train.shape)
print('output_train_class : ', output_train_class.shape)
print('output_train_class_onehot : ', output_train_class_onehot.shape)
print()
print('input_test : ', input_test.shape)
print('output_test_class : ', output_test_class.shape)
print('output_test_class_onehot : ', output_test_class_onehot.shape)
print()
print('output_class_names : ', output_class_names)
print()

item_id = 5

print('item_id : ', item_id)
print('output_train_class [item_id] : ', output_train_class[item_id])
print('output_train_class_onehot [item_id] : ', output_train_class_onehot[item_id, :])

plt.imshow(input_train[item_id, :, :, 0], cmap=plt.cm.binary)
plt.title('input_train [' + str(item_id) + ']')
plt.grid(None)
plt.xticks([])
plt.yticks([])
plt.show()

### Define the network hyperparameters


In [0]:
optimizer_type = SGD(lr=0.2)  # optimisation algorithm: SGD stochastic gradient decent 
loss = 'categorical_crossentropy'  # loss (cost) function to be minimised by the optimiser
metrics = ['categorical_accuracy']  # network accuracy metric to be determined after each epoch
dropout_ratio = 0.0  # % of nodes in the hidden layer to dropout during back-propagation update of the network weights
validtrain_split_ratio = 0.2  # % of the seen dataset to be put aside for validation, rest is for training
max_epochs = 50  # maxmimum number of epochs to be iterated
batch_size = 500   # batch size for the training data set
batch_shuffle = True   # shuffle the training data prior to batching before each epoch
num_hidden_nodes = 256  # number of nodes in hidden fully connected layer

### Define the network architecture


In [0]:
input_shape = (28, 28, 1)
inputs = Input(shape=input_shape)

down_01 = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
down_01 = Activation('relu')(down_01)
down_01 = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same')(down_01)
down_01 = Activation('relu')(down_01)

down_01_pool = MaxPooling2D((2, 2), strides=(2, 2))(down_01)   # maxpool downsampled to 14x14x16

down_02 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same')(down_01_pool)
down_02 = Activation('relu')(down_02)
down_02 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same')(down_02)
down_02 = Activation('relu')(down_02)

down_02_pool = MaxPooling2D((2, 2), strides=(2, 2))(down_02)   # maxpool downsampled to 7x7x32

flatten = Flatten()(down_02_pool)   # 1568 nodes

dense_01 = Dense(num_hidden_nodes)(flatten)
dense_01 = Activation('sigmoid')(dense_01)
dense_01 = Dropout(dropout_ratio)(dense_01)

dense_02 = Dense(10)(dense_01)
outputs = Activation('softmax')(dense_02)

# compile the network
model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=optimizer_type, loss=loss, metrics=metrics)


### Train the neural network with the training dataset

In [0]:
print('* Training the compiled network *')
print()

history = model.fit(input_train, output_train_class_onehot, \
                    batch_size=batch_size, \
                    epochs=max_epochs, \
                    validation_split=validtrain_split_ratio, \
                    shuffle=batch_shuffle)

print()
print('Training completed')
print()

### Plot the training history of the network


In [0]:
# model loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss : ' + loss)
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='best')
plt.show()
plt.close()

# model accuracy metric

plt.plot(np.array(history.history[metrics[0]]))
plt.plot(np.array(history.history['val_' + metrics[0]]))
plt.title('Model accuracy metric : ' + metrics[0])
plt.ylabel('Accuracy metric')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='best')
plt.show()
plt.close()

- As iteration goes, the model loss/accuracy move further apart from the training loss/accuracy, which means over fitting happens. Thus, I will use dropout method in the next task.

## Task 2

In [0]:
# function to training model
def classification_network(
    input_train = input_train, 
    output_train_class_onehot = output_train_class_onehot,
    optimizer_type = SGD(lr=0.2),
    loss = 'categorical_crossentropy',
    metrics = ['categorical_accuracy'],
    dropout_ratio = 0.0,
    validtrain_split_ratio = 0.2,
    max_epochs = 50,
    batch_size = 500,
    batch_shuffle = True,
    num_hidden_nodes = 256,
    input_shape = (28, 28, 1),
    inputs = Input(shape=input_shape)):

    down_01 = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same')(inputs)
    down_01 = Activation('relu')(down_01)
    down_01 = Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='same')(down_01)
    down_01 = Activation('relu')(down_01)

    down_01_pool = MaxPooling2D((2, 2), strides=(2, 2))(down_01)   # maxpool downsampled to 14x14x16
    down_01_pool = Dropout(dropout_ratio)(down_01_pool)

    down_02 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same')(down_01_pool)
    down_02 = Activation('relu')(down_02)
    down_02 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='same')(down_02)
    down_02 = Activation('relu')(down_02)
    
    down_02_pool = MaxPooling2D((2, 2), strides=(2, 2))(down_02)   # maxpool downsampled to 7x7x32
    down_02_pool = Dropout(dropout_ratio)(down_02_pool)

    flatten = Flatten()(down_02_pool)   # 1568 nodes

    dense_01 = Dense(num_hidden_nodes)(flatten)
    dense_01 = Activation('sigmoid')(dense_01)
    dense_01 = Dropout(dropout_ratio)(dense_01)

    dense_02 = Dense(10)(dense_01)
    outputs = Activation('softmax')(dense_02)

    # compile the network
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=optimizer_type, loss=loss, metrics=metrics)

    print('* Training the compiled network *')
    print()

    history = model.fit(input_train, output_train_class_onehot, \
                        batch_size=batch_size, \
                        epochs=max_epochs, \
                        validation_split=validtrain_split_ratio, \
                        shuffle=batch_shuffle)
    print()
    print('Training completed')
    print()

    return model, history

In [0]:
str(R[0])

In [0]:
# function to plot history
def plot_result(history, r):
    fig, (axL, axR) = plt.subplots(ncols=2, figsize=(10,4))
    # model loss
    axL.plot(history.history['loss'])
    axL.plot(history.history['val_loss'])
    axL.set_title('Loss: Dropout ratio ='+str(r))
    axL.set_xlabel('Epoch')
    axL.set_ylabel('Loss')
    axL.legend(['Training', 'Validation'], loc='best')
    # model accuracy metric
    axR.plot(np.array(history.history[metrics[0]]))
    axR.plot(np.array(history.history['val_' + metrics[0]]))
    axR.set_title('Model accuracy: Dropout ratio ='+str(r))
    axR.set_ylabel('Accuracy metric')
    axR.set_xlabel('Epoch')
    axR.legend(['Training', 'Validation'], loc='best')
    fig.show()    

In [0]:
# train a network with different dropout ratio
R = [0.1, 0.2, 0.3, 0.4]
histories = []
for r in R:
    model, history = classification_network(dropout_ratio=r)
    histories.append(history)

In [0]:
# check the histories
for i in range(len(R)):
    plot_result(histories[i], R[i])

Dropout ratio 0.1: Overfitting
Dropout ratio 0.3,0.4: Underfitting
--> Dropout ratio 0.2 is the optimal value to avoid bot over/underfitting and obtain high accuracy.