# Deep Learning Fundamentals - LU03-B Lab Exercise

## 3.6 Load the saved MNIST dataset

In [None]:
import pickle

with open('MNIST_data.pickle', 'rb') as in_file:
    train_images, train_labels, test_images, test_labels = pickle.load(in_file)

## 3.7 Print the shape of train and test data for verification 

In [None]:
print("Train images shape: {}".format(train_images.shape))
print("Train labels shape: {}".format(train_labels.shape))
print("Test images shape: {}".format(test_images.shape))
print("Test labels shape: {}".format(test_labels.shape))

## 3.8 Build and train the network

In [None]:
from keras import models
from keras import layers
from keras import losses

def classifier_model():
    network = models.Sequential()
    network.add(layers.Dense(128, activation='relu', input_shape=(784,)))
    network.add(layers.Dense(64, activation='relu'))
    network.add(layers.Dense(10, activation='softmax'))
    return network

model = classifier_model()
print(model.summary())

### 3.8.1 Optimizers

In this section, you will explore the different optimizers on the training performance. Try to change the learning rate as well.

Refer to https://www.tensorflow.org/api_docs/python/tf/keras/optimizers for the available optimizers in Tensorflow.

In [None]:
import numpy.random as nr

nr.seed(1305)

# Define the optimizer list (Feel free to change to other optimizers)
optimizers = ['SGD', 'RMSprop', 'adam']
history_cache = {}

for opt in optimizers:
    
    # Define the model
    model = classifier_model()

    # Compile using different optimizer
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

    history = model.fit(train_images, train_labels, epochs=20, batch_size=128, validation_data=(test_images, test_labels))
    history_cache[opt] = history
    
    test_loss, test_acc = model.evaluate(test_images, test_labels)
    print('\nTest Acc ({}) - {:.4f}: \n'.format(opt, test_acc))

In [None]:
import matplotlib.pyplot as plt
def plot_loss(history_cache):
    plt.figure(figsize=(10, 8))
    for opt, history in history_cache.items():
        train_loss = history.history['loss']
        test_loss = history.history['val_loss']
        x = list(range(1, len(test_loss) + 1))
        plt.plot(x, test_loss, label='Val loss - {}'.format(opt))
        plt.plot(x, train_loss, label='Train loss - {}'.format(opt))
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Loss over Epochs')
        plt.legend()
    plt.show()
    
def plot_accuracy(history_cache):
    plt.figure(figsize=(10, 8))
    for opt, history in history_cache.items():
        train_acc = history.history['accuracy']
        test_acc = history.history['val_accuracy']
        x = list(range(1, len(test_acc) + 1))
        plt.plot(x, test_acc, label='Val accuracy - {}'.format(opt))
        plt.plot(x, train_acc, label='Train accuracy - {}'.format(opt))  
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.title('Accuracy over Epochs')  
        plt.legend()
    plt.show()

In [None]:
plot_loss(history_cache)  

In [None]:
plot_accuracy(history_cache)

### 3.8.2 Dropout

In this section, you will include dropout into every layer in the model and explore different dropout rate on the training performance.

Refer to https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dropout.

In [None]:
def classifier_dropout_model(dropout_rate):
    """Classifier model with dropout in every layer.
    
    :param dropout_rate: The rate for dropout
    :return: network
    """
    assert dropout_rate <= 1, 'Incorrect dropout rate specified.'
    network = models.Sequential()
    # Your codes here
    return network

model = classifier_dropout_model(dropout_rate=0.2)
print(model.summary())

In [None]:
nr.seed(1305)

# Define the model
model = classifier_dropout_model(dropout_rate=0)

# Compile using different optimizer
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_images, train_labels, epochs=20, batch_size=128, validation_data=(test_images, test_labels))

test_loss, test_acc = model.evaluate(test_images, test_labels)
print('\nTest Acc: {:.4f}'.format(test_acc))

In [None]:
def plot_loss(history):
    plt.figure(figsize=(10, 8))
    train_loss = history.history['loss']
    test_loss = history.history['val_loss']
    x = list(range(1, len(test_loss) + 1))
    plt.plot(x, test_loss, label='Val loss')
    plt.plot(x, train_loss, label='Train loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss over Epochs')
    plt.legend()
    plt.show()
    
def plot_accuracy(history):
    plt.figure(figsize=(10, 8))
    train_acc = history.history['accuracy']
    test_acc = history.history['val_accuracy']
    x = list(range(1, len(test_acc) + 1))
    plt.plot(x, test_acc, label='Val accuracy')
    plt.plot(x, train_acc, label='Train accuracy')  
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Accuracy over Epochs')  
    plt.legend()
    plt.show()

In [None]:
plot_loss(history)

In [None]:
plot_accuracy(history)

### 3.8.2 Batchnorm

In this section, you will include batchnorm into every layer of the model and explore its impact on the training performance. Remember batchnorm are typically used before activation function. 

Refer to https://www.tensorflow.org/api_docs/python/tf/keras/layers/BatchNormalization.

In [None]:
def classifier_batchnorm_model():
    """Classifier model with batch norm in every layer.
    
    :return: network
    """
    network = models.Sequential()
    # your codes here
    return network

model = classifier_batchnorm_model()
print(model.summary())

In [None]:
nr.seed(1305)

# Define the model
model = classifier_batchnorm_model()

# Compile using different optimizer
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_images, train_labels, epochs=20, batch_size=128, validation_data=(test_images, test_labels))

test_loss, test_acc = model.evaluate(test_images, test_labels)
print('\nTest Acc: {:.4f}'.format(test_acc))

In [None]:
plot_loss(history)

In [None]:
plot_accuracy(history)

### 3.8.3 Weights Initialization

In this section, you will change the weights initialization of every layer in the model and explore the differences on the training performance.

Refer to https://www.tensorflow.org/api_docs/python/tf/keras/initializers.

In [None]:
def classifier_diff_wts_init_model():
    """Classifier model with different weights initialization in every layer.
    
    :return: network
    """
    network = models.Sequential()
    # your codes here
    return network

model = classifier_diff_wts_init_model()
print(model.summary())

In [None]:
nr.seed(1305)

# Define the model
model = classifier_diff_wts_init_model()

# Compile using different optimizer
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_images, train_labels, epochs=20, batch_size=128, validation_data=(test_images, test_labels))

test_loss, test_acc = model.evaluate(test_images, test_labels)
print('\nTest Acc: {:.4f}'.format(test_acc))

### 3.8.4 Regularization

In this section, you will include the regularization in every layer in the model and explore the differences on the training performance.

Refer to https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/Regularizer.

In [None]:
from keras import regularizers

def classifier_reg_model(reg_type, lambd):
    """Classifier model with regularizations in every layer.
    
    :param reg_type: Regularization type
    :param lambd: Regularization constant
    :return: network
    """
    assert reg_type in ['L1', 'L2'], 'Incorrect norm type specified'
    if reg_type == 'L1':
        reg = regularizers.L1(lambd)
    elif reg_type == 'L2':
        reg = regularizers.L2(lambd)
    network = models.Sequential()
    # your codes here
    return network

model = classifier_reg_model(reg_type='L1', lambd=0.1)
print(model.summary())

In [None]:
nr.seed(1305)

# Define the model
model = classifier_reg_model(reg_type='L1', lambd=0.01)

# Compile using different optimizer
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_images, train_labels, epochs=20, batch_size=128, validation_data=(test_images, test_labels))

test_loss, test_acc = model.evaluate(test_images, test_labels)
print('\nTest Acc: {:.4f}'.format(test_acc))

In [None]:
plot_loss(history)

In [None]:
plot_accuracy(history)

### 3.8.5 Learning Rate Schedulers

In this section, you will include the learning rate scheduler during the compilation and explore the differences on the training performance.

Refer to https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/schedules.

In [None]:
def classifier_model():
    """Classifier model.

    :return: network
    """
    network = models.Sequential()
    network.add(layers.Dense(128, activation='relu', input_shape=(784,)))
    network.add(layers.Dense(64, activation='relu'))
    network.add(layers.Dense(10, activation='softmax'))
    return network

model = classifier_model()
print(model.summary())

In [None]:
import tensorflow as tf
from tensorflow.keras.optimizers.schedules import ExponentialDecay

nr.seed(1305)

# Define the model
model = classifier_model()

# Define the learning rate scheduler
lr = 0.01
lr_schedule = # Your codes here
opt = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

# Compile using different optimizer
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_images, train_labels, epochs=20, batch_size=128, validation_data=(test_images, test_labels))

test_loss, test_acc = model.evaluate(test_images, test_labels)
print('\nTest Acc: {:.4f}'.format(test_acc))

In [None]:
plot_loss(history)

In [None]:
plot_accuracy(history)

## 3.9 Save the trained model

There are various ways of saving a trained model. The sections below depicts some common methods of saving the model.

### 3.9.1 Save to local drive using object method 'save'
This method saves the model after training.

In [None]:
model.save("MNIST_classifier_model_1.h5")

### 3.9.2 Save to local drive using callback method
This method saves the model every epoch.

In [None]:
# Define the model
model = classifier_model()

# Compile using adam optimizer
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Set the callback
callback = tf.keras.callbacks.ModelCheckpoint("MNIST_classifier_model_2_{epoch:02d}.h5", monitor='val_accuracy', save_best_only=True)

# Fit the model
history = model.fit(train_images, train_labels, epochs=20, batch_size=128, validation_data=(test_images, test_labels), callbacks=[callback])

# Evaluate the model
test_loss, test_acc = model.evaluate(test_images, test_labels)
print('\nTest Acc: {:.4f}'.format(test_acc))

## 3.10 Your Exercise
We will try to randomly select an image from the mnist database and perform a prediction using your custom model


### 3.10.1 Build and save your model

Build your own model and integrate what you have learnt so far into your model.

In [None]:
# Your codes here

Randomly select 1 of of the MNIST image for prediction. Display the label of your selected image for validation. You can either select from your train_labels, test_labels or from original mnist dataset

In [None]:
# Your codes here

Your prediction will show the probablities of all 10 values. Select the max probability to show the correct predicted value.
<br>
**HINT: use numpy.argmax(prediction)**

In [None]:
# Your codes here