## Deep Learning

> Antonopoulos Ilias (p3352004) <br />
> Ndoja Silva (p3352017) <br />
> MSc Data Science AUEB

## Table of Contents

- [Data Loading](#Data-Loading)
- [Hyperparameter Tuning](#Hyperparameter-Tuning)
- [Model Selection](#Model-Selection)
- [Evaluation](#Evaluation)

In [1]:
import gc
import itertools

import numpy as np
import keras_tuner as kt
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.metrics import confusion_matrix

2022-03-05 21:41:56.718939: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-03-05 21:41:56.718956: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
# TODO: [ilias] data augmentation

In [3]:
print(tf.__version__)

2.8.0


In [4]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

Num GPUs Available:  0


2022-03-05 21:41:58.448201: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-03-05 21:41:58.448606: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-03-05 21:41:58.448670: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2022-03-05 21:41:58.448725: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2022-03-05 21:41:58.450579: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Co

### Data Loading

In [5]:
fashion_mnist = tf.keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

In [6]:
train_images.shape

(60000, 28, 28)

In [7]:
train_labels

array([9, 0, 0, ..., 3, 0, 5], dtype=uint8)

In [8]:
set(train_labels)

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}

In [9]:
test_images.shape

(10000, 28, 28)

This is a dataset of 60,000 28x28 grayscale images of 10 fashion categories,
  along with a test set of 10,000 images.
  
The classes are:
  
| Label | Description |
|:-----:|-------------|
|   0   | T-shirt/top |
|   1   | Trouser     |
|   2   | Pullover    |
|   3   | Dress       |
|   4   | Coat        |
|   5   | Sandal      |
|   6   | Shirt       |
|   7   | Sneaker     |
|   8   | Bag         |
|   9   | Ankle boot  |

In [10]:
class_names = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

### Hyperparameter Tuning

In [11]:
SEED = 123456

np.random.seed(SEED)
tf.random.set_seed(SEED)

In [12]:
def clean_up(model_):
    tf.keras.backen.clear_session()
    del model_
    gc.collect()

In [13]:
def cnn_model_builder(hp):
    """Creates a HyperModel instance (or callable that takes hyperparameters and returns a Model instance)."""
    model = tf.keras.Sequential(
        [
            tf.keras.layers.Conv2D(
                filters=hp.Int("1st-filter", min_value=32, max_value=128, step=16),
                kernel_size=hp.Choice("1st-kernel", values=[3, 5]),
                strides=(1, 1),
                padding="same",
                dilation_rate=(1, 1),
                activation="relu",
                input_shape=(28, 28, 1),
                name="1st-convolution",
            ),
            tf.keras.layers.MaxPool2D(
                pool_size=(2, 2), strides=(2, 2), padding="same", name="1st-max-pooling"
            ),
            tf.keras.layers.Dropout(
                rate=hp.Float("1st-dropout", min_value=0.0, max_value=0.6, step=0.1),
                name="1st-dropout",
            ),
            tf.keras.layers.Conv2D(
                filters=hp.Int("2nd-filter", min_value=32, max_value=64, step=16),
                kernel_size=hp.Choice("2nd-kernel", values=[3, 5]),
                strides=(1, 1),
                padding="same",
                dilation_rate=(1, 1),
                activation="relu",
                name="2nd-convolution",
            ),
            tf.keras.layers.MaxPool2D(
                pool_size=(2, 2), strides=(2, 2), padding="same", name="2nd-max-pooling"
            ),
            tf.keras.layers.Dropout(
                rate=hp.Float("2nd-dropout", min_value=0.0, max_value=0.6, step=0.1),
                name="2nd-dropout",
            ),
            tf.keras.layers.Flatten(name="flatten-layer"),
            tf.keras.layers.Dense(
                units=hp.Int("dense-layer-units", min_value=32, max_value=128, step=16),
                activation="relu",
                name="dense-layer",
            ),
            tf.keras.layers.Dense(units=10, activation="softmax", name="output-layer"),
        ]
    )

    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hp.Choice(
                "learning-rate", values=[1e-3, 1e-4, 2 * 1e-4, 4 * 1e-4]
            )
        ),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=["accuracy"],
    )

    return model

In [14]:
# BayesianOptimization tuning with Gaussian process
tuner = kt.BayesianOptimization(
    cnn_model_builder,
    objective="val_accuracy",
    max_trials=70,  # the total number of trials (model configurations) to test at most
    allow_new_entries=True,
    tune_new_entries=True,
    seed=SEED,
    directory="hparam-tuning",
    project_name="cnn",
)

2022-03-05 21:41:58.832975: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [15]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=5)

In [17]:
tuner.search(
    train_images, train_labels, epochs=50, validation_split=0.2, callbacks=[stop_early]
)

# get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(
    f"""
The hyperparameter search is complete. \n

Results
=======
|
---- optimal number of output filters in the 1st convolution : {best_hps.get('1st-filter')}
|
---- optimal size of the first convolution window            : {best_hps.get('1st-kernel')}
|
---- optimal first dropout rate                              : {best_hps.get('1st-dropout')}
|
---- optimal number of output filters in the 2nd convolution : {best_hps.get('2nd-filter')}
|
---- optimal size of the second convolution window           : {best_hps.get('2nd-kernel')}
|
---- optimal second dropout rate                             : {best_hps.get('2nd-dropout')}
|
---- optimal number of units in the densely-connected layer  : {best_hps.get('dense-layer-units')}
|
---- optimal learning rate for the optimizer                 : {best_hps.get('learning-rate')}
"""
)


Search: Running Trial #2

Hyperparameter    |Value             |Best Value So Far 
1st-filter        |112               |?                 
1st-kernel        |5                 |?                 
1st-dropout       |0.1               |?                 
2nd-filter        |48                |?                 
2nd-kernel        |3                 |?                 
2nd-dropout       |0.5               |?                 
dense-layer-units |96                |?                 
learning-rate     |0.0004            |?                 

Epoch 1/50
  71/1500 [>.............................] - ETA: 51s - loss: 4.3133 - accuracy: 0.4401

KeyboardInterrupt: 

### Model Selection

In [None]:
model = tuner.get_best_models(num_models=1)[0]

model.summary()

In [None]:
tf.keras.utils.plot_model(
    model, to_file="static/cnn_model.png", show_shapes=True, show_layer_names=True
)

In [None]:
clean_up(model)

In [None]:
# build the model with the optimal hyperparameters and train it on the data for 50 epochs
model = tuner.hypermodel.build(best_hps)
history = model.fit(train_images, train_labels, epochs=30, validation_split=0.2)

# keep best epoch
val_acc_per_epoch = history.history["val_accuracy"]
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print("Best epoch: %d" % (best_epoch,))

In [None]:
clean_up(model)

In [None]:
hypermodel = tuner.hypermodel.build(best_hps)

# retrain the model
history = hypermodel.fit(
    train_images, train_labels, epochs=best_epoch, validation_split=0.2
)

### Evaluation

In [None]:
eval_result = hypermodel.evaluate(test_images, test_labels, verbose=3)
print("[test loss, test accuracy]:", eval_result)

In [None]:
def plot_history(hs, epochs, metric):
    print()
    plt.style.use("dark_background")
    plt.rcParams["figure.figsize"] = [15, 8]
    plt.rcParams["font.size"] = 16
    plt.clf()
    for label in hs:
        plt.plot(
            hs[label].history[metric],
            label="{0:s} train {1:s}".format(label, metric),
            linewidth=2,
        )
        plt.plot(
            hs[label].history["val_{0:s}".format(metric)],
            label="{0:s} validation {1:s}".format(label, metric),
            linewidth=2,
        )
    x_ticks = np.arange(0, epochs + 1, epochs / 10)
    x_ticks[0] += 1
    plt.xticks(x_ticks)
    plt.ylim((0, 1))
    plt.xlabel("Epochs")
    plt.ylabel("Loss" if metric == "loss" else "Accuracy")
    plt.legend()
    plt.show()

In [None]:
print("Train Loss          : {0:.5f}".format(history.history["loss"][-1]))
print("Validation Loss     : {0:.5f}".format(history.history["val_loss"][-1]))
print("Test Loss           : {0:.5f}".format(eval_result[0]))
print("-------------------")
print("Train Accuracy      : {0:.5f}".format(history.history["accuracy"][-1]))
print("Validation Accuracy : {0:.5f}".format(history.history["val_accuracy"][-1]))
print("Test Accuracy       : {0:.5f}".format(eval_result[1]))

# Plot train and validation error per epoch.
plot_history(hs={"CNN": history}, epochs=best_epoch, metric="loss")
plot_history(hs={"CNN": history}, epochs=best_epoch, metric="accuracy")

In [None]:
def plot_confusion_matrix(
    cm, classes, normalize=False, title="Confusion matrix", cmap=plt.cm.PuBuGn
):

    plt.style.use("default")
    plt.rcParams["figure.figsize"] = [11, 9]
    plt.imshow(cm, interpolation="nearest", cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.0
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(
            j,
            i,
            cm[i, j],
            horizontalalignment="center",
            color="white" if cm[i, j] > thresh else "black",
        )

    plt.tight_layout()
    plt.ylabel("True label")
    plt.xlabel("Predicted label")

In [None]:
# Predict the values from the validation dataset
Y_pred = hypermodel.predict(test_images)

# Convert predictions classes to one hot vectors
Y_pred_classes = np.argmax(Y_pred, axis=1)

# compute the confusion matrix
confusion_mtx = confusion_matrix(test_labels, Y_pred_classes)

# plot the confusion matrix
plot_confusion_matrix(
    confusion_mtx,
    classes=[
        "T-shirt/Top",
        "Trouser",
        "Pullover",
        "Dress",
        "Coat",
        "Sandal",
        "Shirt",
        "Sneaker",
        "Bag",
        "Ankle Boot",
    ],
)

In [None]:
incorrect = []
for i in range(len(test_labels)):
    if not Y_pred_classes[i] == test_labels[i]:
        incorrect.append(i)
    if len(incorrect) == 4:
        break

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(12, 6))
fig.set_size_inches(10, 10)
ax[0, 0].imshow(test_images[incorrect[0]].reshape(28, 28), cmap="gray")

ax[0, 0].set_title(
    "Predicted Label : "
    + class_names[Y_pred_classes[incorrect[0]]]
    + "\n"
    + "Actual Label : "
    + class_names[test_labels[incorrect[0]]]
)
ax[0, 1].imshow(test_images[incorrect[1]].reshape(28, 28), cmap="gray")
ax[0, 1].set_title(
    "Predicted Label : "
    + class_names[Y_pred_classes[incorrect[1]]]
    + "\n"
    + "Actual Label : "
    + class_names[test_labels[incorrect[1]]]
)
ax[1, 0].imshow(test_images[incorrect[2]].reshape(28, 28), cmap="gray")
ax[1, 0].set_title(
    "Predicted Label : "
    + class_names[Y_pred_classes[incorrect[2]]]
    + "\n"
    + "Actual Label : "
    + class_names[test_labels[incorrect[2]]]
)
ax[1, 1].imshow(test_images[incorrect[3]].reshape(28, 28), cmap="gray")
ax[1, 1].set_title(
    "Predicted Label : "
    + class_names[Y_pred_classes[incorrect[3]]]
    + "\n"
    + "Actual Label : "
    + class_names[test_labels[incorrect[3]]]
)