# Experiment with a convolutional neural network

In [None]:
import sys
import yaml
import matplotlib.pyplot as plt
import tensorflow as tf

sys.path.append('../modules/')

%load_ext autoreload
%autoreload 2

## A VGG model

Import a VGG layer and experiment with it.

In [None]:
from vgg import VGGLayer
from utilities import generate_test_batch

Load config file defining the architecture of the neural network.

In [None]:
config_path = './vgg_config.yml'

with open(config_path, 'r') as f:
    vgg_config = yaml.load(f, Loader=yaml.FullLoader)

vgg_config

In [None]:
vgg_layer = VGGLayer(vgg_config)

Test the layer on randomly-generated data.

In [None]:
test_batch = generate_test_batch(batch_size=64, image_width=224, image_height=224)

test_batch.shape

In [None]:
vgg_layer(test_batch)[0, ...]

## Load data

Source:
- https://www.tensorflow.org/tutorials/load_data/images

In [None]:
training_data_dir = '../data/Dataset/'
test_data_dir = '../data/Dataset_test/'

In [None]:
square_image_side = 100

training_dataset = tf.keras.utils.image_dataset_from_directory(
    directory=training_data_dir,
    image_size=(square_image_side, square_image_side),
    batch_size=64
)

test_dataset = tf.keras.utils.image_dataset_from_directory(
    directory=test_data_dir,
    image_size=(square_image_side, square_image_side),
    batch_size=1
).shuffle(buffer_size=128)

# Define the test and validation datasets by taking respectively 
# the first 450 samples from the `test_dataset` dataset and all
# the other samples.
test_data = test_dataset.take(450)
validation_data = test_dataset.skip(450)

In [None]:
# Extract the first batch from the training dataset.
test_batch, test_labels = next(iter(training_dataset))

# Plot the first 9 images in the batch.
fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(10, 10))

plt.axis('off')

for i in range(9):
    ax = axs[i // 3, i % 3]
    
    ax.imshow(
        test_batch[i, ...].numpy().astype('uint8')
    )

    plt.sca(ax)
    plt.title(test_labels[i].numpy())

## Custom VGG-like model

In [None]:
from tensorflow.keras import Input, Sequential, Model
from tensorflow.keras.layers import Flatten, Dropout, Dense
from tensorflow.keras.callbacks import LearningRateScheduler
import seaborn as sns
from lr_schedules import step_schedule

sns.set_theme()

In [None]:
custom_config_path = './custom_vgg_config.yml'

with open(custom_config_path, 'r') as f:
    custom_vgg_config = yaml.load(f, Loader=yaml.FullLoader)

custom_vgg_config

In [None]:
custom_vgg_layer = VGGLayer(custom_vgg_config)

In [None]:
# Test on an image.
test_batch, test_labels = next(iter(training_dataset))

custom_vgg_layer(test_batch[:1, ...])

Complete the model with a fully-connected "head" at the end of it. The final object with be a Keras `Model`.

In [None]:
inputs = Input((100, 100, 3,))

# Note: since there's no softmax activation function for
# the final Dense layer, the model outputs the logits
# (unnormalized probabilities), rather than the actual
# predicted probabilities.
outputs = VGGLayer(custom_vgg_config)(inputs)
outputs = Flatten()(outputs)
outputs = Dropout(0.5)(outputs)
outputs = Dense(units=64, activation='relu')(outputs)
outputs = Dropout(0.5)(outputs)
outputs = Dense(units=64, activation='relu')(outputs)
outputs = Dense(units=10)(outputs)

custom_vgg_model = Model(
    inputs=inputs,
    outputs=outputs
)

custom_vgg_model.summary()

In [None]:
# Test the model on a batch of input images.
# Output shape: (batch_shape, n_classes).
custom_vgg_model(test_batch)[:1]

Compile the model and train.

In [None]:
custom_vgg_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    # Note: if the model outputs the one-hot encoded probabilities or logits
    #       and the true labels are not one-hot encoded, use the SPARSE
    #       categorical cross-entropy!
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

training_history = custom_vgg_model.fit(
    training_dataset,
    validation_data=validation_data,
    epochs=20,
    callbacks=[LearningRateScheduler(step_schedule)]
)

Plot the value of the training loss across the epochs.

In [None]:
fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(14, 6))

sns.lineplot(
    x=range(1, len(training_history.history['loss']) + 1),
    y=training_history.history['loss'],
    label='Training loss',
    ax=axs[0]
)

sns.lineplot(
    x=range(1, len(training_history.history['loss']) + 1),
    y=training_history.history['val_loss'],
    label='Validation loss',
    ax=axs[0]
)

plt.sca(axs[0])
plt.ylabel('Loss')

sns.lineplot(
    x=range(1, len(training_history.history['loss']) + 1),
    y=training_history.history['accuracy'],
    label='Training accuracy',
    ax=axs[1]
)

sns.lineplot(
    x=range(1, len(training_history.history['loss']) + 1),
    y=training_history.history['val_accuracy'],
    label='Validation accuracy',
    ax=axs[1]
)

plt.sca(axs[0])
plt.ylabel('Loss')
plt.xlabel('Epoch')

Plot the learning rate schedule.

In [None]:
import numpy as np

lrs = [1e-4]

for i in range(19):
    lrs.append(step_schedule(i+1, lrs[-1]))

fig = plt.figure(figsize=(14, 6))

sns.lineplot(
    x=range(20),
    y=lrs
)