# Experiment with a convolutional neural network

Experiment training a convolutional neural network with VGG-like architecture.

Source: https://github.com/stefanogiagu/dl_mib_2023/blob/main/DLforPhysicists_MIB2023_HandsOn_SimpleCNN_italian.ipynb

In [None]:
import sys
import yaml
import matplotlib.pyplot as plt
import tensorflow as tf

sys.path.append('../modules/')

%load_ext autoreload
%autoreload 2

## Load data

Source:
- https://www.tensorflow.org/tutorials/load_data/images

In [None]:
training_data_dir = '../data/Dataset/'
test_data_dir = '../data/Dataset_test/'

In [None]:
square_image_side = 100

training_dataset = tf.keras.utils.image_dataset_from_directory(
    directory=training_data_dir,
    image_size=(square_image_side, square_image_side),
    batch_size=64,
    color_mode='rgb'
)

test_dataset = tf.keras.utils.image_dataset_from_directory(
    directory=test_data_dir,
    image_size=(square_image_side, square_image_side),
    batch_size=1
)

# Define the test and validation datasets by taking respectively 
# the first 450 samples from the `test_dataset` dataset and all
# the other samples.
test_data = test_dataset.take(450)
validation_data = test_dataset.skip(450)

In [None]:
# Extract the first batch from the training dataset.
test_batch, test_labels = next(iter(training_dataset))

# Plot the first 9 images in the batch.
fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(10, 10))

plt.axis('off')

for i in range(9):
    ax = axs[i // 3, i % 3]
    
    ax.imshow(
        test_batch[i, ...].numpy().astype('uint8')
    )

    plt.sca(ax)
    plt.title(test_labels[i].numpy())

In [None]:
test_batch, test_labels = next(iter(training_dataset))

fig = plt.figure(figsize=(14, 6))

plt.imshow(
    test_batch[i, ...].numpy().astype('uint8')
)

"Unfold" the Tensorflow dataset.

In [None]:
x_train = []
y_train = []

for batch in training_dataset:
    x_train.append(batch[0])
    y_train.append(batch[1])

x_train = tf.concat(x_train, axis=0)
y_train = tf.concat(y_train, axis=0)

In [None]:
fig, axs = plt.subplots(ncols=3, nrows=3, figsize=(10, 10))

for i, row in enumerate(axs):
    for j, col in enumerate(row):
        axs[i, j].imshow(x_train[i+j, ...].numpy().astype('uint8'))

        plt.sca(axs[i, j])
        plt.title(y_train[i+j].numpy())

## Custom VGG-like model

In [None]:
from tensorflow.keras import Input, Sequential, Model
from tensorflow.keras.layers import Flatten, Dropout, Dense
from tensorflow.keras.callbacks import LearningRateScheduler
import seaborn as sns
from vgg import VGGLayer

sns.set_theme()

In [None]:
custom_config_path = './custom_vgg_config.yml'

with open(custom_config_path, 'r') as f:
    custom_vgg_config = yaml.load(f, Loader=yaml.FullLoader)

custom_vgg_config

Complete the model with a fully-connected "head" at the end of it. The final object with be a Keras `Model`.

In [None]:
inputs = Input((100, 100, 3,))

# Note: since there's no softmax activation function for
# the final Dense layer, the model outputs the logits
# (unnormalized probabilities), rather than the actual
# predicted probabilities.
outputs = VGGLayer(custom_vgg_config)(inputs)
outputs = Flatten()(outputs)
outputs = Dropout(0.5)(outputs)
outputs = Dense(units=64, activation='relu')(outputs)
outputs = Dropout(0.5)(outputs)
outputs = Dense(units=64, activation='relu')(outputs)
outputs = Dense(units=10)(outputs)

custom_vgg_model = Model(
    inputs=inputs,
    outputs=outputs
)

# Note: correct number of parameters w.r.t. to source is 217082.
custom_vgg_model.summary()

In [None]:
# Test the model on a batch of input images.
# Output shape: (batch_shape, n_classes).
custom_vgg_model(test_batch)[:1]

Compile the model and train.

In [None]:
custom_vgg_model.compile(
    # optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
    optimizer=tf.keras.optimizers.SGD(
        learning_rate=tf.keras.optimizers.schedules.PiecewiseConstantDecay(
            boundaries=[10, 20, 30],
            values=[1e-2, 1e-3, 1e-4, 1e-5]
        )
    ),
    # Note: if the model outputs the one-hot encoded probabilities or logits
    #       and the true labels are not one-hot encoded, use the SPARSE
    #       categorical cross-entropy!
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

training_history = custom_vgg_model.fit(
    # training_dataset,
    # validation_data=validation_data,
    x=x_train,
    y=y_train,
    batch_size=64,
    epochs=40
)

Plot the value of the training loss across the epochs.

In [None]:
fig, axs = plt.subplots(nrows=2, ncols=1, figsize=(14, 6))

sns.lineplot(
    x=range(1, len(training_history.history['loss']) + 1),
    y=training_history.history['loss'],
    label='Training loss',
    ax=axs[0]
)

if 'val_loss' in training_history.history.keys():
    sns.lineplot(
        x=range(1, len(training_history.history['loss']) + 1),
        y=training_history.history['val_loss'],
        label='Validation loss',
        ax=axs[0]
    )

plt.sca(axs[0])
plt.ylabel('Loss')

sns.lineplot(
    x=range(1, len(training_history.history['loss']) + 1),
    y=training_history.history['accuracy'],
    label='Training accuracy',
    ax=axs[1]
)

if 'val_accuracy' in training_history.history.keys():
    sns.lineplot(
        x=range(1, len(training_history.history['loss']) + 1),
        y=training_history.history['val_accuracy'],
        label='Validation accuracy',
        ax=axs[1]
    )

plt.sca(axs[0])
plt.ylabel('Loss')
plt.xlabel('Epoch')