### Save Checkpoint (EMNIST Balanced)

This notebook is used to train the best models found during training. \
They will be fully trained and the best saved as a checkpoint for use in the demo notebook.

In [None]:
# Install dependencies
%pip install --upgrade pip
%pip install torchvision
%pip install opencv-python-headless
%pip install seaborn
%pip install matplotlib
%pip install pandas
%pip install scikit-learn
%pip install tensorflow[and-cuda]
%pip install keras-tuner

In [None]:
# Import dependencies
import os
import matplotlib.pyplot as plt # type: ignore
import matplotlib.gridspec as gridspec # type: ignore
import numpy as np # type: ignore
import pandas as pd # type: ignore
import tensorflow as tf # type: ignore
import seaborn as sns # type: ignore
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint # type: ignore
from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Input, MaxPooling2D # type: ignore
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.utils import to_categorical # type: ignore
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay # type: ignore

import cv2 # type: ignore
from torchvision import datasets # type: ignore

In [None]:
# Tensorflow version
print(f"Tensorflow: v{tf.__version__}")

# Check GPU availability
print(f"GPUs Available: {len(tf.config.list_physical_devices('GPU'))}")
print(tf.config.list_physical_devices('GPU'))

In [None]:
# Set seed for reproducibility
SEED = 42
np.random.seed(SEED)

In [None]:
# Load results csv files
csv_files = [
    f'../results/emnist-balanced/{file}'
    for file in os.listdir('../results/emnist-balanced/')
    if file.endswith('.csv') and file != 'all.csv'
]

# Combine and clean the results
all_results = pd.concat([pd.read_csv(file) for file in csv_files], ignore_index=True)
all_results = all_results.drop(columns=['Unnamed: 0'])
all_results = all_results.round({ 'dropout_rate': 1 })

# Save the combined results to a csv file
all_results = all_results.sort_values('val_accuracy', ascending=False)
all_results.to_csv('../results/emnist-balanced/all.csv', index=False)

# Get the top 3 models from the combined results
top_models = all_results.head(3)
top_models

In [None]:
# Define data transformations
def transform(image):
    image = np.array(image)
    image = cv2.flip(image, 1)
    center = (image.shape[1] // 2, image.shape[0] // 2)
    rotation_matrix = cv2.getRotationMatrix2D(center, angle=90, scale=1.0)
    image = cv2.warpAffine(image, rotation_matrix, (image.shape[1], image.shape[0]), flags=cv2.INTER_LINEAR)
    image = image.astype('float32') / 255.0
    image = np.expand_dims(image, axis=-1)
    return image

# Load EMNIST Balanced subset
emnist_train = datasets.EMNIST(root='../data', split='balanced', train=True, transform=transform, download=True)
emnist_test = datasets.EMNIST(root='../data', split='balanced', train=False, transform=transform, download=True)

# Convert to numpy arrays for TensorFlow
X_train = np.array([img[0] for img in emnist_train])
y_train = np.array([img[1] for img in emnist_train])
X_test = np.array([img[0] for img in emnist_test])
y_test = np.array([img[1] for img in emnist_test])

# One-hot encode the labels
y_train = to_categorical(y_train, 47)
y_test = to_categorical(y_test, 47)

# Display some info and stats about the dataset
print(f'Training data shape: {X_train.shape}')
print(f'Test data shape: {X_test.shape}')
print(f'Number of classes: {y_train.shape[1]}')

In [None]:
# Define the model
def create_custom_model(conv1, conv2, dense_units, dropout_rate, learning_rate):
    # Build the model architecture
    model = Sequential([
        Input(shape=(28, 28, 1)),
        Conv2D(conv1, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(conv2, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(dense_units, activation='relu'),
        Dropout(dropout_rate),
        Dense(47, activation='softmax')
    ])
    
    # Compile the model with the chosen learning rate
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Define the plotting function
def plot_training_history(model, history, hyperparameters, index):
    # Generate predictions for the test set
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)

    # Combined figure for accuracy, loss, and confusion matrix
    fig = plt.figure(figsize=(20, 10))
    gs = gridspec.GridSpec(2, 3, width_ratios=[1, 1, 2])

    # Model's accuracy (top-left, spans one cell)
    ax0 = fig.add_subplot(gs[0, 0])
    ax0.plot(history['accuracy'], label='Train Accuracy')
    ax0.plot(history['val_accuracy'], label='Validation Accuracy')
    ax0.set_xlabel('Epoch')
    ax0.set_ylabel('Accuracy')
    ax0.legend(loc='lower right')
    ax0.set_title('Training and Validation Accuracy')
    ax0.grid(True)

    # Model's loss (bottom-left, spans one cell)
    ax1 = fig.add_subplot(gs[1, 0])
    ax1.plot(history['loss'], label='Train Loss')
    ax1.plot(history['val_loss'], label='Validation Loss')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend(loc='upper right')
    ax1.set_title('Training and Validation Loss')
    ax1.grid(True)

    # Model's classification report (right side, spans two rows)
    ax2 = fig.add_subplot(gs[:, 1])
    report = classification_report(y_true, y_pred_classes, output_dict=True)
    df = pd.DataFrame(report).transpose()
    sns.heatmap(df.iloc[:-1, :-3], annot=True, fmt=".2f", cmap='Blues', ax=ax2)
    ax2.set_title('Classification Report')
    ax2.grid(False)

    # Model's Confusion matrix (top-middle, spans one cell)
    ax3 = fig.add_subplot(gs[:, 2])
    confusion_mtx = confusion_matrix(y_true, y_pred_classes)
    disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mtx, display_labels=np.arange(47))
    disp.plot(cmap=plt.cm.Blues, ax=ax3, colorbar=False)
    ax3.set_title('Confusion Matrix')
    ax3.grid(False)

    # Include the hyperparameters in the title
    title = f"Validation Accuracy: {float(hyperparameters['val_accuracy']):.5f}   Hyperparameters: Conv1: {int(hyperparameters['conv1'])}, Conv2: {int(hyperparameters['conv2'])}, Dense: {int(hyperparameters['dense_units'])}, Dropout: {float(hyperparameters['dropout_rate']):.1f}, Learning Rate: {float(hyperparameters['learning_rate']):.5f}, Batch Size: {int(hyperparameters['batch_size'])}"
    fig.suptitle(title)

    # Adjust layout and display the figure
    plt.tight_layout()

    # Save the figure
    os.makedirs('../results/emnist-balanced', exist_ok=True)
    plt.savefig(f'../results/emnist-balanced/best-{index}.png')

    # Display the figure
    plt.show()

In [None]:
# Train the top models and save the results
for i, (_, row) in enumerate(top_models.iterrows(), 1):
    # Build the model with the hyperparameters
    model = create_custom_model(
        int(row['conv1']),
        int(row['conv2']),
        int(row['dense_units']),
        float(row['dropout_rate']),
        float(row['learning_rate'])
    )

    # Add callbacks for model training
    callbacks = [
        EarlyStopping(
            monitor='val_accuracy',
            patience=5,
            restore_best_weights=True,
            verbose=1
        ),
        ModelCheckpoint(
            '../results/emnist-balanced/best-model.keras',
            monitor='val_accuracy',
            mode='max',
            save_best_only=True,
            verbose=1
        ) if i == 1 else None
    ]

    # Train the model
    history = model.fit(
        X_train, y_train, 
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=int(row['batch_size']),
        callbacks=[cb for cb in callbacks if cb is not None],
    ).history

    # Plot the training history
    plot_training_history(model, history, row, i)