CS 4375 - Final Project\
Model 1: MNIST Digits

**Author**: Alec Ibarra\
**Date**: 2024-10-24

In [None]:
# Install dependencies
%pip install --upgrade pip
%pip install seaborn
%pip install matplotlib
%pip install pandas
%pip install scikit-learn
%pip install tensorflow[and-cuda]
%pip install keras-tuner

In [None]:
# Import dependencies
import os
import matplotlib.pyplot as plt # type: ignore
import numpy as np # type: ignore
import pandas as pd # type: ignore
import tensorflow as tf # type: ignore
import seaborn as sns # type: ignore
from tensorflow.keras.callbacks import EarlyStopping # type: ignore
from tensorflow.keras.datasets import mnist # type: ignore
from tensorflow.keras.layers import Conv2D, Dense, Dropout, Flatten, Input, MaxPooling2D # type: ignore
from tensorflow.keras.models import Sequential # type: ignore
from tensorflow.keras.utils import to_categorical # type: ignore
from keras_tuner.tuners import Hyperband # type: ignore
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay # type: ignore


In [None]:
# Tensorflow version
print(f"Tensorflow: v{tf.__version__}")

# Check GPU availability
print(f"GPUs Available: {len(tf.config.list_physical_devices('GPU'))}")
print(tf.config.list_physical_devices('GPU'))

In [None]:
# Load the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Reshape data to add a channel dimension, normalize pixel values
X_train = X_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
X_test = X_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Display some info and stats about the dataset
print(f'Training data shape: {X_train.shape}')
print(f'Test data shape: {X_test.shape}')
print(f'Number of classes: {y_train.shape[1]}')

In [None]:
# Plot some data samples
fig, axes = plt.subplots(1, 10, figsize=(20, 3))
for i, ax in enumerate(axes):
    ax.imshow(X_train[i].squeeze(), cmap='gray')
    ax.set_title(f'Label: {np.argmax(y_train[i])}')
    ax.axis('off')

In [None]:
# Set the batch size
batch_size = 256

# Define the CNN model for tuning
def create_model(hp):
    # Define a wider hyperparameter range for more comprehensive search
    conv1 = hp.Int('conv1', min_value=32, max_value=256, step=32)
    conv2 = hp.Int('conv2', min_value=32, max_value=256, step=32)
    dense_units = hp.Int('dense_units', min_value=32, max_value=256, step=32)
    dropout_rate = hp.Float('dropout_rate', min_value=0.1, max_value=0.9, step=0.1)
    learning_rate = hp.Float('learning_rate', min_value=0.0001, max_value=0.01, sampling='log')
    
    # Build the model architecture
    model = Sequential([
        Input(shape=(28, 28, 1)),
        Conv2D(conv1, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(conv2, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(dense_units, activation='relu'),
        Dropout(dropout_rate),
        Dense(10, activation='softmax')
    ])
    
    # Compile the model with the chosen learning rate
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Initialize the tuner
tuner = Hyperband(
    create_model,
    objective='val_accuracy',
    max_epochs=30,
    factor=2,
    hyperband_iterations=5,
    directory='hyperband',
    project_name=f'mnist/{batch_size}_cnn_tuning'
)

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_accuracy', patience=3, restore_best_weights=True)

In [None]:
# Start the search for the best hyperparameters
tuner.search(
    X_train,
    y_train,
    epochs=30,
    batch_size=batch_size,
    validation_split=0.2,
    callbacks=[early_stopping]
)

In [None]:
# Train the best model from the search
best_trial = tuner.oracle.get_best_trials(num_trials=1)[0]
best_model = tuner.hypermodel.build(best_trial.hyperparameters)
history = best_model.fit(
    X_train, y_train, 
    validation_data=(X_test, y_test),
    epochs=30,
    batch_size=batch_size,
).history

# Generate predictions for the test set
y_pred = best_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

In [None]:
# Combined figure for accuracy, loss, and confusion matrix
fig, axes = plt.subplots(1, 4, figsize=(15, 3))

# Best model's accuracy
axes[0].plot(history['accuracy'], label='Train Accuracy')
axes[0].plot(history['val_accuracy'], label='Validation Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend(loc='lower right')
axes[0].set_title('Training and Validation Accuracy')
axes[0].grid(True)

# Best model's loss
axes[1].plot(history['loss'], label='Train Loss')
axes[1].plot(history['val_loss'], label='Validation Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend(loc='upper right')
axes[1].set_title('Training and Validation Loss')
axes[1].grid(True)

# Best model's classification report
report = classification_report(y_true, y_pred_classes, output_dict=True)
df = pd.DataFrame(report).transpose()
sns.heatmap(df.iloc[:-1, :-3], annot=True, fmt=".2f", cmap='Blues', ax=axes[2])
axes[2].set_title('Classification Report')
axes[2].grid(False)

# Confusion matrix for the best model predictions
confusion_mtx = confusion_matrix(y_true, y_pred_classes)
disp = ConfusionMatrixDisplay(confusion_matrix=confusion_mtx, display_labels=np.arange(10))
disp.plot(cmap=plt.cm.Blues, ax=axes[3], colorbar=False)
axes[3].set_title('Confusion Matrix')
axes[3].grid(False)

# Adjust layout and display the figure
plt.tight_layout()

# Save the figure
os.makedirs('results', exist_ok=True)
plt.savefig(f'results/mnist/{batch_size}-best.png')

# Display the figure
plt.show()

In [None]:
# Extract all results from tuner
trials = tuner.oracle.get_best_trials(num_trials=9999)

# Create a DataFrame with trial IDs, hyperparameters, and scores
results_df = pd.DataFrame({
    'trial_id': [trial.trial_id for trial in trials],
    'mean_val_accuracy': [trial.score for trial in trials],
    'conv1': [trial.hyperparameters.values['conv1'] for trial in trials],
    'conv2': [trial.hyperparameters.values['conv2'] for trial in trials],
    'dense_units': [trial.hyperparameters.values['dense_units'] for trial in trials],
    'dropout_rate': [trial.hyperparameters.values['dropout_rate'] for trial in trials],
    'learning_rate': [trial.hyperparameters.values['learning_rate'] for trial in trials],
    'batch_size': batch_size,
})

# Save the results to a CSV file
os.makedirs('results', exist_ok=True)
results_df.to_csv(f'results/mnist/{batch_size}-all.csv', index=True)
results_df.head()

In [None]:
# Combine all results into a single CSV file
all_results = pd.concat([pd.read_csv(f'results/mnist/{batch_size}-all.csv') for batch_size in range(16, 256, 16)])
all_results.drop(columns=['Unnamed: 0', 'trial_id'], inplace=True)
all_results.sort_values('mean_val_accuracy', ascending=False, inplace=True)
all_results.to_csv('results/mnist/all.csv', index=False)
all_results.head()