# COMP4318/5318 Assignment 2: Image Classification

### Group number: 44
### Student 1 SID: 550217239
### Student 2 SID: 550232025
### Student 3 SID: 550332875
### Student 4 SID: 550300357

This template notebook includes code to load the  dataset and a skeleton for the main sections that should be included in the notebook. Please stick to this struture for your submitted notebook.

Please focus on making your code clear, with appropriate variable names and whitespace. Include comments and markdown text to aid the readability of your code where relevant. See the specification and marking criteria in the associated specification to guide you when completing your implementation.

## Setup and dependencies
Please use this section to list and set up all your required libraries/dependencies and your plotting environment. 

In [None]:
import numpy as np
import matplotlib.pyplot as plt

## 1. Data loading, exploration, and preprocessing


Code to load the dataset is provided in the following cell. Please proceed with your data exploration and preprocessing in the remainder of this section.

In [None]:
# Load the dataset training and test sets as numpy arrays
# assuming Assignment2Data folder is present in the same directory 
# as the notebook
X_train = np.load('Assignment2Data/X_train.npy')
y_train = np.load('Assignment2Data/y_train.npy')
X_test = np.load('Assignment2Data/X_test.npy')
y_test = np.load('Assignment2Data/y_test.npy')

In [None]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

In [None]:
print("X_train dtype:", X_train.dtype)
print("y_train dtype:", y_train.dtype)
print("X_train min/max:", X_train.min(), X_train.max())
print("y_train unique labels:", np.unique(y_train))

In [None]:
print("First training sample (flattened):", X_train[0].flatten()[:50])  # first 50 values
print("Label:", y_train[0])

In [None]:
plt.imshow(X_train[0])
plt.title(f"Label: {y_train[0]}")
plt.show()

In [None]:
print("Mean pixel value:", X_train.mean())
print("Std pixel value:", X_train.std())

In [None]:
# Check class distribution
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
               'dog', 'frog', 'horse', 'ship', 'truck']

unique, counts = np.unique(y_train, return_counts=True)
plt.figure(figsize=(10, 5))
plt.bar(range(10), counts)
plt.xticks(range(10), class_names, rotation=45)
plt.xlabel('Class')
plt.ylabel('Number of samples')
plt.title('Training Set Class Distribution')
plt.tight_layout()
plt.show()

print("Class distribution:")
for i, count in enumerate(counts):
    print(f"{class_names[i]}: {count}")

In [None]:
# Visualize sample images from each class
fig, axes = plt.subplots(2, 5, figsize=(15, 6))
axes = axes.ravel()

for i in range(10):
    # Find first image of each class
    idx = np.where(y_train == i)[0][0]
    axes[i].imshow(X_train[idx])
    axes[i].set_title(class_names[i])
    axes[i].axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Visualize multiple samples from a single class to see variation
fig, axes = plt.subplots(2, 5, figsize=(15, 6))
axes = axes.ravel()

# Let's look at cats (class 3)
cat_indices = np.where(y_train == 3)[0][:10]

for i, idx in enumerate(cat_indices):
    axes[i].imshow(X_train[idx])
    axes[i].set_title(f'Cat #{i+1}')
    axes[i].axis('off')

plt.suptitle('Variation within "cat" class')
plt.tight_layout()
plt.show()

In [None]:
# Analyze pixel intensity distribution across channels
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for i, color in enumerate(['Red', 'Green', 'Blue']):
    axes[i].hist(X_train[:1000, :, :, i].flatten(), bins=50, alpha=0.7)
    axes[i].set_title(f'{color} Channel Distribution (first 1000 images)')
    axes[i].set_xlabel('Pixel Value')
    axes[i].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

In [None]:
# Calculate mean image per class
mean_images = []
for i in range(10):
    class_images = X_train[y_train == i]
    mean_img = class_images.mean(axis=0).astype(np.uint8)
    mean_images.append(mean_img)

fig, axes = plt.subplots(2, 5, figsize=(15, 6))
axes = axes.ravel()

for i in range(10):
    axes[i].imshow(mean_images[i])
    axes[i].set_title(f'Mean {class_names[i]}')
    axes[i].axis('off')

plt.tight_layout()
plt.show()

### Examples of preprocessed data
Please print/display some examples of your preprocessed data here.

In [None]:
# 1. Normalize pixel values to [0, 1]
X_train_normalized = X_train.astype('float32') / 255.0
X_test_normalized = X_test.astype('float32') / 255.0

print("After normalization:")
print("X_train min/max:", X_train_normalized.min(), X_train_normalized.max())
print("X_train mean/std:", X_train_normalized.mean(), X_train_normalized.std())

In [None]:
# 2. One-hot encode labels for neural networks
from tensorflow.keras.utils import to_categorical

y_train_categorical = to_categorical(y_train, num_classes=10)
y_test_categorical = to_categorical(y_test, num_classes=10)

print("Original label shape:", y_train.shape)
print("One-hot encoded shape:", y_train_categorical.shape)
print("Example - original label:", y_train[0])
print("Example - one-hot encoded:", y_train_categorical[0])

In [None]:
# 3. Create validation split for hyperparameter tuning
from sklearn.model_selection import train_test_split

X_train_final, X_val, y_train_final, y_val = train_test_split(
    X_train_normalized, y_train, 
    test_size=0.2, 
    random_state=42,
    stratify=y_train  # maintain class distribution
)

# Also create categorical versions for neural networks
y_train_final_cat, y_val_cat = train_test_split(
    y_train_categorical, 
    test_size=0.2, 
    random_state=42,
    stratify=y_train
)

print("Training set:", X_train_final.shape, y_train_final.shape)
print("Validation set:", X_val.shape, y_val.shape)
print("Test set:", X_test_normalized.shape, y_test.shape)

In [None]:
# Display preprocessed examples
fig, axes = plt.subplots(1, 3, figsize=(12, 4))

axes[0].imshow(X_train[0])
axes[0].set_title('Original')
axes[0].axis('off')

axes[1].imshow(X_train_normalized[0])
axes[1].set_title('Normalized')
axes[1].axis('off')

# Show the difference is subtle visually
axes[2].imshow(X_train_final[0])
axes[2].set_title('Final Preprocessed')
axes[2].axis('off')

plt.tight_layout()
plt.show()

## 2. Algorithm design and setup

In [None]:
# Add these imports to your setup section
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
import time

### Algorithm of choice from first six weeks of course- Random Forest

In [None]:
# For Random Forest, we need to flatten the images
X_train_flat = X_train_final.reshape(X_train_final.shape[0], -1)
X_val_flat = X_val.reshape(X_val.shape[0], -1)
X_test_flat = X_test_normalized.reshape(X_test_normalized.shape[0], -1)

print("Flattened shape:", X_train_flat.shape)

In [None]:
# Create a simple Random Forest model to test
rf_model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)

print("Training Random Forest...")
start_time = time.time()
rf_model.fit(X_train_flat, y_train_final)
rf_time = time.time() - start_time

# Evaluate on validation set
y_val_pred = rf_model.predict(X_val_flat)
val_accuracy = accuracy_score(y_val, y_val_pred)

print(f"Training time: {rf_time:.2f} seconds")
print(f"Validation accuracy: {val_accuracy:.4f}")

### Fully connected neural network- Multilayer Perceptron(MLP)

In [None]:
def create_mlp(input_shape, hidden_layers=[128, 64], dropout_rate=0.3, learning_rate=0.001):
    """
    Create a fully connected MLP model
    
    Args:
        input_shape: shape of flattened input
        hidden_layers: list of units in each hidden layer
        dropout_rate: dropout rate for regularization
        learning_rate: learning rate for optimizer
    """
    model = models.Sequential()
    
    # Input layer
    model.add(layers.Input(shape=input_shape))
    
    # Hidden layers
    for units in hidden_layers:
        model.add(layers.Dense(units, activation='relu'))
        model.add(layers.Dropout(dropout_rate))
    
    # Output layer
    model.add(layers.Dense(10, activation='softmax'))
    
    # Compile model
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Create and test MLP
mlp_model = create_mlp(input_shape=(3072,), hidden_layers=[256, 128], dropout_rate=0.3)
mlp_model.summary()

In [None]:
# Train MLP with early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

print("Training MLP...")
start_time = time.time()

history_mlp = mlp_model.fit(
    X_train_flat, y_train_final_cat,
    validation_data=(X_val_flat, y_val_cat),
    epochs=50,
    batch_size=128,
    callbacks=[early_stop],
    verbose=1
)

mlp_time = time.time() - start_time
print(f"Training time: {mlp_time:.2f} seconds")

In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history_mlp.history['loss'], label='Train Loss')
plt.plot(history_mlp.history['val_loss'], label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('MLP Training and Validation Loss')

plt.subplot(1, 2, 2)
plt.plot(history_mlp.history['accuracy'], label='Train Accuracy')
plt.plot(history_mlp.history['val_accuracy'], label='Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('MLP Training and Validation Accuracy')

plt.tight_layout()
plt.show()

# Validation accuracy
val_loss, val_acc = mlp_model.evaluate(X_val_flat, y_val_cat, verbose=0)
print(f"MLP Validation Accuracy: {val_acc:.4f}")

### Convolutional neural network

In [None]:
def create_cnn(input_shape=(32, 32, 3), filters=[32, 64], kernel_size=3, 
               dense_units=128, dropout_rate=0.3, learning_rate=0.001):
    """
    Creating a CNN model
    
    Args:
        input_shape: shape of input images
        filters: list of filters for each conv layer
        kernel_size: size of convolutional kernel
        dense_units: units in dense layer
        dropout_rate: dropout rate
        learning_rate: learning rate
    """
    model = models.Sequential()
    
    # First convolutional block
    model.add(layers.Conv2D(filters[0], (kernel_size, kernel_size), 
                            activation='relu', padding='same', 
                            input_shape=input_shape))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(dropout_rate))
    
    # Second convolutional block
    model.add(layers.Conv2D(filters[1], (kernel_size, kernel_size), 
                            activation='relu', padding='same'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(dropout_rate))
    
    # Flatten and dense layers
    model.add(layers.Flatten())
    model.add(layers.Dense(dense_units, activation='relu'))
    model.add(layers.Dropout(dropout_rate))
    model.add(layers.Dense(10, activation='softmax'))
    
    # Compile
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Create and test CNN
cnn_model = create_cnn(filters=[32, 64], dense_units=128, dropout_rate=0.3)
cnn_model.summary()

In [None]:
# Train CNN
print("Training CNN...")
start_time = time.time()

history_cnn = cnn_model.fit(
    X_train_final, y_train_final_cat,
    validation_data=(X_val, y_val_cat),
    epochs=50,
    batch_size=128,
    callbacks=[early_stop],
    verbose=1
)

cnn_time = time.time() - start_time
print(f"Training time: {cnn_time:.2f} seconds")

In [None]:
# Plot CNN training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history_cnn.history['loss'], label='Train Loss')
plt.plot(history_cnn.history['val_loss'], label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('CNN Training and Validation Loss')

plt.subplot(1, 2, 2)
plt.plot(history_cnn.history['accuracy'], label='Train Accuracy')
plt.plot(history_cnn.history['val_accuracy'], label='Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('CNN Training and Validation Accuracy')

plt.tight_layout()
plt.show()

# Validation accuracy
val_loss, val_acc = cnn_model.evaluate(X_val, y_val_cat, verbose=0)
print(f"CNN Validation Accuracy: {val_acc:.4f}")

In [None]:
##Performance Summary:

##Random Forest: 46.3% validation accuracy (quick baseline)
##MLP: 40.5% validation accuracy (struggling, signs of overfitting)
##CNN: 73.6% validation accuracy (much better!)

##Key Observations:

##MLP Issues: The MLP shows clear overfitting - training accuracy (~37%) is lower than validation (~40%), and the validation loss plateaus early. The gap suggests the model isn't learning spatial features well from flattened images.
##CNN Success: The CNN performs significantly better because it preserves spatial structure. Notice how training and validation accuracy track closely, showing good generalization.

## 3. Hyperparameter tuning

### Algorithm of choice from first six weeks of course- Random Forest

In [None]:
#Random Forest Hyperparameter Tuning
from sklearn.model_selection import GridSearchCV

# Define hyperparameter grid for Random Forest
rf_param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

print("Starting Random Forest hyperparameter search...")
print(f"Total combinations: {len(rf_param_grid['n_estimators']) * len(rf_param_grid['max_depth']) * len(rf_param_grid['min_samples_split']) * len(rf_param_grid['min_samples_leaf'])}")

# Use a smaller subset for faster tuning (optional)
# X_train_subset = X_train_flat[:10000]
# y_train_subset = y_train_final[:10000]

rf_grid = GridSearchCV(
    RandomForestClassifier(random_state=42, n_jobs=-1),
    rf_param_grid,
    cv=3,  # 3-fold cross-validation
    scoring='accuracy',
    verbose=2,
    n_jobs=-1
)

start_time = time.time()
rf_grid.fit(X_train_flat, y_train_final)
rf_search_time = time.time() - start_time

print(f"\nSearch completed in {rf_search_time:.2f} seconds")
print(f"Best parameters: {rf_grid.best_params_}")
print(f"Best cross-validation score: {rf_grid.best_score_:.4f}")

# Store results for analysis
rf_results = rf_grid.cv_results_

In [None]:
# Visualize Random Forest hyperparameter search results
import pandas as pd

rf_results_df = pd.DataFrame(rf_results)

# Plot effect of n_estimators
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# n_estimators vs accuracy
for max_depth in [10, 20, 30, None]:
    mask = rf_results_df['param_max_depth'] == max_depth
    data = rf_results_df[mask].groupby('param_n_estimators')['mean_test_score'].mean()
    axes[0, 0].plot(data.index, data.values, marker='o', label=f'max_depth={max_depth}')
axes[0, 0].set_xlabel('n_estimators')
axes[0, 0].set_ylabel('Mean CV Accuracy')
axes[0, 0].set_title('Effect of n_estimators')
axes[0, 0].legend()
axes[0, 0].grid(True)

# max_depth vs accuracy
depth_vals = [10, 20, 30, None]
depth_scores = [rf_results_df[rf_results_df['param_max_depth'] == d]['mean_test_score'].mean() 
                for d in depth_vals]
axes[0, 1].bar(range(len(depth_vals)), depth_scores)
axes[0, 1].set_xticks(range(len(depth_vals)))
axes[0, 1].set_xticklabels([str(d) for d in depth_vals])
axes[0, 1].set_xlabel('max_depth')
axes[0, 1].set_ylabel('Mean CV Accuracy')
axes[0, 1].set_title('Effect of max_depth')
axes[0, 1].grid(True, axis='y')

# min_samples_split vs accuracy
for n_est in [50, 100, 200]:
    mask = rf_results_df['param_n_estimators'] == n_est
    data = rf_results_df[mask].groupby('param_min_samples_split')['mean_test_score'].mean()
    axes[1, 0].plot(data.index, data.values, marker='o', label=f'n_estimators={n_est}')
axes[1, 0].set_xlabel('min_samples_split')
axes[1, 0].set_ylabel('Mean CV Accuracy')
axes[1, 0].set_title('Effect of min_samples_split')
axes[1, 0].legend()
axes[1, 0].grid(True)

# Runtime analysis
axes[1, 1].scatter(rf_results_df['mean_fit_time'], rf_results_df['mean_test_score'], alpha=0.6)
axes[1, 1].set_xlabel('Mean Fit Time (seconds)')
axes[1, 1].set_ylabel('Mean CV Accuracy')
axes[1, 1].set_title('Accuracy vs Training Time Trade-off')
axes[1, 1].grid(True)

plt.tight_layout()
plt.show()

### Fully connected neural network- Multilayer Perceptron(MLP)

In [None]:
#MLP Hyperparameter Tuning

from keras_tuner import RandomSearch

def build_mlp_tuner(hp):
    """Build MLP model with hyperparameters to tune"""
    
    # Hyperparameters to tune
    n_layers = hp.Int('n_layers', min_value=1, max_value=3, step=1)
    units_layer1 = hp.Choice('units_layer1', values=[128, 256, 512])
    dropout_rate = hp.Float('dropout_rate', min_value=0.2, max_value=0.5, step=0.1)
    learning_rate = hp.Choice('learning_rate', values=[0.001, 0.0001])
    
    model = models.Sequential()
    model.add(layers.Input(shape=(3072,)))
    
    # First layer
    model.add(layers.Dense(units_layer1, activation='relu'))
    model.add(layers.Dropout(dropout_rate))
    
    # Additional layers
    for i in range(n_layers - 1):
        units = hp.Choice(f'units_layer{i+2}', values=[64, 128, 256])
        model.add(layers.Dense(units, activation='relu'))
        model.add(layers.Dropout(dropout_rate))
    
    # Output layer
    model.add(layers.Dense(10, activation='softmax'))
    
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Set up tuner
mlp_tuner = RandomSearch(
    build_mlp_tuner,
    objective='val_accuracy',
    max_trials=20,  # Number of different configurations to try
    executions_per_trial=1,
    directory='mlp_tuning',
    project_name='cifar10_mlp'
)

print("MLP Hyperparameter Search")
print("=" * 50)

# Search
start_time = time.time()
mlp_tuner.search(
    X_train_flat, y_train_final_cat,
    validation_data=(X_val_flat, y_val_cat),
    epochs=20,
    batch_size=128,
    callbacks=[EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)],
    verbose=0
)
mlp_search_time = time.time() - start_time

print(f"\nSearch completed in {mlp_search_time:.2f} seconds")
print(f"\nBest hyperparameters: {mlp_tuner.get_best_hyperparameters()[0].values}")

# Get best model
best_mlp = mlp_tuner.get_best_models(num_models=1)[0]
val_loss, val_acc = best_mlp.evaluate(X_val_flat, y_val_cat, verbose=0)
print(f"Best model validation accuracy: {val_acc:.4f}")

In [None]:
# Visualize MLP tuning results
mlp_trials = []
for trial in mlp_tuner.oracle.trials.values():
    trial_data = {
        'score': trial.score,
        **trial.hyperparameters.values
    }
    mlp_trials.append(trial_data)

mlp_trials_df = pd.DataFrame(mlp_trials)

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Learning rate effect
mlp_trials_df.groupby('learning_rate')['score'].mean().plot(kind='bar', ax=axes[0, 0])
axes[0, 0].set_title('Effect of Learning Rate')
axes[0, 0].set_ylabel('Validation Accuracy')
axes[0, 0].grid(True, axis='y')

# Dropout rate effect
axes[0, 1].scatter(mlp_trials_df['dropout_rate'], mlp_trials_df['score'], alpha=0.6)
axes[0, 1].set_xlabel('Dropout Rate')
axes[0, 1].set_ylabel('Validation Accuracy')
axes[0, 1].set_title('Effect of Dropout Rate')
axes[0, 1].grid(True)

# Number of layers
mlp_trials_df.groupby('n_layers')['score'].mean().plot(kind='bar', ax=axes[1, 0])
axes[1, 0].set_title('Effect of Number of Layers')
axes[1, 0].set_ylabel('Validation Accuracy')
axes[1, 0].grid(True, axis='y')

# Units in first layer
mlp_trials_df.groupby('units_layer1')['score'].mean().plot(kind='bar', ax=axes[1, 1])
axes[1, 1].set_title('Effect of Units in First Layer')
axes[1, 1].set_ylabel('Validation Accuracy')
axes[1, 1].grid(True, axis='y')

plt.tight_layout()
plt.show()

### Convolutional neural network

In [None]:
# CNN Hyperparameter Tuning

def build_cnn_tuner(hp):
    """Build CNN model with hyperparameters to tune"""
    
    # Hyperparameters to tune
    filters_1 = hp.Choice('filters_1', values=[32, 64])
    filters_2 = hp.Choice('filters_2', values=[64, 128])
    kernel_size = hp.Choice('kernel_size', values=[3, 5])
    dense_units = hp.Choice('dense_units', values=[64, 128, 256])
    dropout_rate = hp.Float('dropout_rate', min_value=0.2, max_value=0.5, step=0.1)
    learning_rate = hp.Choice('learning_rate', values=[0.001, 0.0001])
    
    model = models.Sequential()
    
    # First conv block
    model.add(layers.Conv2D(filters_1, (kernel_size, kernel_size), 
                            activation='relu', padding='same', 
                            input_shape=(32, 32, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(dropout_rate))
    
    # Second conv block
    model.add(layers.Conv2D(filters_2, (kernel_size, kernel_size), 
                            activation='relu', padding='same'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(dropout_rate))
    
    # Dense layers
    model.add(layers.Flatten())
    model.add(layers.Dense(dense_units, activation='relu'))
    model.add(layers.Dropout(dropout_rate))
    model.add(layers.Dense(10, activation='softmax'))
    
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Set up tuner
cnn_tuner = RandomSearch(
    build_cnn_tuner,
    objective='val_accuracy',
    max_trials=20,
    executions_per_trial=1,
    directory='cnn_tuning',
    project_name='cifar10_cnn'
)

print("CNN Hyperparameter Search")
print("=" * 50)

# Search
start_time = time.time()
cnn_tuner.search(
    X_train_final, y_train_final_cat,
    validation_data=(X_val, y_val_cat),
    epochs=20,
    batch_size=128,
    callbacks=[EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)],
    verbose=0
)
cnn_search_time = time.time() - start_time

print(f"\nSearch completed in {cnn_search_time:.2f} seconds")
print(f"\nBest hyperparameters: {cnn_tuner.get_best_hyperparameters()[0].values}")

# Get best model
best_cnn = cnn_tuner.get_best_models(num_models=1)[0]
val_loss, val_acc = best_cnn.evaluate(X_val, y_val_cat, verbose=0)
print(f"Best model validation accuracy: {val_acc:.4f}")

In [None]:
# Visualize CNN tuning results
cnn_trials = []
for trial in cnn_tuner.oracle.trials.values():
    trial_data = {
        'score': trial.score,
        **trial.hyperparameters.values
    }
    cnn_trials.append(trial_data)

cnn_trials_df = pd.DataFrame(cnn_trials)

fig, axes = plt.subplots(2, 3, figsize=(16, 10))

# Filter configurations
axes[0, 0].scatter(cnn_trials_df['filters_1'], cnn_trials_df['score'], alpha=0.6)
axes[0, 0].set_xlabel('Filters in Layer 1')
axes[0, 0].set_ylabel('Validation Accuracy')
axes[0, 0].set_title('Effect of First Layer Filters')
axes[0, 0].grid(True)

axes[0, 1].scatter(cnn_trials_df['filters_2'], cnn_trials_df['score'], alpha=0.6)
axes[0, 1].set_xlabel('Filters in Layer 2')
axes[0, 1].set_ylabel('Validation Accuracy')
axes[0, 1].set_title('Effect of Second Layer Filters')
axes[0, 1].grid(True)

# Kernel size
cnn_trials_df.groupby('kernel_size')['score'].mean().plot(kind='bar', ax=axes[0, 2])
axes[0, 2].set_title('Effect of Kernel Size')
axes[0, 2].set_ylabel('Validation Accuracy')
axes[0, 2].grid(True, axis='y')

# Dense units
cnn_trials_df.groupby('dense_units')['score'].mean().plot(kind='bar', ax=axes[1, 0])
axes[1, 0].set_title('Effect of Dense Units')
axes[1, 0].set_ylabel('Validation Accuracy')
axes[1, 0].grid(True, axis='y')

# Dropout
axes[1, 1].scatter(cnn_trials_df['dropout_rate'], cnn_trials_df['score'], alpha=0.6)
axes[1, 1].set_xlabel('Dropout Rate')
axes[1, 1].set_ylabel('Validation Accuracy')
axes[1, 1].set_title('Effect of Dropout Rate')
axes[1, 1].grid(True)

# Learning rate
cnn_trials_df.groupby('learning_rate')['score'].mean().plot(kind='bar', ax=axes[1, 2])
axes[1, 2].set_title('Effect of Learning Rate')
axes[1, 2].set_ylabel('Validation Accuracy')
axes[1, 2].grid(True, axis='y')

plt.tight_layout()
plt.show()

## Hyperparameter Tuning Results:

Random Forest: 45.89% CV accuracy

Best: 200 trees, no max depth, min_samples_split=5
Training time: ~37 minutes


MLP: 50.11% validation accuracy

Best: 2 layers (512→128 units), dropout=0.2, lr=0.0001
Search time: ~5 minutes


CNN: 73.55% validation accuracy

Best: 64→128 filters, kernel=3x3, 256 dense units, dropout=0.3, lr=0.001
Search time: ~76 minutes



Key Insights from Visualizations:
Random Forest:

More trees = better performance (diminishing returns after 100)
Unlimited depth works best (overfitting isn't an issue)
Clear time-accuracy tradeoff visible

MLP:

Lower learning rate (0.0001) performs better
Dropout around 0.2-0.3 is optimal
2 layers with 512→128 units gives best results
Still struggles compared to CNN (~50% vs ~74%)

CNN:

More filters (64, 128) perform better
Kernel size 3 slightly better than 5
Dropout 0.3 optimal
Learning rate 0.001 works well
Consistently strong performance (70%+)

## 4. Final models
In this section, please ensure to include cells to train each model with its best hyperparmater combination independently of the hyperparameter tuning cells, i.e. don't rely on the hyperparameter tuning cells having been run.

In [None]:
print("=" * 70)
print("FINAL MODEL TRAINING AND EVALUATION")
print("=" * 70)

### Algorithm of choice from first six weeks of course- Random Forest

In [None]:
# 1. Random Forest - Final Model
print("\n1. Random Forest - Training final model...")
print("-" * 70)

rf_final = RandomForestClassifier(
    n_estimators=200,
    max_depth=None,
    min_samples_split=5,
    min_samples_leaf=1,
    random_state=42,
    n_jobs=-1
)

start_time = time.time()
rf_final.fit(X_train_flat, y_train_final)
rf_train_time = time.time() - start_time

# Evaluate on test set
y_test_pred_rf = rf_final.predict(X_test_flat)
rf_test_accuracy = accuracy_score(y_test, y_test_pred_rf)

print(f"Training time: {rf_train_time:.2f} seconds")
print(f"Test accuracy: {rf_test_accuracy:.4f}")

### Fully connected neural network- Multilayer Perceptron(MLP)

In [None]:
# 2. MLP - Final Model
print("\n2. MLP - Training final model...")
print("-" * 70)

mlp_final = create_mlp(
    input_shape=(3072,),
    hidden_layers=[512, 128],
    dropout_rate=0.2,
    learning_rate=0.0001
)

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

start_time = time.time()
history_mlp_final = mlp_final.fit(
    X_train_flat, y_train_final_cat,
    validation_data=(X_val_flat, y_val_cat),
    epochs=50,
    batch_size=128,
    callbacks=[early_stop],
    verbose=0
)
mlp_train_time = time.time() - start_time

# Evaluate on test set
test_loss, mlp_test_accuracy = mlp_final.evaluate(X_test_flat, to_categorical(y_test, 10), verbose=0)

print(f"Training time: {mlp_train_time:.2f} seconds")
print(f"Epochs trained: {len(history_mlp_final.history['loss'])}")
print(f"Test accuracy: {mlp_test_accuracy:.4f}")

### Convolutional neural network

In [None]:
# 3. CNN - Final Model
print("\n3. CNN - Training final model...")
print("-" * 70)

cnn_final = create_cnn(
    input_shape=(32, 32, 3),
    filters=[64, 128],
    kernel_size=3,
    dense_units=256,
    dropout_rate=0.3,
    learning_rate=0.001
)

start_time = time.time()
history_cnn_final = cnn_final.fit(
    X_train_final, y_train_final_cat,
    validation_data=(X_val, y_val_cat),
    epochs=50,
    batch_size=128,
    callbacks=[early_stop],
    verbose=0
)
cnn_train_time = time.time() - start_time

# Evaluate on test set
test_loss, cnn_test_accuracy = cnn_final.evaluate(X_test_normalized, to_categorical(y_test, 10), verbose=0)

print(f"Training time: {cnn_train_time:.2f} seconds")
print(f"Epochs trained: {len(history_cnn_final.history['loss'])}")
print(f"Test accuracy: {cnn_test_accuracy:.4f}")

In [None]:
# Summary Table
print("\n" + "=" * 70)
print("FINAL RESULTS SUMMARY")
print("=" * 70)

results_summary = pd.DataFrame({
    'Algorithm': ['Random Forest', 'MLP', 'CNN'],
    'Best Hyperparameters': [
        'n_est=200, max_depth=None, min_split=5',
        'layers=[512,128], dropout=0.2, lr=0.0001',
        'filters=[64,128], k=3, dense=256, dropout=0.3, lr=0.001'
    ],
    'Training Time (s)': [rf_train_time, mlp_train_time, cnn_train_time],
    'Test Accuracy': [rf_test_accuracy, mlp_test_accuracy, cnn_test_accuracy],
    'Parameters': [
        'N/A (ensemble)',
        mlp_final.count_params(),
        cnn_final.count_params()
    ]
})

print(results_summary.to_string(index=False))

In [None]:
# Confusion Matrices
from sklearn.metrics import confusion_matrix
import seaborn as sns

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Random Forest
cm_rf = confusion_matrix(y_test, y_test_pred_rf)
sns.heatmap(cm_rf, annot=True, fmt='d', cmap='Blues', ax=axes[0], 
            xticklabels=class_names, yticklabels=class_names)
axes[0].set_title(f'Random Forest\nAccuracy: {rf_test_accuracy:.3f}')
axes[0].set_ylabel('True Label')
axes[0].set_xlabel('Predicted Label')

# MLP
y_test_pred_mlp = np.argmax(mlp_final.predict(X_test_flat), axis=1)
cm_mlp = confusion_matrix(y_test, y_test_pred_mlp)
sns.heatmap(cm_mlp, annot=True, fmt='d', cmap='Blues', ax=axes[1],
            xticklabels=class_names, yticklabels=class_names)
axes[1].set_title(f'MLP\nAccuracy: {mlp_test_accuracy:.3f}')
axes[1].set_ylabel('True Label')
axes[1].set_xlabel('Predicted Label')

# CNN
y_test_pred_cnn = np.argmax(cnn_final.predict(X_test_normalized), axis=1)
cm_cnn = confusion_matrix(y_test, y_test_pred_cnn)
sns.heatmap(cm_cnn, annot=True, fmt='d', cmap='Blues', ax=axes[2],
            xticklabels=class_names, yticklabels=class_names)
axes[2].set_title(f'CNN\nAccuracy: {cnn_test_accuracy:.3f}')
axes[2].set_ylabel('True Label')
axes[2].set_xlabel('Predicted Label')

plt.tight_layout()
plt.show()

In [None]:
# Per-class accuracy analysis
from sklearn.metrics import classification_report

print("\n" + "=" * 70)
print("PER-CLASS PERFORMANCE")
print("=" * 70)

for name, y_pred in [('Random Forest', y_test_pred_rf), 
                      ('MLP', y_test_pred_mlp), 
                      ('CNN', y_test_pred_cnn)]:
    print(f"\n{name}:")
    print(classification_report(y_test, y_pred, target_names=class_names))