# Task

Create a convolutional neural network to process the MNIST dataset.

Compare it with a neural network with fully connected layers.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import keras
from keras.datasets import mnist
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, Input
from keras.models import Sequential
from keras.utils import to_categorical
from sklearn.metrics import r2_score, accuracy_score, confusion_matrix

# Load and display data

In [None]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")

In [None]:
def show_images(images, labels, rows=2, cols=10):
    fig, axes = plt.subplots(rows, cols, figsize=(cols, rows))
    for idx in range(rows * cols):
        ridx = idx // cols
        cidx = idx % cols
        ax = axes[ridx, cidx]
        ax.axis("off")
        ax.imshow(images[idx], cmap="gray_r")
        ax.set_title(f"{labels[idx]}")
    plt.show()

show_images(X_train, Y_train)

# Data preparation

In [None]:
X_train = X_train.astype("float32") / 255
X_test = X_test.astype("float32") / 255

X_train_cnn = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test_cnn = X_test.reshape(X_test.shape[0], 28, 28, 1)

X_train_dense = X_train.reshape(X_train.shape[0], 784)
X_test_dense = X_test.reshape(X_test.shape[0], 784)

print(f"CNN input shape: {X_train_cnn.shape}")
print(f"Dense input shape: {X_train_dense.shape}")

In [None]:
Y_train = to_categorical(Y_train, num_classes=10)
Y_test = to_categorical(Y_test, num_classes=10)

# CNN Model

In [None]:
model_cnn = Sequential()
model_cnn.add(Input(shape=(28, 28, 1)))
model_cnn.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
model_cnn.add(MaxPooling2D(pool_size=(2, 2)))
model_cnn.add(Dropout(0.25))
model_cnn.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model_cnn.add(MaxPooling2D(pool_size=(2, 2)))
model_cnn.add(Dropout(0.25))
model_cnn.add(Flatten())
model_cnn.add(Dense(128, activation='relu'))
model_cnn.add(Dropout(0.5))
model_cnn.add(Dense(10, activation='softmax'))

model_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_cnn.summary()

In [None]:
early_stop_cnn = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

start_time_cnn = time.time()
history_cnn = model_cnn.fit(X_train_cnn, Y_train, 
                            epochs=15, 
                            batch_size=128, 
                            validation_split=0.1,
                            callbacks=[early_stop_cnn])
training_time_cnn = time.time() - start_time_cnn
print(f"CNN Training time: {training_time_cnn:.2f} seconds")

# Dense Model

In [None]:
model_dense = Sequential()
model_dense.add(Input(shape=(784,)))
model_dense.add(Dense(128, activation='relu'))
model_dense.add(Dropout(0.2))
model_dense.add(Dense(64, activation='relu'))
model_dense.add(Dropout(0.2))
model_dense.add(Dense(10, activation='softmax'))

model_dense.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_dense.summary()

In [None]:
early_stop_dense = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

start_time_dense = time.time()
history_dense = model_dense.fit(X_train_dense, Y_train, 
                                epochs=15, 
                                batch_size=128, 
                                validation_split=0.1,
                                callbacks=[early_stop_dense])
training_time_dense = time.time() - start_time_dense
print(f"Dense Training time: {training_time_dense:.2f} seconds")

# Learning curves

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 8))

axes[0, 0].plot(history_cnn.history['loss'], label='Train')
axes[0, 0].plot(history_cnn.history['val_loss'], label='Validation')
axes[0, 0].set_title('CNN - Loss')
axes[0, 0].legend()

axes[0, 1].plot(history_cnn.history['accuracy'], label='Train')
axes[0, 1].plot(history_cnn.history['val_accuracy'], label='Validation')
axes[0, 1].set_title('CNN - Accuracy')
axes[0, 1].legend()

axes[1, 0].plot(history_dense.history['loss'], label='Train')
axes[1, 0].plot(history_dense.history['val_loss'], label='Validation')
axes[1, 0].set_title('Dense - Loss')
axes[1, 0].legend()

axes[1, 1].plot(history_dense.history['accuracy'], label='Train')
axes[1, 1].plot(history_dense.history['val_accuracy'], label='Validation')
axes[1, 1].set_title('Dense - Accuracy')
axes[1, 1].legend()

plt.tight_layout()
plt.show()

# Model Evaluation

In [None]:
Y_pred_cnn = model_cnn.predict(X_test_cnn)
Y_pred_cnn_classes = np.argmax(Y_pred_cnn, axis=-1)
Y_test_classes = np.argmax(Y_test, axis=-1)

Y_pred_dense = model_dense.predict(X_test_dense)
Y_pred_dense_classes = np.argmax(Y_pred_dense, axis=-1)

In [None]:
accuracy_cnn = accuracy_score(Y_test_classes, Y_pred_cnn_classes)
accuracy_dense = accuracy_score(Y_test_classes, Y_pred_dense_classes)

r2_cnn = r2_score(Y_test_classes, Y_pred_cnn_classes)
r2_dense = r2_score(Y_test_classes, Y_pred_dense_classes)

print(f"CNN - Accuracy: {accuracy_cnn:.4f}, R2: {r2_cnn:.4f}")
print(f"Dense - Accuracy: {accuracy_dense:.4f}, R2: {r2_dense:.4f}")

In [None]:
print("CNN - Accuracy per class:")
for digit in range(10):
    mask = Y_test_classes == digit
    acc = accuracy_score(Y_test_classes[mask], Y_pred_cnn_classes[mask])
    print(f"  Digit {digit}: {acc:.2%}")

In [None]:
print("Dense - Accuracy per class:")
for digit in range(10):
    mask = Y_test_classes == digit
    acc = accuracy_score(Y_test_classes[mask], Y_pred_dense_classes[mask])
    print(f"  Digit {digit}: {acc:.2%}")

# Confusion matrices

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

cf_cnn = confusion_matrix(Y_test_classes, Y_pred_cnn_classes)
sns.heatmap(cf_cnn, annot=True, fmt='d', cmap='Blues', ax=axes[0])
axes[0].set_title('CNN')
axes[0].set_xlabel('Predicted')
axes[0].set_ylabel('Actual')

cf_dense = confusion_matrix(Y_test_classes, Y_pred_dense_classes)
sns.heatmap(cf_dense, annot=True, fmt='d', cmap='Greens', ax=axes[1])
axes[1].set_title('Dense')
axes[1].set_xlabel('Predicted')
axes[1].set_ylabel('Actual')

plt.tight_layout()
plt.show()

# Misclassified examples

In [None]:
def show_wrong(X, Y_true, Y_pred, title, rows=2, cols=10):
    wrong_idx = np.where(Y_true != Y_pred)[0]
    fig, axes = plt.subplots(rows, cols, figsize=(cols, rows))
    for i in range(min(rows * cols, len(wrong_idx))):
        idx = wrong_idx[i]
        ax = axes[i // cols, i % cols]
        ax.imshow(X[idx].reshape(28, 28), cmap='gray_r')
        ax.set_title(f"{Y_true[idx]}!={Y_pred[idx]}")
        ax.axis('off')
    plt.suptitle(title)
    plt.tight_layout()
    plt.show()

show_wrong(X_test, Y_test_classes, Y_pred_cnn_classes, "CNN - Misclassified")
show_wrong(X_test, Y_test_classes, Y_pred_dense_classes, "Dense - Misclassified")

# Model comparison

In [None]:
start = time.time()
_ = model_cnn.predict(X_test_cnn, verbose=0)
inference_cnn = time.time() - start

start = time.time()
_ = model_dense.predict(X_test_dense, verbose=0)
inference_dense = time.time() - start

print("=" * 50)
print("MODEL COMPARISON")
print("=" * 50)
print(f"\nAccuracy:")
print(f"  CNN:   {accuracy_cnn:.4f}")
print(f"  Dense: {accuracy_dense:.4f}")
print(f"\nTraining time:")
print(f"  CNN:   {training_time_cnn:.2f}s")
print(f"  Dense: {training_time_dense:.2f}s")
print(f"\nInference time:")
print(f"  CNN:   {inference_cnn:.4f}s")
print(f"  Dense: {inference_dense:.4f}s")
print("=" * 50)

In [None]:
model_cnn.save('mnist_cnn.keras')
model_dense.save('mnist_dense.keras')