# CNN vs MLP on MNIST - Minimal Assignment

## 1. Import Libraries

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import numpy as np

## 2. Load and Preprocess MNIST

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Normalize to [0,1] and add channel dimension
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

# One-hot encode labels
y_train_cat = tf.keras.utils.to_categorical(y_train, 10)
y_test_cat = tf.keras.utils.to_categorical(y_test, 10)

print(f"Train shape: {x_train.shape}, Test shape: {x_test.shape}")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Train shape: (60000, 28, 28, 1), Test shape: (10000, 28, 28, 1)


## 3. Data Augmentation (Basic)

In [3]:
datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1
)
datagen.fit(x_train)

## 4. Build CNN Model (2 Conv + FC)

In [4]:
cnn = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

cnn.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

cnn.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## 5. Build MLP Model (for comparison)

In [5]:
# Flatten images for MLP
x_train_flat = x_train.reshape(-1, 28*28)
x_test_flat = x_test.reshape(-1, 28*28)

mlp = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(28*28,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

mlp.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

mlp.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## 6. Train CNN with Augmentation

In [None]:
history_cnn = cnn.fit(
    datagen.flow(x_train, y_train_cat, batch_size=128),
    epochs=10,
    validation_data=(x_test, y_test_cat),
    verbose=1
)

Epoch 1/10


  self._warn_if_super_not_called()


[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 128ms/step - accuracy: 0.7509 - loss: 0.7793 - val_accuracy: 0.9765 - val_loss: 0.0741
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 122ms/step - accuracy: 0.9513 - loss: 0.1572 - val_accuracy: 0.9869 - val_loss: 0.0457
Epoch 3/10
[1m439/469[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m3s[0m 117ms/step - accuracy: 0.9666 - loss: 0.1115

## 7. Train MLP (no augmentation)

In [None]:
history_mlp = mlp.fit(
    x_train_flat, y_train_cat,
    batch_size=128,
    epochs=10,
    validation_data=(x_test_flat, y_test_cat),
    verbose=1
)

## 8. Evaluate Both Models

In [None]:
test_loss_cnn, test_acc_cnn = cnn.evaluate(x_test, y_test_cat, verbose=0)
test_loss_mlp, test_acc_mlp = mlp.evaluate(x_test_flat, y_test_cat, verbose=0)

print(f"CNN Test Accuracy: {test_acc_cnn:.4f}")
print(f"MLP Test Accuracy: {test_acc_mlp:.4f}")

## 9. Plot Accuracy Comparison

In [None]:
plt.figure(figsize=(10,4))

plt.subplot(1,2,1)
plt.plot(history_cnn.history['val_accuracy'], label='CNN Val Acc')
plt.plot(history_mlp.history['val_accuracy'], label='MLP Val Acc')
plt.title('Validation Accuracy')
plt.xlabel('Epoch')
plt.legend()

plt.subplot(1,2,2)
plt.bar(['CNN', 'MLP'], [test_acc_cnn, test_acc_mlp])
plt.title('Test Accuracy')
plt.ylim(0.95, 1.0)

plt.tight_layout()
plt.show()