<a href="https://colab.research.google.com/github/YahyaHajji/Mnist_CNN_Project/blob/master/mnist_cnn_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Step 1: Import Required Libraries**

In [None]:
# Import libraries
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import matplotlib.pyplot as plt
import numpy as np
import time

print(f"TensorFlow version: {tf.__version__}")

**Step 2: Load and Prepare the MNIST Dataset**

In [None]:
# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

print("Original shapes:")
print(f"Training data: {x_train.shape}")
print(f"Test data: {x_test.shape}")

# Normalize pixel values to [0, 1]
x_train = x_train / 255.0
x_test = x_test / 255.0

# Reshape data to include channel dimension for CNN
# From (samples, 28, 28) to (samples, 28, 28, 1)
x_train_cnn = x_train.reshape(-1, 28, 28, 1)
x_test_cnn = x_test.reshape(-1, 28, 28, 1)

print("\nReshaped for CNN:")
print(f"Training data: {x_train_cnn.shape}")
print(f"Test data: {x_test_cnn.shape}")

# Visualize some samples
plt.figure(figsize=(10, 4))
for i in range(10):
    plt.subplot(2, 5, i + 1)
    plt.imshow(x_train[i], cmap='gray')
    plt.title(f"Label: {y_train[i]}")
    plt.axis('off')
plt.suptitle("Sample MNIST Digits", fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

**Step 3: Build the CNN Model**

In [None]:
from tensorflow.keras.layers import Input

# Create CNN model
cnn_model = Sequential([
    Input(shape=(28, 28, 1)), # Explicitly define the input layer
    # First Convolutional Block
    Conv2D(32, (3, 3), activation='relu'), # input_shape is now defined by the Input layer
    MaxPooling2D((2, 2)),

    # Second Convolutional Block (optional, for better performance)
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    # Flatten and Dense layers
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),  # Prevent overfitting
    Dense(10, activation='softmax')
])

# Display model architecture
cnn_model.summary()

**Step 5: Compile the CNN Model**

In [None]:
# Compile the model
cnn_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("‚úÖ CNN Model compiled successfully!")

**Step 6: Train the CNN Model**

In [None]:
# Train the CNN model
print("üöÄ Training CNN Model...")
start_time = time.time()

cnn_history = cnn_model.fit(
    x_train_cnn, y_train,
    epochs=10,
    batch_size=128,
    validation_data=(x_test_cnn, y_test),
    verbose=1
)

training_time = time.time() - start_time
print(f"\n‚è±Ô∏è Training completed in {training_time:.2f} seconds")

**Step 7: Build and Train Simple ANN for Comparison**

In [None]:
# Build simple ANN (like in previous project)
ann_model = Sequential([
    Flatten(input_shape=(28, 28)),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])

ann_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Display ANN architecture
print("ANN Model Architecture:")
ann_model.summary()

# Train ANN model
print("\nüöÄ Training ANN Model for comparison...")
ann_start_time = time.time()

ann_history = ann_model.fit(
    x_train, y_train,
    epochs=10,
    batch_size=128,
    validation_data=(x_test, y_test),
    verbose=1
)

ann_training_time = time.time() - ann_start_time
print(f"\n‚è±Ô∏è ANN Training completed in {ann_training_time:.2f} seconds")

**Step 8: Evaluate Both Models**

In [None]:
# Evaluate CNN
cnn_loss, cnn_accuracy = cnn_model.evaluate(x_test_cnn, y_test, verbose=0)

# Evaluate ANN
ann_loss, ann_accuracy = ann_model.evaluate(x_test, y_test, verbose=0)

# Display comparison
print("\n" + "="*60)
print("üìä MODEL COMPARISON RESULTS")
print("="*60)
print(f"\nüß† CNN Model:")
print(f"   Test Accuracy: {cnn_accuracy:.4f} ({cnn_accuracy*100:.2f}%)")
print(f"   Test Loss: {cnn_loss:.4f}")
print(f"   Training Time: {training_time:.2f}s")

print(f"\nüìù ANN Model:")
print(f"   Test Accuracy: {ann_accuracy:.4f} ({ann_accuracy*100:.2f}%)")
print(f"   Test Loss: {ann_loss:.4f}")
print(f"   Training Time: {ann_training_time:.2f}s")

print(f"\n‚ú® Improvement:")
improvement = (cnn_accuracy - ann_accuracy) * 100
print(f"   CNN is {improvement:.2f}% more accurate than ANN")
print("="*60)

**Step 9: Visualize Training History Comparison**

In [None]:
# Plot comparison
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# CNN Accuracy
axes[0, 0].plot(cnn_history.history['accuracy'], label='Training', linewidth=2)
axes[0, 0].plot(cnn_history.history['val_accuracy'], label='Validation', linewidth=2)
axes[0, 0].set_title('CNN - Accuracy', fontsize=12, fontweight='bold')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# CNN Loss
axes[0, 1].plot(cnn_history.history['loss'], label='Training', linewidth=2)
axes[0, 1].plot(cnn_history.history['val_loss'], label='Validation', linewidth=2)
axes[0, 1].set_title('CNN - Loss', fontsize=12, fontweight='bold')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# ANN Accuracy
axes[1, 0].plot(ann_history.history['accuracy'], label='Training', linewidth=2)
axes[1, 0].plot(ann_history.history['val_accuracy'], label='Validation', linewidth=2)
axes[1, 0].set_title('ANN - Accuracy', fontsize=12, fontweight='bold')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Accuracy')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# ANN Loss
axes[1, 1].plot(ann_history.history['loss'], label='Training', linewidth=2)
axes[1, 1].plot(ann_history.history['val_loss'], label='Validation', linewidth=2)
axes[1, 1].set_title('ANN - Loss', fontsize=12, fontweight='bold')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Loss')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

**Step 10: Side-by-Side Accuracy Comparison**

In [None]:
# Create bar chart comparison
fig, ax = plt.subplots(figsize=(10, 6))

models = ['ANN', 'CNN']
accuracies = [ann_accuracy * 100, cnn_accuracy * 100]
colors = ['#FF6B6B', '#4ECDC4']

bars = ax.bar(models, accuracies, color=colors, width=0.5, edgecolor='black', linewidth=2)

# Add value labels on bars
for bar, acc in zip(bars, accuracies):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{acc:.2f}%',
            ha='center', va='bottom', fontsize=14, fontweight='bold')

ax.set_ylabel('Test Accuracy (%)', fontsize=12, fontweight='bold')
ax.set_title('ANN vs CNN Performance Comparison on MNIST', fontsize=14, fontweight='bold')
ax.set_ylim([95, 100])
ax.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

**Step 11: Test Predictions on Sample Images**

In [None]:
# Make predictions with both models
cnn_predictions = cnn_model.predict(x_test_cnn[:20])
ann_predictions = ann_model.predict(x_test[:20])

# Visualize predictions
fig, axes = plt.subplots(4, 5, figsize=(15, 10))

for i, ax in enumerate(axes.flat):
    ax.imshow(x_test[i], cmap='gray')

    cnn_pred = np.argmax(cnn_predictions[i])
    ann_pred = np.argmax(ann_predictions[i])
    true_label = y_test[i]

    # Color code: green if both correct, yellow if only CNN correct, red if both wrong
    if cnn_pred == true_label and ann_pred == true_label:
        color = 'green'
        status = '‚úì‚úì'
    elif cnn_pred == true_label and ann_pred != true_label:
        color = 'orange'
        status = 'CNN‚úì'
    elif cnn_pred != true_label and ann_pred == true_label:
        color = 'blue'
        status = 'ANN‚úì'
    else:
        color = 'red'
        status = '‚úó‚úó'

    ax.set_title(f'True: {true_label}\nCNN: {cnn_pred} | ANN: {ann_pred}\n{status}',
                 color=color, fontsize=9, fontweight='bold')
    ax.axis('off')

plt.suptitle('Prediction Comparison: CNN vs ANN', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

**Step 12: Analyze Overfitting**

In [None]:
# Check for overfitting
def check_overfitting(history, model_name):
    final_train_acc = history.history['accuracy'][-1]
    final_val_acc = history.history['val_accuracy'][-1]
    gap = (final_train_acc - final_val_acc) * 100

    print(f"\n{model_name} Overfitting Analysis:")
    print(f"  Final Training Accuracy: {final_train_acc*100:.2f}%")
    print(f"  Final Validation Accuracy: {final_val_acc*100:.2f}%")
    print(f"  Gap: {gap:.2f}%")

    if gap < 2:
        print(f"  Status: ‚úÖ No significant overfitting")
    elif gap < 5:
        print(f"  Status: ‚ö†Ô∏è Slight overfitting")
    else:
        print(f"  Status: ‚ùå Overfitting detected")

print("="*60)
check_overfitting(cnn_history, "CNN")
check_overfitting(ann_history, "ANN")
print("="*60)

**Step 13: Confusion Matrix (Optional Advanced)**

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

# Get predictions for entire test set
cnn_pred_classes = np.argmax(cnn_model.predict(x_test_cnn), axis=1)

# Create confusion matrix
cm = confusion_matrix(y_test, cnn_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=range(10), yticklabels=range(10))
plt.title('CNN Confusion Matrix', fontsize=14, fontweight='bold')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.show()

# Print classification report
print("\nüìä CNN Classification Report:")
print(classification_report(y_test, cnn_pred_classes, target_names=[str(i) for i in range(10)]))