# MNIST Handwritten Digit Classification using Convolutional Neural Networks

This notebook demonstrates how to build a Convolutional Neural Network (CNN) to classify handwritten digits from the MNIST dataset. The MNIST dataset is a widely used benchmark in machine learning and consists of 28x28 grayscale images of handwritten digits (0-9).

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# For deep learning
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, BatchNormalization
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.utils import to_categorical

# For model evaluation
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

## Data Loading and Exploration

We'll load the MNIST dataset directly from Keras datasets.

In [None]:
# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

In [None]:
# Check the shape of the data
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

In [None]:
# Display some sample images
plt.figure(figsize=(10, 10))
for i in range(25):
    plt.subplot(5, 5, i+1)
    plt.imshow(X_train[i], cmap='gray')
    plt.title(f"Label: {y_train[i]}")
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# Check the distribution of digits in the training set
plt.figure(figsize=(10, 6))
sns.countplot(x=y_train)
plt.title('Distribution of Digits in Training Set')
plt.xlabel('Digit')
plt.ylabel('Count')
plt.show()

In [None]:
# Check for missing values
print(f"Missing values in X_train: {np.isnan(X_train).sum()}")
print(f"Missing values in y_train: {np.isnan(y_train).sum()}")
print(f"Missing values in X_test: {np.isnan(X_test).sum()}")
print(f"Missing values in y_test: {np.isnan(y_test).sum()}")

In [None]:
# Check the range of pixel values
print(f"Min pixel value in X_train: {X_train.min()}")
print(f"Max pixel value in X_train: {X_train.max()}")

## Data Preprocessing

In [None]:
# Normalize the pixel values to be between 0 and 1
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

In [None]:
# Reshape the data to include the channel dimension (grayscale = 1 channel)
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

In [None]:
# One-hot encode the target variable
y_train_encoded = to_categorical(y_train, 10)
y_test_encoded = to_categorical(y_test, 10)

print(f"Shape of y_train_encoded: {y_train_encoded.shape}")
print(f"Shape of y_test_encoded: {y_test_encoded.shape}")

## Building the CNN Model

In [None]:
# Define the CNN model architecture
model = Sequential()

# First Convolutional Block
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(BatchNormalization())
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# Second Convolutional Block
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

# Flatten and Dense Layers
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))  # 10 classes for digits 0-9

In [None]:
# Model summary
model.summary()

In [None]:
# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
# Define callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,
    patience=5,
    min_lr=0.0001
)

In [None]:
# Train the model
history = model.fit(
    X_train, y_train_encoded,
    epochs=20,
    batch_size=128,
    validation_split=0.1,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)

## Model Evaluation

In [None]:
# Plot training history
plt.figure(figsize=(12, 5))

# Plot training & validation accuracy values
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='lower right')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')

plt.tight_layout()
plt.show()

In [None]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test_encoded, verbose=0)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

In [None]:
# Make predictions on the test set
y_pred_proba = model.predict(X_test)
y_pred = np.argmax(y_pred_proba, axis=1)

In [None]:
# Generate confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

In [None]:
# Print classification report
print(classification_report(y_test, y_pred))

## Visualizing Predictions

In [None]:
# Function to plot images with their predictions
def plot_predictions(X, true_labels, predicted_labels, num_images=25):
    plt.figure(figsize=(12, 12))
    for i in range(num_images):
        plt.subplot(5, 5, i+1)
        plt.imshow(X[i].reshape(28, 28), cmap='gray')
        
        # Green for correct predictions, red for incorrect
        color = 'green' if true_labels[i] == predicted_labels[i] else 'red'
        plt.title(f"True: {true_labels[i]}\nPred: {predicted_labels[i]}", color=color)
        plt.axis('off')
    plt.tight_layout()
    plt.show()

# Plot some predictions
plot_predictions(X_test, y_test, y_pred)

## Visualizing Misclassifications

In [None]:
# Find misclassified examples
misclassified_indices = np.where(y_test != y_pred)[0]
print(f"Number of misclassified examples: {len(misclassified_indices)}")

# Plot some misclassified examples
if len(misclassified_indices) > 0:
    num_to_plot = min(25, len(misclassified_indices))
    misclassified_indices = misclassified_indices[:num_to_plot]
    
    plt.figure(figsize=(12, 12))
    for i, idx in enumerate(misclassified_indices):
        plt.subplot(5, 5, i+1)
        plt.imshow(X_test[idx].reshape(28, 28), cmap='gray')
        plt.title(f"True: {y_test[idx]}\nPred: {y_pred[idx]}", color='red')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

## Feature Maps Visualization

In [None]:
# Create a model that outputs the feature maps from the first convolutional layer
feature_map_model = tf.keras.models.Model(
    inputs=model.inputs,
    outputs=model.layers[0].output
)

# Get feature maps for a sample image
sample_image = X_test[0:1]  # Using the first test image
feature_maps = feature_map_model.predict(sample_image)

# Plot the feature maps
plt.figure(figsize=(15, 15))
for i in range(32):  # 32 filters in the first conv layer
    plt.subplot(6, 6, i+1)
    plt.imshow(feature_maps[0, :, :, i], cmap='viridis')
    plt.axis('off')
plt.tight_layout()
plt.suptitle('Feature Maps of First Convolutional Layer', fontsize=16)
plt.subplots_adjust(top=0.95)
plt.show()

# Display the original image for reference
plt.figure(figsize=(5, 5))
plt.imshow(X_test[0].reshape(28, 28), cmap='gray')
plt.title(f"Original Image (Digit: {y_test[0]})")
plt.axis('off')
plt.show()

## Making Predictions on New Data

In [None]:
# Function to preprocess and predict a single image
def predict_digit(image):
    # Ensure the image is in the right format (28x28 grayscale)
    if image.shape != (28, 28):
        # Resize if needed
        image = tf.image.resize(image, [28, 28])
    
    # Normalize and reshape
    image = image.astype('float32') / 255.0
    image = image.reshape(1, 28, 28, 1)
    
    # Make prediction
    prediction = model.predict(image)
    digit = np.argmax(prediction)
    confidence = prediction[0][digit]
    
    return digit, confidence

# Let's test this function on a sample from the test set
sample_idx = 42  # Choose any index
sample_image = X_test[sample_idx].reshape(28, 28)

# Display the image
plt.figure(figsize=(5, 5))
plt.imshow(sample_image, cmap='gray')
plt.title(f"True Label: {y_test[sample_idx]}")
plt.axis('off')
plt.show()

# Make prediction
digit, confidence = predict_digit(sample_image)
print(f"Predicted Digit: {digit}")
print(f"Confidence: {confidence:.4f} ({confidence*100:.2f}%)")

## Conclusion

In this notebook, we built a Convolutional Neural Network (CNN) to classify handwritten digits from the MNIST dataset. The model achieved high accuracy on the test set, demonstrating the effectiveness of CNNs for image classification tasks.

Key points from this project:

1. CNNs are well-suited for image classification tasks due to their ability to learn spatial hierarchies of features.
2. Data preprocessing, including normalization and reshaping, is crucial for good model performance.
3. Techniques like batch normalization and dropout help prevent overfitting and improve model generalization.
4. Visualizing predictions and misclassifications provides insights into the model's strengths and weaknesses.

This model could be further improved by:

1. Using data augmentation to increase the training set size and diversity
2. Implementing more sophisticated CNN architectures like ResNet or DenseNet
3. Fine-tuning hyperparameters using techniques like grid search or Bayesian optimization
4. Applying transfer learning from pre-trained models

## Saving the Model

In [None]:
# Save the model
model.save('mnist_cnn_model.h5')
print("Model saved successfully!")