# Task 2: Handwritten Digit Recognition (OCR) with Neural Networks

This notebook implements various neural network architectures for MNIST digit recognition, including feedforward ANNs and CNNs.

## 1. Import Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU available: {tf.config.list_physical_devices('GPU')}")

## 2. Load and Explore MNIST Dataset

In [None]:
# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()

print(f"Training data shape: {X_train.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Test labels shape: {y_test.shape}")
print(f"Pixel value range: {X_train.min()} to {X_train.max()}")
print(f"Number of classes: {len(np.unique(y_train))}")

In [None]:
# Visualize sample digits
plt.figure(figsize=(12, 8))
for i in range(20):
    plt.subplot(4, 5, i + 1)
    plt.imshow(X_train[i], cmap='gray')
    plt.title(f'Label: {y_train[i]}')
    plt.axis('off')
plt.suptitle('Sample MNIST Digits')
plt.tight_layout()
plt.show()

# Class distribution
plt.figure(figsize=(10, 6))
unique, counts = np.unique(y_train, return_counts=True)
plt.bar(unique, counts)
plt.xlabel('Digit Class')
plt.ylabel('Number of Samples')
plt.title('MNIST Training Set Class Distribution')
plt.show()

## 3. Data Preprocessing

In [None]:
# Normalize pixel values to [0, 1]
X_train_norm = X_train.astype('float32') / 255.0
X_test_norm = X_test.astype('float32') / 255.0

# Reshape for feedforward neural network (flatten)
X_train_flat = X_train_norm.reshape(X_train_norm.shape[0], -1)
X_test_flat = X_test_norm.reshape(X_test_norm.shape[0], -1)

# Keep original shape for CNN
X_train_cnn = X_train_norm.reshape(X_train_norm.shape[0], 28, 28, 1)
X_test_cnn = X_test_norm.reshape(X_test_norm.shape[0], 28, 28, 1)

# Convert labels to categorical (one-hot encoding)
y_train_cat = to_categorical(y_train, 10)
y_test_cat = to_categorical(y_test, 10)

print(f"Flattened training data shape: {X_train_flat.shape}")
print(f"CNN training data shape: {X_train_cnn.shape}")
print(f"Categorical labels shape: {y_train_cat.shape}")

## 4. Baseline Model - Logistic Regression

In [None]:
# Baseline logistic regression
baseline_lr = LogisticRegression(max_iter=1000, random_state=42)
baseline_lr.fit(X_train_flat, y_train)

y_pred_baseline = baseline_lr.predict(X_test_flat)
baseline_accuracy = accuracy_score(y_test, y_pred_baseline)

print("BASELINE Logistic Regression")
print(f"Accuracy: {baseline_accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred_baseline, digits=4))

## 5. Simple Feedforward Neural Network

In [None]:
# Model A: Simple feedforward NN (128-64)
model_a = keras.Sequential([
    layers.Input(shape=(784,)),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

model_a.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Model A Architecture:")
model_a.summary()

# Train Model A
history_a = model_a.fit(
    X_train_flat, y_train_cat,
    validation_data=(X_test_flat, y_test_cat),
    epochs=15,
    batch_size=128,
    verbose=1
)

## 6. Deep Neural Network

In [None]:
# Model B: Deeper network (256-128-64)
model_b = keras.Sequential([
    layers.Input(shape=(784,)),
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

model_b.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Model B Architecture:")
model_b.summary()

# Train Model B
history_b = model_b.fit(
    X_train_flat, y_train_cat,
    validation_data=(X_test_flat, y_test_cat),
    epochs=15,
    batch_size=128,
    verbose=1
)

## 7. Neural Network with Dropout Regularization

In [None]:
# Model C: Network with Dropout
model_c = keras.Sequential([
    layers.Input(shape=(784,)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(10, activation='softmax')
])

model_c.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Model C Architecture:")
model_c.summary()

# Train Model C
history_c = model_c.fit(
    X_train_flat, y_train_cat,
    validation_data=(X_test_flat, y_test_cat),
    epochs=15,
    batch_size=128,
    verbose=1
)

## 8. Convolutional Neural Network

In [None]:
# Model D: Convolutional Neural Network
model_d = keras.Sequential([
    layers.Input(shape=(28, 28, 1)),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(10, activation='softmax')
])

model_d.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Model D (CNN) Architecture:")
model_d.summary()

# Train Model D (CNN)
history_d = model_d.fit(
    X_train_cnn, y_train_cat,
    validation_data=(X_test_cnn, y_test_cat),
    epochs=10,
    batch_size=128,
    verbose=1
)

## 9. Training Visualization

In [None]:
# Plot training histories
plt.figure(figsize=(15, 10))

# Accuracy plots
plt.subplot(2, 2, 1)
plt.plot(history_a.history['accuracy'], label='Model A - Train')
plt.plot(history_a.history['val_accuracy'], label='Model A - Val')
plt.plot(history_b.history['accuracy'], label='Model B - Train')
plt.plot(history_b.history['val_accuracy'], label='Model B - Val')
plt.title('Feedforward Models - Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# Loss plots
plt.subplot(2, 2, 2)
plt.plot(history_a.history['loss'], label='Model A - Train')
plt.plot(history_a.history['val_loss'], label='Model A - Val')
plt.plot(history_b.history['loss'], label='Model B - Train')
plt.plot(history_b.history['val_loss'], label='Model B - Val')
plt.title('Feedforward Models - Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Dropout model
plt.subplot(2, 2, 3)
plt.plot(history_c.history['accuracy'], label='Model C - Train')
plt.plot(history_c.history['val_accuracy'], label='Model C - Val')
plt.title('Model with Dropout - Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# CNN model
plt.subplot(2, 2, 4)
plt.plot(history_d.history['accuracy'], label='CNN - Train')
plt.plot(history_d.history['val_accuracy'], label='CNN - Val')
plt.title('CNN Model - Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

## 10. Model Evaluation and Comparison

In [None]:
# Evaluate all models
models = {
    'Model A (128-64)': (model_a, X_test_flat),
    'Model B (256-128-64)': (model_b, X_test_flat),
    'Model C (Dropout)': (model_c, X_test_flat),
    'Model D (CNN)': (model_d, X_test_cnn)
}

results = {}

print("=== MODEL EVALUATION RESULTS ===")
print(f"Baseline (Logistic Regression): {baseline_accuracy:.4f}")
print()

for name, (model, X_test_data) in models.items():
    # Predictions
    y_pred_proba = model.predict(X_test_data, verbose=0)
    y_pred = np.argmax(y_pred_proba, axis=1)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy
    
    print(f"{name}: {accuracy:.4f}")

# Best model detailed evaluation
best_model_name = max(results, key=results.get)
best_model, best_X_test = models[best_model_name]

print(f"\n=== BEST MODEL: {best_model_name} ===")
y_pred_best = np.argmax(best_model.predict(best_X_test, verbose=0), axis=1)
print("Classification Report:")
print(classification_report(y_test, y_pred_best, digits=4))

## 11. Final Comparison Summary

In [None]:
# Create comparison visualization
model_names = ['Baseline\n(LogReg)', 'Model A\n(128-64)', 'Model B\n(256-128-64)', 
               'Model C\n(Dropout)', 'Model D\n(CNN)']
accuracies = [baseline_accuracy] + list(results.values())

plt.figure(figsize=(12, 8))
bars = plt.bar(model_names, accuracies, color=['lightcoral', 'lightblue', 'lightgreen', 'lightyellow', 'lightpink'])
plt.ylabel('Accuracy')
plt.title('MNIST Digit Recognition - Model Comparison')
plt.ylim(0.9, 1.0)

# Add accuracy values on bars
for bar, acc in zip(bars, accuracies):
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height + 0.001,
             f'{acc:.4f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

# Final summary
print("\n=== FINAL SUMMARY ===")
print(f"Best performing model: {best_model_name}")
print(f"Best accuracy: {max(results.values()):.4f}")
print(f"Improvement over baseline: {(max(results.values()) - baseline_accuracy)*100:.2f}%")