In [None]:
##importing dependencies
import torch
import torch.nn as nn
import torch.optim as optim

from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader, random_split

# MNIST Digit Classification using PyTorch (CNN)

## Project Overview

This project implements a complete end-to-end multiclass image classification pipeline using a Convolutional Neural Network (CNN) in PyTorch on the MNIST handwritten digits dataset.

The objective is to design, train, validate, and evaluate a CNN that learns spatial features from grayscale digit images (0–9) while following standard deep learning and PyTorch best practices.

## Key Features

- Multiclass image classification (10 classes)
- CNN-based feature extraction
- Clean Train / Validation / Test split
- GPU acceleration (CUDA support)
- Cross-Entropy loss with raw logits
- Accuracy and loss tracking
- Model checkpointing
- Inference on unseen samples

## Dataset

- **Dataset:** MNIST Handwritten Digits  
- **Training samples:** 60,000  
- **Test samples:** 10,000  
- **Image size:** 28 × 28 (grayscale)

The training dataset is further split into:

- **Training set:** 85%
- **Validation set:** 15%

Input images are normalized to improve convergence and training stability.

## Model Architecture

The model is a Convolutional Neural Network (CNN) consisting of:

### Convolutional feature extractor:
- Conv2D → ReLU → MaxPooling  
- Conv2D → ReLU → MaxPooling  

### Fully connected classifier:
- Flatten  
- Linear → ReLU  
- Linear → Output (10 classes)

No softmax is applied inside the model.  
`CrossEntropyLoss` internally applies softmax during training.

## Training Strategy

- **Optimizer:** Adam  
- **Learning rate:** 0.001  
- **Loss function:** CrossEntropyLoss  
- **Batch size:** 64  
- **Epochs:** 5  
- **Evaluation metrics:** Loss and Accuracy  

## Project Structure

- Device configuration  
- Dataset loading and preprocessing  
- Train / validation split  
- CNN model definition  
- Training loop  
- Validation loop  
- Final test evaluation  
- Inference  
- Model saving  

## Goal

This notebook is designed to:

- Demonstrate a real-world CNN workflow in PyTorch  
- Follow professional deep learning conventions  
- Be suitable for interviews, portfolios, and GitHub review  


In [None]:
##device selection
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)



Using device: cpu


In [None]:
##Dataset and dataloader
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

full_train_data = MNIST(
    root="data",
    train=True,
    download=True,
    transform=transform
)

test_data = MNIST(
    root="data",
    train=False,
    download=True,
    transform=transform
)


In [None]:
#  Train / Validation split
train_size = int(0.85 * len(full_train_data))
val_size = len(full_train_data) - train_size

train_data, val_data = random_split(
    full_train_data, [train_size, val_size]
)

train_loader = DataLoader(
    train_data,
    batch_size=64,
    shuffle=True
)

val_loader = DataLoader(
    val_data,
    batch_size=64,
    shuffle=False
)

test_loader = DataLoader(
    test_data,
    batch_size=64,
    shuffle=False
)

In [None]:
##Model Creation
class DigitModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # [B, 32, 28, 28]
            nn.ReLU(),
            nn.MaxPool2d(2),                             # [B, 32, 14, 14]

            nn.Conv2d(32, 64, kernel_size=3, padding=1), # [B, 64, 14, 14]
            nn.ReLU(),
            nn.MaxPool2d(2)                              # [B, 64, 7, 7]
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x
model = DigitModel().to(device)


In [None]:
##Loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
##Training loop and Validation loop
epochs = 5

for epoch in range(epochs):

    # ---------- TRAIN ----------
    model.train()
    train_loss = 0.0
    train_correct = 0
    train_total = 0

    for xb, yb in train_loader:
        xb = xb.to(device)
        yb = yb.to(device)

        optimizer.zero_grad()
        logits = model(xb)
        loss = loss_fn(logits, yb)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds = torch.argmax(logits, dim=1)
        train_correct += (preds == yb).sum().item()
        train_total += yb.size(0)

    avg_train_loss = train_loss / len(train_loader)
    train_acc = train_correct / train_total

    # ---------- VALIDATION ----------
    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(device)
            yb = yb.to(device)

            logits = model(xb)
            loss = loss_fn(logits, yb)

            val_loss += loss.item()
            preds = torch.argmax(logits, dim=1)
            val_correct += (preds == yb).sum().item()
            val_total += yb.size(0)

    avg_val_loss = val_loss / len(val_loader)
    val_acc = val_correct / val_total

    print(
        f"Epoch {epoch+1:03d} | "
        f"Train Loss: {avg_train_loss:.4f} | "
        f"Train Acc: {train_acc:.4f} | "
        f"Val Loss: {avg_val_loss:.4f} | "
        f"Val Acc: {val_acc:.4f}"
    )


Epoch 001 | Train Loss: 0.1412 | Train Acc: 0.9565 | Val Loss: 0.0521 | Val Acc: 0.9838
Epoch 002 | Train Loss: 0.0426 | Train Acc: 0.9868 | Val Loss: 0.0428 | Val Acc: 0.9873
Epoch 003 | Train Loss: 0.0297 | Train Acc: 0.9910 | Val Loss: 0.0311 | Val Acc: 0.9903
Epoch 004 | Train Loss: 0.0212 | Train Acc: 0.9932 | Val Loss: 0.0418 | Val Acc: 0.9877
Epoch 005 | Train Loss: 0.0163 | Train Acc: 0.9951 | Val Loss: 0.0371 | Val Acc: 0.9894


In [None]:
## Final Test Evaluation
model.eval()
test_loss = 0.0
test_correct = 0
test_total = 0

with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        yb = yb.to(device)

        logits = model(xb)
        loss = loss_fn(logits, yb)

        test_loss += loss.item()
        preds = torch.argmax(logits, dim=1)
        test_correct += (preds == yb).sum().item()
        test_total += yb.size(0)

print(
    f"Final Test Loss: {test_loss / len(test_loader):.4f} | "
    f"Test Acc: {test_correct / test_total:.4f}"
)


Final Test Loss: 0.0343 | Test Acc: 0.9893


In [None]:
## Inference
model.eval()
with torch.no_grad():
    sample, _ = test_data[0]
    sample = sample.unsqueeze(0).to(device)   # [1, 1, 28, 28]

    logits = model(sample)
    probs = torch.softmax(logits, dim=1)
    prediction = torch.argmax(probs, dim=1)

print("Probabilities:", probs)
print("Predicted digit:", prediction.item())


Probabilities: tensor([[1.5229e-11, 8.9571e-11, 9.2619e-10, 3.3163e-08, 1.1860e-11, 5.7843e-12,
         1.2398e-15, 1.0000e+00, 5.6408e-10, 5.4258e-07]])
Predicted digit: 7


In [None]:
##Saving the model
torch.save(model.state_dict(), "mnist_model.pth")