In [3]:
# Importing all necessary PyTorch and helper libraries
import torch                               # Main PyTorch library for tensors and models
import torch.nn as nn                      # Neural network module for creating layers
import torch.optim as optim                # Optimizers like Adam, SGD, etc.
from torchvision import datasets, transforms  # For loading MNIST dataset and preprocessing
from torch.utils.data import DataLoader    # Helps to batch and shuffle data efficiently


In [4]:
# Define the transformation for the MNIST images
transform = transforms.Compose([
    transforms.ToTensor(),                # Convert PIL image to tensor with values in [0,1]
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1,1] using mean and std deviation
])

# Load the training and testing datasets
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# DataLoader allows batch processing and shuffling
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)


100%|██████████| 9.91M/9.91M [00:00<00:00, 57.5MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.72MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 14.4MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 4.78MB/s]


In [6]:
# Define a simple neural network class
class DigitClassifier(nn.Module):
    def __init__(self):
        super(DigitClassifier, self).__init__()
        self.fc = nn.Sequential(
            nn.Flatten(),                  # Converts 28x28 image to 784 vector
            nn.Linear(28*28, 128),         # First hidden layer with 128 neurons
            nn.ReLU(),                     # ReLU activation adds non-linearity
            nn.Linear(128, 10)             # Output layer for 10 classes (digits 0–9)
        )

    def forward(self, x):
        return self.fc(x)                  # Forward pass through the layers

# Use GPU if available, else CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = DigitClassifier().to(device)      # Move model to the device


In [7]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()         # CrossEntropyLoss is used for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer with learning rate 0.001


In [8]:
# Train the model for 5 epochs
for epoch in range(5):
    model.train()                         # Set model to training mode
    running_loss = 0.0                    # Track total loss

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)  # Move data to GPU if available

        optimizer.zero_grad()             # Clear old gradients
        outputs = model(images)           # Forward pass
        loss = criterion(outputs, labels) # Calculate loss
        loss.backward()                   # Backward pass to compute gradients
        optimizer.step()                  # Update model weights

        running_loss += loss.item()       # Accumulate loss

    print(f"Epoch {epoch+1}: Loss = {running_loss:.4f}")


Epoch 1: Loss = 350.4526
Epoch 2: Loss = 179.2324
Epoch 3: Loss = 131.8035
Epoch 4: Loss = 107.8105
Epoch 5: Loss = 92.2459


In [9]:
# Evaluate model accuracy on test set
model.eval()                              # Switch model to evaluation mode
correct = 0
total = 0

with torch.no_grad():                     # Disable gradient calculation (faster & less memory)
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)  # Choose the highest scoring class
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"\n🎯 Test Accuracy: {accuracy:.2f}%")



🎯 Test Accuracy: 97.14%
