<a href="https://colab.research.google.com/github/dhruvin6122/pytorch/blob/main/mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Transform: convert images to tensor + flatten later
transform = transforms.Compose([
    transforms.ToTensor(),                 # to tensor [0,1]
    transforms.Lambda(lambda x: x.view(-1))  # flatten 28x28 -> 784
])

# Train & Test datasets
train_ds = datasets.MNIST(root="./data", train=True, download=True, transform=transform)
test_ds  = datasets.MNIST(root="./data", train=False, download=True, transform=transform)

# DataLoaders
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=1000)


100%|██████████| 9.91M/9.91M [00:00<00:00, 38.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 1.09MB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 9.53MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 10.9MB/s]


In [2]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(784, 256),  # input: 28x28=784
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 10)    # 10 classes
        )
    def forward(self, x):
        return self.net(x)

model = MLP().to(device)


In [3]:
criterion = nn.CrossEntropyLoss()            # multi-class
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [5]:
def train(model, loader, criterion, optimizer):
    model.train()
    total_loss, total_correct = 0, 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)

        # Forward
        logits = model(xb)
        loss = criterion(logits, yb)

        # Backward
        optimizer.zero_grad(set_to_none=True)
        loss.backward()
        optimizer.step()

        # Track
        total_loss += loss.item() * xb.size(0)
        total_correct += (logits.argmax(1) == yb).sum().item()

    return total_loss/len(loader.dataset), total_correct/len(loader.dataset)

def evaluate(model, loader, criterion):
    model.eval()
    total_loss, total_correct = 0, 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            logits = model(xb)
            loss = criterion(logits, yb)

            total_loss += loss.item() * xb.size(0)
            total_correct += (logits.argmax(1) == yb).sum().item()

    return total_loss/len(loader.dataset), total_correct/len(loader.dataset)


In [6]:
epochs = 5
for epoch in range(1, epochs+1):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer)
    test_loss, test_acc = evaluate(model, test_loader, criterion)
    print(f"Epoch {epoch}: "
          f"Train loss={train_loss:.4f}, acc={train_acc:.4f} | "
          f"Test loss={test_loss:.4f}, acc={test_acc:.4f}")


Epoch 1: Train loss=0.3040, acc=0.9116 | Test loss=0.1327, acc=0.9595
Epoch 2: Train loss=0.1269, acc=0.9617 | Test loss=0.0974, acc=0.9688
Epoch 3: Train loss=0.0903, acc=0.9714 | Test loss=0.0780, acc=0.9751
Epoch 4: Train loss=0.0710, acc=0.9774 | Test loss=0.0716, acc=0.9783
Epoch 5: Train loss=0.0593, acc=0.9811 | Test loss=0.0704, acc=0.9796


In [11]:
torch.save(model.state_dict(), "mnist_Model.pt")

torch.save(model, "mnist_handwritten_digits.pth")
