# MNIST Download and Simple CNN Training

This notebook will:

1. Download the MNIST dataset to the `/data` directory.
2. Train a simple convolutional neural network (CNN) on the MNIST dataset using PyTorch.

In [None]:
# Download MNIST to /data using torchvision

import os
from torchvision import datasets

# Dataset directory
DATASET_DIR = '/data'

# Ensure /data directory exists
os.makedirs(DATASET_DIR, exist_ok=True)

# Download training and test sets
train_dataset = datasets.MNIST(DATASET_DIR, train=True, download=True)
test_dataset = datasets.MNIST(DATASET_DIR, train=False, download=True)

print(f"Downloaded MNIST to {DATASET_DIR}. Train size: {len(train_dataset)}, Test size: {len(test_dataset)}")

In [None]:
# Define a simple CNN for MNIST
import torch
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

In [None]:
# Set up DataLoader and training utilities
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms

# Transform: convert to tensor and normalize
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Reload datasets with transform
train_dataset = datasets.MNIST(DATASET_DIR, train=True, download=False, transform=transform)
test_dataset = datasets.MNIST(DATASET_DIR, train=False, download=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleCNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
# Train the CNN for 1 epoch and evaluate on test set
for epoch in range(1):
    model.train()
    total_loss = 0
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Training loss: {total_loss/len(train_loader):.4f}")

# Evaluate on test set
model.eval()
correct = 0
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()
accuracy = 100. * correct / len(test_loader.dataset)
print(f"Test set accuracy: {accuracy:.2f}%")