In [1]:
"""
Step 1: Train MNIST Model and Export for Web Use
Run this in Google Colab
"""

# Install required packages
!pip install torch torchvision onnx

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import numpy as np

# 1. Define a simple but effective CNN model
class MNISTNet(nn.Module):
    def __init__(self):
        super(MNISTNet, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        # Fully connected layers
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

        self.dropout = nn.Dropout(0.25)
        self.relu = nn.ReLU()

    def forward(self, x):
        # Conv block 1
        x = self.relu(self.conv1(x))
        x = self.pool(x)

        # Conv block 2
        x = self.relu(self.conv2(x))
        x = self.pool(x)

        # Flatten
        x = x.view(-1, 64 * 7 * 7)

        # FC layers
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)

        return x

# 2. Prepare data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# 3. Train the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MNISTNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(f"Training on: {device}")

# Training loop
epochs = 5
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if batch_idx % 100 == 99:
            print(f'Epoch {epoch+1}, Batch {batch_idx+1}, Loss: {running_loss/100:.4f}')
            running_loss = 0.0

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    print(f'Epoch {epoch+1} Accuracy: {100 * correct / total:.2f}%')

print("Training complete!")

# 4. Export model for web use (ONNX format)
model.eval()
dummy_input = torch.randn(1, 1, 28, 28).to(device)

torch.onnx.export(
    model,
    dummy_input,
    "mnist_model.onnx",
    export_params=True,
    opset_version=11,
    do_constant_folding=True,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
)

print("Model exported as mnist_model.onnx")
print("Download this file from Colab (left panel > Files)")

# 5. Also save as PyTorch format (backup)
torch.save(model.state_dict(), 'mnist_model.pth')
print("Model also saved as mnist_model.pth")

# Test the model with a sample
test_data, test_target = next(iter(test_loader))
test_data = test_data[:5].to(device)
with torch.no_grad():
    predictions = model(test_data)
    predicted_classes = torch.argmax(predictions, dim=1)
    print(f"\nSample predictions: {predicted_classes.cpu().numpy()}")
    print(f"Actual labels: {test_target[:5].numpy()}")

Collecting onnx
  Downloading onnx-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.0 kB)
Downloading onnx-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (18.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.2/18.2 MB[0m [31m64.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx
Successfully installed onnx-1.19.1


100%|██████████| 9.91M/9.91M [00:00<00:00, 14.8MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 485kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.50MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 7.08MB/s]


Training on: cuda
Epoch 1, Batch 100, Loss: 0.5739
Epoch 1, Batch 200, Loss: 0.1660
Epoch 1, Batch 300, Loss: 0.1477
Epoch 1, Batch 400, Loss: 0.1094
Epoch 1, Batch 500, Loss: 0.0894
Epoch 1, Batch 600, Loss: 0.0812
Epoch 1, Batch 700, Loss: 0.0831
Epoch 1, Batch 800, Loss: 0.0791
Epoch 1, Batch 900, Loss: 0.0681
Epoch 1 Accuracy: 98.50%
Epoch 2, Batch 100, Loss: 0.0473
Epoch 2, Batch 200, Loss: 0.0546
Epoch 2, Batch 300, Loss: 0.0517
Epoch 2, Batch 400, Loss: 0.0600
Epoch 2, Batch 500, Loss: 0.0512
Epoch 2, Batch 600, Loss: 0.0571
Epoch 2, Batch 700, Loss: 0.0418
Epoch 2, Batch 800, Loss: 0.0545
Epoch 2, Batch 900, Loss: 0.0478
Epoch 2 Accuracy: 98.97%
Epoch 3, Batch 100, Loss: 0.0424
Epoch 3, Batch 200, Loss: 0.0372
Epoch 3, Batch 300, Loss: 0.0441
Epoch 3, Batch 400, Loss: 0.0348
Epoch 3, Batch 500, Loss: 0.0393
Epoch 3, Batch 600, Loss: 0.0367
Epoch 3, Batch 700, Loss: 0.0399
Epoch 3, Batch 800, Loss: 0.0353
Epoch 3, Batch 900, Loss: 0.0361
Epoch 3 Accuracy: 98.94%
Epoch 4, Batch 1

  torch.onnx.export(



Sample predictions: [7 2 1 0 4]
Actual labels: [7 2 1 0 4]
