In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader

In [14]:
# Define transforms (convert to tensor and normalize)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

# Load training dataset
train_dataset = torchvision.datasets.MNIST(
    root='./data',           # Directory to save/load data
    train=True,              # Load training set
    download=True,           # Download if not already present
    transform=transform      # Apply transforms
)

# Load test dataset
test_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=False,             # Load test set
    download=True,
    transform=transform
)

In [15]:
# Define the neural network
class MNISTNet(nn.Module):
    def __init__(self):
        super(MNISTNet, self).__init__()
        # Input: 28x28 = 784 pixels
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)  # 10 classes (digits 0-9)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        # Flatten the image: (batch_size, 1, 28, 28) -> (batch_size, 784)
        x = x.view(-1, 784)
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))   # No activation here, will use CrossEntropyLoss
        x = self.fc4(x)
        return x

In [16]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [23]:
# Initialize model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MNISTNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

# Training function
def train(model, train_loader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # Forward pass
            output = model(data)
            loss = criterion(output, target)
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            # Statistics
            running_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            
            if batch_idx % 200 == 0:
                print(f'Epoch {epoch+1}/{epochs}, Batch {batch_idx}, '
                      f'Loss: {loss.item():.4f}')
        
        accuracy = 100 * correct / total
        print(f'Epoch {epoch+1}/{epochs} - Loss: {running_loss/len(train_loader):.4f}, '
              f'Accuracy: {accuracy:.2f}%')


In [24]:
# Testing function
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    
    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')

In [25]:
# Train and test the model
print(f"Using device: {device}")
train(model, train_loader, criterion, optimizer, epochs=10)
test(model, test_loader)

Using device: cpu
Epoch 1/10, Batch 0, Loss: 2.2812
Epoch 1/10, Batch 200, Loss: 0.8440
Epoch 1/10, Batch 400, Loss: 0.5463
Epoch 1/10, Batch 600, Loss: 0.2500
Epoch 1/10, Batch 800, Loss: 0.1750
Epoch 1/10, Batch 1000, Loss: 0.2871
Epoch 1/10, Batch 1200, Loss: 0.1949
Epoch 1/10, Batch 1400, Loss: 0.1544
Epoch 1/10, Batch 1600, Loss: 0.2339
Epoch 1/10, Batch 1800, Loss: 0.3335
Epoch 1/10 - Loss: 0.4395, Accuracy: 86.17%
Epoch 2/10, Batch 0, Loss: 0.5356
Epoch 2/10, Batch 200, Loss: 0.1788
Epoch 2/10, Batch 400, Loss: 0.1263
Epoch 2/10, Batch 600, Loss: 0.0922
Epoch 2/10, Batch 800, Loss: 0.4656
Epoch 2/10, Batch 1000, Loss: 0.0531
Epoch 2/10, Batch 1200, Loss: 0.0930
Epoch 2/10, Batch 1400, Loss: 0.0144
Epoch 2/10, Batch 1600, Loss: 0.1054
Epoch 2/10, Batch 1800, Loss: 0.0190
Epoch 2/10 - Loss: 0.2030, Accuracy: 93.77%
Epoch 3/10, Batch 0, Loss: 0.1867
Epoch 3/10, Batch 200, Loss: 0.2929
Epoch 3/10, Batch 400, Loss: 0.1707
Epoch 3/10, Batch 600, Loss: 0.5484
Epoch 3/10, Batch 800, Los