In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader

In [2]:
# Define transforms (convert to tensor and normalize)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

# Load training dataset
train_dataset = torchvision.datasets.MNIST(
    root='./data',           # Directory to save/load data
    train=True,              # Load training set
    download=True,           # Download if not already present
    transform=transform      # Apply transforms
)

# Load test dataset
test_dataset = torchvision.datasets.MNIST(
    root='./data',
    train=False,             # Load test set
    download=True,
    transform=transform
)

In [8]:
# Define the neural network
class MNISTNet(nn.Module):
    def __init__(self):
        super(MNISTNet, self).__init__()
        # Input: 28x28 = 784 pixels
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)  # 10 classes (digits 0-9)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        # Flatten the image: (batch_size, 1, 28, 28) -> (batch_size, 784)
        x = x.view(-1, 784)
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))   # No activation here, will use CrossEntropyLoss
        x = self.fc4(x)
        return x

In [9]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [10]:
# Initialize model, loss function, and optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MNISTNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training function
def train(model, train_loader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            
            # Zero gradients
            optimizer.zero_grad()
            
            # Forward pass
            output = model(data)
            loss = criterion(output, target)
            
            # Backward pass
            loss.backward()
            optimizer.step()
            
            # Statistics
            running_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
            
            if batch_idx % 200 == 0:
                print(f'Epoch {epoch+1}/{epochs}, Batch {batch_idx}, '
                      f'Loss: {loss.item():.4f}')
        
        accuracy = 100 * correct / total
        print(f'Epoch {epoch+1}/{epochs} - Loss: {running_loss/len(train_loader):.4f}, '
              f'Accuracy: {accuracy:.2f}%')


In [11]:
# Testing function
def test(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predicted = torch.max(output, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    
    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')

In [12]:
# Train and test the model
print(f"Using device: {device}")
train(model, train_loader, criterion, optimizer, epochs=5)
test(model, test_loader)

Using device: cpu
Epoch 1/5, Batch 0, Loss: 2.2776
Epoch 1/5, Batch 200, Loss: 0.3646
Epoch 1/5, Batch 400, Loss: 0.4820
Epoch 1/5, Batch 600, Loss: 0.1804
Epoch 1/5, Batch 800, Loss: 0.2163
Epoch 1/5 - Loss: 0.4351, Accuracy: 86.20%
Epoch 2/5, Batch 0, Loss: 0.5291
Epoch 2/5, Batch 200, Loss: 0.2534
Epoch 2/5, Batch 400, Loss: 0.1936
Epoch 2/5, Batch 600, Loss: 0.2397
Epoch 2/5, Batch 800, Loss: 0.3056
Epoch 2/5 - Loss: 0.2145, Accuracy: 93.41%
Epoch 3/5, Batch 0, Loss: 0.1733
Epoch 3/5, Batch 200, Loss: 0.2001
Epoch 3/5, Batch 400, Loss: 0.1870
Epoch 3/5, Batch 600, Loss: 0.2056
Epoch 3/5, Batch 800, Loss: 0.0961
Epoch 3/5 - Loss: 0.1783, Accuracy: 94.52%
Epoch 4/5, Batch 0, Loss: 0.1019
Epoch 4/5, Batch 200, Loss: 0.0508
Epoch 4/5, Batch 400, Loss: 0.2110
Epoch 4/5, Batch 600, Loss: 0.2289
Epoch 4/5, Batch 800, Loss: 0.2059
Epoch 4/5 - Loss: 0.1540, Accuracy: 95.18%
Epoch 5/5, Batch 0, Loss: 0.0489
Epoch 5/5, Batch 200, Loss: 0.1265
Epoch 5/5, Batch 400, Loss: 0.0951
Epoch 5/5, Batc