In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision import datasets
from torch.utils.data import DataLoader, Subset
import torch.optim as optim

mean = torch.tensor(0.13066045939922333)
std = torch.tensor(0.30810779333114624)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((mean.item(),), (std.item(),)), 
    transforms.Lambda(lambda x: torch.flatten(x))  
])

full_train_dataset = datasets.MNIST(
    root='./data/',
    train=True,
    download=True,
    transform=transform
)


train_indices = list(range(0, 50000))   
test_indices = list(range(50000, 60000))

train_subset = Subset(full_train_dataset, train_indices)
test_subset = Subset(full_train_dataset, test_indices)

batch_size = 64

train_loader = DataLoader(
    train_subset,
    batch_size=batch_size,
    shuffle=True 
)

test_loader = DataLoader(
    test_subset,
    batch_size=batch_size,
    shuffle=False 
)

In [2]:
class MLP(nn.Module):
    def __init__(self, input_size=784, hidden_sizes=[256, 128], num_classes=10):
        super(MLP, self).__init__()
        self.flatten = nn.Flatten()
        self.hidden_layers = nn.ModuleList()
        
        self.hidden_layers.append(nn.Linear(input_size, hidden_sizes[0]))
        self.hidden_layers.append(nn.ReLU())
        
        for i in range(1, len(hidden_sizes)):
            self.hidden_layers.append(nn.Linear(hidden_sizes[i-1], hidden_sizes[i]))
            self.hidden_layers.append(nn.ReLU())
        
        self.output_layer = nn.Linear(hidden_sizes[-1], num_classes)
    
    def forward(self, x):
        x = self.flatten(x)
        for layer in self.hidden_layers:
            x = layer(x)
        logits = self.output_layer(x)
        return logits


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

model = MLP().to(device)

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

Using device: cuda


In [4]:
num_epochs = 10

for epoch in range(num_epochs):
    model.train() 
    running_loss = 0.0
    correct = 0
    total = 0
    
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        

        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()
        
        if (batch_idx + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{batch_idx+1}/{len(train_loader)}], Loss: {running_loss/100:.4f}, Accuracy: {100 * correct / total:.2f}%')
            running_loss = 0.0

    model.eval()
    with torch.no_grad():
        test_correct = 0
        test_total = 0
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            test_total += targets.size(0)
            test_correct += (predicted == targets).sum().item()
        test_accuracy = 100 * test_correct / test_total
        print(f'End of Epoch {epoch+1}, Test Accuracy: {test_accuracy:.2f}%\n')


Epoch [1/10], Step [100/782], Loss: 0.6177, Accuracy: 82.73%
Epoch [1/10], Step [200/782], Loss: 0.2960, Accuracy: 86.82%
Epoch [1/10], Step [300/782], Loss: 0.2296, Accuracy: 88.94%
Epoch [1/10], Step [400/782], Loss: 0.1984, Accuracy: 90.21%
Epoch [1/10], Step [500/782], Loss: 0.1852, Accuracy: 90.99%
Epoch [1/10], Step [600/782], Loss: 0.1555, Accuracy: 91.67%
Epoch [1/10], Step [700/782], Loss: 0.1588, Accuracy: 92.17%
End of Epoch 1, Test Accuracy: 95.71%

Epoch [2/10], Step [100/782], Loss: 0.1084, Accuracy: 96.80%
Epoch [2/10], Step [200/782], Loss: 0.1064, Accuracy: 96.82%
Epoch [2/10], Step [300/782], Loss: 0.1099, Accuracy: 96.74%
Epoch [2/10], Step [400/782], Loss: 0.0999, Accuracy: 96.77%
Epoch [2/10], Step [500/782], Loss: 0.1159, Accuracy: 96.75%
Epoch [2/10], Step [600/782], Loss: 0.1028, Accuracy: 96.72%
Epoch [2/10], Step [700/782], Loss: 0.1132, Accuracy: 96.70%
End of Epoch 2, Test Accuracy: 97.28%

Epoch [3/10], Step [100/782], Loss: 0.0715, Accuracy: 97.70%
Epoch [

In [5]:
model.eval() 
with torch.no_grad():
    test_correct = 0
    test_total = 0
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        test_total += targets.size(0)
        test_correct += (predicted == targets).sum().item()
    test_accuracy = 100 * test_correct / test_total
    print(f'Final Test Accuracy: {test_accuracy:.2f}%')

Final Test Accuracy: 97.78%
