In [2]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.nn.functional import one_hot

# Define the MLP model (as above)
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(28*28, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 10)
        )
        
    def forward(self, x):
        return self.layers(x)

# Create the model
model = MLP()

# Create a loader with the MNIST dataset
transform = transforms.ToTensor()
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Set loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# Training loop
for epoch in range(10):  # number of times to loop over the dataset
    for images, labels in train_loader:
        # Convert labels to one-hot encoding
        labels_one_hot = one_hot(labels, num_classes=10).float()
        
        # Forward pass
        outputs = model(images)
        
        # Compute loss
        loss = loss_fn(outputs, labels_one_hot)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f'Epoch {epoch+1}, Loss: {loss.item()}')

Epoch 1, Loss: 0.08842538297176361
Epoch 2, Loss: 0.08597208559513092
Epoch 3, Loss: 0.08326978981494904
Epoch 4, Loss: 0.08028261363506317
Epoch 5, Loss: 0.0728931650519371
Epoch 6, Loss: 0.06935524195432663
Epoch 7, Loss: 0.06703294813632965
Epoch 8, Loss: 0.0667542964220047
Epoch 9, Loss: 0.059144891798496246
Epoch 10, Loss: 0.05437822267413139
