#### **MNIST MLP with pytorch**

This is a template of an MNIST classifier with an Artificial Neural Network (MLP)
If follows the basic blocks of a Deep Learning classifier

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt


In [2]:
# Device GPU configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters

num_classes = 10  # Digits 0-9
num_epochs = 5
batch_size = 100
learning_rate = 0.001

#### **Reading MNIST Dataset**
Most frameworks offer an easy way to download the MNIST dataset. In this case we use the datasets method

PyTorch models expect input data in the form of tensors because tensors are the fundamental data structure in this framework for computation.

You can see below the variable **images** is a tensor and this variable contains all the data to be feed to the network. 

In [3]:
# read data

# MNIST dataset
train_dataset = datasets.MNIST(root='data', 
                               train=True, 
                               transform=transforms.ToTensor(), 
                               download=True)

test_dataset = datasets.MNIST(root='data', 
                              train=False, 
                              transform=transforms.ToTensor())

# Data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=False)

In [4]:
type(test_dataset)

torchvision.datasets.mnist.MNIST

#### **ANN architecture definition**
This is the key part of the notebook, in this cell we define the structure of the network 
and all the different elements mainly

- Activation function 
- Layer size
- Number of layers


In [6]:
hidden_size1 = 256
hidden_size2 = 128
input_size = 784  # 28x28 images

class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1) 
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size2, num_classes)
    
    def forward(self, x):
        out = x.view(-1, input_size)  # Flatten the image
        out = self.fc1(out)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        return out
 
model = NeuralNet(input_size, hidden_size1, hidden_size2, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)



#### **Network training**
In this training the network is trained with the data from the MNIST we have prepared previously

In [7]:
# Training the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}')

# Testing the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')



Epoch [1/5], Step [100/600], Loss: 0.2600
Epoch [1/5], Step [200/600], Loss: 0.4031
Epoch [1/5], Step [300/600], Loss: 0.2388
Epoch [1/5], Step [400/600], Loss: 0.2919
Epoch [1/5], Step [500/600], Loss: 0.1596
Epoch [1/5], Step [600/600], Loss: 0.1626
Epoch [2/5], Step [100/600], Loss: 0.1115
Epoch [2/5], Step [200/600], Loss: 0.2196
Epoch [2/5], Step [300/600], Loss: 0.0917
Epoch [2/5], Step [400/600], Loss: 0.0878
Epoch [2/5], Step [500/600], Loss: 0.1990
Epoch [2/5], Step [600/600], Loss: 0.1687
Epoch [3/5], Step [100/600], Loss: 0.0818
Epoch [3/5], Step [200/600], Loss: 0.0657
Epoch [3/5], Step [300/600], Loss: 0.0653
Epoch [3/5], Step [400/600], Loss: 0.0906
Epoch [3/5], Step [500/600], Loss: 0.0771
Epoch [3/5], Step [600/600], Loss: 0.0309
Epoch [4/5], Step [100/600], Loss: 0.0536
Epoch [4/5], Step [200/600], Loss: 0.0292
Epoch [4/5], Step [300/600], Loss: 0.0877
Epoch [4/5], Step [400/600], Loss: 0.0242
Epoch [4/5], Step [500/600], Loss: 0.1740
Epoch [4/5], Step [600/600], Loss:

In [8]:
print("the variable images is a ", type(images), " with a dimension of ", images.shape)

the variable images is a  <class 'torch.Tensor'>  with a dimension of  torch.Size([100, 1, 28, 28])


In [9]:
# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total:.2f}%')



Test Accuracy: 97.67%


In [None]:
# Visualize Loss and Accuracy
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss', color='orange')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Training Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Training Accuracy')
plt.legend()

plt.show()