# Multilayer Perceptron

Dataset: MNIST  
Each piece of data is a 28*28(784 features) grayscale image of a handwritten digit. Training-set includes 60000 images; Test-set includes 10000 images

Number of input layer neuron: 784 (size: 28*28)    
Number of output layer neuron: 10 (class: 10)

### Data processing

In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

In [None]:
# Read .csv files to load data
train_path = "mnist_train.csv"
test_path = "mnist_test.csv"
train_data = pd.read_csv(train_path,header=None)
test_data = pd.read_csv(test_path,header=None)

In [5]:
print(train_data.shape)
print(test_data.shape)

(60000, 785)
(10000, 785)


In [52]:
y_train = train_data.iloc[:, 0].values  # get labels
x_train = train_data.iloc[:, 1:].values / 255.0  # get features

y_test = test_data.iloc[:, 0].values  # get labels
x_test = test_data.iloc[:, 1:].values / 255.0  # get features

In [54]:
# convert to tensor
x_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)

x_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

In [56]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

torch.Size([60000, 784])
torch.Size([60000])
torch.Size([10000, 784])
torch.Size([10000])


### Building Neural Networks

In [58]:
import torch.nn as nn
import torch.nn.functional as F
from tqdm.auto import tqdm

In [60]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [62]:
class MNIST(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 1000)
        self.fc2 = nn.Linear(1000, 500)
        self.fc3 = nn.Linear(500, 10)
        # input 784 features >>> 1000 neurons >>> 500 neurons >>> output 10 classes
        
    def forward(self, x):
        x = F.relu(self.fc1(x))  # use "relu" activation function
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x



In [82]:
mnist = MNIST().to(device) # instantiation
print(mnist)

MNIST(
  (fc1): Linear(in_features=784, out_features=1000, bias=True)
  (fc2): Linear(in_features=1000, out_features=500, bias=True)
  (fc3): Linear(in_features=500, out_features=10, bias=True)
)


In [84]:
# define learning rate
lr = 0.001

losses = []

for epoch in range(100):
    x_train = x_train.to(device)
    y_train = y_train.to(device)
    y_pred = mnist(x_train)
    
    # calculate the loss
    loss = F.cross_entropy(y_pred, y_train)

    # backpropagation
    loss.backward()

    # Updating the gradient
    with torch.no_grad():
        for p in mnist.parameters():
            p -= lr * p.grad
        mnist.zero_grad() # Avoid gradient accumulation
        
        print(f'Epoch {epoch}, Loss: {loss.item()}')
        losses.append(loss.item())
        

Epoch 0, Loss: 13.138396263122559
Epoch 1, Loss: 15.532133102416992
Epoch 2, Loss: 16.90188980102539
Epoch 3, Loss: 16.443052291870117
Epoch 4, Loss: 9.296648025512695
Epoch 5, Loss: 6.8345136642456055
Epoch 6, Loss: 2.460996389389038
Epoch 7, Loss: 1.5471001863479614
Epoch 8, Loss: 1.785235047340393
Epoch 9, Loss: 1.0998437404632568
Epoch 10, Loss: 0.9675479531288147
Epoch 11, Loss: 0.9238576292991638
Epoch 12, Loss: 0.8884881138801575
Epoch 13, Loss: 0.857928454875946
Epoch 14, Loss: 0.8308229446411133
Epoch 15, Loss: 0.8066513538360596
Epoch 16, Loss: 0.7849944233894348
Epoch 17, Loss: 0.7654323577880859
Epoch 18, Loss: 0.7476842403411865
Epoch 19, Loss: 0.7314475178718567
Epoch 20, Loss: 0.7165075540542603
Epoch 21, Loss: 0.7026445269584656
Epoch 22, Loss: 0.6897234320640564
Epoch 23, Loss: 0.6776250600814819
Epoch 24, Loss: 0.666257381439209
Epoch 25, Loss: 0.6555370688438416
Epoch 26, Loss: 0.6454069018363953
Epoch 27, Loss: 0.6358129978179932
Epoch 28, Loss: 0.6267023682594299
E

In [None]:
with torch.no_grad():
    x_test = x_test.to(device)
    y_test = y_test.to(device)

    y_test_pred = mnist(x_test)  
    _, predicted = torch.max(y_test_pred, 1)  ß
    correct = (predicted == y_test).sum().item()  
    accuracy = correct / len(y_test)  
    print(f'Test Accuracy: {accuracy * 100:.2f}%')
    
    
    

Test Accuracy: 89.72%
