# MLP

## Steps to do:

1. get training data
2. prepare data loading
3. define model
4. define optimization procedure
5. train model (and evaluate)

### MNIST dataset

Standard benchmark dataset for image classification - hand written digits: http://yann.lecun.com/exdb/mnist/

We will use `torchvision.datasets` to load the data and explore its contents.

In [1]:
# Load the train and test MNIST data
# --- YOUR CODE HERE ---
import torch
from torchvision import datasets, transforms

mnist_train = datasets.MNIST('../Datasets', train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST('../Datasets', train=False, download=True, transform=transforms.ToTensor())

### Data loading

Use `torch.uitls.data.DataLoader` to prepare the batch loading

In [2]:
# Construct train and test data loaders to provide batches of data
# --- YOUR CODE HERE ---
from torch.utils.data import DataLoader, TensorDataset

batch_size = 10
dl_train = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
dl_test = DataLoader(mnist_test, batch_size=batch_size, shuffle=False)

### Model definition

2-layer MLP with ReLU activation:

$$\mathbf{h} = \sigma(\mathbf{x}^T \mathbf{W}{(1)} + \mathbf{b}{(1)}) \\
\mathbf{o} = \mathbf{h}^T \mathbf{W}^{(2)} + \mathbf{b}^{(2)}$$

In [3]:
# Initialize parametes
# --- YOUR CODE HERE ---
import torch.nn as nn

num_inputs, num_outputs, num_hidden = 784, 10, 16

w1 = torch.nn.Parameter(torch.randn(num_inputs, num_hidden))
b1 = torch.nn.Parameter(torch.randn(num_hidden))
w2 = torch.nn.Parameter(torch.randn(num_hidden, num_outputs))
b2 = torch.nn.Parameter(torch.randn(num_outputs))


w1.requires_grad


True

In [4]:
# Define relu activation function ReLU(x) = max(x, 0)
# --- YOUR CODE HERE ---
def relu(x):
    return torch.maximum(x,torch.Tensor([0]))

mt =torch.randn(5,3)

relu(mt)

tensor([[0.0000, 0.6581, 0.7088],
        [0.3933, 0.0000, 0.2369],
        [0.6248, 0.3164, 0.0000],
        [1.0337, 0.5241, 0.0000],
        [2.6988, 0.0000, 0.5725]])

In [5]:
# Define the MLP model
# --- YOUR CODE HERE ---



def lin_reg(x, w, b):
    return x.matmul(w) + b

def mlp(x):
    # 1st linear layer
    h = lin_reg(x, w1, b1)
    # pass through relu
    o = relu(h)
    # 2nd linear layer
    y = lin_reg(o, w2, b2)
    
    return y

x = torch.randn(10, num_inputs)
y = mlp(x)
y.shape

torch.Size([10, 10])

### Model training

Follows the same logic as for linear regression problem

In [10]:
# write the training procedure
# --- YOUR CODE HERE ---
import torch.optim as optim

lr = 0.1
num_epochs = 1
batch_size = 256

loss = nn.CrossEntropyLoss()
optimizer = optim.SGD([w1, b1, w2, b2], lr)

losses = []

for epoch in range(num_epochs): 
    for count, (x_batch, y_batch) in enumerate(mnist_train):  #train_dl
        
        # predictions using our mlp
        y_hat = mlp(torch.flatten(x_batch, 1)) #y_hat = mlp(torch.flatten(x_batch, 1), w1, b1, w2, b2)

        # get loss using predictions and true targets
        ll = loss(y_hat, y_batch)

        # get gradients
        optimizer.zero_grad()
        ll.backward()
        if count //20 == 0:
            losses.append(ll.item())
        # update parameters
        optimizer.step()
losses

TypeError: cross_entropy_loss(): argument 'target' (position 2) must be Tensor, not int

In [None]:
# plot graphs
&matplotlib inline
import matplotlib.pyplot as plt

plt.plot(losses)

In [None]:
# Display 10 data examples together with their labels

fig, axs = plt.subplots(2, 5)
fig.set_size_inches(8, 4)
axs = axs.flatten()
for i, batch in enumerate(mnist_test):
    axs[i].imshow(batch[0].squeeze(), cmap='gray')
    preds = mlp(torch.flatten(batch[0], 1))
    preds = torch.argmax(preds, dim=1)
    axs[i].set_title(f"{batch[1]}/{preds.item()}")
    axs[i].xaxis.set_visible(False)
    axs[i].yaxis.set_visible(False)
    if i == 9:
        break
