In [1]:
import numpy as np
import utils

import torch
from torch import nn
from torch.nn import functional as F

hyperparameters

In [2]:
epochs = 10
batch_size = 64
lr = 0.1
resize = 28
hidden_size = 128

Load Fashion MNIST

In [3]:
train_iter, test_iter = utils.load_fashion_mnist(batch_size, resize)

### Implemention of MLP from Scratch

Initialize Parameters

In [4]:
num_in = resize * resize
num_out = 10

W_1 = torch.normal(0, 0.01, (num_in, hidden_size), requires_grad = True)
W_2 = torch.normal(0, 0.01, (hidden_size, num_out), requires_grad = True)
b_1 = torch.normal(0, 0.01, (hidden_size,), requires_grad = True)
b_2 = torch.normal(0, 0.01, (num_out,), requires_grad = True)
params = [W_1, W_2, b_1, b_2]

Activation Function

In [5]:
def ReLU(X):
    a = torch.zeros_like(X)
    return torch.max(X, a)

Model

In [6]:
def MLPReg(X, params, activation):
    '''Implemention of MLP model from Scratch'''
    W_1, W_2, b_1, b_2 = params
    X = X.reshape(X.shape[0], -1)
    H = activation(X @ W_1 + b_1)
    return F.softmax(H @ W_2 + b_2, dim = 1)

Train

In [7]:
train_metric = utils.Accumulator(3)
test_metric = utils.Accumulator(3)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params, lr)

for epoch in range(epochs):
    train_metric.reset()
    test_metric.reset()
    
    for X, y in train_iter:
        optimizer.zero_grad()
        y_hat = MLPReg(X, params, ReLU)
        loss = loss_fn(y_hat, y)
        loss.backward()
        optimizer.step()
        # utils.SGD(params, lr)
        train_metric.add(float(loss) * y.numel(), utils.accuracy(y_hat, y), y.numel())
    
    with torch.no_grad():
        for X, y in test_iter:
            y_hat = MLPReg(X, params, ReLU)
            loss = loss_fn(y_hat, y)
            acc = utils.accuracy(MLPReg(X, params, ReLU), y)
            test_metric.add(float(loss) * y.numel(), acc, y.numel())
    print('epoch: {}, train loss: {:.3f}, train acc: {:.3f}, test loss: {:.3f},test acc: {:.3f}'.format(
        epoch, 
        train_metric[0] / train_metric[2], train_metric[1] / train_metric[2], 
        test_metric[0] / test_metric[2], test_metric[1] / test_metric[2]
    ))

epoch: 0, train loss: 2.021, train acc: 0.466, test loss: 1.829,test acc: 0.652
epoch: 1, train loss: 1.785, train acc: 0.700, test loss: 1.761,test acc: 0.715
epoch: 2, train loss: 1.747, train acc: 0.726, test loss: 1.743,test acc: 0.728
epoch: 3, train loss: 1.735, train acc: 0.734, test loss: 1.736,test acc: 0.731
epoch: 4, train loss: 1.729, train acc: 0.738, test loss: 1.730,test acc: 0.737
epoch: 5, train loss: 1.725, train acc: 0.742, test loss: 1.730,test acc: 0.736
epoch: 6, train loss: 1.722, train acc: 0.745, test loss: 1.725,test acc: 0.741
epoch: 7, train loss: 1.719, train acc: 0.747, test loss: 1.723,test acc: 0.743
epoch: 8, train loss: 1.717, train acc: 0.748, test loss: 1.721,test acc: 0.744
epoch: 9, train loss: 1.715, train acc: 0.750, test loss: 1.720,test acc: 0.743


### Concise Implemention of MLP

In [8]:
num_in, num_out, hidden_size = resize * resize, 10, 64

model = nn.Sequential(
    nn.Linear(num_in, hidden_size), 
    nn.ReLU(),
    nn.Linear(hidden_size, num_out), 
    nn.Softmax(dim = 1)
)
model.apply(utils.init_weights)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr)

X = torch.randn((batch_size, 1, resize, resize))
model(X.reshape(-1, num_in))

train_metric = utils.Accumulator(3)
test_metric = utils.Accumulator(3)

for epoch in range(epochs):
    train_metric.reset()
    test_metric.reset()
    
    for X, y in train_iter:
        optimizer.zero_grad()
        y_hat = model(X.reshape(-1, num_in))
        loss = loss_fn(y_hat, y)
        loss.backward()
        optimizer.step()
        train_metric.add(float(loss) * y.numel(), utils.accuracy(y_hat, y), y.numel())
    
    with torch.no_grad():
        for X, y in test_iter:
            y_hat = model(X.reshape(-1, num_in))
            loss = loss_fn(y_hat, y)
            acc = utils.accuracy(y_hat, y)
            test_metric.add(float(loss) * y.numel(), acc, y.numel())
    print('epoch: {}, train loss: {:.3f}, train acc: {:.3f}, test loss: {:.3f},test acc: {:.3f}'.format(
        epoch, 
        train_metric[0] / train_metric[2], train_metric[1] / train_metric[2], 
        test_metric[0] / test_metric[2], test_metric[1] / test_metric[2]
    ))

epoch: 0, train loss: 2.042, train acc: 0.438, test loss: 1.799,test acc: 0.680
epoch: 1, train loss: 1.736, train acc: 0.752, test loss: 1.708,test acc: 0.770
epoch: 2, train loss: 1.689, train acc: 0.786, test loss: 1.686,test acc: 0.787
epoch: 3, train loss: 1.675, train acc: 0.797, test loss: 1.676,test acc: 0.794
epoch: 4, train loss: 1.667, train acc: 0.802, test loss: 1.670,test acc: 0.801
epoch: 5, train loss: 1.661, train acc: 0.806, test loss: 1.670,test acc: 0.798
epoch: 6, train loss: 1.657, train acc: 0.810, test loss: 1.665,test acc: 0.801
epoch: 7, train loss: 1.654, train acc: 0.813, test loss: 1.661,test acc: 0.806
epoch: 8, train loss: 1.652, train acc: 0.815, test loss: 1.660,test acc: 0.805
epoch: 9, train loss: 1.650, train acc: 0.817, test loss: 1.658,test acc: 0.808
