## Implementation a desne classifier for classifaction on MNIST dataset

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report

from ml.layers.linear import Linear
from ml.layers.activations import Sigmoid
from ml.loss import MSELoss
from ml.optimizers import SGD

### Loading data and creating data utils

In [2]:
class Dataset:
    def __init__(self, path):
        df = pd.read_csv(path, header=None)
        self.X = df.iloc[:, 1:]
        self.y = df.iloc[:, 0]
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return np.array(self.X.iloc[idx]), self.y[idx]

In [3]:
train_data = Dataset('./datasets/mnist/mnist_train.csv')
test_data = Dataset('./datasets/mnist/mnist_test.csv')
len(train_data), len(test_data)

(60000, 10000)

In [4]:
def get_batch(dataset, batchsize):
    n = len(dataset)
    indices = list(range(n))
    np.random.shuffle(indices)
    start = 0
    end = batchsize
    while True:
        if start == n:
            start = 0
            end = batchsize
            np.random.shuffle(indices)
            yield None
            continue
        batch_X, batch_y = list(), list()
        for i in range(start, end):
            X, y = train_data[i]
            batch_X.append(X)
            batch_y.append(y)
        yield np.array(batch_X), np.array(batch_y)
        start = end
        end = min(n, end+batchsize)

In [5]:
BATCHSIZE = 32
train_loader = get_batch(train_data, BATCHSIZE)
test_loader = get_batch(test_data, BATCHSIZE)

## Training and testing utilities

In [6]:
def train_model(model, loss_fn, dataloader, optimizer, n_epochs):
    for epoch in range(n_epochs):
        batch_count = 0
        while True:
            batch = next(dataloader)
            if not batch:
                break
            batch_count += 1
            x, y = batch
            y = np.eye(10)[y]
            y_pred = model.forward(x)
            loss = loss_fn.forward(y, y_pred)
            din_loss = loss_fn.backward()
            model.backward(din_loss)
            model.update(optimizer)
            print("\rEpoch {} Batch {} Loss {}".format(epoch, batch_count, loss), end="") 
        print()

In [7]:
def test_model(model, dataloader):
    true = list()
    pred = list()
    while True:
        batch = next(dataloader)
        if not batch:
            break
        x, y = batch
        y_pred = np.argmax(clf.forward(x), axis=-1)
        true.extend(list(np.squeeze(y)))
        pred.extend(list(np.squeeze(y_pred)))
    print(classification_report(true, pred))

### Model Class

In [8]:
class Model:
    def __init__(self, layers=[]):
        self.layers = layers
        
    def add_layer(self, layer):
        self.layers.append(layer)
        
    def forward(self, x):
        for l in self.layers:
            x = l.forward(x)
        return x
    
    def backward(self, dx):
        for l in self.layers[::-1]:
            dx = l.backward(dx)
            
    def update(self, optim):
        for l in self.layers:
            if l.type == "learnable":
                for p in l.params:
                    l.params[p] = optim.update(l.params[p], l.grad[p])

## Implementation 1
- 3 dense layers followed by sigmoid
- MSE Loss
- SGD Optimizer with LR 0.1
- 5 epochs

In [9]:
layers = [
    Linear(784, 100),
    Sigmoid(),
    Linear(100, 20),
    Sigmoid(),
    Linear(20, 10),
    Sigmoid()
]
clf = Model(layers)

In [10]:
loss_fn = MSELoss()

In [11]:
sgd_optim = SGD(lr=0.1)

In [12]:
train_model(clf, loss_fn, train_loader, sgd_optim, 5)

  return 1 / (1 + np.exp(-x))


Epoch 0 Batch 1875 Loss 0.053395755404210565
Epoch 1 Batch 1875 Loss 0.051330457668268326
Epoch 2 Batch 1875 Loss 0.050307912642755484
Epoch 3 Batch 1875 Loss 0.049131947810160324
Epoch 4 Batch 1875 Loss 0.049512293328935446


In [14]:
test_model(clf, train_loader)

  return 1 / (1 + np.exp(-x))


              precision    recall  f1-score   support

           0       0.20      0.09      0.12      5923
           1       0.43      0.71      0.53      6742
           2       0.04      0.00      0.01      5958
           3       0.20      0.68      0.31      6131
           4       0.22      0.08      0.12      5842
           5       0.21      0.12      0.15      5421
           6       0.14      0.07      0.10      5918
           7       0.29      0.42      0.34      6265
           8       0.12      0.09      0.10      5851
           9       0.12      0.06      0.08      5949

    accuracy                           0.24     60000
   macro avg       0.20      0.23      0.19     60000
weighted avg       0.20      0.24      0.19     60000



In [13]:
test_model(clf, test_loader)

              precision    recall  f1-score   support

           0       0.21      0.09      0.13      1001
           1       0.42      0.74      0.54      1127
           2       0.05      0.00      0.01       991
           3       0.20      0.66      0.31      1032
           4       0.23      0.09      0.13       980
           5       0.20      0.11      0.14       863
           6       0.13      0.07      0.09      1014
           7       0.31      0.45      0.37      1070
           8       0.13      0.10      0.12       944
           9       0.11      0.05      0.07       978

    accuracy                           0.25     10000
   macro avg       0.20      0.24      0.19     10000
weighted avg       0.20      0.25      0.20     10000



## Implementation 2
- 3 dense layers followed by sigmoid
- MSE Loss
- SGD Optimizer with LR 0.1
- 10 epochs

In [16]:
clf = Model(layers)

In [17]:
train_model(clf, loss_fn, train_loader, sgd_optim, 10)

Epoch 0 Batch 1050 Loss 0.049476492722046916
Epoch 1 Batch 1875 Loss 0.049847328095996596
Epoch 2 Batch 1875 Loss 0.048697885268765675
Epoch 3 Batch 1875 Loss 0.048556133842159854
Epoch 4 Batch 1875 Loss 0.048102262978292724
Epoch 5 Batch 1875 Loss 0.047497936415689295
Epoch 6 Batch 1875 Loss 0.047168681899091115
Epoch 7 Batch 1875 Loss 0.046858681781957276
Epoch 8 Batch 1875 Loss 0.046803227011923996
Epoch 9 Batch 1875 Loss 0.047035857658350264


In [18]:
test_model(clf, train_loader)

              precision    recall  f1-score   support

           0       0.42      0.35      0.38      5923
           1       0.65      0.87      0.74      6742
           2       0.05      0.00      0.00      5958
           3       0.28      0.71      0.41      6131
           4       0.32      0.22      0.26      5842
           5       0.30      0.08      0.13      5421
           6       0.26      0.44      0.33      5918
           7       0.40      0.68      0.50      6265
           8       0.29      0.17      0.21      5851
           9       0.20      0.03      0.04      5949

    accuracy                           0.37     60000
   macro avg       0.32      0.35      0.30     60000
weighted avg       0.32      0.37      0.31     60000



In [19]:
test_model(clf, test_loader)

  return 1 / (1 + np.exp(-x))


              precision    recall  f1-score   support

           0       0.41      0.35      0.38      1001
           1       0.64      0.88      0.74      1127
           2       0.00      0.00      0.00       991
           3       0.29      0.68      0.40      1032
           4       0.34      0.24      0.28       980
           5       0.27      0.08      0.12       863
           6       0.27      0.45      0.34      1014
           7       0.41      0.68      0.52      1070
           8       0.30      0.20      0.24       944
           9       0.24      0.02      0.04       978

    accuracy                           0.38     10000
   macro avg       0.32      0.36      0.31     10000
weighted avg       0.32      0.38      0.32     10000

