In [1]:
import numpy as np
import pandas as pd
import scipy.io
import functools as fn
from functools import wraps
import time
from tqdm import tqdm
import matplotlib.pyplot as plt

In [2]:
mat = scipy.io.loadmat('data_image_classif.mat')

X = mat['Xts']
n = X.shape[1]

X[X != 0] = 1

df_y = pd.DataFrame(mat['yts'])
Y = pd.get_dummies(df_y[0]).to_numpy() # OneHotEncoding
print(f'X shape : {X.shape}')
print(f'Y shape : {Y.shape}')

X_to_predict = mat['Xvr']
X_to_predict[X_to_predict != 0] = 1

X shape : (60000, 784)
Y shape : (60000, 10)


In [3]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

test_size = int(0.3 * len(X)) 
X, Y = unison_shuffled_copies(X, Y)
X_train, Y_train = X[test_size:], Y[test_size:]
X_test, Y_test = X[:test_size], Y[:test_size]

In [4]:
def timeit(func):
    @wraps(func)
    def timeit_wrapper(*args, **kwargs):
        start_time = time.perf_counter()
        result = func(*args, **kwargs)
        end_time = time.perf_counter()
        total_time = end_time - start_time
        print(f'Function {func.__name__} elapsed {total_time:.2e}s')
        return result
    return timeit_wrapper

def plot(x, y, title):
    ax = plt.axes()
    ax.plot(x, y, c='r', lw=1)
    ax.set_xlabel('Step')
    ax.set_ylabel('Loss')
    ax.set_title(title)
    # ax.legend(loc='upper right')

In [5]:
class Perceptron:
    def __init__(self, n) -> None:
        self.n = n
        self.m = 10
        self.mn = self.n * self.m
        self.w = np.random.uniform(-0.05, 0.05, (self.m * (self.n + 1), 1))
        self.gradw = np.zeros_like(self.w)
        self.update_params()

    def reset(self):
        self.w = np.random.uniform(-0.05, 0.05, (self.m * (self.n + 1), 1))
        self.update_params()

    def update_params(self):
        self.W = self.w[:self.n * self.m].reshape((self.m, self.n))
        self.b = self.w[self.n * self.m:].reshape((self.m, 1))

    def softmax(self, x):
        z = self.W@x + self.b
        e_z = np.exp(z - np.max(z))
        act_func = e_z / e_z.sum(axis=0)
        return act_func

    def compute_single_gradient(self, x, y):
        gradw = np.zeros((self.m * (self.n + 1), 1))
        y_diff = self.softmax(x) - y

        dW = y_diff @ x.T
        gradw[:self.mn, :] = dW.reshape((self.mn, 1))
        gradw[self.mn:, :] = y_diff
        return gradw

    def compute_gradient(self, X, Y, single=False):
        gradw = np.zeros_like(self.w)

        if single:
            gradw = self.compute_single_gradient(X.reshape(self.n, 1), Y.reshape(self.m, 1))
        else:
            for i in range(X.shape[0]):
                gradw += self.compute_single_gradient(X[i, :].reshape(self.n, 1), Y[i, :].reshape(self.m, 1))
            gradw /= X.shape[0]
        return gradw

    def calculate_loss(self, X, Y):
        loss = 0
        for i in range(X.shape[0]):
            x = X[i, :].reshape((self.n, 1))
            y = Y[i, :].reshape((self.m, 1))

            z = self.softmax(x)
            loss -= np.log(z[np.argmax(y)])
        return loss

    def train(self, X, Y, trainer, steps, do_plot=False):
        self.reset()
        loss = [0] * steps
        for i in tqdm(range(steps)):
            trainer(X, Y)
            if do_plot:
                loss[i] = self.calculate_loss(X, Y)

        if do_plot:
            plot([i for i in range(1, steps+1)], loss, 'Loss during train')
        return loss

    def test(self, X, Y):
        result = 0

        for i in range(len(X)):
            x = X[i, :].reshape((self.n, 1))
            y = Y[i, :].reshape((self.m, 1))
            
            act_func = self.softmax(x)

            y_pred = np.argmax(act_func)
            y_real = np.argmax(y)

            if y_pred == y_real:
                result += 1
        
        accuracy = round(result / len(X) * 100, 1)
        return accuracy
    
    def full_gd(self, X: np.ndarray, Y: np.ndarray, params):
        lr = params[0]

        self.w = self.w - lr * self.compute_gradient(X, Y)
        self.update_params()

    def sgd(self, X: np.ndarray, Y: np.ndarray, params):
        selected = np.random.randint(0, self.n)
        lr = params[0]

        self.w = self.w - lr * self.compute_gradient(X[selected, :], Y[selected, :], single=True)
        self.update_params()

    def batch_sgd(self, X: np.ndarray, Y: np.ndarray, params):
        lr = params[0] 
        batch_size = params[1]

        selected = np.random.randint(0, self.n, size=batch_size)
        self.w = self.w - lr * self.compute_gradient(X[selected, :], Y[selected, :])
        self.update_params()

In [6]:
# Full GD
lr = 0.2
steps = 2000
do_plot = False
model_full = Perceptron(n)
loss_full = model_full.train(X_train, Y_train, fn.partial(model_full.full_gd, params=[lr]), steps, do_plot)

acc = model_full.test(X_test, Y_test)
print(f'Model accuracy: {acc}%')

100%|██████████| 2000/2000 [44:23<00:00,  1.33s/it]


Model accuracy: 91.1%


In [None]:
# SGD
lr = 0.2
steps = 1000
do_plot = True
model_sgd = Perceptron(n)
loss_sgd = model_sgd.train(X_train, Y_train, fn.partial(model_sgd.sgd, params=[lr]), steps, do_plot)

acc = model_sgd.test(X_test, Y_test)
print(f'Model accuracy: {acc}%')

In [None]:
# Batch SGD
lr = 0.2
batch_size = 1000
steps = 1000
do_plot = True
model_batch = Perceptron(n)
loss_batch = model_batch.train(X_train, Y_train, fn.partial(model_batch.batch_sgd, params=[lr, batch_size]), steps, do_plot)
# model_batch.train(X, Y, fn.partial(model_batch.batch_sgd, params=[lr, batch_size]), steps=1000)

acc = model_batch.test(X_test, Y_test)
print(f'Model accuracy: {acc}%')

In [7]:
def predict(model: Perceptron, X):
    size = X.shape[0]
    Y = [0] * size

    for i in range(size):
        x = X[i, :].reshape((model.n, 1))
        act_func = model.softmax(x)
        y_pred = np.argmax(act_func)
        Y[i] = (y_pred + 1) * 100 + 1

    result = pd.DataFrame({'id': [i for i in range(1, size + 1)], 'class': Y})
    result.to_csv("result.csv", sep=',', index=False)

predict(model_full, X_to_predict)