Imports

In [None]:
import numpy as np
import pandas as pd

DATA

In [None]:
train = pd.read_csv('./mnist_train.csv')
test = pd.read_csv('./mnist_test.csv')

In [None]:
m_train, n_train =  train.shape
m_test, n_test = test.shape
train = np.array(train)
test = np.array(test)

In [None]:
y_train, x_train = train[:,0], train[:,1:n_train].T/255
y_test, x_test = test[:,0], test[:,1:n_test].T/255

In [None]:
def init_weights():

    w1 = np.random.rand(10, 784) - 0.5
    w2 = np.random.rand(10, 10) - 0.5
    return w1, w2

def init_bias():

    b1 = np.random.rand(10, 1) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return b1, b2

def activation_fn(z):
    return np.maximum(0,z)

def smooth_fn(z):
    
    return 1/(1 + np.exp(-z))


def deriv_activation_fn(z):
    
    return z>0

def y_label(y):

    Y = np.zeros((10, m_train))

    for i in range(m_train):

        Y[y[i], i] = 1

    return Y

def get_pred(x):
    return np.argmax(x,0)

def get_acc(x, y):
    return np.count_nonzero(get_pred(x) == y)/y.size

In [None]:
def for_prop(x, w1, w2, b1, b2):

    z1 = w1.dot(x) + b1
    a1 = activation_fn(z1)

    z2 = w2.dot(a1) + b2
    a2 = smooth_fn(z2)

    return a1, a2, z1, z2


def back_prop(x, Y, a1, a2, w1, w2, z1, z2):

    dz2 = a2 - Y
    dw2 = (dz2.dot(a1.T))/m_train
    db2 = dz2.sum(axis=1)[:,None]/m_train

    dz1 = (w2.T).dot(dz2)*deriv_activation_fn(z1)
    dw1 = (dz1).dot(x.T)/m_train
    db1 = dz1.sum(axis=1)[:,None]/m_train

    return dw1, dw2, db1, db2


def update_value(w1, w2, b1, b2, dw1, dw2, db1, db2):
    
    w1 = w1 - lr * dw1
    w2 = w2 - lr * dw2
    b1 = b1 - lr * db1
    b2 = b2 - lr * db2

    return w1, w2, b1, b2


In [None]:
def grad_desc(x_train, y_train, iterations):

    w1, w2 = init_weights()
    b1, b2 = init_bias()
    Y = y_label(y_train)

    for i in range(iterations+1):

        a1, a2, z1, z2 = for_prop(x_train, w1, w2, b1, b2)
        dw1, dw2, db1, db2 = back_prop(x_train, Y, a1, a2, w1, w2, z1, z2)

        w1, w2, b1, b2 = update_value(w1, w2, b1, b2, dw1, dw2, db1, db2)

        if i%50 == 0:
            accuracy = get_acc(a2, y_train)
            print(i)
            print(f'Accuracy : {accuracy*100}%')
    
    return w1, w2, b1, b2

In [None]:
W1, W2, B1, B2 = grad_desc(x_train, y_train, 2000)