In [310]:
import numpy as np 
import pandas as pd

In [311]:
df_train=pd.read_csv('mnist_train.csv')
df_test=pd.read_csv('mnist_test.csv')

In [312]:
X_train = df_train.iloc[:, 1:].values.T
Y_train = df_train.iloc[:, 0].values
X_test = df_test.iloc[:, 1:].values.T
Y_test = df_test.iloc[:, 0].values

In [313]:
def initialise():
    W1 = np.random.randn(10, 784) * 0.01
    b1 = np.random.randn(10, 1) * 0.01
    W2 = np.random.randn(10, 10) * 0.01
    b2 = np.random.randn(10, 1) * 0.01
    return W1,b1,W2,b2

def softmax(Z):
    expZ = np.exp(Z - np.max(Z, axis=0, keepdims = True))
    return expZ / np.sum(expZ, axis=0, keepdims = True)

def ReLU(Z):
    return np.maximum(0,Z)

def der_relu(Z):
    return np.where(Z>0,1,0) 

def forward(W1, b1, W2, b2, X):
    Z1=np.dot(W1,X) + b1
    A1=ReLU(Z1)
    Z2=np.dot(W2,A1) + b2
    A2=softmax(Z2)
    return Z1,A1,Z2,A2

def one_hot(Y):
    zero_matrix = np.zeros((10, len(Y)))
    for i in range(len(Y)):
        zero_matrix[Y[i],i] = 1
    return zero_matrix

def backward(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    m=X.shape[1] 
    dW2=(1/m)*np.dot(dZ2,np.transpose(A1))
    db2=(1/m)*np.sum(dZ2)
    dZ1=np.dot(np.transpose(W2),dZ2)*der_relu(Z1)
    dW1=(1/m)* np.dot(dZ1,np.transpose(X))
    db1=(1/m)*np.sum(dZ1)
    return dW1, db1, dW2, db2

def update(dW1, db1, dW2, db2, W1, W2, b1, b2, alpha):
    W1-=alpha*dW1
    b1-=alpha*db1
    W2-=alpha*dW2
    b2-=alpha*db2
    return W1, b1, W2, b2

In [314]:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size

def getloss(A2, Y):
    m = Y.shape[0]  
    one_hot_Y = one_hot(Y)  
    log_likelihood = -np.sum(one_hot_Y * np.log(A2 + 1e-8), axis=0)
    loss = np.sum(log_likelihood) / m
    return loss
    
def fit(X,Y,alpha,iter):
    W1,b1,W2,b2=initialise()
    for i in range(iter): 
        Z1,A1,Z2,A2=forward(W1,b1,W2,b2,X)
        dW1,db1,dW2,db2=backward(Z1,A1,Z2,A2,W1,W2,X,Y)
        W1,b1,W2,b2=update(dW1,db1,dW2,db2,W1,W2,b1,b2,alpha)
        if(i!=0):
            print("▅▅▅▅▅▅▅▅▅▅▅▅", end='')
        if i % 10 == 0:
            print("\nIteration: ", i)
            predictions = get_predictions(A2)
            print("Accuracy = ",get_accuracy(predictions, Y)," Loss = ",getloss(predictions,Y))
    print("▅▅▅▅▅▅▅▅▅▅▅▅", end='')
    print("\nIteration: ", iter)
    predictions = get_predictions(A2)
    print("Accuracy = ",get_accuracy(predictions, Y)," Loss = ",getloss(predictions,Y))
    return W1, b1, W2, b2

In [315]:
W1, b1, W2, b2 = fit(X_train, Y_train, 0.001, 300)


Iteration:  0
Accuracy =  0.047  Loss =  -1.7751936879340466
▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Iteration:  10
Accuracy =  0.28541666666666665  Loss =  3.4009596542774165
▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Iteration:  20
Accuracy =  0.45793333333333336  Loss =  1.863195683418115
▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Iteration:  30
Accuracy =  0.6398166666666667  Loss =  1.01018488762224
▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Iteration:  40
Accuracy =  0.7155833333333333  Loss =  0.754414186064713
▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Iteration:  50
Accuracy =  0.7551833333

In [316]:
def evaluate(X,Y):
    _, _, _, A2=forward(W1,b1,W2,b2,X)
    predictions=get_predictions(A2)
    print("Model Accuracy on Untrained Set = ",get_accuracy(predictions, Y))

In [317]:
evaluate(X_test,Y_test)

Model Accuracy on Untrained Set =  0.8912
