### Implemeting neural network from scratch

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.data import loadlocal_mnist

In [2]:
xtrain,ytrain = loadlocal_mnist(images_path = "train-images-idx3-ubyte",labels_path = 'train-labels-idx1-ubyte')
xtest,ytest = loadlocal_mnist(images_path = 't10k-images-idx3-ubyte',labels_path = 't10k-labels-idx1-ubyte')

In [3]:
xtest.shape

(10000, 784)

In [4]:
#normalise
digits = 10
xtrain = xtrain/255
xtest =  xtest/255

In [5]:
#onehot coded
ytrain = np.eye(digits)[ytrain.astype('int32')]

In [6]:
ytrain.shape

(60000, 10)

In [7]:
ytest = np.eye(digits)[ytest.astype('int32')]
ytest = ytest.T.reshape(digits, len(ytest))

In [8]:
ytrain.shape
ytrain = ytrain.T.reshape(digits, len(ytrain))

In [9]:
# multi class cross entropy loss function

def compute_multiclass_loss(Y, predicted):
    L_sum = np.sum(np.multiply(Y, np.log(predicted)))
    m = Y.shape[1]
    L = -(1/m) * L_sum
    return L

# Activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [10]:
xtrain = xtrain.T.reshape(784, len(xtrain))

In [11]:
xtest = xtest.T.reshape(784, len(xtest))

In [12]:
xtrain.shape[0]

784

In [18]:
# hyperparameters
feature = xtrain.shape[0]
hidden_node = 256
m = len(xtrain)
alpha = 0.1


# weights and bias intitialisation

w1 = np.random.randn(hidden_node, feature)
b1 = np.zeros((hidden_node, 1))
w2 = np.random.randn(digits, hidden_node)
b2 = np.zeros((digits, 1))
#print(w1.shape,w2.shape,b1.shape,b2.shape)
x = xtrain
y = ytrain


for i in range(1000):
    
    # Forward_propagation
    z1 = np.matmul(w1,x) + b1
    a1 = sigmoid(z1)
    z2 = np.matmul(w2,a1) + b2
    a2 = np.exp(z2) / np.sum(np.exp(z2), axis=0)
    #print(a2.shape)
    cost = compute_multiclass_loss(y, a2)
   
    #Backpropagation
    dz2 = a2-y
    dw2 = (1./m) * np.matmul(dz2, a1.T)
    db2 = (1./m) * np.sum(dz2, axis=1, keepdims=True)

    da1 = np.matmul(w2.T, dz2)
    dz1 = da1 * (a1*(1-a1))
    dw1 = (1./m) * np.matmul(dz1, x.T)
    db1 = (1./m) * np.sum(dz1, axis=1, keepdims=True)

    # Updating weights
    w2 = w2 - alpha * dw2
    b2 = b2 - alpha * db2
    w1 = w1 - alpha * dw1
    b1 = b1 - alpha * db1

    if (i % 100 == 0):
        print("Epoch", i, "cost: ", cost)

print("Final cost:", cost)

(256, 784) (10, 256) (256, 1) (10, 1)
Epoch 0 cost:  19.56719210743892
Epoch 100 cost:  0.3853538811451661
Epoch 200 cost:  0.27289845960126996
Epoch 300 cost:  0.21996498961167957
Epoch 400 cost:  0.18700799647245542
Epoch 500 cost:  0.16358313243111844
Epoch 600 cost:  0.14588696647215885
Epoch 700 cost:  0.1318171269344279
Epoch 800 cost:  0.12029096961944918
Epoch 900 cost:  0.11052368244037027
Final cost: 0.10210350566431793


In [16]:
print("xtrain :",xtrain.shape)
print("ytrain :",ytrain.shape)
print("w1 :",w1.shape)
print("w2 :",w2.shape)
print("b1 :",b1.shape)
print("b2 :",b2.shape)
print("a1 :",a1.shape)
print("z1 :",z1.shape)

xtrain : (784, 60000)
ytrain : (10, 60000)
w1 : (256, 784)
w2 : (10, 256)
b1 : (256, 1)
b2 : (10, 1)
a1 : (256, 60000)
z1 : (256, 60000)


In [17]:
dz2.shape

(10, 60000)

In [18]:
z1_t = np.matmul(w1, xtest) + b1
a1_t = sigmoid(z1_t)
z2_t = np.matmul(w2, a1_t) + b2
a2_t = np.exp(z2_t) / np.sum(np.exp(z2_t), axis=0)

predictions = np.argmax(a2_t, axis=0)
labels = np.argmax(ytest, axis=0)

In [19]:
#Accuracy

from sklearn.metrics import accuracy_score

accuracy = accuracy_score(labels,predictions)

In [20]:
accuracy*100

95.57

##### hidden layer = 64, accuracy =94.28, alpha = 0.1 
##### hidden layer = 64, accuracy =89.87, alpha = 0.01 
##### hidden layer = 256, accuracy =95.06, alpha = 0.1