### Implemeting neural network from scratch

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.data import loadlocal_mnist

In [3]:
xtrain,ytrain = loadlocal_mnist(images_path = "train-images-idx3-ubyte",labels_path = 'train-labels-idx1-ubyte')
xtest,ytest = loadlocal_mnist(images_path = 't10k-images-idx3-ubyte',labels_path = 't10k-labels-idx1-ubyte')

In [4]:
xtest.shape

(10000, 784)

In [5]:
#normalise
digits = 10
xtrain = xtrain/255
xtest =  xtest/255

In [6]:
#onehot coded
ytrain = np.eye(digits)[ytrain.astype('int32')]

In [7]:
ytrain.shape

(60000, 10)

In [8]:
ytest = np.eye(digits)[ytest.astype('int32')]
ytest = ytest.T.reshape(digits, len(ytest))

In [9]:
ytrain.shape
ytrain = ytrain.T.reshape(digits, len(ytrain))

In [10]:
# multi class cross entropy loss function

def compute_multiclass_loss(Y, predicted):
    L_sum = np.sum(np.multiply(Y, np.log(predicted)))
    m = Y.shape[1]
    L = -(1/m) * L_sum
    return L

# Activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [11]:
xtrain = xtrain.T.reshape(784, len(xtrain))

In [12]:
xtest = xtest.T.reshape(784, len(xtest))

In [13]:
xtrain.shape[0]

784

In [15]:
# hyperparameters
feature = xtrain.shape[0]
hidden_node = 256
m = len(xtrain)
alpha = 0.1


# weights and bias intitialisation

w1 = np.random.randn(hidden_node, feature)
b1 = np.zeros((hidden_node, 1))
w2 = np.random.randn(digits, hidden_node)
b2 = np.zeros((digits, 1))

x = xtrain
y = ytrain

for i in range(2000):
    
    # Forward_propagation
    z1 = np.matmul(w1,x) + b1
    a1 = sigmoid(z1)
    z2 = np.matmul(w2,a1) + b2
    a2 = np.exp(z2) / np.sum(np.exp(z2), axis=0)

    cost = compute_multiclass_loss(y, a2)
    
    #Backpropagation
    dz2 = a2-y
    dw2 = (1./m) * np.matmul(dz2, a1.T)
    db2 = (1./m) * np.sum(dz2, axis=1, keepdims=True)

    da1 = np.matmul(w2.T, dz2)
    dz1 = da1 * (a1*(1-a1))
    dw1 = (1./m) * np.matmul(dz1, x.T)
    db1 = (1./m) * np.sum(dz1, axis=1, keepdims=True)

    # Updating weights
    w2 = w2 - alpha * dw2
    b2 = b2 - alpha * db2
    w1 = w1 - alpha * dw1
    b1 = b1 - alpha * db1

    if (i % 100 == 0):
        print("Epoch", i, "cost: ", cost)

print("Final cost:", cost)

Epoch 0 cost:  22.054860684222003
Epoch 100 cost:  0.3763994132146482
Epoch 200 cost:  0.2691011733640084
Epoch 300 cost:  0.21894026517212029
Epoch 400 cost:  0.18799581717205308
Epoch 500 cost:  0.16608397753488988
Epoch 600 cost:  0.14918410186941106
Epoch 700 cost:  0.1354779346361646
Epoch 800 cost:  0.12409067210214647
Epoch 900 cost:  0.1144243457334666
Epoch 1000 cost:  0.10606863529219022
Epoch 1100 cost:  0.09876228232038319
Epoch 1200 cost:  0.09231694625721013
Epoch 1300 cost:  0.08655205047102293
Epoch 1400 cost:  0.08132961153774172
Epoch 1500 cost:  0.07656104127383453
Epoch 1600 cost:  0.0721907290871448
Epoch 1700 cost:  0.06817239433898314
Epoch 1800 cost:  0.0644762260595214
Epoch 1900 cost:  0.061080217727061216
Final cost: 0.057983470996977794


In [16]:
print("xtrain :",xtrain.shape)
print("ytrain :",ytrain.shape)
print("w1 :",w1.shape)
print("w2 :",w2.shape)
print("b1 :",b1.shape)
print("b2 :",b2.shape)
print("a1 :",a1.shape)
print("z1 :",z1.shape)

xtrain : (784, 60000)
ytrain : (10, 60000)
w1 : (256, 784)
w2 : (10, 256)
b1 : (256, 1)
b2 : (10, 1)
a1 : (256, 60000)
z1 : (256, 60000)


In [17]:
dz2.shape

(10, 60000)

In [18]:
z1_t = np.matmul(w1, xtest) + b1
a1_t = sigmoid(z1_t)
z2_t = np.matmul(w2, a1_t) + b2
a2_t = np.exp(z2_t) / np.sum(np.exp(z2_t), axis=0)

predictions = np.argmax(a2_t, axis=0)
labels = np.argmax(ytest, axis=0)

In [19]:
#Accuracy

from sklearn.metrics import accuracy_score

accuracy = accuracy_score(labels,predictions)

In [20]:
accuracy*100

95.57

##### hidden layer = 64, accuracy =94.28, alpha = 0.1 
##### hidden layer = 64, accuracy =89.87, alpha = 0.01 
##### hidden layer = 256, accuracy =95.06, alpha = 0.1