# Neural Networks From Scratch
   In the following Notebook , I will create a neural network from scratch where every process is splitted into a function.
   The processes are :<br>
   1- Initializing the weights and biases according to the dimensions of Input data and the required number of hidden layers.<br>
   2- Forward propagation , where we propagate our Input through the hidden layers to calculate the Output.<br>
   3- Backward propagation , where we propagate our Error or Loss backward through the Layers to be able to calculate the      dLoss/dWeights which means how to minimize the loss by changing these weights.<br>
   4- Updating these Weights.<br>
   5- Then Iterate to step 2.<br>

# Defining the needed Libraries

In [1]:
#First we import our needed libraries 
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score
%matplotlib inline

In [3]:
from urllib import request
import gzip
import pickle

filename = [
["training_images","train-images-idx3-ubyte.gz"],
["test_images","t10k-images-idx3-ubyte.gz"],
["training_labels","train-labels-idx1-ubyte.gz"],
["test_labels","t10k-labels-idx1-ubyte.gz"]
]

def download_mnist():
    base_url = "http://yann.lecun.com/exdb/mnist/"
    for name in filename:
        print("Downloading "+name[1]+"...")
        request.urlretrieve(base_url+name[1], name[1])
    print("Download complete.")

def save_mnist():
    mnist = {}
    for name in filename[:2]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=16).reshape(-1,28*28)
    for name in filename[-2:]:
        with gzip.open(name[1], 'rb') as f:
            mnist[name[0]] = np.frombuffer(f.read(), np.uint8, offset=8)
    with open("mnist.pkl", 'wb') as f:
        pickle.dump(mnist,f)
    print("Save complete.")

def init():
    #download_mnist()
    #save_mnist()
    load()

def load():
    with open("mnist.pkl",'rb') as f:
        mnist = pickle.load(f)
    return mnist["training_images"], mnist["training_labels"], mnist["test_images"], mnist["test_labels"]

if __name__ == '__main__':
    init()

# Loading Dataset

In [4]:
#Loaded the dataset into the following X's and Y's with numpy format
X_train,Y_train,X_test,Y_test=load()

# Normalizing the Inputs to be between 0-1

In [5]:
X_train = X_train/255
X_test = X_test/255
X_test.shape

(10000, 784)

In [6]:
values = Y_train
maxx = np.max(values)+1
Y_trainz= np.eye(maxx)[values]

In [7]:
v = Y_test
asd = np.max(v)+1
Y_testz=np.eye(asd)[v]

In [33]:
Y_test

array([7, 2, 1, ..., 4, 5, 6], dtype=uint8)

# Now define the activation functions

In [9]:
def tanh(X):
    return np.tanh(X)

In [10]:
def Softmax(X):
    exps = np.exp(X)
    return exps / np.sum(exps,axis=1,keepdims=True)

# Defining our loss function

In [11]:
def Cross_Entropy_Loss(Output,Predicted):
    m = Output.shape[0]
    loss = (-1/m )* np.sum(Output*np.log(Predicted.clip(min=0.00000001)))
    return loss

def Loss_derivative(Output,Predicted):
    return (Predicted - Output)

In [12]:
def tanh_derivative(X):
    return (1 - np.power(X,2))

# Initializing the parameters

In [13]:
def Initialize_Parameters(Input_dim,Hidden_dim,Output_dim):
    W1 = np.random.randn(Input_dim,Hidden_dim)
    b1 = np.zeros((1,Hidden_dim))
    
    W2 = np.random.randn(Hidden_dim,Hidden_dim)
    b2 = np.zeros((1,Hidden_dim))
    
    W3 = np.random.randn(Hidden_dim,Output_dim)
    b3 = np.zeros((1,Output_dim))
    
    Parameters = {'W1':W1,'b1':b1,'W2':W2,'b2':b2,'W3':W3,'b3':b3}
    return Parameters

# Forward Step

In [14]:
def forward_Prop(Parameters,x):
    W1,b1,W2,b2,W3,b3= Parameters['W1'],Parameters['b1'],Parameters['W2'],Parameters['b2'],Parameters['W3'],Parameters['b3']
    
    Z1 = np.dot(x,W1) + b1
    A1 = tanh(Z1)
    Z2 = np.dot(A1,W2) + b2
    A2 = tanh(Z2)
    Z3 = np.dot(A2,W3) + b3  
    A3 = Softmax(Z3)
    
    Layers = {'Z1':Z1,'A1':A1,'Z2':Z2,'A2':A2,'Z3':Z3,'A3':A3}
    return Layers

# Backward Step

In [15]:
def Backward_prop(Parameters,Layers,x,y):
    Z1,A1,Z2,A2,Z3,A3 = Layers['Z1'],Layers['A1'],Layers['Z2'],Layers['A2'],Layers['Z3'],Layers['A3']
    W2,W3 = Parameters['W2'],Parameters['W3']
    m = x.shape[0]
    dZ3 = Loss_derivative(y,A3)
    dW3 = (1/m) * np.dot((A2.T),dZ3)
    db3 = (1/m) * np.sum(dZ3,axis=0)
    dZ2 = np.multiply(dZ3.dot(W3.T),tanh_derivative(A2))
    dW2 = (1/m) * np.dot((A1.T),dZ2)
    db2 = (1/m) * np.sum(dZ2,axis=0)
    dZ1 = np.multiply(dZ2.dot(W2.T),tanh_derivative(A1))
    dW1 = (1/m) * np.dot((x.T),dZ1)
    db1 = (1/m) * np.sum(dZ1,axis=0)
    
    derivatives = {'db3':db3,'dW3':dW3,'db2':db2,'dW2':dW2,'db1':db1,'dW1':dW1}
    return derivatives

# Learning Process and Updates

In [16]:
def Learning_Process(derivatives,Parameters,learning_rate):
    dW3,db3,dW2,db2,dW1,db1 = derivatives['dW3'],derivatives['db3'],derivatives['dW2'],derivatives['db2'],derivatives['dW1'],derivatives['db1']
    W3,b3,W2,b2,W1,b1 = Parameters['W3'],Parameters['b3'],Parameters['W2'],Parameters['b2'],Parameters['W1'],Parameters['b1']
    
    W1 = W1 - learning_rate*dW1
    W2 = W2 - learning_rate*dW2
    W3 = W3 - learning_rate*dW3
    
    b1 = b1 - learning_rate*db1
    b2 = b2 - learning_rate*db2
    b3 = b3 - learning_rate*db3
    
    Parameters = {'W1':W1,'b1':b1,'W2':W2,'b2':b2,'W3':W3,'b3':b3}
    return Parameters

In [31]:
def Training_Loop(Parameters,learning_rate,X,Y,epochs):
    for epoch in range(1,epochs):
        Layers = forward_Prop(Parameters,X)
        derivatives = Backward_prop(Parameters,Layers,X,Y)
        Parameters = Learning_Process(derivatives,Parameters,learning_rate)
        Loss = Cross_Entropy_Loss(Y,Layers['A3'])
        if (epoch% 100==0):
            print("Epoch %d : Loss is equal %f"%(epoch,Loss))
            print(accuracy_score(y_pred = np.argmax(Layers['A3'],axis=1),y_true = np.argmax(Y,axis=1)))
    return Parameters

In [32]:
Parameters=Initialize_Parameters(784,128,10)
Parameters=Training_Loop(Parameters=Parameters,learning_rate=0.1,X=X_train,Y=Y_trainz,epochs=2000)

Epoch 100 : Loss is equal 3.466147
0.5511666666666667
Epoch 200 : Loss is equal 2.378573
0.6537333333333334
Epoch 300 : Loss is equal 1.907731
0.7021
Epoch 400 : Loss is equal 1.626646
0.7338166666666667
Epoch 500 : Loss is equal 1.432608
0.7540166666666667
Epoch 600 : Loss is equal 1.287136
0.7698333333333334
Epoch 700 : Loss is equal 1.173637
0.7832833333333333
Epoch 800 : Loss is equal 1.082623
0.7948166666666666
Epoch 900 : Loss is equal 1.007562
0.8051
Epoch 1000 : Loss is equal 0.943995
0.8135
Epoch 1100 : Loss is equal 0.889069
0.8205
Epoch 1200 : Loss is equal 0.840753
0.8263666666666667
Epoch 1300 : Loss is equal 0.797892
0.83155
Epoch 1400 : Loss is equal 0.759705
0.83635
Epoch 1500 : Loss is equal 0.725446
0.8413166666666667
Epoch 1600 : Loss is equal 0.694516
0.8456166666666667
Epoch 1700 : Loss is equal 0.666582
0.8499
Epoch 1800 : Loss is equal 0.641336
0.8537166666666667
Epoch 1900 : Loss is equal 0.618373
0.8572333333333333


In [35]:
Predict = forward_Prop(Parameters,X_test)
Predicted_labels = np.argmax(Predict['A3'],axis=1)
accuracy_score(y_pred=Predicted_labels,y_true=Y_test)*100


80.61

0.5