In [2]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

In [3]:
def init_params(n_units):
    n_layers= len(n_units)
    params= {}
    for i in range(1, n_layers):
        params["W"+str(i)]= np.random.randn(n_units[i],n_units[i-1])*0.1 
        params["b"+str(i)]= np.zeros((n_units[i],1))
        
    return params
    

In [4]:
def relu(z):
    a= np.maximum(0,z)
    return a

In [5]:
def d_relu(da, z):
    dz = np.array(da, copy= True)
    dz[z<=0] = 0
    return dz

In [6]:
def sigmoid(z):
    a = 1/(1+np.exp(-z))
    return a


In [7]:
def d_sigmoid(da , z):
    s = sigmoid(z)
    dz = da * s*(1-s)
    return dz

In [8]:
def forward_prop(X,params):
    A= []
    Z= {}

    A.append(X)
    l = len(params)//2
    for i in range(1,l):
        Z[str(i)] = np.dot(params["W"+str(i)], A[i-1]) + params['b'+str(i)]
        A.append( relu(Z[str(i)]) )  
    
    Z[str(l)] = np.dot(params["W"+str(l)], A[l-1]) + params['b'+str(l)]
    A.append( sigmoid(Z[str(l)]) )
    return A[l], A ,Z

In [47]:
def cost(ypred, y):
#     assert ypred.shape == y.shape , f'shape is not equal {ypred.shape} != {y.shape}'
    m = y.shape[0]
    cost = (-1/m) * ( np.dot(y,np.log(ypred).T) + np.dot((1-y), np.log(1-ypred).T) )
    return np.squeeze(cost)

In [10]:
def d_cost(ypred, y):
    da = - ( ( y/ypred) - (1-y)/(1-ypred) )
    return da

In [48]:
def backward_prop(A, y, Z, params):
    l = len(params)/2
    d_params= {}
    m = y.shape[0]
#     da_prev = d_cost(A , y )
    dz = A[l]-y
    d_params['dz'+str(l)] = dz

    for i in range(l,0,-1):
        dz = d_params['dz'+str(i)]
        w = params.get('W'+str(i))

        dw = (dz * A[i-1]) /m
        db = np.sum(dz, axis=1, keepdims=True) /m
        da = np.dot(w, dz)
        dz = d_relu(da,Z)
        
        d_params['da'+str(i-1)] = da
        d_params['dz'+str(i)] = dz
        d_params['dw'+str(i)] = dw
        d_params['db'+str(i)] = db

        

        

In [12]:
def update_params(params, d_params,lr):
    l= len(params)/2
    for i in range(1, l+1):
        params["W"+str(i)] -= lr*d_params["dw"+str(i)]
        params["b"+str(i)] -= lr*d_params['db'+str(i)] 
        
    return params

In [14]:
from keras.datasets import mnist

In [16]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [31]:
x_train.shape

(60000, 784)

In [32]:
x_test.shape

(10000, 784)

In [29]:
x_test = x_test.reshape(10000,28*28)

In [22]:
x_train = x_train/255
x_test = x_test/255

In [30]:
x_train = x_train.reshape(60000,28*28)

In [40]:
X= x_train.T

In [41]:
y= y_train

In [None]:
costs = []
params = init_params([x_train.shape[1],512,256, 256,128 ,64,10])
for i in range(1,1000):
    ypred, A , Z = forward_prop(X,params)
    costs += cost(ypred, y)
    d_params= backward_prop(ypred, y, Z, params)
    params= update_params(params, d_params)