# An implementation of Neural Network



In [33]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

In [34]:
def sigmoid(z):
    A = 1/(1+mp.exp(-z))
    cache=z
    return A, cache
    

In [35]:
def relu(z):
    A = np.maximum(0,z)
    cache=z
    return A, cache

In [36]:
def sigmoid_backward(dA, Z):
    
    s=1/(1+np.exp(-z))
    dZ = dA*s*(1-s)
    
    return(dZ)

In [37]:
def relu_backward(dA, Z):
    
    dZ = np.array(dA,copy=True)
    dZ[Z <=0] = 0
    return dZ

In [38]:
def tanh(z):
    A = (np.exp(z)-np.exp(-z))/(np.exp(z)+np.exp(-z))
    return A, z

In [39]:
def initialize_parameters(layers_dims, initialization_method="random"):
    
    # layers_dims- python array containing the dimensions of each layer
    
    parameters = {}
    L = len(layers_dims)
    
    if initialization_method=="random":
        for l in range(1,L):
            parameters["W"+str(l)] = np.random.randn(layers_dims[l],layers_dims[l-1])*0.01
            parameters[b+str(l)] = np.zeros((layers_dims[l],1))
    if initialization_method=="xavier":
        for l in range(1,L):
            parameters["W"+str(l)] = np.random.randn(layers_dims[l],layers_dims[l-1])*np.sqrt(2/layers_dims[l-1])
            parameters[b+str(l)] = np.zeros((layers_dims[l],1))
    if initialization_methos=="he":
        for l in range(1,L):
            parameters["W"+str(l)] = np.random.randn(layers_dims[l],layers_dims[l-1])*np.sqrt(1/layers_dims[l-1])
            parameters[b+str(l)] = np.zeros((layers_dims[l],1))
            
    return parameters


In [40]:
def forward_propagation(X, parameters, activation="relu", regularization=None, keep_prob=0.5):
    
    A = X
    L = len(parameters)//2
    caches={}
    
    if activation=="relu":
        for l in range(1,L):
            A_prev = A
            Z = np.dot(parameters["W"+str(l)], A_prev) + parameters["b"+str(l)]
            A, Z = relu(Z)
            if regularization=="dropout":
                D = (np.random.rand(A.shape[0], A.shape[1])<keep_probs).astype(int)
                A = (A*D)/keep_prob
                
            linear_cache = (A_prev,parameters["W"+str(l)],parameters["b"+str(l)])
            activation_cache = Z
            caches[l] = (linear_cache, activation_cache)
    
    if activation=="tanh":
        for l in range(1,L):
            A_prev = A
            Z = np.dot(parameters["W"+str(l)], A_prev) + parameters["b"+str(l)]
            A, Z = tanh(Z)
            linear_cache = (A_prev,parameters["W"+str(l)],parameters["b"+str(l)])
            activation_cache = Z
            caches[l] = (linear_cache, activation_cache)
    
    Z = np.dot(parameters["W"+str(L)], A) + parameters["b"+str(L)]
    AL, Z = sigmoid(Z)
    caches[L] = ((A,parameters["W"+str(L)],parameters["b"+str(L)]), Z)
    
    return AL,caches

In [41]:
def compute_cost(AL ,Y ,caches, regularization=None, lambd=0.7):
    
    m = Y.shape[1]
    L = len(parameters)//2
    
    
    if regularization=="L2":
        cost = -(np.sum(Y*np.log(AL) + (1-Y)*np.log(1-AL)))/m
        for l in range(1, L+1):
            cost += (lambd*(np.sum(np.square(caches[l][0][1]))))/(2*m)
    else:
        cost = -(np.sum(Y*np.log(AL) + (1-Y)*np.log(1-AL)))/m
    
    
    cost = np.squeeze(cost)
    return cost
    

In [42]:
def activation_backward(dA, cache, activation, regularization, lambd):
    
    linear_cache, activation_cache = cache
    
    if activation=="relu":
        dZ = relu_backward(dA, activation_cache)
        
        if regularization=="L2":
            dW = np.dot(dZ, linear_cache[0].T)/m + (lambd/m)*linear_cache[1]
        else:
            dW = np.dot(dZ, linear_cache[0].T)/m
        db = np.sum(dZ, axis=1, keepdims=True)/m
        dA_prev = np.dot(W.T, dZ)
    elif activation=="sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        if regularization=="L2":
            dW = np.dot(dZ, linear_cache[0].T)/m + (lambd/m)*linear_cache[1]
        else:
            dW = np.dot(dZ, linear_cache[0].T)/m
        db = np.sum(dZ, axis=1, keepdims=True)/m
        dA_prev = np.dot(W.T, dZ)
        
    return dA_prev, dW, db

In [43]:
def backward_propagation(AL, Y, caches, regularization=None, lambd=0.7):
    
    grads={}
    L = len(caches)
    m = Y.shape[1]
    Y = Y.reshape(AL.shape)
    
    dAL = -np.divide(Y,AL) + np.divide(1-Y, 1-AL)
    
    grads["dA"+str(L-1)], grads["dW"+str(L)], grads["db"+str(L)] = activation_backward(dAL, caches[L], "sigmoid", regularization=regularization, lambd=lambd)
    
    for l in reversed(range(1, L)):
        
        
        dA_prev_temp, dW_temp, db_temp = activation_backward(grads["dA"+str(l)], caches[l], "relu", regularization=regularization, lambd=lambd)
        grads["dA"+str(l-1)] = dA_prev_temp
        grads["dW"+str(l)] = dW_temp
        grads["db"+str(l)] = db_temp
        
    return grads
        

In [44]:
def update_parameters(parameters, grads, learning_rate):
    
    L = len(parameters)//2
    
    for l in range(1,L+1):
        parameters["W"+str(l)] -= learning_rate*grads["dW"+str(l)]
        parameters["b"+str(l)] -= learning_rate*grads["db"+str(l)]
    
    return parameters

In [45]:
def mini_batch_generator(X, Y, mini_batch_size=64):
    
    m = X.shape[1]
    mini_batches=[]
    permutation = list(np.random.permutation(m))
    shuffled_X =X[:, permutation]
    shuffled_y =Y[:, permutation].reshape((1,m))
    no_complete_minibatches = math.floor(m/mini_batch_size)
    for k in range(no_complete_minibatches):
        mini_batch_X = shuffled_X[:,k*mini_batch_size:(k+1)*mini_batch_size]
        mini_batch_Y = shuffled_Y[:,k*mini_batch_size:(k+1)*mini_batch_size]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
        
    if m%mini_batch_size !=0 :
        mini_batch_X = shuffled_X[:,no_complete_minibatches*mini_batch_size:]
        mini_batch_Y = shuffled_Y[:,no_complete_minibatches*mini_batch_size:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

In [46]:
def model(X, Y, layers_dims, learning_rate=0.0075, num_iterations=3000, print_cost=False, regul=None, mini_batch=False, num_epochs=10000, mini_batch_size=64):
    
    costs=[]
    
    parameters = initialize_parameters(layers_dims, initialization_method="xavier")
    
    if mini_batch:
        for i in range(num_epochs):
            cost_total=0
            minibatches = mini_batch_generator(X,Y, mini_batch_size=mini_batch_size)
            
            for minibatch in minibatches:
                X, Y = minibatch
                AL, caches = forward_propagation(X, parameters, regularization=regu)
            
                if regularization is None:
                    cost_total+= compute_cost(AL,Y, caches)
                else:
                    cost_total+= compute_cost(AL,Y,caches,regulatization=regul)
        
                grads = backward_propagation(AL, Y, caches, regularization=regul)
        
                parameters = update_parameters(parameters, grads, learning_rate)
            cost.append(cost_total/m)
            if print_cost and i%100==0 :
                print(f"cost after {i} iterations is {cost}")
    
    else:
        for i in range(num_iterations):
        
            AL, caches = forward_propagation(X, parameters, regularization=regu)
            
            if regularization is None:
                cost = compute_cost(AL,Y, caches)
            else:
                cost = compute_cost(AL,Y,caches,regulatization=regul)
        
            grads = backward_propagation(AL, Y, caches, regularization=regul)
        
            parameters = update_parameters(parameters, grads, learning_rate)
        
            if print_cost and i%100==0 :
                print(f"cost after {i} iterations is {cost}")
                costs.append(cost)
        
    plt.plot(costs)
    plt.show()
    return parameters
    