# Neural network form strach using sogmoid and softmax function

Now lets define neural network with multiple class

# 1)Lets initialize the parameters

In [731]:
import numpy as np
import matplotlib.pyplot as plt

In [732]:
def initialization_of_parameters(list_nodes):
    np.random.seed(3) #Random seed is change for example set back to 3
    Number_of_layer=len(list_nodes)
    
    parameters={}
    
    
    for l in range(1,Number_of_layer): #to make one less parameter matix than number of layers and match the number W1,W2,b1,b2 etc
        
        parameters["W"+ str(l)]=np.random.randn(list_nodes[l],list_nodes[l-1])*0.01
        parameters["b"+ str(l)]=np.zeros((list_nodes[l],1))
        
        
        assert ( parameters["W"+str(l)].shape == (list_nodes[l],list_nodes[l-1]))
        assert (parameters["b"+str(l)].shape==(list_nodes[l],1))
        
    return  parameters  

## 2)Now lets move towards the forward propagation.

In [733]:
def linear_forward(X,W,b):
    
    Z=np.dot(W,X)+b
    
    assert (Z.shape== (W.shape[0],X.shape[1]))
    storage_linear_forward=(X,W,b)
    
    return Z,storage_linear_forward

In [734]:
def sigmoid(Z):

    
    A = 1/(1+np.exp(-Z))
    storage_sigmoid = Z
    
    return A, storage_sigmoid

In [735]:
def softmax(Z):
    expZ = np.exp(Z)
    A=expZ / expZ.sum(axis=0, keepdims=True)
    
    storage_softmax=Z
    
    return A, storage_softmax

In [736]:
def linear_forward_with_activation(X,W,b,activation_type):
    
    if activation_type== "sigmoid":
        Z,storage_linear_forward= linear_forward(X,W,b)
        A,storage_Z=sigmoid(Z)
        
    elif activation_type == "softmax":
        Z,storage_linear_forward=linear_forward(X,W,b)
        A,storage_Z=softmax(Z)
        
    assert (A.shape==(Z.shape))
    
    storage_linear_forward_with_activation=(storage_linear_forward,storage_Z ) #to store X,W,b,Z 
    
    return A,storage_linear_forward_with_activation   

Now lets define L-layer forward propagation.

In [737]:
def forward_propagation_for_n_layers(X,parameters):
    
    Number_of_layer=len(parameters)//2
    
    storage_forward_propagation_for_all_layers=[]
    A=X
    
    for l in range(1,Number_of_layer):
        A_previous= A
        
        A,storage_linear_forward_with_activation_sigmoid=linear_forward_with_activation(A_previous,
                            parameters["W"+str(l)],parameters["b"+str(l)],activation_type="sigmoid")
       
        storage_forward_propagation_for_all_layers.append(storage_linear_forward_with_activation_sigmoid) #to store X,W,b,Z 
        
        
        
    A_last,storage_linear_forward_with_activation_softmax=linear_forward_with_activation(A ,
                 parameters["W"+str(Number_of_layer)],parameters["b"+str(Number_of_layer)],activation_type="softmax")
    

    storage_forward_propagation_for_all_layers.append(storage_linear_forward_with_activation_softmax)
                                                                                             
    assert(A_last.shape==(parameters["W"+str(Number_of_layer)].shape[0],X.shape[1]))
    
    return A_last, storage_forward_propagation_for_all_layers    

## 3)Now lets calculate the cross Entropy

In [738]:
def cost_cal(A_last,Y):
    cost = - np.sum(np.multiply(np.log(A_last),Y))
    
    cost = np.squeeze(cost)
    assert (cost.shape==())
    
    return cost    

# 4) Lets do the backpropagation

In [739]:
def linear_backward(dZ, storage_linear_forward):
  
    A_previous, W, b = storage_linear_forward
    m = A_previous.shape[1]

    
    dW = dZ @ A_previous.T
    db = np.sum(dZ, axis=1, keepdims=True)
    dA_previous = W.T @ dZ
   
    
    assert (dA_previous.shape == A_previous.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    
    return dA_previous, dW, db

In [740]:
def sigmoid_backward(dA, storage_Z ):
    
    Z = storage_Z 
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    assert (dZ.shape == Z.shape)
    
    return dZ

In [741]:
def softmax_backward(dA,storage_Z ):
    Z = storage_Z
    dZ =dA * 1 #(From the derivatives of cost entropy and softmax activation function)
    
    assert (dZ.shape == Z.shape)
    return dZ

In [742]:
def linear_activation_backward(dA, storage_linear_forward_with_activation, activation_type):
    
    storage_linear_forward,storage_Z=storage_linear_forward_with_activation
    
    if activation_type == "softmax":
        
        dZ = softmax_backward(dA,storage_Z )
        dA_previous, dW, db = linear_backward(dZ, storage_linear_forward)
        
    elif activation_type == "sigmoid":
       
        dZ = sigmoid_backward(dA, storage_Z)
        dA_previous, dW, db = linear_backward(dZ, storage_linear_forward)

    
    return dA_previous, dW, db

In [743]:
def L_model_backward(A_last, Y, storage_forward_propagation_for_all_layers  ):

    gradients = {}
    Number_of_layer= len( storage_forward_propagation_for_all_layers ) # the number of layers
    m = A_last.shape[1]
    Y = Y.reshape(A_last.shape)
    
    # Initializing the backpropagation
  
    dA_last = A_last-Y
    
    current_storage = storage_forward_propagation_for_all_layers[Number_of_layer-1]
    gradients["dA" + str(Number_of_layer-1)], gradients["dW" + str(Number_of_layer)], gradients["db" + str(Number_of_layer)] = linear_activation_backward(dA_last, current_storage, "softmax")
    
#     for relu layers
    for l in reversed(range(Number_of_layer-1)):
        
        current_storage =  storage_forward_propagation_for_all_layers [l]
        dA_previous_temp, dW_temp, db_temp = linear_activation_backward(gradients["dA" + str(l + 1)], current_storage, "sigmoid")
        gradients["dA" + str(l)] = dA_previous_temp
        gradients["dW" + str(l + 1)] = dW_temp
        gradients["db" + str(l + 1)] = db_temp
      

    return gradients

# 5) Now Lets update the parameters

In [744]:
def update_parameters(parameters, gradients, learning_rate):
    
    L = len(parameters) // 2 # number of layers in the neural network

    for l in range(L):
        parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * gradients["dW" + str(l + 1)]
        parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * gradients["db" + str(l + 1)]

    return parameters

# 6)Now lets merge this all and make a single funtion for L layer Nerual Networks 

In [745]:
def L_layer_neural_network_model(X, Y, list_nodes, learning_rate = 0.0075, iterations = 3000, print_cost=False):

    np.random.seed(1)
    costs = []                  
    
    #step:1
    parameters =initialization_of_parameters(list_nodes)
   
    # Loop (gradient descent)
    for i in range(0,iterations):

        # Step 2: Forward propagation: linear to sigmoid and linear to softmax
        A_last, storage_forward_propagation_for_all_layers = forward_propagation_for_n_layers(X,parameters)
    
        # step 3 : Calculation of cost.
        cost = cost_cal(A_last,Y)
    
        # step 4: Backward propagation.
        gradients = L_model_backward(A_last, Y, storage_forward_propagation_for_all_layers )
    
        #step 5: Update parameters.
        parameters = update_parameters(parameters, gradients, learning_rate)
        
        # Cost every 100 iterations
        if print_cost and i % 200 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
        if print_cost and i % 200 == 0:
            costs.append(cost)
            
    # plot the cost
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per hundreds)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()
    
    return parameters

# 6) Predict with updated parameters

In [746]:
def predict(X,parameters):
 # Forward propagation
    probability, param_storage = forward_propagation_for_n_layers(X,parameters)
    return probability, param_storage  #gives last layer output.

# 7)Lets check the accuracy of the model

In [752]:
def accuracy(probability,true_class):
    
    print(f"The size of output probability :{probability.size} ")
    unique_probability=np.unique(probability)
    print(f"The size of unique output probability :{unique_probability.size} ")
    print(f"The size of true class :{true_class.size} ")
    
    predicted_class=(probability== probability.max(axis=0)).astype(int)
    label=np.array([1,2,3]).reshape(3,1)
    multiply_1=label* predicted_class
    multiply_2=label*true_class
    
    final_labels_predicted=np.max(multiply_1,axis=0)
    final_labels_true=np.max(multiply_2,axis=0)
    
    diff=(final_labels_predicted-final_labels_true)
    count_diff=np.sum(diff)
    
    m=true_class.shape[1]
    
    Accuracy= (count_diff/m)*100
    
    print(f"Accuracy of model is {Accuracy}%")
    return final_labels_predicted, final_labels_true  

# Features(X) and true label (Y) (one hot encoding)