In [1]:
# Numpy for vector calculus
import numpy as np

In [2]:
# Sigmoid activation function for output layer
def sigmoid(z):
    s = 1/(1+np.exp(-z))
    return s

In [3]:
# Defining size of the layers
def layer_sizes(X,Y):
    n_x = X.shape[0]
    n_y = Y.shape[0]
    return n_x,n_y

In [4]:
# Initializing weights and bais
def initialize_parameters(n_x,n_h,n_y):
    np.random.seed(1)
    
    # Formulae for computing weights and bais
    w1 = np.random.randn(n_x,n_h)*0.01
    b1 = np.zeros((n_x,1))
    w2 = np.random.randn(n_h,n_y)*0.01
    b2 = np.zeros((n_h,1))
    
    # Storing weights and bais
    parameters = {'w1':w1,
                  'w2':w2,
                  'b1':b1,
                  'b2':b2 }
    
    # returning parameters
    return parameters

In [5]:
# Forward passing step
def forward_propagate(X,parameters):
    
    # retriving weights and bais
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    # calculating Activation functions
    z1 = np.dot(w1,X)+b1
    A1 = np.tanh(z1)
    z2 = np.dot(w2,A1)+b2
    A2 = sigmoid(z2)
    
    # Storing values of activation functions
    cache = {'z1':z1,
             'A1':A1,
             'z2':z2,
             'A2':A2 }
    
    # returning values
    return A2, cache

In [6]:
# Computing cost for accuracy calculation
def compute_cost(A2,Y):
    m = Y.shape[1]
    
    # Computing cost
    logprobs = (1/m)*(np.multiply(np.log(A2),Y))
    cost = -(1/m)*np.sum(logprobs)
    
    # returning cost
    return cost

In [7]:
# Backward passing step
def backward_propagate(X,Y,parameters,cache):
    m = X.shape[1]
    
    # retrieving weights and bais
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    # retrieving activation values
    z1 = cache['z1']
    A1 = cache['A1']
    z2 = cache['z2']
    A2 = cache['A2']
    
    # Calculating derivatives
    dz2 = A2 - Y
    dw2 = (1/m)*(np.dot(dz2,A1.T))
    db2 = (1/m)*(np.sum(dz2,axis = 1,keepdims=True))
    dz1 = (np.dot(w2.T,dz2)*(1-(A1)**2))
    dw1 = (1/m)*(np.dot(dz1,X.T))
    db1 = (1/m)*(np.sum(dz2,axis = 1,keepdims = True))
    
    # Storing Derivatives
    grads = {'dw1':dw1,
             'db1':db1,
             'dw2':dw2,
             'db2':db2 }
    
    # returning derivatives
    return grads

In [8]:
# Updating parameters
def update_parameters(parameters,grads,learning_rate=1.2):
    
    # retrieving weghts and bais
    w1 = parameters['w1']
    b1 = parameters['b1']
    w2 = parameters['w2']
    b2 = parameters['b2']
    
    # retrieving derivatives
    dw1 = grads['dw1']
    db1 = grads['db1']
    dw2 = grads['dw2']
    db2 = grads['db2']
    
    # updating weights and bais
    w1 -= learning_rate*dw1
    b1 -= learning_rate*db1
    w2 -= learning_rate*dw2
    b2 -= learning_rate*db2
    
    # storing updated parameters
    parameters = {'w1':w1,
                  'b1':b1,
                  'w2':w2,
                  'b2':b2 }
    
    # returning parameters
    return parameters

In [9]:
# defining model
def nn_model(X,Y,n_h,epochs,print_cost=False):
    np.random.seed(3)
    
    # Initializing sizes of features and labels
    n_x,n_y = layer_sizes(X,Y)
    
    # initializing weights and bais
    parameters = initialize_parameters(n_x,n_h,n_y)
    
    # loop for training again and again
    for i in range(epochs):
        
        # Forward Pass Step
        A2, cache = forward_propagate(X,parameters)
        
        # Computing cost
        cost = compute_cost(A2,Y)
        
        # Backward pass step
        grads = backward_propagate(X,Y,parameters,cache)
        
        # updating parameters
        parameters = update_parameters(parameters,grads)
        
        # Conditioning to print cost at every 100 iteration
        if print_cost and i%100 == 0:
            print(f"Cost after {i} iterations: {cost}")
    
    # returning parameters
    return parameters