## b)

In [9]:
import numpy as np
import copy

class Model:
    def __init__(self,X,T):
        self.X = X
        self.T = T ## T labels
        
    def layer_sizes(self):
        n_x = self.X.shape[0] ## 10 rows, 1 column 
        n_h = 50    
        n_y = self.T.shape[0] ## 3
        return (n_x,n_h,n_y)

    def pre_activation(self,layer,w,b): ## Step 1
        return (np.dot(w,layer)+b)
        
    def activation_ReLU(self,p):       ## Step 2
        return np.maximum(0,p)
        
    def output_Softmax(self,z2):      ## Step 3
        numerator = np.exp(z2)
        denominator = sum(np.exp(z2))
        return numerator/denominator

def generate_params(model):
    (n_x, n_h, n_y) = model.layer_sizes()
    np.random.seed(50)
    ## Wts and biases for the first layer i.e. hidden layer,
    W1 = np.random.uniform(0,0.1,(n_h,n_x))
    b1 = np.full((n_h,1),np.random.uniform(0,0.1))
    ## Wts and biases for the output layer,
    W2 = np.random.uniform(0,0.1,(n_y,n_h))
    b2 = np.full((n_y,1),(np.random.uniform(0,0.1)))
    assert (W1.shape == (n_h, n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))
    return {"W1":W1,"b1":b1,"W2":W2,"b2":b2}
    
def forward_pass(X_train, params):
    (n_x,n_h,n_y) = X_train.layer_sizes()
    print('The size of the input layer is: ',n_x)
    print('The size of the hidden layer is: ',n_h)
    print('The size of the output layer is: ',n_y)
    print('-'*100)
    ## Step3: Caculate the forward pass: A1,Z1
    # Pre-activation:
    Z1 = X_train.pre_activation(X_train.X,params["W1"],params["b1"])    ## 1st layer - [wT].[x] + [b1] 
    print('The size of the pre-activation calculation is :',Z1.shape)
    print('-'*100)

    A1 = X_train.activation_ReLU(Z1)                        ## Activation fn - ReLU
    print('The size of the activation matrix is :',A1.shape)
    print('-'*100)

    Z2 = X_train.pre_activation(A1,params["W2"],params["b2"])              ## 2nd layer - [wT].[x] + [b2]
    print('The size of the 2nd layer pre-activation is :',Z2.shape)
    print('-'*100)

    Y = X_train.output_Softmax(Z2) ## A2                   ## Output fn - Softmax
    print('The size of the output layer is :',Y.shape)
    print('-'*100)

    print('The output vector is :\n',Y)
    print('-'*100)
    return {"A1": A1, "Z1": Z1, "Y": Y, "Z2": Z2}

##Main function:
X = np.array([[0.5],[0.6],[0.1],[0.25],[0.33],[0.9],[0.88],[0.76],[0.69],[0.95]])
T = np.array([[1],[0],[0]])
## Step1: Setting up the model layers and their vector shapes:
model = Model(X, T)
## Step 2: Initialize parameters.
params = generate_params(model)
print('The size of the weights for the 1st layer i.e. hidden layer is:',params["W1"].shape)
print('The size of the bias for the 1st layer i.e. hidden layer is:',params["b1"].shape)
print('The size of the weights for the 2st layer i.e. output layer is:',params["W2"].shape)
print('The size of the bias for the 2st layer i.e. output layer is:',params["b2"].shape)
print('-'*100)
fwd_result = forward_pass(model, params)

The size of the weights for the 1st layer i.e. hidden layer is: (50, 10)
The size of the bias for the 1st layer i.e. hidden layer is: (50, 1)
The size of the weights for the 2st layer i.e. output layer is: (3, 50)
The size of the bias for the 2st layer i.e. output layer is: (3, 1)
----------------------------------------------------------------------------------------------------
The size of the input layer is:  10
The size of the hidden layer is:  50
The size of the output layer is:  3
----------------------------------------------------------------------------------------------------
The size of the pre-activation calculation is : (50, 1)
----------------------------------------------------------------------------------------------------
The size of the activation matrix is : (50, 1)
----------------------------------------------------------------------------------------------------
The size of the 2nd layer pre-activation is : (3, 1)
-------------------------------------------------

## c)

In [10]:
## Above T is referring to the Ground Truth and Y is referring to the forward pass output vector - 
# cross_entropy_mul = -sum(np.multiply(np.log10(Y),X_train.T))
#print(Y.shape)
#print(X_train.T.shape)
#print(Y)
#print(X_train.T)
def calculate_loss(Y, T):
    cross_entropy = -sum(np.dot(np.log(Y).T, model.T)) ## (3,1) . (3,1) so k = 3
    print('The Cross Entropy Loss is:\n ',cross_entropy)
    print('\nTo verify the forward pass we will take log(3): \n',np.log(3))
    print('-'*100)
    return cross_entropy
calculate_loss(fwd_result["Y"], model.T)

The Cross Entropy Loss is:
  [1.07353671]

To verify the forward pass we will take log(3): 
 1.0986122886681098
----------------------------------------------------------------------------------------------------


array([1.07353671])

## d)

In [11]:
def backward_pass(X_train, params, fwd_result):
    m = X_train.X.shape[1]
    
    # First, retrieve W1 and W2 from the dictionary "parameters".
    W1 = params["W1"]
    W2 = params["W2"]
        
    # Retrieve also A1 and Y from dictionary "cache".
    A1 = fwd_result["A1"]
    Y = fwd_result["Y"]
  
    # Backward propagation: calculate dW1, db1, dW2, db2. 
    dZ2 = Y - X_train.T
    dW2 = np.dot(dZ2,np.transpose(A1))/m
    db2 = np.sum(dZ2,axis = 1, keepdims = True)/m
    dZ1 = np.dot(np.transpose(W2),dZ2) * (1-np.power(A1,2))
    dW1 = np.dot(dZ1,np.transpose(X_train.X)) /m
    db1 = np.sum(dZ1, axis =1, keepdims = True)/m

    
    delta_dict = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    delta_dict_shape = {"dW1": delta_dict["dW1"].shape, 
                        "db1": delta_dict["db1"].shape, 
                        "dW2": delta_dict["dW2"].shape, 
                        "db2": delta_dict["db2"].shape}
    print('All delta matrices shapes are', delta_dict_shape)
    
    return delta_dict


In [12]:
grads =  backward_pass(model, params, fwd_result)

All delta matrices shapes are {'dW1': (50, 10), 'db1': (50, 1), 'dW2': (3, 50), 'db2': (3, 1)}


## e)

In [13]:
def update_params(params, grads, learning_rate = 1.2):
    W1 = params["W1"]
    b1 = params["b1"]
    W2 = params["W2"]
    b2 = params["b2"]
    # Retrieve each gradient from the dictionary "grads"
    dW1 = grads['dW1']
    db1 = grads['db1']
    dW2 = grads['dW2']
    db2 = grads['db2'] 
    # Update rule for each parameter for each data points of the entire dataset.
    W1 = W1 - (learning_rate * dW1)
    b1 = b1 - (learning_rate * db1) 
    W2 = W2 - (learning_rate * dW2)
    b2 = b2 - (learning_rate * db2)
    # Return updated params.
    return {"W1": W1, "b1": b1, "W2": W2, "b2": b2}

In [16]:
def neural_network(model, params, num_iterations = 10000):
    """
    Arguments:
    X -- dataset of shape (2, number of examples)
    Y -- labels of shape (1, number of examples)
    n_h -- size of the hidden layer
    num_iterations -- Number of iterations in gradient descent loop
    print_cost -- if True, print the cost every 1000 iterations
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    for k in range(0, num_iterations):
        loss = 0
        for i in range(0, model.X.shape[0]):
            xi = model.X[i:i+1, :]
            # yi = model.T[i:i+1, :]
            new_model = Model(xi, model.T)
            
            # Forward propagation.
            fwd_result = forward_pass(new_model, params)
            loss += calculate_loss(Y, new_model.T)
            grads = backward_pass(new_model, params, fwd_result)
            params = update_params(params, grads)
        # Print the cost every 1000 iterations
        if i % 1000 == 0:
            print ("Cost after iteration %i: %f" % (k, loss))

neural_network(model, copy.deepcopy(params))

The size of the input layer is:  1
The size of the hidden layer is:  50
The size of the output layer is:  3
----------------------------------------------------------------------------------------------------


ValueError: shapes (50,10) and (1,1) not aligned: 10 (dim 1) != 1 (dim 0)

In [None]:
a