In [1]:
#Importing numpy as the main library
import numpy as np
# I import main_functions which will have functions such as activation functions and others
from main_functions import *

In [2]:
# Initializing weights randomly by neurons per layer and the number of layers
def init_weights(number_layers, neurons_by_layer):
    #Initializing weights and biases.
    weights = {}
    biases = {}
    
    #Doing a for loop across all the layers.
    for l in range(1, number_layers):
        #Returning random values of weights from the layer number l
        weights["W" + str(l)] = np.random.rand(neurons_by_layer[l], neurons_by_layer[l-1])
        biases["b" + str(l)] = np.random.rand(neurons_by_layer[l], 1)
    
    return weights, biases    

In [3]:
# Doing a function that performs a whole forward iteration for the Neural Network
def forward_iteration(X, weights, biases, activation_names, number_layers, neurons_by_layer):
    """This function receives the inputs X (A[0]), the weights from the whole network by a dictionary,
    the biases by a dictionary and the activation names in a list from each layer. It keeps track 
    of the linear activations and the activations of the whole network in order to use it in the backpropagation
     algorithm and finally it gives the final result of the network"""
    
    # I begin the dictionary of the activated neurons and save A[0] by the input of the network.
    A_cache = {"A0": X}
    Z_cache = {}
    
    for l in range(1, number_layers):
        #Because the activation names start from the layer 1 I have to call as activation_name of layer 1 =
        # activation_names[0], activation_names of layer 2 = activation_names[1] ...
        activation_name = activation_names[l-1]
        
        #The weights and  biases defined by the dictionaries 
        A_prev = A_cache["A" + str(l-1)]
        W = weights["W" + str(l)]
        b = biases["b" + str(l)]
        
        #Doing the linear activation and the forward activation in the layer l. It's worth to notice that I need
        # to get Z because I will need those values for the backpropagation algorithm.
        Z = linear_activation(W, A_prev, b)
        A = forward_activation(W, A_prev, b, activation_name)
        # Now I can save those values in the caches of A and Z
        A_cache["A" + str(l)] = A
        Z_cache["Z" +str(l)] = Z
        
  # Finally, I save the final value of the network Y, which is the value of the activation function in the last layer.    
        
    Y_hat = A_cache["A" + str(number_layers-1)]
        
    return Z_cache, A_cache, Y_hat    
    


Since I assume that I am going to finish the network with the sigmoid function, I will use the next loss function:

$  L(\hat{Y}, Y) = -(y\log(\hat{y}) + (1-y)\log(1-\hat{y}))$

$ Cost = J(w, b) = \frac{1}{m}\sum_{i=1}^{m} L(\hat{y}^{(i)}, y^{(i)})$

In [4]:
# Once the values have gone throught the network forward, I calculate the loss function.
# Since I assume that the final activation function I will use will be the sigmoid
def loss_function(Y_hat, Y):
    # I make sure first that the dimensions of n(Number of features) and m(number of samples) are the same
    # in the results and the labels
    n, m = np.shape(Y)
    n_hat, m_hat = np.shape(Y_hat)
    assert(n==n_hat and m==m_hat)
    
    L = -(Y*(np.log(Y_hat)) + (1-Y)*(np.log(1-Y_hat)))
    
    return L

# Now I develop the cost function
def Cost_function(Y_hat, Y):
    L = loss_function(Y_hat, Y)
    n, m = np.shape(L) 
    C = 1/m*np.sum(L)
    return C

$$ da^{[L]} = \frac{-y}{\hat{y}} + \frac{(1-y)}{(1-\hat{y})} $$
$$ \text{Being } \hat{y} \equiv \text{Activation from the last layer } a^{[L]}$$

In [5]:
# Here I start the backpropagation algorithm.




In [6]:
#Draft where I implement ideas
# I use the seed to maintain the random numbers fixed
np.random.seed(2)
X = np.random.randn(5,10)*10
neurons_by_layer = [5, 4, 4, 3, 1]
number_layers = len(neurons_by_layer)
activation_names = ['relu', 'relu', 'relu', 'sigmoid']

weights, biases = init_weights(number_layers, neurons_by_layer)

Z_cache, A_cache, Y_hat = forward_iteration(X, weights, biases, activation_names, number_layers, neurons_by_layer)


A_prev = np.random.randint(low=0, high=4, size =(4,15))
dZ = np.random.randint(low=0, high=4, size =(3,15))

W3 = weights['W3']
A2 = A_cache["A2"]

np.matmul(W3.T, dZ) == derivatives_dA_prev(W3, dZ)


array([[ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True]])