# An implementation of gradient descent to optimize shallow neural network's parameters

In [1]:
import numpy as np

# Helper functions

### Activation functions

In [2]:
# ReLu
def ReLu(Z):
    """
    This function applies a ReLu transformation to the input Z.
    """
    
    return np.maximum(0, Z)


# test
Z_test = np.array([[-1, 2, 3, -4], [1, 5, -3, 2]])
print("ReLu(Z_test) =", ReLu(Z_test))

ReLu(Z_test) = [[0 2 3 0]
 [1 5 0 2]]


In [3]:
# sigmoid
def sigmoid(Z):
    """
    This function applies a Sigmoid transformation to the input Z.
    """
    
    return 1 / (1 + np.exp(Z))


# test
Z_test = np.array([[-1, 2, 3, -4], [1, 5, -3, 2]])
print("sigmoid(Z_test) =", sigmoid(Z_test))

sigmoid(Z_test) = [[0.73105858 0.11920292 0.04742587 0.98201379]
 [0.26894142 0.00669285 0.95257413 0.11920292]]


### Derivatives

In [4]:
def ReLu_prime(Z):
    """
    This function returns the derivative of ReLu given input Z.
    """
    
    return np.where(Z < 0, 0, 1)


# test
Z_test = np.array([[-1, 2, 3, -4], [1, 5, -3, 2]])
print("ReLu_prime(Z_test) =", ReLu_prime(Z_test))

ReLu_prime(Z_test) = [[0 1 1 0]
 [1 1 0 1]]


### Function for one step of forward propagation

In [5]:
# one layer of forward prop
def get_activations(A_input, W, b, activation):
    """
    This function computes and returns the activisions of one layer of neurons.
    """
    
    Z = np.dot(W, A_input) + b
    return activation(Z)


# test
A_input_test = np.array([[-1, 2, 3, -4], [1, 5, -3, 2]])
W_test = np.array([[1, 2], [0, -1], [-3,1]])
b_test = np.array([[0], [-1], [2]])
activision_test = ReLu
print("A_input_test =", A_input_test)
print("W_test =", W_test)
print("b_test =", b_test)
print("activisions =", get_activations(A_input_test, W_test, b_test, activision_test))

A_input_test = [[-1  2  3 -4]
 [ 1  5 -3  2]]
W_test = [[ 1  2]
 [ 0 -1]
 [-3  1]]
b_test = [[ 0]
 [-1]
 [ 2]]
activisions = [[ 1 12  0  0]
 [ 0  0  2  0]
 [ 6  1  0 16]]


# Simulate data

In [6]:
# simulate m examples of nx dimension column vector, X, with each element being a random value in [-0.5, 0.5] following
# a uniform distribution
nx = 2
m = 10
X = np.random.rand(nx, m) - 0.5
print("X.shape =", X.shape)

X.shape = (2, 10)


In [8]:
# set the parameters for simulated y, for a 2-layer shallow network, with n1=3 and n2=1
W1 = np.array([[-0.5, 0.5], [-1, 0.5], [0.5, 1]])
b1 = np.array([[0], [-1], [0.5]])
W2 = np.array([[1, -1, -0.5]])
b2 = np.array([[0]])

In [9]:
# forward pass
A1 = get_activations(A_input=X, W=W1, b=b1, activation=ReLu)
A2 = get_activations(A_input=A1, W=W2, b=b2, activation=sigmoid)
print("A2.shape =", A2.shape)

A2.shape = (1, 10)


In [12]:
# simulate m examples of 1-dimensional column vector, Y
Y = np.where(np.random.rand(1, m) < A2, 1, 0)
print("Y.shape =", Y.shape)

Y.shape = (1, 10)


# Fit a 2-layer neural network to the simulated data