In [49]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import numpy as np

In [50]:
# Funcion de activacion 
def sigmoid(z):
    return 1/(1 + np.exp(-z))

In [51]:
# Inicializacion de los parametros con 0's
def initialize_parameters_zero(n_x, n_h, n_y):
    W1 = np.zeros((n_h, n_x))
    b1 = np.zeros((n_h, 1))
    W2 = np.zeros((n_y, n_h))
    b2 = np.zeros((n_y, 1))
    
    parameters = {
        "W1": W1,
        "b1": b1,
        "W2": W2,
        "b2": b2
    }
    return parameters

In [52]:
# Inicializacion de los parametros 
def initialize_parameters(n_x, n_h, n_y):
    W1 = np.random.randn(n_h, n_x)
    b1 = np.random.randn(n_h, 1)
    W2 = np.random.randn(n_y, n_h)
    b2 = np.random.randn(n_y, 1)
    
    parameters = {
        "W1": W1,
        "b1" : b1,
        "W2": W2,
        "b2" : b2
    }
    return parameters

In [53]:
def forward_prop(X, parameters):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    Z1 = np.dot(W1, X) + b1
    A1 = np.tanh(Z1)
    Z2 = np.dot(W2, A1) + b2
    A2 = sigmoid(Z2)

    cache = {
        "A1": A1,
        "A2": A2
    }
    return A2, cache

In [42]:
# Funcion de perdida
def loss_function(A2, Y):
    cost = -np.sum(np.multiply(Y, np.log(A2)) +  np.multiply(1-Y, np.log(1-A2)))/m
    cost = np.squeeze(cost)

    return cost

In [43]:
def backward_prop(X, Y, cache, parameters):
    A1 = cache["A1"]
    A2 = cache["A2"]

    W2 = parameters["W2"]

    dZ2 = A2 - Y
    dW2 = np.dot(dZ2, A1.T)/m
    db2 = np.sum(dZ2, axis=1, keepdims=True)/m
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1-np.power(A1, 2))
    dW1 = np.dot(dZ1, X.T)/m
    db1 = np.sum(dZ1, axis=1, keepdims=True)/m

    grads = {
        "dW1": dW1,
        "db1": db1,
        "dW2": dW2,
        "db2": db2
    }

    return grads

In [54]:
###### 3


def mse_loss(A2, Y):
    cost = np.mean(np.square(A2 - Y))
    return cost

def backward_prop_mse(X, Y, cache, parameters):
    A1 = cache["A1"]
    A2 = cache["A2"]
    W2 = parameters["W2"]

    dZ2 = A2 - Y
    dW2 = np.dot(dZ2, A1.T)/m
    db2 = np.sum(dZ2, axis=1, keepdims=True)/m
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1-np.power(A1, 2))
    dW1 = np.dot(dZ1, X.T)/m
    db1 = np.sum(dZ1, axis=1, keepdims=True)/m

    grads = {
        "dW1": dW1,
        "db1": db1,
        "dW2": dW2,
        "db2": db2
    }

    return grads

def model_mse(X, Y, n_x, n_h, n_y, num_of_iters, learning_rate):
    parameters = initialize_parameters(n_x, n_h, n_y)

    for i in range(0, num_of_iters+1):
        a2, cache = forward_prop(X, parameters)

        cost = mse_loss(a2, Y)

        grads = backward_prop_mse(X, Y, cache, parameters)

        parameters = update_parameters(parameters, grads, learning_rate)

        if(i % 100 == 0):
            print('Cost after iteration# {:d}: {:f}'.format(i, cost))

    return parameters


In [55]:
def update_parameters(parameters, grads, learning_rate):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]

    W1 = W1 - learning_rate*dW1
    b1 = b1 - learning_rate*db1
    W2 = W2 - learning_rate*dW2
    b2 = b2 - learning_rate*db2
    
    new_parameters = {
        "W1": W1,
        "W2": W2,
        "b1" : b1,
        "b2" : b2
    }

    return new_parameters

In [45]:
def model(X, Y, n_x, n_h, n_y, num_of_iters, learning_rate, initialization="random"):
    if initialization == "random":
        parameters = initialize_parameters(n_x, n_h, n_y)
    elif initialization == "zero":
        parameters = initialize_parameters_zero(n_x, n_h, n_y)

    for i in range(0, num_of_iters+1):
        a2, cache = forward_prop(X, parameters)

        cost = loss_function(a2, Y)

        grads = backward_prop(X, Y, cache, parameters)

        parameters = update_parameters(parameters, grads, learning_rate)

        if(i % 100 == 0):
            print('Cost after iteration# {:d}: {:f}'.format(i, cost))

    return parameters


In [56]:
def predict(X, parameters):
    a2, cache = forward_prop(X, parameters)
    yhat = a2
    yhat = np.squeeze(yhat)
    if(yhat >= 0.5):
        y_predict = 1
    else:
        y_predict = 0

    return y_predict

In [36]:
####### 1

np.random.seed(2)

# The 4 training examples by columns
X = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])

# The outputs of the XOR for every example in X
Y = np.array([[0, 1, 1, 0]])

# No. of training examples
m = X.shape[1]

# Set the hyperparameters
n_x = 2     # No. of neurons in first layer
n_h = 2     # No. of neurons in hidden layer
n_y = 1     # No. of neurons in output layer
num_of_iters = 1000
learning_rate = 0.3

# Inicialización aleatoria
print("Inicialización aleatoria:")
trained_parameters_random = model(X, Y, n_x, n_h, n_y, num_of_iters, learning_rate, initialization="random")

# Test 2X1 vector to calculate the XOR of its elements. 
X_test = np.array([[1], [1]])
y_predict = predict(X_test, trained_parameters_random)
print('Neural Network prediction for example ({:d}, {:d}) is {:d}'.format(X_test[0][0], X_test[1][0], y_predict))

# Inicialización en 0
print("\nInicialización en 0:")
trained_parameters_zero = model(X, Y, n_x, n_h, n_y, num_of_iters, learning_rate, initialization="zero")

# Test 2X1 vector to calculate the XOR of its elements. 
y_predict = predict(X_test, trained_parameters_zero)
print('Neural Network prediction for example ({:d}, {:d}) is {:d}'.format(X_test[0][0], X_test[1][0], y_predict))

Inicialización aleatoria:
Cost after iteration# 0: 1.052558
Cost after iteration# 100: 0.695402
Cost after iteration# 200: 0.693668
Cost after iteration# 300: 0.693206
Cost after iteration# 400: 0.692966
Cost after iteration# 500: 0.692779
Cost after iteration# 600: 0.692587
Cost after iteration# 700: 0.692352
Cost after iteration# 800: 0.692030
Cost after iteration# 900: 0.691539
Cost after iteration# 1000: 0.690679
Neural Network prediction for example (1, 1) is 1

Inicialización en 0:
Cost after iteration# 0: 0.693147
Cost after iteration# 100: 0.693147
Cost after iteration# 200: 0.693147
Cost after iteration# 300: 0.693147
Cost after iteration# 400: 0.693147
Cost after iteration# 500: 0.693147
Cost after iteration# 600: 0.693147
Cost after iteration# 700: 0.693147
Cost after iteration# 800: 0.693147
Cost after iteration# 900: 0.693147
Cost after iteration# 1000: 0.693147
Neural Network prediction for example (1, 1) is 1


In [48]:
####### 2


np.random.seed(2)

# The 4 training examples by columns
X = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])

# The outputs of the XOR for every example in X
Y = np.array([[0, 1, 1, 0]])

# No. of training examples
m = X.shape[1]

# Set the hyperparameters
n_x = 2     # No. of neurons in first layer
n_h = 2     # No. of neurons in hidden layer
n_y = 1     # No. of neurons in output layer
num_of_iters = 1000

learning_rates = [0.01, 0.1, 0.5]

for lr in learning_rates:
    print(f"\nTesting with learning rate: {lr}")
    trained_parameters = model(X, Y, n_x, n_h, n_y, num_of_iters, lr)

    # Test 2X1 vector to calculate the XOR of its elements. 
    X_test = np.array([[1], [1]])
    y_predict = predict(X_test, trained_parameters)
    print('Neural Network prediction for example ({:d}, {:d}) with learning rate {:.2f} is {:d}'.format(X_test[0][0], X_test[1][0], lr, y_predict))


Testing with learning rate: 0.01
Cost after iteration# 0: 1.052558
Cost after iteration# 100: 0.948307
Cost after iteration# 200: 0.864690
Cost after iteration# 300: 0.803459
Cost after iteration# 400: 0.765007
Cost after iteration# 500: 0.742498
Cost after iteration# 600: 0.729047
Cost after iteration# 700: 0.720556
Cost after iteration# 800: 0.714882
Cost after iteration# 900: 0.710898
Cost after iteration# 1000: 0.707985
Neural Network prediction for example (1, 1) with learning rate 0.01 is 1

Testing with learning rate: 0.1
Cost after iteration# 0: 1.013592
Cost after iteration# 100: 0.674649
Cost after iteration# 200: 0.629729
Cost after iteration# 300: 0.590321
Cost after iteration# 400: 0.561134
Cost after iteration# 500: 0.540238
Cost after iteration# 600: 0.524827
Cost after iteration# 700: 0.512317
Cost after iteration# 800: 0.498693
Cost after iteration# 900: 0.452745
Cost after iteration# 1000: 0.312239
Neural Network prediction for example (1, 1) with learning rate 0.10 

In [57]:
##### 3

np.random.seed(2)

# The 4 training examples by columns
X = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])

# The outputs of the XOR for every example in X
Y = np.array([[0, 1, 1, 0]])

# No. of training examples
m = X.shape[1]

# Set the hyperparameters
n_x = 2     # No. of neurons in first layer
n_h = 2     # No. of neurons in hidden layer
n_y = 1     # No. of neurons in output layer
num_of_iters = 1000
learning_rate = 0.3

# Entrenamiento con MSE
print("\nEntrenamiento con MSE:")
trained_parameters_mse = model_mse(X, Y, n_x, n_h, n_y, num_of_iters, learning_rate)

# Test 2X1 vector to calculate the XOR of its elements. 
X_test = np.array([[1], [1]])
y_predict = predict(X_test, trained_parameters_mse)
print('Neural Network prediction for example ({:d}, {:d}) is {:d}'.format(X_test[0][0], X_test[1][0], y_predict))


Entrenamiento con MSE:
Cost after iteration# 0: 0.357529
Cost after iteration# 100: 0.251127
Cost after iteration# 200: 0.250260
Cost after iteration# 300: 0.250029
Cost after iteration# 400: 0.249910
Cost after iteration# 500: 0.249816
Cost after iteration# 600: 0.249720
Cost after iteration# 700: 0.249602
Cost after iteration# 800: 0.249441
Cost after iteration# 900: 0.249196
Cost after iteration# 1000: 0.248766
Neural Network prediction for example (1, 1) is 1
