In [1]:
# References
# Lecture slides
# https://blog.zhaytam.com/2018/08/15/implement-neural-network-backpropagation/
# https://towardsdatascience.com/estimating-optimal-learning-rate-for-a-deep-neural-network-ce32f2556ce0
# https://towardsdatascience.com/learning-rate-a6e7b84f1658
# https://towardsdatascience.com/implementing-the-xor-gate-using-backpropagation-in-neural-networks-c1f255b4f20d

In [2]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# from mpl_toolkits.mplot3d import axes3d, Axes3D

In [3]:
# Lets define our inputs and traget output according to XNOR Truth table
inputs = np.array([[1,1],[1,-1],[-1,1], [-1,-1]])
target_output = np.array([[1],[-1],[-1],[1]]) 

In [4]:
# It's a 2-2-1 neural network, implies
input_layer_neurons = 2
hidden_layer_neurons = 2
output_layer_neurons = 1

In [5]:
# Let's set the learning rate i.e., alpha and the number of iterations i.e., epochs:
epochs = 10000
alpha = 0.1

In [6]:
# Step 1: Initialize the weights and biases to have random values between −0.5 and 0.5:
# weights
hidden_layer_weights = np.random.uniform(-0.5, 0.5, size = (input_layer_neurons,hidden_layer_neurons))
output_layer_weights = np.random.uniform(-0.5,0.5, size = (hidden_layer_neurons,output_layer_neurons))

# bias:
hidden_layer_bias = np.random.uniform(-0.5, 0.5,size = (1,hidden_layer_neurons))
output_layer_bias = np.random.uniform(-0.5,0.5,size = (1,output_layer_neurons))

print('Initial Output Layer Weights:\n', output_layer_weights)
print('Initial Hidden Layer Weights:\n', hidden_layer_weights)
print('Initial Output Layer Bias:\n', output_layer_bias)
print('Initial Hidden Layer Bias:\n', hidden_layer_bias)

Initial Output Layer Weights:
 [[0.06421842]
 [0.44047024]]
Initial Hidden Layer Weights:
 [[-0.31345188  0.09234885]
 [ 0.38342476  0.36175132]]
Initial Output Layer Bias:
 [[-0.47409258]]
Initial Hidden Layer Bias:
 [[0.27082469 0.1054798 ]]


In [7]:
# Step 2: Training the inputs

# Step 2a:
# Setting the corresponding activation(a1) for input_layer
def sigmoid(x):
    return (1 - np.exp(-x))/(1 + np.exp(-x))

# We need the derivative of this for further calculations
def derivative_of_sigmoid(x):
#     derivative of sigmoid in terms of itslef is:
    return 0.5 * (1 + x) * (1 - x)

x1 = sigmoid(np.dot(inputs, hidden_layer_weights))
x2 = sigmoid(np.dot(x1, output_layer_weights))
print('Before Training', x2)

for i in range(epochs):
#     Step 2b:
#     Feedforward
    h_layer_a = np.dot(inputs, hidden_layer_weights)
    h_layer_a = h_layer_a + hidden_layer_bias
    output_hidden_layer = sigmoid(h_layer_a)
    
    o_layer_a = np.dot(output_hidden_layer, output_layer_weights)
    o_layer_a = o_layer_a + output_layer_bias
    pred_output = sigmoid(o_layer_a)

#     Step 2c:
#     Output error
    final_error = target_output - pred_output
    pred_output_delta = final_error * derivative_of_sigmoid(pred_output)
#     pred_output_delta = np.multiply(final_error,derivative_of_sigmoid(pred_output))

#     Step 2d:
#     Backpropogating the error
    h_layer_error = pred_output_delta.dot(output_layer_weights.T)
    hidden_layer_output_delta = h_layer_error * derivative_of_sigmoid(output_hidden_layer)

#     Step 2e:
#     Updating weights and bias
    output_layer_weights = output_layer_weights + output_hidden_layer.T.dot(pred_output_delta) * alpha
    output_layer_bias = output_layer_bias + np.sum(pred_output_delta) * alpha
    
    hidden_layer_weights = hidden_layer_weights + inputs.T.dot(hidden_layer_output_delta) * alpha
    hidden_layer_bias = hidden_layer_bias + np.sum(hidden_layer_output_delta) * alpha
    
#     alpha = alpha - (1/epochs)

#     plt.xlabel('Epochs')
#     plt.ylabel('Mean Square Error')
#     plt.plot(i, np.mean(final_error**2),'o')
    
#     plt.xlabel('Learning Rate')
#     plt.ylabel('Mean Square Error')
#     plt.plot(alpha, np.mean(final_error**2),'o')
    
    
       
# plt.savefig('Error vs Epochs.png')
       
print('Final Output Layer Weights:\n', output_layer_weights)
print('Final Hidden Layer Weights:\n', hidden_layer_weights)
print('Final Output Layer Bias:\n', output_layer_bias)
print('Final Hidden Layer Bias:\n', hidden_layer_bias)

print('Target Output:\n', target_output)
print('After Training:\n', pred_output)
# print('Predicted Output:\n', pred_output)
    

Before Training [[ 0.05024308]
 [-0.04022236]
 [ 0.04022236]
 [-0.05024308]]
Final Output Layer Weights:
 [[5.17646069]
 [5.17700971]]
Final Hidden Layer Weights:
 [[-3.3375171   3.24763671]
 [ 3.33742433 -3.24756198]]
Final Output Layer Bias:
 [[-4.73072222]]
Final Hidden Layer Bias:
 [[3.19698962 3.03164473]]
Target Output:
 [[ 1]
 [-1]
 [-1]
 [ 1]]
After Training:
 [[ 0.9826669 ]
 [-0.97623754]
 [-0.97615487]
 [ 0.98266698]]
