In [10]:
# https://towardsdatascience.com/how-to-build-a-simple-neural-network-from-scratch-with-python-9f011896d2f3

import numpy as np

def sigmoid(z):
    return 1/(1 + np.exp(-z))

def initialize_parameters(neurons_count_input_layer, neurons_count_hidden_layer, neurons_count_output_layer):
    weight1 = np.random.randn(neurons_count_hidden_layer, neurons_count_input_layer)
    bias1 = np.zeros((neurons_count_hidden_layer, 1))
    weight2 = np.random.randn(neurons_count_output_layer, neurons_count_hidden_layer)
    bias2 = np.zeros((neurons_count_output_layer, 1))

    parameters = {
        "weight1": weight1,
        "bias1" : bias1,
        "weight2": weight2,
        "bias2" : bias2
    }
    return parameters

def forward_prop(X, parameters):
    weight1 = parameters["weight1"]
    bias1 = parameters["bias1"]
    weight2 = parameters["weight2"]
    bias2 = parameters["bias2"]

    Z1 = np.dot(weight1, X) + bias1
    A1 = np.tanh(Z1)
#    A1 = sigmoid(Z1)
    Z2 = np.dot(weight2, A1) + bias2
    A2 = sigmoid(Z2)
#    A2 = np.tanh(Z2)

    cache = {
        "A1": A1,
        "A2": A2
    }
    return A2, cache

def calculate_cost(A2, Y):
    cost = -np.sum(np.multiply(Y, np.log(A2)) +  np.multiply(1-Y, np.log(1-A2)))/m
    cost = np.squeeze(cost)

    return cost

def backward_prop(X, Y, cache, parameters):
    A1 = cache["A1"]
    A2 = cache["A2"]

    weight2 = parameters["weight2"]

    dZ2 = A2 - Y
    dweight2 = np.dot(dZ2, A1.T)/m
    dbias2 = np.sum(dZ2, axis=1, keepdims=True)/m
    dZ1 = np.multiply(np.dot(weight2.T, dZ2), 1-np.power(A1, 2))
    dweight1 = np.dot(dZ1, X.T)/m
    dbias1 = np.sum(dZ1, axis=1, keepdims=True)/m

    grads = {
        "dweight1": dweight1,
        "dbias1": dbias1,
        "dweight2": dweight2,
        "dbias2": dbias2
    }

    return grads

def update_parameters(parameters, grads, learning_rate):
    weight1 = parameters["weight1"]
    bias1 = parameters["bias1"]
    weight2 = parameters["weight2"]
    bias2 = parameters["bias2"]

    dweight1 = grads["dweight1"]
    dbias1 = grads["dbias1"]
    dweight2 = grads["dweight2"]
    dbias2 = grads["dbias2"]

    weight1 = weight1 - learning_rate*dweight1
    bias1 = bias1 - learning_rate*dbias1
    weight2 = weight2 - learning_rate*dweight2
    bias2 = bias2 - learning_rate*dbias2
    
    new_parameters = {
        "weight1": weight1,
        "weight2": weight2,
        "bias1" : bias1,
        "bias2" : bias2
    }

    return new_parameters


def model(X, Y, neurons_count_input_layer, neurons_count_hidden_layer, neurons_count_output_layer, num_of_iters, learning_rate):
    parameters = initialize_parameters(neurons_count_input_layer, neurons_count_hidden_layer, neurons_count_output_layer)

    for i in range(0, num_of_iters+1):
        a2, cache = forward_prop(X, parameters)

        cost = calculate_cost(a2, Y)

        grads = backward_prop(X, Y, cache, parameters)

        parameters = update_parameters(parameters, grads, learning_rate)

        if(i%50 == 0):
            print('Cost after iteration# {:d}: {:f}'.format(i, cost))
#            print('cache', cache)
            print('grads', grads)
#            print('parameters', parameters)

    return parameters

def predict(X, parameters):
    a2, cache = forward_prop(X, parameters)
    yhat = a2
    yhat = np.squeeze(yhat)
    if(yhat >= 0.5):
        y_predict = 1
    else:
        y_predict = 0

    return y_predict
    


np.random.seed(2)

# The 4 training examples by columns
X = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])

# The outputs of the XOR for every example in X
Y = np.array([[0, 1, 1, 0]])

# No. of training examples
m = X.shape[1]

# Set the hyperparameters
neurons_count_input_layer = 2     #No. of neurons in first layer
neurons_count_hidden_layer = 2     #No. of neurons in hidden layer
neurons_count_output_layer = 1     #No. of neurons in output layer
num_of_iters = 10000
learning_rate = 0.3

trained_parameters = model(X, Y, neurons_count_input_layer, neurons_count_hidden_layer, neurons_count_output_layer, num_of_iters, learning_rate)

# Test 2X1 vector to calculate the XOR of its elements. 
# Try (0, 0), (0, 1), (1, 0), (1, 1)

X_test = np.array([[1], [1]])
y_predict = predict(X_test, trained_parameters)
print('Neural Network prediction for example (1,1) is {:d}'.format(y_predict))

X_test = np.array([[0], [1]])
y_predict = predict(X_test, trained_parameters)
print('Neural Network prediction for example (0,1) is {:d}'.format(y_predict))

X_test = np.array([[0], [0]])
y_predict = predict(X_test, trained_parameters)
print('Neural Network prediction for example (0,0) is {:d}'.format(y_predict))

X_test = np.array([[1], [0]])
y_predict = predict(X_test, trained_parameters)
print('Neural Network prediction for example (1,0) is {:d}'.format(y_predict))


Cost after iteration# 0: 0.856267
grads {'dweight1': array([[-0.20850296,  0.02049162],
       [-0.12492949, -0.10745412]]), 'dbias1': array([[-0.13603693],
       [-0.21063238]]), 'dweight2': array([[-0.05726626, -0.19815821]]), 'dbias2': array([[0.1054896]])}
Cost after iteration# 50: 0.503667
grads {'dweight1': array([[-0.02761564,  0.01814891],
       [ 0.02764883, -0.02626384]]), 'dbias1': array([[-0.00162985],
       [-0.02037731]]), 'dweight2': array([[0.02356708, 0.06216107]]), 'dbias2': array([[-0.03324471]])}
Cost after iteration# 100: 0.347426
grads {'dweight1': array([[-0.05169183,  0.03862595],
       [ 0.03076282, -0.01913592]]), 'dbias1': array([[-0.03096171],
       [-0.0077317 ]]), 'dweight2': array([[0.03170799, 0.04121168]]), 'dbias2': array([[-0.06792659]])}
Cost after iteration# 150: 0.171442
grads {'dweight1': array([[-0.03675009,  0.03477031],
       [ 0.01519262, -0.0112744 ]]), 'dbias1': array([[-0.02267367],
       [-0.00605038]]), 'dweight2': array([[0.029528

Cost after iteration# 1700: 0.006572
grads {'dweight1': array([[-0.0007628 ,  0.00075781],
       [ 0.00052461, -0.00052532]]), 'dbias1': array([[-0.00037634],
       [-0.00025162]]), 'dweight2': array([[0.00197138, 0.00200944]]), 'dbias2': array([[-0.00203435]])}
Cost after iteration# 1750: 0.006368
grads {'dweight1': array([[-0.00073542,  0.00073053],
       [ 0.00050727, -0.00050823]]), 'dbias1': array([[-0.00036268],
       [-0.00024336]]), 'dweight2': array([[0.00191262, 0.00194907]]), 'dbias2': array([[-0.001973]])}
Cost after iteration# 1800: 0.006177
grads {'dweight1': array([[-0.00070983,  0.00070502],
       [ 0.00049099, -0.00049217]]), 'dbias1': array([[-0.00034991],
       [-0.00023562]]), 'dweight2': array([[0.00185727, 0.00189221]]), 'dbias2': array([[-0.00191524]])}
Cost after iteration# 1850: 0.005997
grads {'dweight1': array([[-0.00068584,  0.00068112],
       [ 0.00047569, -0.00047705]]), 'dbias1': array([[-0.00033795],
       [-0.00022833]]), 'dweight2': array([[0.0

Cost after iteration# 3700: 0.002877
grads {'dweight1': array([[-0.00029395,  0.00029122],
       [ 0.00021627, -0.00021876]]), 'dbias1': array([[-0.00014388],
       [-0.00010439]]), 'dweight2': array([[0.00088482, 0.00089679]]), 'dbias2': array([[-0.00090643]])}
Cost after iteration# 3750: 0.002837
grads {'dweight1': array([[-0.00028927,  0.00028657],
       [ 0.00021304, -0.00021552]]), 'dbias1': array([[-0.00014157],
       [-0.00010284]]), 'dweight2': array([[0.00087281, 0.00088454]]), 'dbias2': array([[-0.00089403]])}
Cost after iteration# 3800: 0.002798
grads {'dweight1': array([[-0.00028473,  0.00028206],
       [ 0.0002099 , -0.00021236]]), 'dbias1': array([[-0.00013934],
       [-0.00010133]]), 'dweight2': array([[0.00086111, 0.00087262]]), 'dbias2': array([[-0.00088197]])}
Cost after iteration# 3850: 0.002760
grads {'dweight1': array([[-0.00028032,  0.00027768],
       [ 0.00020684, -0.0002093 ]]), 'dbias1': array([[-1.37169187e-04],
       [-9.98653510e-05]]), 'dweight2': a

Cost after iteration# 5650: 0.001854
grads {'dweight1': array([[-0.00017805,  0.0001762 ],
       [ 0.00013484, -0.00013682]]), 'dbias1': array([[-8.69611673e-05],
       [-6.52694378e-05]]), 'dweight2': array([[0.00057587, 0.00058232]]), 'dbias2': array([[-0.00058838]])}
Cost after iteration# 5700: 0.001837
grads {'dweight1': array([[-0.00017622,  0.00017439],
       [ 0.00013353, -0.00013549]]), 'dbias1': array([[-8.60639673e-05],
       [-6.46377433e-05]]), 'dweight2': array([[0.00057077, 0.00057713]]), 'dbias2': array([[-0.00058313]])}
Cost after iteration# 5750: 0.001821
grads {'dweight1': array([[-0.00017442,  0.0001726 ],
       [ 0.00013224, -0.00013419]]), 'dbias1': array([[-8.51840741e-05],
       [-6.40177100e-05]]), 'dweight2': array([[0.00056575, 0.00057203]]), 'dbias2': array([[-0.00057798]])}
Cost after iteration# 5800: 0.001805
grads {'dweight1': array([[-0.00017266,  0.00017086],
       [ 0.00013097, -0.00013292]]), 'dbias1': array([[-8.43210023e-05],
       [-6.340902

Cost after iteration# 7650: 0.001358
grads {'dweight1': array([[-1.25028050e-04,  1.23646708e-04],
       [ 9.64156286e-05, -9.79865768e-05]]), 'dbias1': array([[-6.10013171e-05],
       [-4.67559004e-05]]), 'dweight2': array([[0.00042418, 0.00042833]]), 'dbias2': array([[-0.00043271]])}
Cost after iteration# 7700: 0.001349
grads {'dweight1': array([[-1.24086660e-04,  1.22714106e-04],
       [ 9.57249902e-05, -9.72875902e-05]]), 'dbias1': array([[-6.05408799e-05],
       [-4.64227146e-05]]), 'dweight2': array([[0.0004214 , 0.00042552]]), 'dbias2': array([[-0.00042987]])}
Cost after iteration# 7750: 0.001340
grads {'dweight1': array([[-1.23158585e-04,  1.21794711e-04],
       [ 9.50437996e-05, -9.65981288e-05]]), 'dbias1': array([[-6.00869711e-05],
       [-4.60940712e-05]]), 'dweight2': array([[0.00041867, 0.00042275]]), 'dbias2': array([[-0.00042707]])}
Cost after iteration# 7800: 0.001331
grads {'dweight1': array([[-1.22243551e-04,  1.20888251e-04],
       [ 9.43718671e-05, -9.591800

Cost after iteration# 9550: 0.001082
grads {'dweight1': array([[-9.67445500e-05,  9.56350530e-05],
       [ 7.55141863e-05, -7.68155268e-05]]), 'dbias1': array([[-4.71749457e-05],
       [-3.66649398e-05]]), 'dweight2': array([[0.00033935, 0.00034237]]), 'dbias2': array([[-0.00034583]])}
Cost after iteration# 9600: 0.001076
grads {'dweight1': array([[-9.61643011e-05,  9.50605595e-05],
       [ 7.50818754e-05, -7.63772624e-05]]), 'dbias1': array([[-4.68914487e-05],
       [-3.64560528e-05]]), 'dweight2': array([[0.00033757, 0.00034057]]), 'dbias2': array([[-0.00034401]])}
Cost after iteration# 9650: 0.001071
grads {'dweight1': array([[-9.55906084e-05,  9.44925646e-05],
       [ 7.46542999e-05, -7.59437833e-05]]), 'dbias1': array([[-4.66111613e-05],
       [-3.62494466e-05]]), 'dweight2': array([[0.00033581, 0.00033879]]), 'dbias2': array([[-0.00034221]])}
Cost after iteration# 9700: 0.001065
grads {'dweight1': array([[-9.50233637e-05,  9.39309610e-05],
       [ 7.42313836e-05, -7.551501