In [1]:
import numpy as np

In [2]:
X = np.array([0.7, 1.2])
y = np.array([1, 0])
print(X, y)

[0.7 1.2] [1 0]


# Initialize the weights and biases

In [3]:
W0 = np.array([[1.5, 0.8], [0.2, -1.6]])
b0 = np.array([0.5, 0.8])
W1 = np.array([[-1.7, 1.6], [2.1, -0.2]])
b1 = np.array([0.9, 1.2])
parameters = {'W0': W0, 'b0': b0, 'W1': W1, 'b1': b1}

# Forward propogation

In [4]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [5]:
def forward_propogation(X, parameters):
    layers = len(parameters) // 2
    cache = {}
    A = X
    for l in range(layers):
        W = parameters['W' + str(l)]
        b = parameters['b' + str(l)]
        Z = np.dot(W, A) + b
        A = sigmoid(Z)
        cache['Z' + str(l)] = Z
        cache['A' + str(l)] = A
    y_hat = np.array([1 if o > 0.5 else 0 for o in A])
    return y_hat, cache

# Loss function

In [6]:
def loss(y, y_hat):
    return np.sum((y - y_hat) ** 2)

# Backpropogation

In [7]:
def loss_gradient(y, y_hat):
    return 2 * (y - y_hat)

In [8]:
def sigmoid_gradient(x):
    return sigmoid(x) * (1 - sigmoid(x))

In [9]:
def backpropogation(parameters, cache, y):
    layers = len(parameters) // 2
    grads = {}
    A = cache['A' + str(layers - 1)]
    dA = loss_gradient(y, A)
    for l in range(layers-1, -1, -1):
        Z = cache['Z' + str(l)]
        W = parameters['W' + str(l)]
        dZ = dA * sigmoid_gradient(Z)
        dW = np.dot(dZ, cache['A' + str(l)].T)
        db = dZ
        dA = np.dot(W.T, dZ)
        grads['dW' + str(l)] = dW
        grads['db' + str(l)] = db
    return grads

# Update parameters

In [10]:
def update_parameters(parameters, grads, learning_rate):
    layers = len(parameters) // 2
    for l in range(layers):
        parameters['W' + str(l)] += learning_rate * grads['dW' + str(l)]
        parameters['b' + str(l)] += learning_rate * grads['db' + str(l)]
    return parameters

# Multi Layer Perceptron

In [11]:
epochs = 1000
learning_rate = 0.42
epoch = 0
while epoch < epochs:
    epoch += 1
    y_hat, cache = forward_propogation(X, parameters)
    l = loss(y, y_hat)
    if l == 0:
        break
    grads = backpropogation(parameters, cache, y)
    parameters = update_parameters(parameters, grads, learning_rate)
    print('Epoch:', epoch, 'Loss:', l)
    print('Parameters:', parameters)

Epoch: 1 Loss: 2
Parameters: {'W0': array([[ 1.4932176,  0.7932176],
       [ 0.1932176, -1.6067824]]), 'b0': array([0.48143559, 0.83806163]), 'W1': array([[-1.68098731,  1.61901269],
       [ 2.11901269, -0.18098731]]), 'b1': array([1.01569916, 1.16648445])}
Epoch: 2 Loss: 2
Parameters: {'W0': array([[ 1.48658442,  0.78658442],
       [ 0.18658442, -1.61341558]]), 'b0': array([0.46317969, 0.87479998]), 'W1': array([[-1.66116607,  1.63883393],
       [ 2.13883393, -0.16116607]]), 'b1': array([1.12497445, 1.13248987])}
Epoch: 3 Loss: 1
Parameters: {'W0': array([[ 1.48009693,  0.78009693],
       [ 0.18009693, -1.61990307]]), 'b0': array([0.4453901 , 0.90975909]), 'W1': array([[-1.64167039,  1.65832961],
       [ 2.15832961, -0.14167039]]), 'b1': array([1.22676222, 1.09802821])}
Epoch: 4 Loss: 1
Parameters: {'W0': array([[ 1.47371377,  0.77371377],
       [ 0.17371377, -1.62628623]]), 'b0': array([0.42814499, 0.94264237]), 'W1': array([[-1.62352671,  1.67647329],
       [ 2.17647329, -0.

In [12]:
print(f"Converged after {epoch} epochs")
print("Final parameters:")
print(parameters)

Converged after 32 epochs
Final parameters:
{'W0': array([[ 1.00223378,  0.30223378],
       [-0.29776622, -2.09776622]]), 'b0': array([-0.33659274,  1.49408015]), 'W1': array([[-2.29796587,  1.00203413],
       [ 1.50203413, -0.79796587]]), 'b1': array([ 2.59016572, -0.89763893])}
