# Multi Layer Perceptron

Explain backprogration also here for MLP

Key point: How do you calculate the deltas for layer_1? First, do the obvious: multiply the output delta by each weight attached to it. This gives a weighting of how much each weight contributed to that error.

In [6]:
import numpy as np
import pandas as pd

inputs = np.array([[ 0, 0],
                    [ 0, 1],
                    [ 1, 0],
                    [ 1, 1] ] )
ouputs = np.array( [ 0, 1, 1, 0  ] )

df = pd.DataFrame({
    'x1': inputs[:, 0],
    'x2': inputs[:, 1],
    'y': ouputs
})
df

Unnamed: 0,x1,x2,y
0,0,0,0
1,0,1,1
2,1,0,1
3,1,1,0


In [7]:
import numpy as np
np.random.seed(40)

def relu(x):
    return (x > 0) * x

def relu2deriv(output):
    return output>0

streetlights = np.array([[ 0, 0 ],
                         [ 0, 1 ],
                         [ 1, 0 ],
                         [ 1, 1 ] ] )
walk_vs_stop = np.array([[ 0, 1, 1, 0]]).T

alpha = 0.001
hidden_size = 4

# weights_0_1 = np.array([ [1,1],[1,1] ] , dtype=np.float64) # np.random.random((2,hidden_size))
# weights_1_2 = np.array([ [ 1 ],[1] ] , dtype=np.float64) #np.random.random((hidden_size,1)) 

weights_0_1 = np.random.random((2,hidden_size))
weights_1_2 = np.random.random((hidden_size,1))

print("Weightes")
print(weights_0_1)
print(weights_1_2)


Weightes
[[0.40768703 0.05536604 0.78853488 0.28730518]
 [0.45035059 0.30391231 0.52639952 0.62381221]]
[[0.77677546]
 [0.68624165]
 [0.98093886]
 [0.60081609]]


In [8]:
import plotly.graph_objects as go    

data_fig = go.FigureWidget()
data_fig.add_scatter(mode="markers+text", x=streetlights[:,0], y=streetlights[:,1], text=walk_vs_stop[:,0],
                    textposition='middle right'
                    ,  textfont=dict(
                        family="sans serif",
                        size=20,
                        color="red"
                    ))

FigureWidget({
    'data': [{'mode': 'markers+text',
              'text': array([0., 1., 1., 0.]),
              'textfont': {'color': 'red', 'family': 'sans serif', 'size': 20},
              'textposition': 'middle right',
              'type': 'scatter',
              'uid': '3b765a11-aa8d-4a4c-9cde-fe8e94388cca',
              'x': array([0, 0, 1, 1]),
              'y': array([0, 1, 0, 1])}],
    'layout': {'template': '...'}
})

In [9]:
for iteration in range(10):
    layer_2_error = 0

    array = np.empty((4,hidden_size))

    print(f"********** Iteration: {iteration} *************")
    for i in range(len(streetlights)):
        # forward pass
        layer_0 = streetlights[i:i+1]
        print(f'Layer 0 = {layer_0}')
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        print(f'Layer 1 = {layer_1}')
        layer_2 = np.dot(layer_1,weights_1_2)
        print(f'Layer 2 = {layer_2}')

        array[i] = layer_1
        # backward pass
        layer_2_error += np.sum((layer_2 - walk_vs_stop[i:i+1]) ** 2)
        
        layer_2_delta = (layer_2 - walk_vs_stop[i:i+1])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)

        # layer_1_delta = layer_1 - layer_2
        # weight updating
        weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)
        print(f'Output = {walk_vs_stop[i:i+1]}')
        print("==============================================")
            
    print("Error:" + str(layer_2_error))
    print(f'{array}')
    
    data_fig.data[0].x = array[:,0]
    data_fig.data[0].y = array[:,1]

    import time
    time.sleep(1)

********** Iteration: 0 *************
Layer 0 = [[0 0]]
Layer 1 = [[0. 0. 0. 0.]]
Layer 2 = [[0.]]
Output = [[0]]
Layer 0 = [[0 1]]
Layer 1 = [[0.45035059 0.30391231 0.52639952 0.62381221]]
Layer 2 = [[1.44954073]]
Output = [[1]]
Layer 0 = [[1 0]]
Layer 1 = [[0.40768703 0.05536604 0.78853488 0.28730518]]
Layer 2 = [[1.30044058]]
Output = [[1]]
Layer 0 = [[1 1]]
Layer 1 = [[0.85745511 0.35876372 1.31419879 0.91066688]]
Layer 2 = [[2.74725075]]
Output = [[0]]
Error:7.839738097962158
[[0.         0.         0.         0.        ]
 [0.45035059 0.30391231 0.52639952 0.62381221]
 [0.40768703 0.05536604 0.78853488 0.28730518]
 [0.85745511 0.35876372 1.31419879 0.91066688]]


********** Iteration: 1 *************
Layer 0 = [[0 0]]
Layer 1 = [[0. 0. 0. 0.]]
Layer 2 = [[0.]]
Output = [[0]]
Layer 0 = [[0 1]]
Layer 1 = [[0.44786829 0.30171895 0.52326497 0.62189254]]
Layer 2 = [[1.43641408]]
Output = [[1]]
Layer 0 = [[1 0]]
Layer 1 = [[0.40532061 0.05327505 0.78554665 0.28547517]]
Layer 2 = [[1.28797664]]
Output = [[1]]
Layer 0 = [[1 1]]
Layer 1 = [[0.85262821 0.35449776 1.30810406 0.90693464]]
Layer 2 = [[2.72177605]]
Output = [[0]]
Error:7.6814526763526585
[[0.         0.         0.         0.        ]
 [0.44786829 0.30171895 0.52326497 0.62189254]
 [0.40532061 0.05327505 0.78554665 0.28547517]
 [0.85262821 0.35449776 1.30810406 0.90693464]]
********** Iteration: 2 *************
Layer 0 = [[0 0]]
Layer 1 = [[0. 0. 0. 0.]]
Layer 2 = [[0.]]
Output = [[0]]
Layer 0 = [[0 1]]
Layer 1 = [[0.4454244  0.29955567 0.52018111 0.62000507]]
Layer 2 = [[1.42355511]]
Output = [[1]]
Layer 0 = [[1 0]]
Layer 1 = [[0.40299168 0.0512135  0.78260786 0.28367654]]
Layer 2 = [[1.2757

In [10]:
import numpy as np

# Helper functions
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# XOR input and output
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Initialize parameters
# np.random.seed(42)

# Number of neurons in each layer
input_neurons = 2
hidden_neurons = 2
output_neurons = 1

# Weights and biases initialization
weights_input_hidden = np.random.rand(input_neurons, hidden_neurons)
weights_hidden_output = np.random.rand(hidden_neurons, output_neurons)
bias_hidden = np.random.rand(1, hidden_neurons)
bias_output = np.random.rand(1, output_neurons)

# Training parameters
learning_rate = 0.01
epochs = 10000

# Training the model
for epoch in range(epochs):
    # Forward pass
    hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_layer_output = relu(hidden_layer_input)
    
    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
    output_layer_output = sigmoid(output_layer_input)
    
    # Compute the loss
    loss = mse_loss(y, output_layer_output)
    
    # Backpropagation
    error_output_layer = y - output_layer_output
    d_output = error_output_layer * sigmoid_derivative(output_layer_output)
    
    error_hidden_layer = d_output.dot(weights_hidden_output.T)
    d_hidden = error_hidden_layer * relu_derivative(hidden_layer_output)
    
    # Update weights and biases
    weights_hidden_output += hidden_layer_output.T.dot(d_output) * learning_rate
    bias_output += np.sum(d_output, axis=0, keepdims=True) * learning_rate
    
    weights_input_hidden += X.T.dot(d_hidden) * learning_rate
    bias_hidden += np.sum(d_hidden, axis=0, keepdims=True) * learning_rate

    # Optionally print the loss at certain intervals
    if epoch % 1000 == 0:
        print(f'Epoch {epoch}, Loss: {loss}')

# Making predictions
hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
hidden_layer_output = relu(hidden_layer_input)

output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
output_layer_output = sigmoid(output_layer_input)

print('Predictions:')
print(np.round(output_layer_output))


Epoch 0, Loss: 0.2913239088494768
Epoch 1000, Loss: 0.25013205911182146
Epoch 2000, Loss: 0.2500839755180796
Epoch 3000, Loss: 0.2500575150938403
Epoch 4000, Loss: 0.2500415143068478
Epoch 5000, Loss: 0.25003101962364
Epoch 6000, Loss: 0.2500236895266624
Epoch 7000, Loss: 0.25001833734402235
Epoch 8000, Loss: 0.25001431284494097
Epoch 9000, Loss: 0.2500112294597659
Predictions:
[[0.]
 [0.]
 [1.]
 [1.]]
