# Multi Layer Perceptron

Explain backprogration also here for MLP

Key point: How do you calculate the deltas for layer_1? First, do the obvious: multiply the output delta by each weight attached to it. This gives a weighting of how much each weight contributed to that error.

In [1]:
import numpy as np
import pandas as pd

inputs = np.array([[ 0, 0],
                    [ 0, 1],
                    [ 1, 0],
                    [ 1, 1] ] )
ouputs = np.array( [ 0, 1, 1, 0  ] )

df = pd.DataFrame({
    'x1': inputs[:, 0],
    'x2': inputs[:, 1],
    'y': ouputs
})
df

Unnamed: 0,x1,x2,y
0,0,0,0
1,0,1,1
2,1,0,1
3,1,1,0


In [2]:
import numpy as np
np.random.seed(40)

def relu(x):
    return (x > 0) * x

def relu2deriv(output):
    return output>0

streetlights = np.array([[ 0, 0 ],
                         [ 0, 1 ],
                         [ 1, 0 ],
                         [ 1, 1 ] ] )
walk_vs_stop = np.array([[ 0, 1, 1, 0]]).T

alpha = 0.001
hidden_size = 4

# weights_0_1 = np.array([ [1,1],[1,1] ] , dtype=np.float64) # np.random.random((2,hidden_size))
# weights_1_2 = np.array([ [ 1 ],[1] ] , dtype=np.float64) #np.random.random((hidden_size,1)) 

weights_0_1 = np.random.random((2,hidden_size))
weights_1_2 = np.random.random((hidden_size,1))

print("Weightes")
print(weights_0_1)
print(weights_1_2)


Weightes
[[0.40768703 0.05536604 0.78853488 0.28730518]
 [0.45035059 0.30391231 0.52639952 0.62381221]]
[[0.77677546]
 [0.68624165]
 [0.98093886]
 [0.60081609]]


In [3]:
import plotly.graph_objects as go    

data_fig = go.FigureWidget()
data_fig.add_scatter(mode="markers+text", x=streetlights[:,0], y=streetlights[:,1], text=walk_vs_stop[:,0],
                    textposition='middle right'
                    ,  textfont=dict(
                        family="sans serif",
                        size=20,
                        color="red"
                    ))

FigureWidget({
    'data': [{'mode': 'markers+text',
              'text': array([0., 1., 1., 0.]),
              'textfont': {'color': 'red', 'family': 'sans serif', 'size': 20},
              'textposition': 'middle right',
              'type': 'scatter',
              'uid': '4b2d8030-89ec-4ea4-a225-46c17a901a7e',
              'x': array([0, 0, 1, 1]),
              'y': array([0, 1, 0, 1])}],
    'layout': {'template': '...'}
})

In [7]:
for iteration in range(10):
    layer_2_error = 0

    array = np.empty((4,hidden_size))

    print(f"********** Iteration: {iteration} *************")
    for i in range(len(streetlights)):
        # forward pass
        layer_0 = streetlights[i:i+1]
        print(f'Layer 0 = {layer_0}')
        layer_1 = relu(np.dot(layer_0,weights_0_1))
        print(f'Layer 1 = {layer_1}')
        layer_2 = np.dot(layer_1,weights_1_2)
        print(f'Layer 2 = {layer_2}')

        array[i] = layer_1
        # backward pass
        layer_2_error += np.sum((layer_2 - walk_vs_stop[i:i+1]) ** 2)
        
        layer_2_delta = (layer_2 - walk_vs_stop[i:i+1])
        layer_1_delta = layer_2_delta.dot(weights_1_2.T) * relu2deriv(layer_1)

        # layer_1_delta = layer_1 - layer_2
        # weight updating
        weights_1_2 -= alpha * layer_1.T.dot(layer_2_delta)
        weights_0_1 -= alpha * layer_0.T.dot(layer_1_delta)
        print(f'Output = {walk_vs_stop[i:i+1]}')
        print("==============================================")
            
    print("Error:" + str(layer_2_error))
    print(f'{array}')
    
    data_fig.data[0].x = array[:,0]
    data_fig.data[0].y = array[:,1]

    import time
    time.sleep(1)

********** Iteration: 0 *************
Layer 0 = [[0 0]]
Layer 1 = [[0. 0. 0. 0.]]
Layer 2 = [[0.]]
Output = [[0]]
Layer 0 = [[0 1]]
Layer 1 = [[0.42714734 0.28325148 0.49718934 0.60597069]]
Layer 2 = [[1.32940877]]
Output = [[1]]
Layer 0 = [[1 0]]
Layer 1 = [[0.38560359 0.03570219 0.76073437 0.27032504]]
Layer 2 = [[1.18642825]]
Output = [[1]]
Layer 0 = [[1 1]]
Layer 1 = [[0.81236306 0.31860499 1.25743731 0.87599964]]
Layer 2 = [[2.5141093]]
Output = [[0]]
Error:6.464011182749799
[[0.         0.         0.         0.        ]
 [0.42714734 0.28325148 0.49718934 0.60597069]
 [0.38560359 0.03570219 0.76073437 0.27032504]
 [0.81236306 0.31860499 1.25743731 0.87599964]]
********** Iteration: 1 *************
Layer 0 = [[0 0]]
Layer 1 = [[0. 0. 0. 0.]]
Layer 2 = [[0.]]
Output = [[0]]
Layer 0 = [[0 1]]
Layer 1 = [[0.42500962 0.28132957 0.49450866 0.60433889]]
Layer 2 = [[1.31862906]]
Output = [[1]]
Layer 0 = [[1 0]]
Layer 1 = [[0.38357341 0.03387695 0.75818855 0.26877536]]
Layer 2 = [[1.176204

In [5]:
import numpy as np

# Helper functions
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

# XOR input and output
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Initialize parameters
# np.random.seed(42)

# Number of neurons in each layer
input_neurons = 2
hidden_neurons = 2
output_neurons = 1

# Weights and biases initialization
weights_input_hidden = np.random.rand(input_neurons, hidden_neurons)
weights_hidden_output = np.random.rand(hidden_neurons, output_neurons)
bias_hidden = np.random.rand(1, hidden_neurons)
bias_output = np.random.rand(1, output_neurons)

# Training parameters
learning_rate = 0.01
epochs = 10000

# Training the model
for epoch in range(epochs):
    # Forward pass
    hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_layer_output = relu(hidden_layer_input)
    
    output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
    output_layer_output = sigmoid(output_layer_input)
    
    # Compute the loss
    loss = mse_loss(y, output_layer_output)
    
    # Backpropagation
    error_output_layer = y - output_layer_output
    d_output = error_output_layer * sigmoid_derivative(output_layer_output)
    
    error_hidden_layer = d_output.dot(weights_hidden_output.T)
    d_hidden = error_hidden_layer * relu_derivative(hidden_layer_output)
    
    # Update weights and biases
    weights_hidden_output += hidden_layer_output.T.dot(d_output) * learning_rate
    bias_output += np.sum(d_output, axis=0, keepdims=True) * learning_rate
    
    weights_input_hidden += X.T.dot(d_hidden) * learning_rate
    bias_hidden += np.sum(d_hidden, axis=0, keepdims=True) * learning_rate

    # Optionally print the loss at certain intervals
    if epoch % 1000 == 0:
        print(f'Epoch {epoch}, Loss: {loss}')

# Making predictions
hidden_layer_input = np.dot(X, weights_input_hidden) + bias_hidden
hidden_layer_output = relu(hidden_layer_input)

output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
output_layer_output = sigmoid(output_layer_input)

print('Predictions:')
print(np.round(output_layer_output))


Epoch 0, Loss: 0.2913239088494768
Epoch 1000, Loss: 0.25013205911182146
Epoch 2000, Loss: 0.2500839755180796
Epoch 3000, Loss: 0.2500575150938403
Epoch 4000, Loss: 0.2500415143068478
Epoch 5000, Loss: 0.25003101962364
Epoch 6000, Loss: 0.2500236895266624
Epoch 7000, Loss: 0.25001833734402235
Epoch 8000, Loss: 0.25001431284494097
Epoch 9000, Loss: 0.2500112294597659
Predictions:
[[0.]
 [0.]
 [1.]
 [1.]]


### Implemented Keras Model

In [6]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

# XOR input and output
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])

# Define the model
model = Sequential()

# Input layer -> Hidden layer with 2 neurons and ReLU activation
model.add(Dense(4, input_dim=2, activation='relu'))

# Hidden layer -> Output layer with 1 neuron and sigmoid activation
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer=SGD(learning_rate=0.01), metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=1000, verbose=0)

# Evaluate the model
loss, accuracy = model.evaluate(X, y)
print(f'Loss: {loss}, Accuracy: {accuracy}')

# Make predictions
predictions = model.predict(X)
print('Predictions:')
print(np.round(predictions))


2024-07-12 15:11:48.125293: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-12 15:11:49.483227: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-12 15:11:54.367960: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - accuracy: 0.5000 - loss: 0.6931
Loss: 0.6931473016738892, Accuracy: 0.5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Predictions:
[[1.]
 [1.]
 [1.]
 [1.]]
