# Backpropagation for XOR

In [1]:
import numpy as np
import pandas as pd

# Sigmoid activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Example DataFrame
data = {'Input1': [0, 0, 1, 1],
        'Input2': [0, 1, 0, 1],
        'Output': [0, 1, 1, 0]}  # XOR problem

df = pd.DataFrame(data)

# Inputs and outputs
X = df[['Input1', 'Input2']].values  # Input features
y = df['Output'].values.reshape(-1, 1)  # Target output

# Initialize weights and bias
input_layer_size = X.shape[1]  # 2 (2 inputs)
hidden_layer_size = 4         # 4 neurons in the hidden layer
output_layer_size = 1         # 1 output neuron

# Randomly initialize weights and biases
np.random.seed(42)
weights_input_hidden = np.random.rand(input_layer_size, hidden_layer_size)
weights_hidden_output = np.random.rand(hidden_layer_size, output_layer_size)
bias_hidden = np.random.rand(1, hidden_layer_size)
bias_output = np.random.rand(1, output_layer_size)

# Training parameters
epochs = 10000
learning_rate = 0.1

# Training the network
for epoch in range(epochs):
    # Forward pass
    hidden_input = np.dot(X, weights_input_hidden) + bias_hidden
    hidden_output = sigmoid(hidden_input)
    
    final_input = np.dot(hidden_output, weights_hidden_output) + bias_output
    final_output = sigmoid(final_input)
    
    # Calculate the error (difference between expected and actual output)
    error = y - final_output
    
    # Backpropagation
    # Calculate the gradient of the output layer
    output_delta = error * sigmoid_derivative(final_output)
    
    # Calculate the gradient of the hidden layer
    hidden_delta = output_delta.dot(weights_hidden_output.T) * sigmoid_derivative(hidden_output)
    
    # Update weights and biases
    weights_hidden_output += hidden_output.T.dot(output_delta) * learning_rate
    weights_input_hidden += X.T.dot(hidden_delta) * learning_rate
    bias_output += np.sum(output_delta, axis=0, keepdims=True) * learning_rate
    bias_hidden += np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate
    
    # Print the error every 1000 epochs
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}, Error: {np.mean(np.abs(error))}")

# After training, test the network on the same inputs
print("\nFinal Outputs after Training:")
print(final_output)


Epoch 0, Error: 0.49721892454240146
Epoch 1000, Error: 0.4880611702611545
Epoch 2000, Error: 0.42140647208759563
Epoch 3000, Error: 0.34147614865219594
Epoch 4000, Error: 0.21182296553191785
Epoch 5000, Error: 0.12925272319332737
Epoch 6000, Error: 0.09344799285513183
Epoch 7000, Error: 0.07464046653242815
Epoch 8000, Error: 0.06306756377339848
Epoch 9000, Error: 0.05518490381142588

Final Outputs after Training:
[[0.05035392]
 [0.94687409]
 [0.95698317]
 [0.05126862]]


# on dataset

In [2]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the MLPClassifier
# hidden_layer_sizes=(10,) indicates one hidden layer with 10 neurons
# max_iter=1000 sets the maximum number of iterations for training
# solver='adam' uses the Adam optimization algorithm, which uses backpropagation
mlp = MLPClassifier(hidden_layer_sizes=(10,), max_iter=1000, solver='adam', random_state=42)

# Train the model
mlp.fit(X_train, y_train)

# Predict on the test set
y_pred = mlp.predict(X_test)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")

# Print the final weights of the model
print("\nFinal weights after training:")
for i, layer_weights in enumerate(mlp.coefs_):
    print(f"Layer {i+1} weights shape: {layer_weights.shape}")
    print(layer_weights)


Accuracy: 97.78%

Final weights after training:
Layer 1 weights shape: (4, 10)
[[-8.79229678e-08  4.52073506e-01  7.94937316e-01  1.90185856e-01
  -1.05695789e-02 -4.02436653e-01 -4.00716072e-02  3.11406317e-01
   1.85761909e-01  1.63055756e-04]
 [-5.73336312e-02  6.98356831e-01  9.47852982e-01 -6.35176974e-01
  -6.41519979e-03 -6.00503861e-01 -8.16795392e-05 -1.28997515e-01
  -5.67842351e-01 -1.69130088e-04]
 [ 7.56662961e-09 -8.22388012e-01 -5.00006070e-01  3.25807217e-01
  -2.09587930e-20  8.55167184e-01 -4.30129880e-03  7.63669219e-02
   7.53309960e-01 -4.50819297e-02]
 [ 2.97926176e-09 -8.27285320e-01 -1.19618796e+00  1.22364522e+00
   5.06164328e-02  9.14032336e-01 -7.98771902e-05 -2.07607074e-01
   9.73938739e-01  1.47877888e-17]]
Layer 2 weights shape: (10, 3)
[[ 6.13712464e-02  2.96292875e-03  4.62176305e-02]
 [ 1.01815228e+00  6.96274001e-02  3.27627213e-01]
 [ 3.11095849e-02  3.01073148e-02 -1.17933833e+00]
 [-7.47538604e-01  1.00143358e-01  2.68033604e-01]
 [ 1.00453781e-02

