# **MLP with Backpropagation**

## Importing libraries

In [1]:
import numpy as np              #for performing numerical operations
from sklearn.datasets import make_moons    #importing the dataset make_moons shaped like two interleaving half-moons
from sklearn.preprocessing import OneHotEncoder   #converts categorical data into numerical format
from sklearn.model_selection import train_test_split   #Splits data into training and testing sets

## Defining Multilayer Perceptron Class

In [2]:
class MLP:
  def __init__(self,input_size,hidden_size,output_size,learning_rate=0.01):   #constructor method that runs when an object of the class is created
    self.input_size=input_size
    self.hidden_size=hidden_size
    self.output_size=output_size
    self.learning_rate=learning_rate                         #Defines how fast the neural network updates during learning (default is 0.01)

    #Initialize weights and biases
    #W1 - weights from input to hidden layer
    self.W1=np.random.randn(self.input_size,self.hidden_size) #np.random.randn(rows,columns) generates random numbers from a normal distribution(ie.mean 0 & std 1)
    #Bias for Hidden Layer
    self.b1=np.zeros((1,self.hidden_size))
    self.W2=np.random.randn(self.hidden_size,self.output_size)
    self.b2=np.zeros((1,self.output_size))

  def sigmoid(self,x):
    return 1/(1+np.exp(-x))

  def sigmoid_derivative(self,x):
    return x*(1-x)

  def forward(self,X):
    self.z1=np.dot(X,self.W1)+self.b1
    self.a1=self.sigmoid(self.z1)
    self.z2=np.dot(self.a1,self.W2)+self.b2
    self.a2=self.sigmoid(self.z2)
    return self.a2

  def backward(self,X,y):
    m=y.shape[0]

    #compute error
    error=self.a2-y
    d_output=error*self.sigmoid_derivative(self.a2)

    #compute gradient for hidden layer
    error_hidden=np.dot(d_output,self.W2.T)
    d_hidden=error_hidden*self.sigmoid_derivative(self.a1)

    #update weights and biases
    self.W2-=self.learning_rate*np.dot(self.a1.T,d_output)/m
    self.b2-=self.learning_rate*np.sum(d_output,axis=0,keepdims=True)/m
    self.W1-=self.learning_rate*np.dot(X.T,d_hidden)/m
    self.b1-=self.learning_rate*np.sum(d_hidden,axis=0,keepdims=True)/m

  def train(self,X,y,epochs=10000):
      for epoch in range(epochs):
        self.forward(X)
        self.backward(X,y)

        if epoch%1000==0:
          loss=np.mean((self.a2-y)**2)
          print(f"Epoch {epoch},Loss:{loss:.6f}")

  def predict(self,X):
      return self.forward(X)

## Generating & Splitting dataset for training

In [3]:
#Generate dataset
X,y=make_moons(n_samples=500,noise=0.2,random_state=42)
y=y.reshape(-1,1)  #reshape for compatibilty

#split dataset
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

## Training MLP on the dataset

In [4]:
#Train MLP
mlp=MLP(input_size=2,hidden_size=4,output_size=1,learning_rate=0.1)
mlp.train(X_train,y_train,epochs=10000)

Epoch 0,Loss:0.292444
Epoch 1000,Loss:0.136272
Epoch 2000,Loss:0.104769
Epoch 3000,Loss:0.097734
Epoch 4000,Loss:0.094886
Epoch 5000,Loss:0.093544
Epoch 6000,Loss:0.092856
Epoch 7000,Loss:0.092474
Epoch 8000,Loss:0.092241
Epoch 9000,Loss:0.092086


## Making predictions with the trained model

In [5]:
#predictions
predictions=mlp.predict(X_test)
print("Predictions:",predictions)

Predictions: [[0.5212849 ]
 [0.22578535]
 [0.30887004]
 [0.95477961]
 [0.90046643]
 [0.80578014]
 [0.0370618 ]
 [0.07530319]
 [0.32053113]
 [0.08423896]
 [0.9489012 ]
 [0.04332643]
 [0.84212431]
 [0.84350749]
 [0.90512294]
 [0.05902108]
 [0.05502829]
 [0.90087697]
 [0.87372675]
 [0.15615662]
 [0.06278368]
 [0.50614901]
 [0.63661572]
 [0.25454836]
 [0.11915513]
 [0.17141618]
 [0.77462151]
 [0.04254255]
 [0.94049358]
 [0.04437501]
 [0.94640315]
 [0.96527257]
 [0.09651079]
 [0.03757103]
 [0.9468687 ]
 [0.62890452]
 [0.08246137]
 [0.95500495]
 [0.94000175]
 [0.90667445]
 [0.70712591]
 [0.83461281]
 [0.29360648]
 [0.06851406]
 [0.05404887]
 [0.14273632]
 [0.91579664]
 [0.13957984]
 [0.89346382]
 [0.29142201]
 [0.34204872]
 [0.95721958]
 [0.0562541 ]
 [0.05776491]
 [0.23686486]
 [0.07271768]
 [0.27220839]
 [0.94935337]
 [0.04084792]
 [0.93454827]
 [0.05857781]
 [0.91973263]
 [0.85554934]
 [0.06227643]
 [0.90175167]
 [0.09077296]
 [0.17469596]
 [0.75484127]
 [0.05272775]
 [0.65582296]
 [0.560

## Conclusion

The moons dataset is a frequent nonlinear classification issue that the developed Multi-Layer Perceptron (MLP) is intended to categorize.  Using the sigmoid activation function, the network is composed of an input layer with two neurons, a hidden layer with four neurons, and an output layer with one neuron.  Backpropagation and gradient descent are used for training, and in order to minimize error, weights and biases are changed iteratively across 10,000 epochs.  The model shows how a basic neural network can manage intricate decision boundaries by successfully learning to distinguish between the two classes.