<a href="https://colab.research.google.com/github/adnaen/machine-learning-notes/blob/main/DEEP_LEARNING/multilayer_perceptron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **MultiLayer Perceptron from Scratch (using NumPy)**

- **MLP is a Multi Layer Perceptron Architecture**
- **weight initialize : W=(next layer features,current layer features)**

In [None]:
import numpy as np

np.random.seed(2342)

In [72]:
# util function

def relu(z: np.ndarray | float | int) -> np.ndarray | float | int:
    return np.maximum(0, z)

def derivative_sigmoid(
        z: np.ndarray | float | int) -> np.ndarray | float | int:
        return z * (1-z)

def derivative_relu(
        z: np.ndarray | float | int) -> np.ndarray | float | int:
        return (z > 0).astype(float)

def sigmoid(z: np.ndarray):
    return 1/(1+np.exp(-z))

def derivative_bce(z: np.ndarray, y: np.ndarray) -> np.ndarray:
    return (z - y) / (z * (1 - z) + 1e-8)

def linear_trans(x: np.ndarray, w: np.ndarray, b: np.ndarray ) -> None:
    return np.dot(w, x) + b

In [107]:
class MLP:
    def __init__(
            self,
            in_feature: int,
            out_feature: int,
            h1_feature: int,
            lr: float = 0.01,
            epochs: int = 500
        ) -> None:
        self.in_feature = in_feature
        self.h1_feature = h1_feature
        self.out_feature = out_feature

        self.lr =  lr
        self.epochs = epochs

        self.h1_weight = None
        self.h1_bias = None
        self.out_weight = None
        self.out_bias = None


    def forward_pass(self) -> None:
        self.Z1 = linear_trans(x=self.x.T, w=self.h1_weight, b=self.h1_bias)
        self.A1 = relu(z=self.Z1)

        self.Z2 = linear_trans(x=self.A1, w=self.out_weight, b=self.out_bias)
        self.A2 = sigmoid(z=self.Z2)

    def backward_pass(self) -> None:
        self.dl_da2 = derivative_bce(self.A2, self.y)
        da2_dz2 = derivative_sigmoid(self.A2)
        dl_dz2 = self.dl_da2 * da2_dz2

        dl_da1 = np.dot(self.out_weight.T, dl_dz2)  # loss
        da1_dz1 = derivative_relu(self.Z1)  # derivative of activation
        dl_dz1 = dl_da1 * da1_dz1

        # params update

        # output layer
        # derivative of weight and bias
        dl_dw2 = np.dot(dl_dz2, self.A1.T)
        dl_db2 = np.sum(dl_dz2, axis=1, keepdims=True)
        # using gradient descent ton update w and b
        self.out_weight -= self.lr * dl_dw2
        self.out_bias -= self.lr * dl_db2

        # hidden layer
        # derivative of weight and bias
        dl_dw1 = np.dot(dl_dz1, self.x)
        dl_db1 = np.sum(dl_dz1, axis=1, keepdims=True)
        # using gradient descent ton update w and b
        self.h1_weight -= self.lr * dl_dw1
        self.h1_bias -= self.lr * dl_db1

    def fit(self, x: np.ndarray, y: np.ndarray) -> None:
        # initialize random weight
        self.x = x
        self.y = y
        self.h1_weight = np.random.randn(self.h1_feature, self.in_feature) * 0.001
        self.h1_bias = np.zeros((self.h1_feature, 1))

        self.out_weight = np.random.randn(self.out_feature, self.h1_feature) * 0.001
        self.out_bias = np.zeros((self.out_feature, 1))

        for epoch in range(self.epochs):
            # training
            self.forward_pass()
            self.backward_pass()

            loss = -np.mean(self.y * np.log(self.A2 + 1e-8) + (1 - self.y) * np.log(1 - self.A2 + 1e-8))

            if epoch % 100 == 0:
                print(f"Epoch {epoch}: Loss = {loss:.6f}")

    def predict(self) -> None:...

In [118]:
x = np.random.randn(1000, 10)
y = np.array([np.random.choice([1,0]) for x in range(1000)])

In [125]:
in_feat = 10
out_feat = 1
h1_feat = 15

mlp = MLP(
    in_feature=in_feat,
    h1_feature=h1_feat,
    out_feature=out_feat,
    lr=0.001,
    epochs=1000
)

mlp.fit(x=x, y=y)

Epoch 0: Loss = 0.693147
Epoch 100: Loss = 0.691176
Epoch 200: Loss = 0.666635
Epoch 300: Loss = 0.645757
Epoch 400: Loss = 0.635724
Epoch 500: Loss = 0.630148
Epoch 600: Loss = 0.620706
Epoch 700: Loss = 0.622932
Epoch 800: Loss = 0.617089
Epoch 900: Loss = 0.615498
