## Multi-layer Perceptron

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification


class MultiLayerPerceptron:
    def __init__(self, input_size: int, hidden_size: int = 10, output_size: int = 1):
        # Initialize weights and biases
        self.weights_input_hidden = np.random.rand(input_size, hidden_size)
        self.biases_hidden = np.zeros((1, hidden_size))
        self.weights_hidden_output = np.random.rand(hidden_size, output_size)
        self.biases_output = np.zeros((1, output_size))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x)) # we interprete it like 'probability'
    
    def sigmoid_derivative(self, x):
        # пошук похідної для будь-якої функції: https://www.derivative-calculator.net
        # https://hausetutorials.netlify.app/posts/2019-12-01-neural-networks-deriving-the-sigmoid-derivative/
        return x * (1 - x)
    
    def relu(self, x):
        return np.maximum(0, x)
    
    def relu_derivative(self, x):
        # пошук похідної для будь-якої функції: https://www.derivative-calculator.net
        # https://yashgarg1232.medium.com/derivative-of-neural-activation-function-64e9e825b67
        return np.where(x > 0, 1, 0)
    
    def binary_cross_entropy_loss(self, y_true: np.array, y_pred: np.array):
        epsilon = 1e-15
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return -(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
    
    def binary_cross_entropy_loss_derivative(self, y_true: np.array, y_pred: np.array):
        # пошук похідної для будь-якої функції: https://www.derivative-calculator.net
        epsilon = 1e-15 # to prevent division by zero issues
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        return (y_pred - y_true) / (y_pred * (1 - y_pred))
    
    def predict(self, X: np.array):
        # Forward propagation for prediction
        hidden_layer_input = np.dot(X, self.weights_input_hidden) + self.biases_hidden
        hidden_layer_output = self.relu(hidden_layer_input) # activation function
        output_layer_input = np.dot(hidden_layer_output, self.weights_hidden_output) + self.biases_output
        predicted_output = self.sigmoid(output_layer_input) # activation function, probabilities
        return predicted_output
    
    def train(self, X_train: np.array, y_train: np.array, learning_rate: float = 0.01, epochs: int = 50):
        for epoch in range(epochs):
            # Forward propagation
            hidden_layer_input = np.dot(X_train, self.weights_input_hidden) + self.biases_hidden
            hidden_layer_output = self.relu(hidden_layer_input)
            output_layer_input = np.dot(hidden_layer_output, self.weights_hidden_output) + self.biases_output
            predicted_output = self.sigmoid(output_layer_input)

            # Calculate Loss
            loss = np.mean(self.binary_cross_entropy_loss(y_train.reshape(-1, 1), predicted_output))

            # Backward propagation - Метод зворотнього поширення помилки
            output_error = self.binary_cross_entropy_loss_derivative(y_train.reshape(-1, 1), predicted_output) # похідна помилки
            output_delta = output_error * self.sigmoid_derivative(predicted_output) # дельта на шарі виходу

            hidden_layer_error = np.dot(output_delta, self.weights_hidden_output.T)
            hidden_layer_delta = hidden_layer_error * self.relu_derivative(hidden_layer_output) # похідна помилки на прихованих шарах

            # Update weights and biases
            self.weights_hidden_output -= learning_rate * np.dot(hidden_layer_output.T, output_delta) # оновлення ваг
            self.biases_output -= learning_rate * np.sum(output_delta, axis=0, keepdims=True) # оновлення зсуву

            self.weights_input_hidden -=learning_rate * np.dot(X_train.T, hidden_layer_delta)
            self.biases_hidden -= learning_rate * np.sum(hidden_layer_delta, axis=0, keepdims=True)

            # Print loss every 10 epochs
            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Loss: {loss}")


# Generate syntethic data
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the MLP
mlp = MultiLayerPerceptron(input_size=X_train.shape[1], hidden_size=10, output_size=1)
mlp.train(X_train, y_train, learning_rate=0.01, epochs=50)

# Test the trained model
predicted_output = mlp.predict(X_test)

# Convert predicted probabilities to binary predictions
binary_predictions = (predicted_output > 0.5).astype(int)

# Evaluate accuracy
accuracy = np.mean(binary_predictions == y_test.reshape(-1, 1))
print(f"\nTest Accuracy: {accuracy:.4f}")





Epoch 0, Loss: 2.7360103811144016
Epoch 10, Loss: 0.40430259175819777
Epoch 20, Loss: 0.4894867658137808
Epoch 30, Loss: 0.38141028327853477
Epoch 40, Loss: 0.3527854540374471

Test Accuracy: 0.8100


### Backpropagation

https://www.analyticsvidhya.com/blog/2023/01/gradient-descent-vs-backpropagation-whats-the-difference/