# Zadanie 5


Celem ćwiczenia jest implementacja perceptronu wielowarstwowego oraz wybranego algorytmu optymalizacji gradientowej z algorytmem propagacji wstecznej.

Następnie należy wytrenować perceptron wielowarstwowy do klasyfikacji zbioru danych [MNIST](http://yann.lecun.com/exdb/mnist/). Zbiór MNIST dostępny jest w pakiecie `scikit-learn`.

Punktacja:
1. Implementacja propagacji do przodu (`forward`) [1 pkt]
2. Implementacja wstecznej propagacji (zademonstrowana na bramce XOR) (`backward`) [2 pkt]
3. Przeprowadzenie eksperymentów na zbiorze MNIST, w tym:
    1. Porównanie co najmniej dwóch architektur sieci [1 pkt]
    2. Przetestowanie każdej architektury na conajmniej 3 ziarnach [1 pkt]
    3. Wnioski 1.[5 pkt]
4. Jakość kodu 0.[5 pkt]

Polecane źródła - teoria + intuicja:
1. [Karpathy, CS231n Winter 2016: Lecture 4: Backpropagation, Neural Networks 1](https://www.youtube.com/watch?v=i94OvYb6noo&ab_channel=AndrejKarpathy)
2. [3 Blude one Brown, Backpropagation calculus | Chapter 4, Deep learning
](https://www.youtube.com/watch?v=tIeHLnjs5U8&t=4s&ab_channel=3Blue1Brown)


In [2]:
from abc import abstractmethod, ABC
from typing import List
import numpy as np


In [None]:
class Layer(ABC):
    """Basic building block of the Neural Network"""

    def __init__(self) -> None:
        self._learning_rate = 0.01

    @abstractmethod
    def forward(self, x:np.ndarray)->np.ndarray:
        """Forward propagation of x through layer"""
        pass

    @abstractmethod
    def backward(self, output_error_derivative) ->np.ndarray:
        """Backward propagation of output_error_derivative through layer"""
        pass

    @property
    def learning_rate(self):
        return self._learning_rate

    @learning_rate.setter
    def learning_rate(self, learning_rate):
        assert learning_rate < 1, f"Given learning_rate={learning_rate} is larger than 1"
        assert learning_rate > 0, f"Given learning_rate={learning_rate} is smaller than 0"
        self._learning_rate = learning_rate

class FullyConnected(Layer):
    def __init__(self, input_size:int, output_size:int) -> None:
        super().__init__()
        self.input_size = input_size
        self.output_size = output_size

    def forward(self, x:np.ndarray)->np.ndarray:
        pass

    def backward(self, output_error_derivative)->np.ndarray:
        pass

class Tanh(Layer):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x:np.ndarray)->np.ndarray:
        pass

    def backward(self, output_error_derivative)->np.ndarray:
        pass

class Loss:
    def __init__(self, loss_function:callable, loss_function_derivative:callable)->None:
        self.loss_function = loss_function
        self.loss_function_derivative = loss_function_derivative

    def loss(self, x:np.ndarray)->np.ndarray:
        """Loss function for a particular x"""
        pass

    def loss_derivative(self, x:np.ndarray, y:np.ndarray)->np.ndarray:
        """Loss function derivative for a particular x and y"""
        pass

class Network:
    def __init__(self, layers:List[Layer], learning_rate:float)->None:
        self.layers = layers
        self.learning_rate = learning_rate

    def compile(self, loss:Loss)->None:
        """Define the loss function and loss function derivative"""
        pass

    def __call__(self, x:np.ndarray) -> np.ndarray:
        """Forward propagation of x through all layers"""
        pass

    def fit(self,
            x_train:np.ndarray,
            y_train:np.ndarray,
            epochs:int,
            learning_rate:float,
            verbose:int=0)->None:
        """Fit the network to the training data"""
        pass

# Eksperymenty

# Wnioski

In [2]:
from typing import List
import numpy as np
from abc import ABC, abstractmethod
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

class Layer(ABC):
    def __init__(self):
        self._learning_rate = 0.01

    @abstractmethod
    def forward(self, x: np.ndarray) -> np.ndarray:
        pass

    @abstractmethod
    def backward(self, output_error_derivative: np.ndarray) -> np.ndarray:
        pass

    @property
    def learning_rate(self):
        return self._learning_rate

    @learning_rate.setter
    def learning_rate(self, learning_rate):
        assert 0 < learning_rate < 1, f"Given learning_rate={learning_rate} is not in the range (0, 1)"
        self._learning_rate = learning_rate

class FullyConnected(Layer):
    def __init__(self, input_size: int, output_size: int):
        super().__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.randn(input_size, output_size)
        self.bias = np.zeros((1, output_size))
        self.input = None

    def forward(self, x: np.ndarray) -> np.ndarray:
        self.input = x
        return np.dot(x, self.weights) + self.bias

    def backward(self, output_error_derivative: np.ndarray) -> np.ndarray:
        weights_derivative = np.dot(self.input.T, output_error_derivative)
        self.weights -= self.learning_rate * weights_derivative
        self.bias -= self.learning_rate * np.sum(output_error_derivative, axis=0, keepdims=True)
        return np.dot(output_error_derivative, self.weights.T)
        
    # class FullyConnected(Layer):
    #     def __init__(self, input_size: int, output_size: int):
    #         super().__init__()
    #         self.input_size = input_size
    #         self.output_size = output_size
    #         self.weights = np.random.randn(input_size, output_size)
    #         self.bias = np.zeros((1, output_size))
    #         self.input = None

    #     def forward(self, x: np.ndarray) -> np.ndarray:
    #         self.input = x
    #         return np.dot(x, self.weights) + self.bias

    #     def backward(self, output_error_derivative: np.ndarray) -> np.ndarray:
    #         weights_derivative = np.dot(self.input.T, output_error_derivative)
    #         self.weights -= self.learning_rate * weights_derivative / len(self.input)
    #         self.bias -= self.learning_rate * np.sum(output_error_derivative, axis=0, keepdims=True)
    #         return np.dot(output_error_derivative, self.weights.T)


class Tanh(Layer):
    def __init__(self):
        super().__init__()
        self.output = None

    def forward(self, x: np.ndarray) -> np.ndarray:
        self.output = np.tanh(x)
        return self.output

    def backward(self, output_error_derivative: np.ndarray) -> np.ndarray:
        return output_error_derivative * (1 - np.square(self.output))

class Loss(ABC):
    @abstractmethod
    def loss(self, predictions: np.ndarray, targets: np.ndarray) -> np.ndarray:
        pass

    @abstractmethod
    def loss_derivative(self, predictions: np.ndarray, targets: np.ndarray) -> np.ndarray:
        pass

class MeanSquaredError(Loss):
    def loss(self, predictions: np.ndarray, targets: np.ndarray) -> np.ndarray:
        return 0.5 * np.mean(np.square(predictions - targets))

    def loss_derivative(self, predictions: np.ndarray, targets: np.ndarray) -> np.ndarray:
        return predictions - targets

# ///////////////////////////////////////////////////////////////////////////////////////////////
# def mean_squared_error_loss(predictions: np.ndarray, targets: np.ndarray) -> np.ndarray:
#     return 0.5 * np.mean(np.square(predictions - targets))

# def mean_squared_error_derivative(predictions: np.ndarray, targets: np.ndarray) -> np.ndarray:
#     return predictions - targets

# def loss(x: np.ndarray, y: np.ndarray, loss_function: callable) -> np.ndarray:
#     """Loss function for a particular x and y"""
#     return loss_function(x, y)

# def loss_derivative(x: np.ndarray, y: np.ndarray, loss_function_derivative: callable) -> np.ndarray:
#     """Loss function derivative for a particular x and y"""
#     return loss_function_derivative(x, y)

# ///////////////////////////////////////////////////////////////////////////////////////////////

class Network:
    def __init__(self, layers: List[Layer], learning_rate: float):
        self.layers = layers
        self.learning_rate = learning_rate
        self.loss = None

    def compile(self, loss: Loss):
        self.loss = loss

    def __call__(self, x: np.ndarray) -> np.ndarray:
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def fit(self, x_train: np.ndarray, y_train: np.ndarray, epochs: int, verbose: int = 0):
        for epoch in range(epochs):
            predictions = self(x_train)
            error = self.loss.loss(predictions, y_train)
            error_derivative = self.loss.loss_derivative(predictions, y_train)

            for layer in reversed(self.layers):
                error_derivative = layer.backward(error_derivative)

            if verbose and epoch % verbose == 0:
                print(f"Epoch: {epoch}, Error: {error}")

# # Example usage:
# # Define the layers
# input_size = 2
# hidden_size = 4
# output_size = 1

# fully_connected_layer1 = FullyConnected(input_size, hidden_size)
# tanh_layer = Tanh()
# fully_connected_layer2 = FullyConnected(hidden_size, output_size)

# #  mogę usunąć całą than layer jesli odrazu będę przemnażać przez funckję aktywacji !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# # Create the network
# network = Network(layers=[fully_connected_layer1, tanh_layer, fully_connected_layer2], learning_rate=0.01)

# # Compile the network with MeanSquaredError loss
# network.compile(loss=MeanSquaredError())

# # Define the training data
# x_train = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
# y_train = np.array([[0], [1], [1], [0]])

# # Train the network
# network.fit(x_train, y_train, epochs=1000, verbose=100)


# Load MNIST data
mnist = fetch_openml('mnist_784')
X = mnist.data.astype('float32') / 255.0
y = LabelBinarizer().fit_transform(mnist.target.astype('int'))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define your neural network layers
input_size = X_train.shape[1]
output_size = y_train.shape[1]

# Modify the layers as needed for your specific task
layers = [
    FullyConnected(input_size, 128),
    Tanh(),
    FullyConnected(128, output_size)
]

# Create the neural network
learning_rate = 0.01
network = Network(layers, learning_rate)

# Compile the network with a loss function
loss_function = MeanSquaredError()
network.compile(loss_function)

# Train the network
epochs = 10
verbose = 1
network.fit(X_train, y_train, epochs, verbose)

# Evaluate the network on the test set
predictions = network(X_test)
test_error = loss_function.loss(predictions, y_test)
print(f"Test Error: {test_error}")


  warn(


KeyboardInterrupt: 

In [None]:
class Layer(ABC):
    """Basic building block of the Neural Network"""

    def __init__(self) -> None:
        self._learning_rate = 0.01

    @abstractmethod
    def forward(self, x:np.ndarray)->np.ndarray:
        """Forward propagation of x through layer"""
        pass

    @abstractmethod
    def backward(self, output_error_derivative) ->np.ndarray:
        """Backward propagation of output_error_derivative through layer"""
        pass

    @property
    def learning_rate(self):
        return self._learning_rate

    @learning_rate.setter
    def learning_rate(self, learning_rate):
        assert learning_rate < 1, f"Given learning_rate={learning_rate} is larger than 1"
        assert learning_rate > 0, f"Given learning_rate={learning_rate} is smaller than 0"
        self._learning_rate = learning_rate

class FullyConnected(Layer):
    def __init__(self, input_size:int, output_size:int) -> None:
        super().__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.weights = np.random.randn(input_size, output_size)
        self.bias = np.zeros((1, output_size))
        # self.input = None

    def forward(self, x:np.ndarray)->np.ndarray:
        # self.input = x
        return self.tanh.forward(np.dot(x, self.weights) + self.bias)  # nwm z tym , ale trzeba jeszcze przemnożyć przez ten tangens 

    def backward(self, output_error_derivative)->np.ndarray:
        pass

class Tanh(Layer):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x:np.ndarray)->np.ndarray:
        return np.tanh(x)

    def backward(self, output_error_derivative)->np.ndarray:
        pass

class Loss:
    def __init__(self, loss_function:callable, loss_function_derivative:callable)->None:
        self.loss_function = loss_function
        self.loss_function_derivative = loss_function_derivative

    def loss(self, x:np.ndarray)->np.ndarray:
        """Loss function for a particular x"""
        pass

    def loss_derivative(self, x:np.ndarray, y:np.ndarray)->np.ndarray:
        """Loss function derivative for a particular x and y"""
        pass

class Network:
    def __init__(self, layers:List[Layer], learning_rate:float)->None:
        self.layers = layers
        self.learning_rate = learning_rate

    def compile(self, loss:Loss)->None:
        """Define the loss function and loss function derivative"""
        pass

    def __call__(self, x:np.ndarray) -> np.ndarray:
        """Forward propagation of x through all layers"""
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def fit(self,
            x_train:np.ndarray,
            y_train:np.ndarray,
            epochs:int,
            learning_rate:float,
            verbose:int=0)->None:
        """Fit the network to the training data"""
        for epoch in range(epochs):
            predictions = self(x_train)
            erorr = self.loss.loss()
            # error = self.loss.loss(predictions, y_train)
            # error_derivative = self.loss.loss_derivative(predictions, y_train)

            for layer in reversed(self.layers):
                error_derivative = layer.backward(error_derivative)

            if verbose and epoch % verbose == 0:
                print(f"Epoch: {epoch}, Error: {error}")