# Zadanie 5


Celem ćwiczenia jest implementacja perceptronu wielowarstwowego oraz wybranego algorytmu optymalizacji gradientowej z algorytmem propagacji wstecznej.

Następnie należy wytrenować perceptron wielowarstwowy do klasyfikacji zbioru danych [MNIST](http://yann.lecun.com/exdb/mnist/). Zbiór MNIST dostępny jest w pakiecie `scikit-learn`.

Punktacja:
1. Implementacja propagacji do przodu (`forward`) [1 pkt]
2. Implementacja wstecznej propagacji (zademonstrowana na bramce XOR) (`backward`) [2 pkt]
3. Przeprowadzenie eksperymentów na zbiorze MNIST, w tym:
    1. Porównanie co najmniej dwóch architektur sieci [1 pkt]
    2. Przetestowanie każdej architektury na conajmniej 3 ziarnach [1 pkt]
    3. Wnioski 1.[5 pkt]
4. Jakość kodu 0.[5 pkt]

Polecane źródła - teoria + intuicja:
1. [Karpathy, CS231n Winter 2016: Lecture 4: Backpropagation, Neural Networks 1](https://www.youtube.com/watch?v=i94OvYb6noo&ab_channel=AndrejKarpathy)
2. [3 Blude one Brown, Backpropagation calculus | Chapter 4, Deep learning
](https://www.youtube.com/watch?v=tIeHLnjs5U8&t=4s&ab_channel=3Blue1Brown)


In [18]:
from abc import abstractmethod, ABC
from typing import List
import numpy as np


In [154]:
class Layer(ABC):
    """Basic building block of the Neural Network"""

    def __init__(self) -> None:
        self._learning_rate = 0.01

    @abstractmethod
    def forward(self, x: np.ndarray) -> np.ndarray:
        """Forward propagation of x through layer"""
        pass

    @abstractmethod
    def backward(self, output_error_derivative) -> np.ndarray:
        """Backward propagation of output_error_derivative through layer"""
        pass

    @property
    def learning_rate(self):
        return self._learning_rate

    @learning_rate.setter
    def learning_rate(self, learning_rate):
        assert (
            learning_rate < 1
        ), f"Given learning_rate={learning_rate} is larger than 1"
        assert (
            learning_rate > 0
        ), f"Given learning_rate={learning_rate} is smaller than 0"
        self._learning_rate = learning_rate


class FullyConnected(Layer):
    def __init__(self, input_size: int, output_size: int) -> None:
        super().__init__()
        self.input_size = input_size
        self.output_size = output_size

        # self.weights = np.random.randn(input_size, output_size)
        self.weights = np.empty((input_size + 1, output_size))
        self.weights.fill(1)

    def forward(self, x: np.ndarray) -> np.ndarray:
        self.x = x
        self.x = np.append(self.x, np.ones((self.x.shape[0], 1)), axis=1)
        self.y = self.x * self.weights.transpose()

        return self.y

    def backward(self, output_error_derivative) -> np.ndarray:
        dx = output_error_derivative * self.weights.transpose()
        return dx


class Sigmoid(Layer):
    def __init__(self) -> None:
        super().__init__()

    def forward(self, x: np.ndarray) -> np.ndarray:
        self.x = x
        self.y = 1 / (1 + np.exp(-(self.x)))
        return self.y

    def backward(self, output_error_derivative) -> np.ndarray:
        dx = output_error_derivative * (1 - self.x) * self.x
        return dx


class Loss:
    def __init__(
        self, loss_function: callable, loss_function_derivative: callable
    ) -> None:
        self.loss_function = loss_function
        self.loss_function_derivative = loss_function_derivative

    def loss(self, x: np.ndarray) -> np.ndarray:
        """Loss function for a particular x"""
        return self.loss_function(x)

    def loss_derivative(self, x: np.ndarray, y: np.ndarray) -> np.ndarray:
        """Loss function derivative for a particular x and y"""
        return self.loss_function_derivative(x, y).transpose()


class Network:
    def __init__(self, layers: List[Layer], learning_rate: float) -> None:
        self.layers = layers
        self.learning_rate = learning_rate

    def compile(self, loss: Loss) -> None:
        """Define the loss function and loss function derivative"""
        pass

    def __call__(self, x: np.ndarray) -> np.ndarray:
        """Forward propagation of x through all layers"""
        pass

    def fit(
        self,
        x_train: np.ndarray,
        y_train: np.ndarray,
        epochs: int,
        learning_rate: float,
        verbose: int = 0,
    ) -> None:
        """Fit the network to the training data"""
        pass

# Eksperymenty

# Wnioski

In [155]:
import numpy as np

ar1 = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]])
ar2 = np.array([[2, 2, 2, 2, 2]])
ar2 = ar2.transpose()
print(ar1 * ar2)


nx = 4
n = 6
hidden_layer = FullyConnected(nx, n)
sigmoid_layer = Sigmoid()
print(hidden_layer.weights.shape)
print(hidden_layer.weights.transpose().shape)


x = np.array([[1, 2, 3, 4]])
y = hidden_layer.forward(x)
y = sigmoid_layer.forward(y)
print(hidden_layer.x.shape)
print(y)


ones = np.ones((n, 1))
print(ones)
dx = sigmoid_layer.backward(ones)
print(dx)
dx = hidden_layer.backward(dx)
print(dx)

MSE = lambda y, y_mod : sum((y - y_mod)**2)
dMSE = lambda y, y_mod : 2 * (y - y_mod)
loss = Loss(lambda x: x ** 2, lambda x, y: 2 * x)

[[2 4 6 8]
 [2 4 6 8]
 [2 4 6 8]
 [2 4 6 8]
 [2 4 6 8]]
(5, 6)
(6, 5)
(1, 5)
[[0.73105858 0.88079708 0.95257413 0.98201379 0.73105858]
 [0.73105858 0.88079708 0.95257413 0.98201379 0.73105858]
 [0.73105858 0.88079708 0.95257413 0.98201379 0.73105858]
 [0.73105858 0.88079708 0.95257413 0.98201379 0.73105858]
 [0.73105858 0.88079708 0.95257413 0.98201379 0.73105858]
 [0.73105858 0.88079708 0.95257413 0.98201379 0.73105858]]
[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]
[[  0.  -2.  -6. -12.   0.]
 [  0.  -2.  -6. -12.   0.]
 [  0.  -2.  -6. -12.   0.]
 [  0.  -2.  -6. -12.   0.]
 [  0.  -2.  -6. -12.   0.]
 [  0.  -2.  -6. -12.   0.]]
[[  0.  -2.  -6. -12.   0.]
 [  0.  -2.  -6. -12.   0.]
 [  0.  -2.  -6. -12.   0.]
 [  0.  -2.  -6. -12.   0.]
 [  0.  -2.  -6. -12.   0.]
 [  0.  -2.  -6. -12.   0.]]
