In [25]:
import numpy as np
import pandas as pd

In [26]:
class NeuralNetwork:
    def __init__(self, layer_sizes, activation='sigmoid', output_activation='linear'):
        """
        Inicjalizuje sieć neuronową MLP.

        :param layer_sizes: Lista określająca liczbę neuronów w kolejnych warstwach, np. [1, 5, 1]
        :param activation: Funkcja aktywacji dla warstw ukrytych (domyślnie 'sigmoid')
        :param output_activation: Funkcja aktywacji dla warstwy wyjściowej (domyślnie 'linear')
        """
        self.layer_sizes = layer_sizes
        self.activation = activation
        self.output_activation = output_activation

        # Inicjalizacja parametrów (wag i biasów) dla każdej warstwy poza wejściową
        self.params = []
        for i in range(len(layer_sizes) - 1):
            W = np.random.randn(layer_sizes[i+1], layer_sizes[i])
            b = np.random.randn(layer_sizes[i+1], 1)
            self.params.append({'W': W, 'b': b})

    def sigmoid(self, z):
        """
        Funkcja aktywacji sigmoidalnej z ograniczeniem zakresu dla stabilności numerycznej.
        """
        z = np.clip(z, -15, 15)
        return 1.0 / (1.0 + np.exp(-z))

    def linear(self, z):
        """
        Funkcja aktywacji liniowej.
        """
        return z

    def forward(self, X):
        """
        Przeprowadza propagację w przód.

        :param X: Dane wejściowe w postaci macierzy [D x N] (D - cechy, N - liczba próbek)
        :return: Wynik propagacji (output sieci)
        """
        # Propagacja przez warstwy ukryte
        for i in range(len(self.layer_sizes) - 2):
            X = self.sigmoid(self.params[i]['W'] @ X + self.params[i]['b'])

        # Warstwa wyjściowa z określoną funkcją aktywacji
        output_func = getattr(self, self.output_activation)
        return output_func(self.params[-1]['W'] @ X + self.params[-1]['b'])

    def compute_loss(self, y_true, y_pred):
        """
        Oblicza funkcję kosztu (MSE).

        MSE = (1/N) * sum((y_pred - y_true)^2)
        """
        y_true = y_true.flatten()
        y_pred = y_pred.flatten()
        N = y_true.shape[0]
        return (1 / N) * np.sum((y_pred - y_true) ** 2)

    def calculate_gradient(self, x_batch, y_batch, eps=1e-6):
        """
        Oblicza gradienty wag i biasów metodą różnic skończonych.

        :param x_batch: Dane wejściowe dla batcha
        :param y_batch: Odpowiedzi dla batcha
        :param eps: Małe przesunięcie do obliczeń numerycznych
        :return: Lista gradientów dla poszczególnych warstw
        """
        gradients = []
        for layer in self.params:
            layer_grad = {}
            for key in ['W', 'b']:
                param = layer[key]
                grad_param = np.zeros_like(param)
                for index in np.ndindex(param.shape):
                    original_val = param[index]

                    # Obliczenie po przesunięciu dodatnim
                    param[index] = original_val + eps
                    loss_plus = self.compute_loss(y_batch, self.forward(x_batch))

                    # Obliczenie po przesunięciu ujemnym
                    param[index] = original_val - eps
                    loss_minus = self.compute_loss(y_batch, self.forward(x_batch))

                    grad_param[index] = (loss_plus - loss_minus) / (2 * eps)
                    param[index] = original_val  # Przywrócenie oryginalnej wartości

                layer_grad[key] = grad_param
            gradients.append(layer_grad)
        return gradients

    def train(self, X_train, y_train, batch_size, epochs, learning_rate=0.01):
        """
        Trenuje sieć neuronową metodą mini-batch gradient descent.

        :param X_train: Dane treningowe [D x N]
        :param y_train: Etykiety treningowe (wektor)
        :param batch_size: Rozmiar mini-batcha
        :param epochs: Liczba epok treningowych
        :param learning_rate: Współczynnik uczenia
        """
        num_samples = X_train.shape[1]

        for epoch in range(epochs):
            indices = np.random.permutation(num_samples)
            for start_idx in range(0, num_samples, batch_size):
                end_idx = min(start_idx + batch_size, num_samples)
                batch_indices = indices[start_idx:end_idx]

                X_batch = X_train[:, batch_indices]
                y_batch = y_train[batch_indices]

                gradients = self.calculate_gradient(X_batch, y_batch)
                for i in range(len(self.params)):
                    self.params[i]['W'] -= learning_rate * gradients[i]['W']
                    self.params[i]['b'] -= learning_rate * gradients[i]['b']

            if epoch % 250 == 0:
                y_pred = self.forward(X_train)
                loss = self.compute_loss(y_train, y_pred)
                print(f"Epoch {epoch+1}/{epochs}, Loss: {loss}")

        final_loss = self.compute_loss(y_train, self.forward(X_train))
        print(f"Final Loss: {final_loss}")


In [27]:
df_test = pd.read_csv("data/steps-large-test.csv")

In [28]:
neural_net = NeuralNetwork(layer_sizes=[1,5, 5 ,1],
                           activation='sigmoid',
                           output_activation='linear')

neural_net.params[0]['W'] = [[-1000], [1000], [100], [0], [0]]
neural_net.params[0]['b'] = [[-500], [-1500], [-50], [-10000], [-10000]]
neural_net.params[1]['W'] = [[30, 0, 0, 0, 0], [0, 30, 0, 0, 0], [0, 0, 30, 0, 0],
                             [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]]
neural_net.params[1]['b'] = [[-13], [-13], [-13], [-10000], [-10000]]
neural_net.params[2]['W'] = [[-80, 80, 80, 0, 0]]
neural_net.params[2]['b'] = [[0]]

In [29]:
x_test  = df_test[['x']].values.T
y_test  = df_test['y'].values
y_pred = neural_net.forward(x_test)


mse = np.mean((y_pred - y_test)**2)
print("Test MSE:", mse)


Test MSE: 1.2629448437107536


In [30]:
df_train = pd.read_csv("data/square-simple-training.csv")
df_test = pd.read_csv("data/square-simple-test.csv")

In [31]:
neural_net = NeuralNetwork(layer_sizes=[1,10,1],
                           activation='sigmoid',
                           output_activation='linear')
neural_net.params = [
    {
        'W': np.array([[-0.34155931],
                       [ 0.11006786],
                       [ 1.87828407],
                       [-0.02531835],
                       [ 1.49160667],
                       [ 2.34437183],
                       [-0.56610897],
                       [ 1.22455786],
                       [-1.27002157],
                       [ 0.08568418]]),
        'b': np.array([[-0.04516066],
                       [-1.41236716],
                       [-3.09940009],
                       [ 0.62112467],
                       [-2.2970304 ],
                       [ 4.07119498],
                       [-0.37213868],
                       [ 1.99204088],
                       [ 0.93481114],
                       [-0.61101664]])
    },
    {
        'W': np.array([[ 1.35538469, -0.37142165,  3.87239322,  2.16885127,  3.2172446 ,
                         -3.13956285,  1.30091389, -1.41370243, -1.99524728,  0.21102372]]),
        'b': np.array([[1.7740576]])
    }
]

In [32]:
x_train = df_train[['x']].values.T
y_train = df_train['y'].values

x_test = df_test[['x']].values.T
y_test = df_test['y'].values

x_mean = np.mean(x_train)
x_std  = np.std(x_train)
y_mean = np.mean(y_train)
y_std = np.std(y_train)

x_train_scaled = (x_train - x_mean) / x_std
y_train_scaled = (y_train - y_mean) / y_std

x_test_scaled = (x_test - x_mean) / x_std

'''
neural_net.train(x_train_scaled, y_train_scaled,
                 epochs=1000,
                 batch_size=8,
                 learning_rate=0.01)
'''

y_test_pred_scaled = neural_net.forward(x_test_scaled)
y_test_pred = y_test_pred_scaled * y_std + y_mean
mse = np.mean((y_test_pred - y_test) ** 2)
print("Test MSE:", mse)


Test MSE: 1.5518718631490744
