In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [None]:
class RecurrentNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate = 0.001):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.Wx = np.random.randn(hidden_size, input_size) * np.sqrt(2 / (input_size + hidden_size))
        self.Wh = np.random.randn(hidden_size, hidden_size) * np.sqrt(1 / hidden_size)
        self.Wy = np.random.randn(output_size, hidden_size) * np.sqrt(2 / (hidden_size + output_size))
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))
        self.learning_rate = learning_rate

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, x):
        h_prev = np.zeros((self.hidden_size, 1))
        self.y_pred, self.h_hist = [], [h_prev]
        for t in range(len(x)):
            h_t = np.tanh(self.Wx @ x[t].reshape(-1, 1) + self.Wh @ h_prev + self.bh)
            y_t = self.sigmoid(self.Wy @ h_t + self.by)
            self.y_pred.append(y_t)
            self.h_hist.append(h_t)
            h_prev = h_t
        return self.y_pred
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def tanh_derivative(self, x):
        return 1 - x ** 2

    def mse(self, y_true, y_pred):
        return np.mean((y_true - y_pred)**2)

    def mse_derivative(self, y_true, y_pred):
        return -2*(y_true - y_pred)

    def print_mse(self, X, y):
        print(f"MSE: {self.mse(y.T, np.concatenate(self.forward(X)).flatten())}")


    def backward(self, x, y):
        Wy_delta_list = []
        by_delta_list = []
        Wh_delta_list = []
        Wx_delta_list = []
        bh_delta_list = []

        h_future_delta = np.zeros_like(np.zeros((self.hidden_size, 1)))

        for t in reversed(range(len(x))):
            y_delta = self.mse_derivative(y[t], self.y_pred[t]) * self.sigmoid_derivative(self.y_pred[t])
            h_current_delta = (self.Wy.T @ y_delta) + h_future_delta
            h_raw_delta = h_current_delta * self.tanh_derivative(self.h_hist[t])
            h_future_delta = self.Wh.T @ h_raw_delta

            Wh_delta = h_raw_delta @ self.h_hist[t - 1].T
            Wx_delta = h_raw_delta @ x[t].reshape(-1, 1)
            Wy_delta = y_delta @ self.h_hist[t].T
            bh_delta = h_raw_delta
            by_delta = y_delta

            Wy_delta_list.append(Wy_delta)
            by_delta_list.append(by_delta)
            Wh_delta_list.append(Wh_delta)
            Wx_delta_list.append(Wx_delta)
            bh_delta_list.append(bh_delta)

        self.Wy -= np.sum(Wy_delta_list, axis=0) * self.learning_rate
        self.by -= np.sum(by_delta_list, axis=0) * self.learning_rate
        self.Wh -= np.sum(Wh_delta_list, axis=0) * self.learning_rate
        self.Wx -= np.sum(Wx_delta_list, axis=0) * self.learning_rate
        self.bh -= np.sum(bh_delta_list, axis=0) * self.learning_rate

In [None]:
# Parameter split_percent defines the ratio of training examples
def get_train_test(url, split_percent=0.8):
    df = pd.read_csv(url, usecols=[1], engine='python')
    data = np.array(df.values.astype('float32'))
    scaler = MinMaxScaler(feature_range=(0, 1))
    data = scaler.fit_transform(data).flatten()
    n = len(data)
    # Point for splitting data into train and test
    split = int(n*split_percent)
    train_data = data[range(split)]
    test_data = data[split:]
    return train_data, test_data, data
 
sunspots_url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/monthly-sunspots.csv'
train_data, test_data, data = get_train_test(sunspots_url)

In [None]:
input_size, hidden_size, output_size = 1, 5, 1
self = RecurrentNeuralNetwork(input_size, hidden_size, output_size)

In [None]:
x = train_data[:-1]  # Input data
y = train_data[1:]  # Target data

In [None]:
for _ in range(1000):
    self.forward(x)
    self.backward(x, y)
    self.print_mse(x,y)