In [1]:
import torch
import torch.nn as nn

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

c:\Users\zhufe\anaconda3\lib\site-packages\numpy\.libs\libopenblas.EL2C6PLE4ZYW3ECEVIV3OXXGRN2NRFM2.gfortran-win_amd64.dll
c:\Users\zhufe\anaconda3\lib\site-packages\numpy\.libs\libopenblas.GK7GX5KEQ4F6UYO3P26ULGBQYHGQO7J4.gfortran-win_amd64.dll
c:\Users\zhufe\anaconda3\lib\site-packages\numpy\.libs\libopenblas.WCDJNK7YVMPZQ2ME2ZZHJJRJ3JIKNDB7.gfortran-win_amd64.dll


In [2]:
class GatedRecurrentUnit(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
        super(GatedRecurrentUnit, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.Wx_reset = nn.Parameter(torch.randn(hidden_size, input_size) * torch.sqrt(torch.tensor(2.0 / (input_size + hidden_size))))
        self.Wh_reset = nn.Parameter(torch.randn(hidden_size, hidden_size) * torch.sqrt(torch.tensor(1.0 / hidden_size)))
        self.b_reset = nn.Parameter(torch.zeros(hidden_size, 1))

        self.Wx_candidate = nn.Parameter(torch.randn(hidden_size, input_size) * torch.sqrt(torch.tensor(2.0 / (input_size + hidden_size))))
        self.Wh_candidate = nn.Parameter(torch.randn(hidden_size, hidden_size) * torch.sqrt(torch.tensor(1.0 / hidden_size)))
        self.b_candidate = nn.Parameter(torch.zeros(hidden_size, 1))

        self.Wx_update = nn.Parameter(torch.randn(hidden_size, input_size) * torch.sqrt(torch.tensor(2.0 / (input_size + hidden_size))))
        self.Wh_update = nn.Parameter(torch.randn(hidden_size, hidden_size) * torch.sqrt(torch.tensor(1.0 / hidden_size)))
        self.bh_update = nn.Parameter(torch.zeros(hidden_size, 1))

        self.Wy = nn.Parameter(torch.randn(output_size, hidden_size) * torch.sqrt(torch.tensor(2.0 / (hidden_size + output_size))))
        self.by = nn.Parameter(torch.zeros(output_size, 1))

        self.learning_rate = learning_rate
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
        self.mse_loss = nn.MSELoss()
        
    def forward(self, x):
        self.h_hist = [torch.zeros(self.hidden_size, 1)]
        for t in range(len(x)):
            x_t = x[t].view(-1, 1)
            h_t_1 = self.h_hist[t]

            r_t = self.sigmoid(self.Wx_reset @ x_t + self.Wh_reset @ h_t_1 + self.b_reset)
            candidate_t = self.tanh(self.Wx_candidate @ x_t + self.Wh_candidate @ (r_t * h_t_1) + self.b_candidate)
            z_t = self.sigmoid(self.Wx_update @ x_t + self.Wh_update @ h_t_1 + self.bh_update)
            h_t = z_t * h_t_1 * (1 - z_t) * candidate_t 
            self.h_hist.append(h_t)
        self.h_hist = self.h_hist[1:]
        self.y_pred = [self.sigmoid(self.Wy @ h_t + self.by) for h_t in self.h_hist]
        return self.y_pred

    def print_mse(self, X, y):
        y_pred = torch.cat(self.forward(X)).view(-1)
        mse = self.mse_loss(y.view(-1), y_pred)
        print(f"MSE: {mse.item()}")

    def backward(self, x, y):
        optimizer = torch.optim.SGD(self.parameters(), lr=self.learning_rate)
        optimizer.zero_grad()

        y_pred = self.forward(x)
        y_pred_tensor = torch.cat(y_pred).view(-1)
        loss = self.mse_loss(y.view(-1), y_pred_tensor)
        loss.backward()
        optimizer.step()


In [3]:
# Parameter split_percent defines the ratio of training examples
def get_train_test(url, split_percent=0.8):
    df = pd.read_csv(url, usecols=[1], engine='python')
    data = np.array(df.values.astype('float32'))
    scaler = MinMaxScaler(feature_range=(0, 1))
    data = scaler.fit_transform(data).flatten()
    n = len(data)
    # Point for splitting data into train and test
    split = int(n*split_percent)
    train_data = data[range(split)]
    test_data = data[split:]
    return train_data, test_data, data
 
sunspots_url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/monthly-sunspots.csv'
train_data, test_data, data = get_train_test(sunspots_url)

In [4]:
input_size, hidden_size, output_size = 1, 5, 1
self = GatedRecurrentUnit(input_size, hidden_size, output_size)

In [5]:
x = torch.tensor(train_data[:-1])  # Input data
y = torch.tensor(train_data[1:])  # Target data

In [6]:
for _ in range(10):
    self.forward(x)
    self.backward(x, y)
    self.print_mse(x,y)

MSE: 0.123899906873703
MSE: 0.12135665118694305
MSE: 0.11887764930725098
MSE: 0.116461843252182
MSE: 0.1141081228852272
MSE: 0.1118154302239418
MSE: 0.10958246141672134
MSE: 0.10740817338228226
MSE: 0.10529126226902008
MSE: 0.1032305434346199
