In [11]:
import sys 
import os
import numpy as np

import plotly.express as px 
import plotly.graph_objs as go 
import plotly.io as pio

import torch 
import torch.nn as nn 
import torch.optim as optim

from sklearn.preprocessing import MinMaxScaler

from utils import load_data, get_input_data
from termcolor import colored

pio.templates.default = "plotly_white"

print(f"cwd : {os.getcwd()}")


cwd : /home/imantha/workspace/cryo-polygen/ts-forecasting/model2


In [2]:
df = load_data(path = os.path.join("..","data", "load.xlsx"))
df.head()

Unnamed: 0,y,date
0,447,2021-11-15 19:00:00
1,435,2021-11-15 20:00:00
2,451,2021-11-15 21:00:00
3,442,2021-11-15 22:00:00
4,444,2021-11-15 23:00:00


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4349 entries, 0 to 4348
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   y       4349 non-null   int64         
 1   date    4349 non-null   datetime64[ns]
dtypes: datetime64[ns](1), int64(1)
memory usage: 68.1 KB


In [4]:
p = go.Figure()
p.add_trace(go.Scatter(
    x = df.date,
    y = df.y,
    mode = "lines",
    line_color = "dodgerblue"
))

## Dataset

In [5]:
ts = df.y.values
normalizer = MinMaxScaler()
ts_norm = normalizer.fit_transform(ts.reshape(-1,1))

In [6]:
ts = df.y.values

X_train, y_train, X_val, y_val, X_test, y_test = get_input_data(seq = ts_norm.flatten().tolist(), ws = 24, split_at = 3500, train_size=0.8)
print("Shapes")
print(f"X_train : {X_train.shape}")
print(f"y_train : {y_train.shape}")
print(f"X_val : {X_val.shape}")
print(f"y_val : {y_val.shape}")
print(f"X_test : {X_test.shape}")
print(f"y_test : {y_test.shape}")

Shapes
X_train : torch.Size([2800, 24])
y_train : torch.Size([2800])
X_val : torch.Size([700, 24])
y_val : torch.Size([700])
X_test : torch.Size([825, 24])
y_test : torch.Size([825])


## Gated Reccurent Unit - Simple Model

In [24]:
class GRU(nn.Module):
    def __init__(self,input_size, hidden_size, output_size):
        super(GRU, self).__init__()
        self.hidden_size = hidden_size

        # input : (N,L,H_in), h0 : (num_layers, N, H_out) ---> output : (N, L, H_out), hn: (num_layers, N, H_out)
        self.gru = nn.GRU(input_size = input_size, hidden_size = hidden_size, batch_first = True)
        # (*, H_in) ---> (*, H_out)
        self.fc = nn.Linear(in_features = hidden_size, out_features= output_size)

    def forward(self, X):
        h0 = torch.zeros(1,X.size()[0],self.hidden_size)
        out, hn = self.gru(X.unsqueeze(2), h0) #X.unsequeeze(2) is adding dimension for input_size NOT batch_size, which is 2800 here
        out = self.fc(out[:,-1])

        return out, hn
        


In [16]:
X_train.shape

torch.Size([2800, 24])

In [23]:
# N = 2800, L = 24, H_in = 1
gru = nn.GRU(1, 25, batch_first = True)
h0 = torch.zeros(1, X_train.size()[0], 25)
fc = nn.Linear(25, 1)
print(f"X_train.shape : {X_train.shape}, X_train,unsqueeze(0) : {X_train.unsqueeze(2).shape}")
out, hn = gru(X_train.unsqueeze(2), h0)
print(f"gru-out : {out.shape}, gru-hn : {hn.shape}")
out = out[:,-1]
print(f"out rsp : {out.shape}")
out = fc(out)
print(f"fc : {out.shape}")


X_train.shape : torch.Size([2800, 24]), X_train,unsqueeze(0) : torch.Size([2800, 24, 1])
gru-out : torch.Size([2800, 24, 25]), gru-hn : torch.Size([1, 2800, 25])
out rsp : torch.Size([2800, 25])
fc : torch.Size([2800, 1])


In [15]:
h0.shape

torch.Size([1, 2800, 25])

In [114]:
# Parameters
learning_rate = 0.02
epochs = 500
model = GRU(input_size = 1, hidden_size = 50, output_size = 1)
optimizer = optim.Adam(params = model.parameters(), lr = 0.02)
criterion = nn.MSELoss()
training_loss = []
validation_loss = []

for e in range(epochs):

    # Forward Prope 
    yhat,hn = model.forward(X_train)
    # Compute Loss
    loss = criterion(yhat.flatten(), y_train)
    # Back Prop
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    # Prediction on validation data
    yhat_val, hn_val = model.forward(X_val)
    val_loss = criterion(yhat_val.flatten(), y_val)
    # Keep track of losses
    training_loss.append(loss.item())
    validation_loss.append(val_loss.item())

    if e % 10 == 0:
        print("epochs {} : training loss : {:.5f}, validation loss : {:.5f}".format(e,loss.item(), val_loss.item()))



epochs 0 : training loss : 0.25768, validation loss : 0.03041
epochs 10 : training loss : 0.01628, validation loss : 0.01926
epochs 20 : training loss : 0.01487, validation loss : 0.01355
epochs 30 : training loss : 0.00825, validation loss : 0.01038
epochs 40 : training loss : 0.00475, validation loss : 0.00429
epochs 50 : training loss : 0.00316, validation loss : 0.00331
epochs 60 : training loss : 0.00288, validation loss : 0.00333
epochs 70 : training loss : 0.00263, validation loss : 0.00288
epochs 80 : training loss : 0.00248, validation loss : 0.00271
epochs 90 : training loss : 0.00242, validation loss : 0.00276
epochs 100 : training loss : 0.00239, validation loss : 0.00270
epochs 110 : training loss : 0.00236, validation loss : 0.00268
epochs 120 : training loss : 0.00234, validation loss : 0.00266
epochs 130 : training loss : 0.00231, validation loss : 0.00263
epochs 140 : training loss : 0.00228, validation loss : 0.00260
epochs 150 : training loss : 0.00225, validation lo

In [115]:
with torch.no_grad():
    p2 = go.Figure()
    p2.add_trace(go.Scatter(
        y = training_loss,
        name = "training_loss"
    ))
    p2.add_trace(go.Scatter(
        y = validation_loss,
        name = "validation_loss"
    ))
    p2.show()

In [116]:
y_train_pred = normalizer.inverse_transform(model.forward(X_train)[0].detach().numpy())
y_val_pred = normalizer.inverse_transform(model.forward(X_val)[0].detach().numpy())
y_test_pred = normalizer.inverse_transform(model.forward(X_test)[0].detach().numpy())


In [117]:
p3 = go.Figure()

p3.add_trace(go.Scatter(
    x = np.arange(0, df.shape[0]),
    y = df.y.values,
    name = "original Ts"
))
p3.add_trace(go.Scatter(
    x = np.arange(23, y_train_pred.shape[0]),
    y = y_train_pred.flatten(),
    name = "training prediction"
))
p3.add_trace(go.Scatter(
    x = np.arange(23 + y_train_pred.shape[0], 23 + y_train_pred.shape[0] + y_val_pred.shape[0]),
    y = y_val_pred.flatten(),
    name = "validation Prediction"
))
p3.add_trace(go.Scatter(
    x = np.arange(23 + y_train_pred.shape[0] + y_val_pred.shape[0], 23 + y_train_pred.shape[0] + y_val_pred.shape[0] + y_train_pred.shape[0]),
    y = y_test_pred.flatten(),
    name = "testing prediction"
))

In [118]:
from sklearn.metrics import mean_squared_error

print(f"MSE-train : {mean_squared_error(normalizer.inverse_transform(y_train.reshape(-1,1)).flatten(), y_train_pred.flatten())}")
print(f"MSE-val : {mean_squared_error(normalizer.inverse_transform(y_val.reshape(-1,1)).flatten(), y_val_pred.flatten())}")
print(f"MSE-test : {mean_squared_error(normalizer.inverse_transform(y_test.reshape(-1,1)).flatten(), y_test_pred.flatten())}")

MSE-train : 936.5034081602137
MSE-val : 1222.0470189846717
MSE-test : 2310.6676262251503


In [95]:
y_train

tensor([0.4157, 0.3320, 0.3413,  ..., 0.3997, 0.4303, 0.4250])

In [96]:
y_train_pred

array([[496.30222],
       [499.68054],
       [429.44382],
       ...,
       [477.42444],
       [481.29138],
       [503.88718]], dtype=float32)

In [87]:
y_val_pred.shape[0]

700