In [1]:
import pandas as pd
import numpy as np
# import torch
import torch
import matplotlib.pyplot as plt

## Load Data

In [2]:
df = pd.read_csv("csv_agile_H_Southern_England.csv", header=None)
df = df.rename(columns={
    0:"date",
    1:"time",
    4:"cost"
})
df = df[["date","time", "cost"]]


In [3]:
#Training dataset
df_train = df[df.date<='2022-01-01'] #before 2022
#Test dataset
df_test =  df[df.date>'2022-01-01'] # after 2022

print("test", len(df_test) / (len(df_train)+len(df_train)))

test 0.13596258262511804


In [4]:
df_test

Unnamed: 0,date,time,cost
67776,2022-01-01T00:00:00Z,00:00,16.53750
67777,2022-01-01T00:30:00Z,00:30,16.53750
67778,2022-01-01T01:00:00Z,01:00,20.10750
67779,2022-01-01T01:30:00Z,01:30,10.58400
67780,2022-01-01T02:00:00Z,02:00,15.43500
...,...,...,...
86201,2023-01-19T20:30:00Z,20:30,32.86500
86202,2023-01-19T21:00:00Z,21:00,31.97250
86203,2023-01-19T21:30:00Z,21:30,29.78850
86204,2023-01-19T22:00:00Z,22:00,34.22601


## Pre-process

In [6]:
length = 128 #length of sequence

#Sliding Window
def sliding_window(array):
    window_step = 1
    window_size = 128 #2.6 days
    return np.lib.stride_tricks.sliding_window_view(array, window_size)

train = sliding_window(df_train["cost"].to_numpy())
test = sliding_window(df_test["cost"].to_numpy())

print(train.shape)
print(test.shape)

(67649, 128)
(18303, 128)


## Define RNN

In [15]:
#Define RNN Model
#https://pytorch.org/docs/stable/generated/torch.nn.RNN.html

input_size = 1
output_size = 1
hidden_size = 10
num_layers = 5
activation = "relu" #relu or tanh
bidirectional = False

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.rnn = torch.nn.RNN(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=0.1,
            nonlinearity=activation,
            bidirectional=bidirectional,
            batch_first=False
        )
        # compress output to the same dim as y
        self.linear = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden_prev):
        out, hidden_prev = self.rnn(x, hidden_prev) # [1, seq, h] => [seq, h]  (batch=1)
        out = out.reshape(-1, hidden_size)  # stack batch and seq

        # linear layer so that output is not [seq,h] but [seq, 1]
        # so it is comparable with y, for loss calculation
        out = self.linear(out)  # [seq, h] => [seq, 1]
        out = out.unsqueeze(dim=0)  # => [1, seq, 1]
        return out, hidden_prev

rnn = Net()
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.001)

# rnn= torch.nn.RNN(input_size=input_size, hidden_size=hidden_size,num_layers=num_layers,
#                   dropout=0, nonlinearity="relu", bidirectional=bidirectional)



In [8]:



# input = torch.randn(length,batch_size,input_size)



# out, hidden = rnn(input, h0)
# print(out.shape, hidden.shape)


In [None]:
# input = torch.randn(length,batch_size,input_size)
# h0 = torch.randn(num_layers,batch_size,hidden_size)

## Train

In [17]:
batch_size = 1 #number of batches for training

def train_rnn(rnn, n_steps, print_every):


    # hidden = h0
    # hidden = torch.randn(num_layers,batch_size,hidden_size)

    # initialize the hidden state
    if bidirectional:
        hidden = torch.zeros(2*num_layers,batch_size,hidden_size)
    else:
        hidden = torch.zeros(num_layers,batch_size,hidden_size)

    for batch_i, step in enumerate(range(n_steps)):
        # defining the training data
        # time_steps = np.linspace(step * np.pi, (step+1)*np.pi, length + 1)
        # data = np.sin(time_steps)
        # data.resize((length + 1, 1)) # input_size=1
        #
        # x = data[:-1]
        # y = data[1:]
        #
        # x_tensor = torch.Tensor(x).unsqueeze(0) # unsqueeze gives a 1, batch_size dimension
        # y_tensor = torch.Tensor(y)
        #
        # print(x_tensor.shape)

        # input = torch.randn(length,batch_size,input_size)

        # x: 49 points 0-49; y: 49 points 1-50
        data=train[batch_i]
        x = torch.tensor(data[:-1]).float().reshape(length - 1, 1, 1)  # [seq_len, b, fea_len]
        y = torch.tensor(data[1:]).float().reshape(length - 1, 1, 1)  # [seq_len, b, fea_len]

        # outputs from the rnn
        # print(x.shape)
        # print(hidden.shape)
        prediction, hidden = rnn(x, hidden)

        # Representing Memory #
        # make a new variable for hidden and detach the hidden state from its history
        # this way, we don't backpropagate through the entire history
        hidden = hidden.data

        # calculate the loss
        loss = criterion(prediction, y)

        # zero gradients
        optimizer.zero_grad()
        # perform backprop and update weights
        loss.backward()
        optimizer.step()

        # display loss and predictions
        if batch_i%print_every == 0:
            print('Loss: ', loss.item())
            # plt.plot(input[1:], x, 'r.') # input
            # plt.plot(input[1:], prediction.data.numpy().flatten(), 'b.') # predictions
            # plt.show()
    return rnn

rnn = train_rnn(rnn, 1000, 10)

print(rnn)

Loss:  35.37954330444336
Loss:  38.498287200927734
Loss:  37.53645324707031
Loss:  37.70008850097656
Loss:  27.05722427368164
Loss:  29.23897933959961
Loss:  37.45521545410156
Loss:  37.253265380859375
Loss:  35.400325775146484
Loss:  27.051958084106445
Loss:  31.791528701782227
Loss:  37.247589111328125
Loss:  37.41933059692383
Loss:  34.86930847167969
Loss:  27.3126277923584
Loss:  39.187530517578125
Loss:  39.53559494018555
Loss:  39.66175842285156
Loss:  33.07939910888672
Loss:  29.947664260864258
Loss:  40.384429931640625
Loss:  40.80271530151367
Loss:  41.33481216430664
Loss:  29.96868324279785
Loss:  29.92669105529785
Loss:  40.622920989990234
Loss:  40.37743377685547
Loss:  39.62628173828125
Loss:  26.96959686279297
Loss:  29.174301147460938
Loss:  36.78670883178711
Loss:  35.637691497802734
Loss:  34.22279357910156
Loss:  25.68470001220703
Loss:  32.2880973815918
Loss:  36.936241149902344
Loss:  36.6348876953125
Loss:  35.145565032958984
Loss:  31.216218948364258
Loss:  41.779

## Test

In [18]:
if bidirectional:
    hidden = torch.zeros(2*num_layers,batch_size,hidden_size)
else:
    hidden = torch.zeros(num_layers,batch_size,hidden_size)

total_loss = 0

for batch_i in range(len(test)):
    data=test[batch_i]
    x = torch.tensor(data[:-1]).float().reshape(length - 1, 1, 1)  # [seq_len, b, fea_len]
    y = torch.tensor(data[1:]).float().reshape(length - 1, 1, 1)  # [seq_len, b, fea_len]

    prediction, hidden = rnn(x, hidden)
    hidden = hidden.data
    total_loss += criterion(prediction, y)

print(total_loss / len(test))




tensor(585.3307, grad_fn=<DivBackward0>)
