In [1]:
import torch
import torch.nn as nn

import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
%matplotlib inline

In [2]:
with open('./PITdf.pkl','rb') as f:
    PITdf = pickle.load(f)
with open('./GREdf.pkl','rb') as f:
    GREdf = pickle.load(f)
with open('./JONdf.pkl','rb') as f:
    JONdf = pickle.load(f)
with open('./MGTdf.pkl','rb') as f:
    MGTdf = pickle.load(f)
with open('./WASdf.pkl','rb') as f:
    WASdf = pickle.load(f)
with open('./WHLdf.pkl','rb') as f:
    WHLdf = pickle.load(f)
with open('./PKSdf.pkl','rb') as f:
    PKSdf = pickle.load(f)
with open('./CBGdf.pkl','rb') as f:
    CBGdf = pickle.load(f)
with open('./STUdf.pkl','rb') as f:
    STUdf = pickle.load(f)
with open('./NPHdf.pkl','rb') as f:
    NPHdf = pickle.load(f)
with open('./ELVdf.pkl','rb') as f:
    ELVdf = pickle.load(f)
with open('./YGTdf.pkl','rb') as f:
    YGTdf = pickle.load(f)
with open('./NCSdf.pkl','rb') as f:
    NCSdf = pickle.load(f)
with open('./BUTdf.pkl','rb') as f:
    BUTdf = pickle.load(f)
with open('./KITdf.pkl','rb') as f:
    KITdf = pickle.load(f)

In [8]:
print(len(PITdf))
print(PITdf.head())

367920
   Year  Month  Day  Hour  Minute  DNI  Wind Speed  Wind Direction    wind_x  \
0  1998      1    1     0       0  0.0         0.6           241.0 -0.524772   
1  1998      1    1     0      30  0.0         0.6           241.0 -0.524772   
2  1998      1    1     1       0  0.0         0.6           238.0 -0.508829   
3  1998      1    1     1      30  0.0         0.6           238.0 -0.508829   
4  1998      1    1     2       0  0.0         0.6           225.8 -0.430146   

     wind_y     day_x     day_y    time_x    time_y  max_possible_DNI  \
0 -0.290886  0.017213  0.999852  0.000000  1.000000               0.0   
1 -0.290886  0.017213  0.999852  0.130526  0.991445               0.0   
2 -0.317952  0.017213  0.999852  0.258819  0.965926               0.0   
3 -0.317952  0.017213  0.999852  0.382683  0.923880               0.0   
4 -0.418299  0.017213  0.999852  0.500000  0.866025               0.0   

   cloudiness_factor  
0           0.773816  
1           0.020232  
2   

First, let the 'DNI' be the only input and also be the target variable. This first trial, I will only use information from PIT, not any other cities. I will set aside the last year as the test set and use the first 5 years as training and the 6th year for testing.

In [13]:
all_data = PITdf['DNI'].values
all_data = all_data[:365*48*6]
test_data_size = 365*48
train_data = all_data[:-test_data_size]
test_data = all_data[-test_data_size:]
print(train_data[:48])


[  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0. 159.   0. 580. 167. 252. 116.  34. 271. 363. 334. 844. 832.
 825. 790. 746. 672. 570. 388.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.]


I will normalize the data between -1 and 1

In [22]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data .reshape(-1, 1))


Convert the data into tensors.

In [23]:
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)

Create the training window. I will start with this being 4 (2 hours).

In [24]:
train_window = 4

In [25]:
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw : i+tw+1]
        inout_seq.append((train_seq, train_label))
    return inout_seq

In [26]:
train_inout_seq = create_inout_sequences(train_data_normalized, train_window)

In [27]:
train_inout_seq[:5]

[(tensor([-1., -1., -1., -1.]), tensor([-1.])),
 (tensor([-1., -1., -1., -1.]), tensor([-1.])),
 (tensor([-1., -1., -1., -1.]), tensor([-1.])),
 (tensor([-1., -1., -1., -1.]), tensor([-1.])),
 (tensor([-1., -1., -1., -1.]), tensor([-1.]))]

In [28]:
class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        
        self.lstm = nn.LSTM(input_size, hidden_layer_size)
        
        self.linear = nn.Linear(hidden_layer_size, output_size)
        
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                           torch.zeros(1,1,self.hidden_layer_size))
    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]

In [29]:
model = LSTM()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [30]:
print(model)

LSTM(
  (lstm): LSTM(1, 100)
  (linear): Linear(in_features=100, out_features=1, bias=True)
)


In [31]:
epochs = 2

for i in range(epochs):
    for seq, labels in train_inout_seq:
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                            torch.zeros(1, 1, model.hidden_layer_size))
        
        y_pred = model(seq)
        
        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()
        
    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')
        
print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

epoch:   1 loss: 0.00011185
epoch:   1 loss: 0.0001118457


In [32]:
fut_pred = 365*48

test_inputs = train_data[-train_window:].tolist()
print(test_inputs)

[0.0, 0.0, 0.0, 0.0]


In [19]:
model.eval()

for i in range(fut_pred):
    seq = torch.FloatTensor(test_inputs[-train_window:])
    with torch.no_grad():
        model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
                       torch.zeros(1, 1, model.hidden_layer_size))
        test_inputs.append(model(seq).item())

In [20]:
test_inputs[fut_pred:]

[0.5253342986106873,
 0.5253342986106873,
 0.5253342986106873,
 0.5253342986106873]

In [None]:
x = np.arrange()