Credit : https://machinelearningmastery.com/multivariate-time-series-forecasting-lstms-keras/

In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from datetime import datetime
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [2]:
# Define the location of source file
file = 'C:/Users/ENSIEAR/Documents/VS_Code/RNN/raw.csv'

In [3]:
# read the file
df = pd.read_csv(file)

In [4]:
df.head()

Unnamed: 0,No,year,month,day,hour,pm2.5,DEWP,TEMP,PRES,cbwd,Iws,Is,Ir
0,1,2010,1,1,0,,-21,-11.0,1021.0,NW,1.79,0,0
1,2,2010,1,1,1,,-21,-12.0,1020.0,NW,4.92,0,0
2,3,2010,1,1,2,,-21,-11.0,1019.0,NW,6.71,0,0
3,4,2010,1,1,3,,-21,-14.0,1019.0,NW,9.84,0,0
4,5,2010,1,1,4,,-20,-12.0,1018.0,NW,12.97,0,0


###### Data Pre-processing 

pm2.5 column  contains NA values for the first 24 hours(first 24 hours data will be removed) , also we need to have a single date column.

In [5]:
def parse(x):
    return datetime.strptime(x, '%Y %m %d %H')
dataset = pd.read_csv(file,  parse_dates = [['year', 'month', 'day', 'hour']], index_col=0, date_parser=parse)

In [6]:
dataset.head()

Unnamed: 0_level_0,No,pm2.5,DEWP,TEMP,PRES,cbwd,Iws,Is,Ir
year_month_day_hour,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-01 00:00:00,1,,-21,-11.0,1021.0,NW,1.79,0,0
2010-01-01 01:00:00,2,,-21,-12.0,1020.0,NW,4.92,0,0
2010-01-01 02:00:00,3,,-21,-11.0,1019.0,NW,6.71,0,0
2010-01-01 03:00:00,4,,-21,-14.0,1019.0,NW,9.84,0,0
2010-01-01 04:00:00,5,,-20,-12.0,1018.0,NW,12.97,0,0


1. No column is not required and will be dropped from the dataset.
2. Need to set meaning full column names.
3. Rename the index column as date

In [7]:
dataset.drop('No', axis=1, inplace=True)
dataset.columns = ['pollution', 'dew', 'temp', 'press', 'wnd_dir', 'wnd_spd', 'snow', 'rain']
dataset.index.name = 'date'
# mark all NA values with 0
dataset['pollution'].fillna(0, inplace=True)
# drop the first 24 hours
dataset = dataset[24:]

In [8]:
dataset.shape

(43800, 8)

In [9]:
dataset.tail()

Unnamed: 0_level_0,pollution,dew,temp,press,wnd_dir,wnd_spd,snow,rain
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014-12-31 19:00:00,8.0,-23,-2.0,1034.0,NW,231.97,0,0
2014-12-31 20:00:00,10.0,-22,-3.0,1034.0,NW,237.78,0,0
2014-12-31 21:00:00,10.0,-22,-3.0,1034.0,NW,242.7,0,0
2014-12-31 22:00:00,8.0,-22,-4.0,1034.0,NW,246.72,0,0
2014-12-31 23:00:00,12.0,-21,-3.0,1034.0,NW,249.85,0,0


dataset contains 2010 to 2014 data with 8 columns. We can also see that wnd_dir column needs to have Label Encoding.

In [10]:
dataset['wnd_dir'].unique()

array(['SE', 'cv', 'NW', 'NE'], dtype=object)

In [11]:
values = dataset.values

In [12]:
# integer encode direction
encoder = LabelEncoder()
values[:,4] = encoder.fit_transform(values[:,4])

In [13]:
# ensure all data is float
values = values.astype('float32')

In [14]:
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

##### Data Prepartion for time series RNN Prediction

We can formulate the problem as predicting the pollution at the current hour (t) given the pollution measurement and weather conditions at the prior time step or past one hour.

Lag features for past one hour has been created using the below function. More details about the function can we found in the link given in function description.

Also the function used pandas shift function , one example of Pandas shift function is given below -

In [15]:
#Pandas Shift Tutorials - 

# importing pandas as pd 
import pandas as pd 
   
# Creating row index values for our data frame 
# We have taken time frequency to be of 12 hours interval 
# We are generating five index value using "period = 5" parameter 
   
ind = pd.date_range('01 / 01 / 2000', periods = 5, freq ='12H') 
   
# Creating a dataframe with 4 columns 
# using "ind" as the index for our dataframe 
df1 = pd.DataFrame({"A":[1, 2, 3, 4, 5],  
                   "B":[10, 20, 30, 40, 50], 
                   "C":[11, 22, 33, 44, 55], 
                   "D":[12, 24, 51, 36, 2]},  
                    index = ind) 
  
# Print the dataframe 
print('Raw Data:')
print(df1)

print('With Shift -2:')
print(df1.shift(-2))

print('With Shift +2:')
print(df1.shift(2))

Raw Data:
                     A   B   C   D
2000-01-01 00:00:00  1  10  11  12
2000-01-01 12:00:00  2  20  22  24
2000-01-02 00:00:00  3  30  33  51
2000-01-02 12:00:00  4  40  44  36
2000-01-03 00:00:00  5  50  55   2
With Shift -2:
                       A     B     C     D
2000-01-01 00:00:00  3.0  30.0  33.0  51.0
2000-01-01 12:00:00  4.0  40.0  44.0  36.0
2000-01-02 00:00:00  5.0  50.0  55.0   2.0
2000-01-02 12:00:00  NaN   NaN   NaN   NaN
2000-01-03 00:00:00  NaN   NaN   NaN   NaN
With Shift +2:
                       A     B     C     D
2000-01-01 00:00:00  NaN   NaN   NaN   NaN
2000-01-01 12:00:00  NaN   NaN   NaN   NaN
2000-01-02 00:00:00  1.0  10.0  11.0  12.0
2000-01-02 12:00:00  2.0  20.0  22.0  24.0
2000-01-03 00:00:00  3.0  30.0  33.0  51.0


In [16]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    
    """
    Credit - https://machinelearningmastery.com/convert-time-series-supervised-learning-problem-python/
    
    Frame a time series as a supervised learning dataset.
    
    Arguments:
    data: Sequence of observations as a list or NumPy array.
    n_in: Number of lag observations as input (X).
    n_out: Number of observations as output (y).
    dropnan: Boolean whether or not to drop rows with NaN values.
    
    Returns:
    Pandas DataFrame of series framed for supervised learning.
    
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [17]:
reframed = series_to_supervised(scaled, 1, 1)

In [18]:
reframed.head()

Unnamed: 0,var1(t-1),var2(t-1),var3(t-1),var4(t-1),var5(t-1),var6(t-1),var7(t-1),var8(t-1),var1(t),var2(t),var3(t),var4(t),var5(t),var6(t),var7(t),var8(t)
1,0.129779,0.352941,0.245902,0.527273,0.666667,0.00229,0.0,0.0,0.148893,0.367647,0.245902,0.527273,0.666667,0.003811,0.0,0.0
2,0.148893,0.367647,0.245902,0.527273,0.666667,0.003811,0.0,0.0,0.15996,0.426471,0.229508,0.545454,0.666667,0.005332,0.0,0.0
3,0.15996,0.426471,0.229508,0.545454,0.666667,0.005332,0.0,0.0,0.182093,0.485294,0.229508,0.563637,0.666667,0.008391,0.037037,0.0
4,0.182093,0.485294,0.229508,0.563637,0.666667,0.008391,0.037037,0.0,0.138833,0.485294,0.229508,0.563637,0.666667,0.009912,0.074074,0.0
5,0.138833,0.485294,0.229508,0.563637,0.666667,0.009912,0.074074,0.0,0.109658,0.485294,0.213115,0.563637,0.666667,0.011433,0.111111,0.0


In [19]:
# drop columns we don't want to predict
reframed.drop(reframed.columns[[9,10,11,12,13,14,15]], axis=1, inplace=True)
print(reframed.head())

   var1(t-1)  var2(t-1)  var3(t-1)  var4(t-1)  var5(t-1)  var6(t-1)  \
1   0.129779   0.352941   0.245902   0.527273   0.666667   0.002290   
2   0.148893   0.367647   0.245902   0.527273   0.666667   0.003811   
3   0.159960   0.426471   0.229508   0.545454   0.666667   0.005332   
4   0.182093   0.485294   0.229508   0.563637   0.666667   0.008391   
5   0.138833   0.485294   0.229508   0.563637   0.666667   0.009912   

   var7(t-1)  var8(t-1)   var1(t)  
1   0.000000        0.0  0.148893  
2   0.000000        0.0  0.159960  
3   0.000000        0.0  0.182093  
4   0.037037        0.0  0.138833  
5   0.074074        0.0  0.109658  


var1(t) is the target column , or the value we will try to predict.

For the simplicity we will train our model with first one year data and we will test our model with remaining dataset.Vice versa also can be tried out. 

In [20]:
# split into train and test sets
values = reframed.values
n_train_hours = 365 * 24
train = values[:n_train_hours, :]
test = values[n_train_hours:, :]
# split into input and outputs
train_X, train_y = train[:, :-1], train[:, -1]
test_X, test_y = test[:, :-1], test[:, -1]
# reshape input to be 3D [samples, sequence length, features]
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

(8760, 1, 8) (8760,) (35039, 1, 8) (35039,)


Sequence length is one becauase we are only passing past one hour data to predict the pollution of the next hour. 

Now we will convert the predictors and target values as pytorch Tensors and also we will create batches using Pytorch's data loader.

In [21]:
batch_size = 256

x_train = torch.tensor(train_X , dtype=torch.float)
y_train = torch.tensor(train_y, dtype=torch.float)
x_test = torch.tensor(test_X , dtype=torch.float)
y_test = torch.tensor(test_y , dtype=torch.float)

train = torch.utils.data.TensorDataset(x_train, y_train)
test = torch.utils.data.TensorDataset(x_test, y_test)

train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)

In [22]:
# Let's check the shape of the input/target data
dataiter = iter(train_loader)
data, target = dataiter.next()
print(type(data))
print(data.shape)
print(target.shape)

<class 'torch.Tensor'>
torch.Size([256, 1, 8])
torch.Size([256])


Let's define the RNN class with one layer of RNN with 512 hidden dimension and with one linear transformation in the output layer.

Required signatures to be keep in mind -

1. RNN - nn.RNN(input_size, hidden_dim, n_layers, batch_first=True) , n_layers is no of stacked RNNs
2. Input data x shape - (batch_size, seq_length, input_size)
3. Output of RNN is r_out and hidden 
4. shape of r_out - (batch_size, time_step, hidden_dim)
5. shape of hidden - (n_layers, batch_size, hidden_dim)

Before feeding the output of RNNs to the linean layer , shape needs to be reshaped as 
- (batch_size, seq_length*hidden_dim)

In [23]:
class RNN(nn.Module):
    def __init__(self, input_size, output_size, hidden_dim, n_layers,seq_length):
        super(RNN, self).__init__()
        
        self.hidden_dim=hidden_dim
        self.n_time_stamps = seq_length

        # define an RNN with specified parameters
        # batch_first means that the first dim of the input and output will be the batch_size
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
        
        # last, fully-connected layer
        self.fc = nn.Linear(seq_length*hidden_dim, output_size)

    def forward(self, x, hidden):
        batch_size = x.size(0)
        
        # get RNN outputs
        r_out, hidden = self.rnn(x, hidden)

        # shape output to the linear layer (batch_size, seq_length*hidden_dim)
        r_out = r_out.contiguous().view(batch_size,-1)  

        # get final output 
        output = self.fc(r_out)
        
        return output, hidden

In [24]:
# decide on parameters
input_size=8
output_size=1
hidden_dim=512
n_layers=1
seq_length = 1

In [25]:
# instantiate an RNN
rnn = RNN(input_size, output_size, hidden_dim, n_layers , seq_length)
print(rnn)

RNN(
  (rnn): RNN(8, 512, batch_first=True)
  (fc): Linear(in_features=512, out_features=1, bias=True)
)


In [26]:
# MSE loss and Adam optimizer with a learning rate of 0.0001
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.0001) 

####  Training Loop

In [27]:
hidden = None # initial hidden
for epoch in range(30): ## run the model for 30 epochs
    train_loss = []
    
    for data, target in train_loader:

        if data.shape[0] != batch_size:   # to verify if the batch no is 256 or not
            print('Batch Size Validation- Input shape Issue:',format(data.shape))
            continue
        else:
            optimizer.zero_grad()
            ## 1. forward propagation
            prediction, hidden = rnn(data, hidden)


            ## Representing Memory ##
            # make a new variable for hidden and detach the hidden state from its history
            # this way, we don't backpropagate through the entire history
            hidden = hidden.data
            batch_size = data.shape[0]

            ## 2. loss calculation
            loss = criterion(prediction.squeeze(), target)    # squeeze (256,1) -> (256) - to match target shape
            
            ## 3. backward propagation
            loss.backward()
            
            ## 4. weight optimization
            optimizer.step()
            
            train_loss.append(loss.item())
        
    print ("Epoch:", epoch, "Training Loss: ", np.mean(train_loss))

Batch Size Validation- Input shape Issue: torch.Size([56, 1, 8])
Epoch: 0 Training Loss:  0.009287049069398028
Batch Size Validation- Input shape Issue: torch.Size([56, 1, 8])
Epoch: 1 Training Loss:  0.007951364948359482
Batch Size Validation- Input shape Issue: torch.Size([56, 1, 8])
Epoch: 2 Training Loss:  0.007052509846877964
Batch Size Validation- Input shape Issue: torch.Size([56, 1, 8])
Epoch: 3 Training Loss:  0.006493238643195261
Batch Size Validation- Input shape Issue: torch.Size([56, 1, 8])
Epoch: 4 Training Loss:  0.006256303996505106
Batch Size Validation- Input shape Issue: torch.Size([56, 1, 8])
Epoch: 5 Training Loss:  0.006746564645950189
Batch Size Validation- Input shape Issue: torch.Size([56, 1, 8])
Epoch: 6 Training Loss:  0.009060627722647041
Batch Size Validation- Input shape Issue: torch.Size([56, 1, 8])
Epoch: 7 Training Loss:  0.012209891321082763
Batch Size Validation- Input shape Issue: torch.Size([56, 1, 8])
Epoch: 8 Training Loss:  0.010260309263899484
B

### Checking the Test set performence or Making Prediction

In [28]:
# Prediction using tensor of predictors i.e x_test
yhat , _ = rnn(x_test, None) # throwing away _ the hidden 

yhat.shape

torch.Size([35039, 1])

In [29]:
#need to convert yhat to numpy
yhat = yhat.detach().numpy()

In [30]:
type(yhat) , yhat.shape

(numpy.ndarray, (35039, 1))

In [31]:
#Checking the shape of target value
test_X.shape

(35039, 1, 8)

In [32]:
# To invert scale we need to reshape the 3D array to 2D array
test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))
test_X.shape

(35039, 8)

In [33]:
# invert scaling for forecast
inv_yhat = np.concatenate((yhat.reshape(-1,1), test_X[:, 1:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]

In [34]:
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = np.concatenate((test_y, test_X[:, 1:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]

In [35]:
# calculate RMSE
from math import sqrt
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)

Test RMSE: 37.338


#### Train On Multiple Lag Timesteps Example
We will use 3 hours of data as input. As stated in the blog post -

We need to be more careful in specifying the column for input and output.

We have 3 * 8 + 8 columns in our framed dataset. We will take 3 * 8 or 24 columns as input for the obs of all features across the previous 3 hours. We will take just the pollution variable as output. 

In [36]:
df = pd.read_csv(file)
def parse(x):
    return datetime.strptime(x, '%Y %m %d %H')
dataset = pd.read_csv(file,  parse_dates = [['year', 'month', 'day', 'hour']], index_col=0, date_parser=parse)
dataset.drop('No', axis=1, inplace=True)
dataset.columns = ['pollution', 'dew', 'temp', 'press', 'wnd_dir', 'wnd_spd', 'snow', 'rain']
dataset.index.name = 'date'
# mark all NA values with 0
dataset['pollution'].fillna(0, inplace=True)
# drop the first 24 hours
dataset = dataset[24:]
values = dataset.values
# integer encode direction
encoder = LabelEncoder()
values[:,4] = encoder.fit_transform(values[:,4])
# ensure all data is float
values = values.astype('float32')
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

In [37]:
# specify the number of lag hours
n_hours = 3
n_features = 8
# frame as supervised learning
reframed = series_to_supervised(scaled, n_hours, 1)

In [38]:
reframed.head()

Unnamed: 0,var1(t-3),var2(t-3),var3(t-3),var4(t-3),var5(t-3),var6(t-3),var7(t-3),var8(t-3),var1(t-2),var2(t-2),var3(t-2),var4(t-2),var5(t-2),var6(t-2),var7(t-2),var8(t-2),var1(t-1),var2(t-1),var3(t-1),var4(t-1),var5(t-1),var6(t-1),var7(t-1),var8(t-1),var1(t),var2(t),var3(t),var4(t),var5(t),var6(t),var7(t),var8(t)
3,0.129779,0.352941,0.245902,0.527273,0.666667,0.00229,0.0,0.0,0.148893,0.367647,0.245902,0.527273,0.666667,0.003811,0.0,0.0,0.15996,0.426471,0.229508,0.545454,0.666667,0.005332,0.0,0.0,0.182093,0.485294,0.229508,0.563637,0.666667,0.008391,0.037037,0.0
4,0.148893,0.367647,0.245902,0.527273,0.666667,0.003811,0.0,0.0,0.15996,0.426471,0.229508,0.545454,0.666667,0.005332,0.0,0.0,0.182093,0.485294,0.229508,0.563637,0.666667,0.008391,0.037037,0.0,0.138833,0.485294,0.229508,0.563637,0.666667,0.009912,0.074074,0.0
5,0.15996,0.426471,0.229508,0.545454,0.666667,0.005332,0.0,0.0,0.182093,0.485294,0.229508,0.563637,0.666667,0.008391,0.037037,0.0,0.138833,0.485294,0.229508,0.563637,0.666667,0.009912,0.074074,0.0,0.109658,0.485294,0.213115,0.563637,0.666667,0.011433,0.111111,0.0
6,0.182093,0.485294,0.229508,0.563637,0.666667,0.008391,0.037037,0.0,0.138833,0.485294,0.229508,0.563637,0.666667,0.009912,0.074074,0.0,0.109658,0.485294,0.213115,0.563637,0.666667,0.011433,0.111111,0.0,0.105634,0.485294,0.213115,0.581818,0.666667,0.014492,0.148148,0.0
7,0.138833,0.485294,0.229508,0.563637,0.666667,0.009912,0.074074,0.0,0.109658,0.485294,0.213115,0.563637,0.666667,0.011433,0.111111,0.0,0.105634,0.485294,0.213115,0.581818,0.666667,0.014492,0.148148,0.0,0.124748,0.485294,0.229508,0.6,0.666667,0.017551,0.0,0.0


In [39]:
# split into train and test sets
values = reframed.values
n_train_hours = 365 * 24
train = values[:n_train_hours, :]
test = values[n_train_hours:, :]

# split into input and outputs
n_obs = n_hours * n_features
train_X, train_y = train[:, :n_obs], train[:, -n_features]
test_X, test_y = test[:, :n_obs], test[:, -n_features]
print(train_X.shape, len(train_X), train_y.shape)

# reshape input to be 3D [samples, timesteps, features]
train_X = train_X.reshape((train_X.shape[0], n_hours, n_features))
test_X = test_X.reshape((test_X.shape[0], n_hours, n_features))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

(8760, 24) 8760 (8760,)
(8760, 3, 8) (8760,) (35037, 3, 8) (35037,)


In [40]:
batch_size = 256

x_train = torch.tensor(train_X , dtype=torch.float)
y_train = torch.tensor(train_y, dtype=torch.float)
x_test = torch.tensor(test_X , dtype=torch.float)
y_test = torch.tensor(test_y , dtype=torch.float)

train = torch.utils.data.TensorDataset(x_train, y_train)
test = torch.utils.data.TensorDataset(x_test, y_test)

train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)

In [41]:
# Let's check the shape of the input/target data
for data, target in train_loader:
    print(data.shape)
    print(target.shape)
    print(target.dtype)
    break

torch.Size([256, 3, 8])
torch.Size([256])
torch.float32


In [42]:
# decide on parameters
input_size=8
output_size=1
hidden_dim=512
n_layers=1
seq_length = 3

In [43]:
# instantiate an RNN
rnn = RNN(input_size, output_size, hidden_dim, n_layers , seq_length)
print(rnn)

RNN(
  (rnn): RNN(8, 512, batch_first=True)
  (fc): Linear(in_features=1536, out_features=1, bias=True)
)


In [44]:
# MSE loss and Adam optimizer with a learning rate of 0.0001
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.0001) 

In [45]:
hidden = None # initial hidden
for epoch in range(30): ## run the model for 30 epochs
    train_loss = []
    
    for data, target in train_loader:

        if data.shape[0] != batch_size:   # to verify if the batch no is 256 or not
            print('Batch Size Validation- Input shape Issue:',format(data.shape))
            continue
        else:
            optimizer.zero_grad()
            ## 1. forward propagation
            prediction, hidden = rnn(data, hidden)


            ## Representing Memory ##
            # make a new variable for hidden and detach the hidden state from its history
            # this way, we don't backpropagate through the entire history
            hidden = hidden.data
            batch_size = data.shape[0]

            ## 2. loss calculation
            loss = criterion(prediction.squeeze(), target)    # squeeze (256,1) -> (256) - to match target shape
            
            ## 3. backward propagation
            loss.backward()
            
            ## 4. weight optimization
            optimizer.step()
            
            train_loss.append(loss.item())
        
    print ("Epoch:", epoch, "Training Loss: ", np.mean(train_loss))

Batch Size Validation- Input shape Issue: torch.Size([56, 3, 8])
Epoch: 0 Training Loss:  0.0079601468408809
Batch Size Validation- Input shape Issue: torch.Size([56, 3, 8])
Epoch: 1 Training Loss:  0.00532056950375109
Batch Size Validation- Input shape Issue: torch.Size([56, 3, 8])
Epoch: 2 Training Loss:  0.004561143824556733
Batch Size Validation- Input shape Issue: torch.Size([56, 3, 8])
Epoch: 3 Training Loss:  0.006887500818051836
Batch Size Validation- Input shape Issue: torch.Size([56, 3, 8])
Epoch: 4 Training Loss:  0.011227845721056355
Batch Size Validation- Input shape Issue: torch.Size([56, 3, 8])
Epoch: 5 Training Loss:  0.006237753571065909
Batch Size Validation- Input shape Issue: torch.Size([56, 3, 8])
Epoch: 6 Training Loss:  0.004455090692157254
Batch Size Validation- Input shape Issue: torch.Size([56, 3, 8])
Epoch: 7 Training Loss:  0.003718995489180088
Batch Size Validation- Input shape Issue: torch.Size([56, 3, 8])
Epoch: 8 Training Loss:  0.0032488409265437547
Bat

In [46]:
# Prediction using tensor of predictors i.e x_test
yhat , _ = rnn(x_test, None) # throwing away _ the hidden 

yhat.shape

torch.Size([35037, 1])

In [47]:
#need to convert yhat to numpy and with correct shape
yhat = yhat.detach().numpy()

In [48]:
type(yhat) , yhat.shape

(numpy.ndarray, (35037, 1))

In [49]:
#Checking the shape of target value
test_X.shape

(35037, 3, 8)

In [50]:
# To invert scale we need to reshape the 3D array to 2D array
test_X = test_X.reshape(test_X.shape[0], n_hours*n_features)
test_X.shape

(35037, 24)

In [51]:
# invert scaling for forecast
inv_yhat = np.concatenate((yhat.reshape(-1,1), test_X[:, -7:]), axis=1) #Note: -7 to select correct inputs
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]

In [52]:
# invert scaling for actual
test_y = test_y.reshape((len(test_y), 1))
inv_y = np.concatenate((test_y, test_X[:, -7:]), axis=1) #Note: -7 to select correct inputs
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]

In [53]:
# calculate RMSE
from math import sqrt
rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)

Test RMSE: 46.668


As stated in the Jason Brownlee's blog post also , we can also see using multiple time sequence for this problem - the model performence is not getting increased.
However , we have learnt how to do RNNs in pytorch with single time sequence and with multiple time sequence. 