In [1]:
import torch
from torch import nn
import numpy as np
import pandas as pd
from torch.nn import functional as F
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv("LSTM-Multivariate_pollution.csv",index_col = 0)
#data['date'] = pd.to_datetime(data['date'],format='%Y-%m-%d %H:%M:%S')

In [3]:
data.dtypes

pollution    float64
dew            int64
temp         float64
press        float64
wnd_dir       object
wnd_spd      float64
snow           int64
rain           int64
dtype: object

In [4]:
data.head()

Unnamed: 0_level_0,pollution,dew,temp,press,wnd_dir,wnd_spd,snow,rain
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-02 00:00:00,129.0,-16,-4.0,1020.0,SE,1.79,0,0
2010-01-02 01:00:00,148.0,-15,-4.0,1020.0,SE,2.68,0,0
2010-01-02 02:00:00,159.0,-11,-5.0,1021.0,SE,3.57,0,0
2010-01-02 03:00:00,181.0,-7,-5.0,1022.0,SE,5.36,1,0
2010-01-02 04:00:00,138.0,-7,-5.0,1022.0,SE,6.25,2,0


In [7]:
wind_dir_enc = LabelEncoder()
data["wnd_dir"]= wind_dir_enc.fit_transform(data["wnd_dir"])
data["wnd_dir"] = data["wnd_dir"].astype(float)

In [8]:
data.head()

Unnamed: 0_level_0,pollution,dew,temp,press,wnd_dir,wnd_spd,snow,rain
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-02 00:00:00,129.0,-16,-4.0,1020.0,2.0,1.79,0,0
2010-01-02 01:00:00,148.0,-15,-4.0,1020.0,2.0,2.68,0,0
2010-01-02 02:00:00,159.0,-11,-5.0,1021.0,2.0,3.57,0,0
2010-01-02 03:00:00,181.0,-7,-5.0,1022.0,2.0,5.36,1,0
2010-01-02 04:00:00,138.0,-7,-5.0,1022.0,2.0,6.25,2,0


In [9]:
values = data.values
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

### create sequence data

In [13]:
# split into train and test sets
n_train_hours = 365*24*4
train = scaled[:int(n_train_hours), :]
test = scaled[int(n_train_hours):, :]

In [16]:
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1,0:1]
        inout_seq.append((train_seq ,train_label))
    return inout_seq   

In [17]:
tw = 4
train_X = []
train_Y = []

train_in_out = create_inout_sequences(train,tw)
for h in range(len(train_in_out)):
    train_x = train_in_out[h][0]
    train_y = train_in_out[h][1]
        
    train_X.append(train_x)
    train_Y.append(train_y)
    
train_X_ = np.stack(train_X) 
train_Y_ = np.stack(train_Y).flatten()    


In [21]:
test_X = []
test_Y = []

test_in_out = create_inout_sequences(test,tw)
for k in range(len(test_in_out)):
    test_x = test_in_out[k][0]
    test_y = test_in_out[k][1]

    test_X.append(test_x)
    test_Y.append(test_y)
    
test_X_ = np.stack(test_X) 
test_Y_ = np.stack(test_Y).flatten()    

In [22]:
train_X_ = torch.FloatTensor(train_X_)
train_Y_ = torch.FloatTensor(train_Y_)

test_X_ = torch.FloatTensor(test_X_)
test_Y_ = torch.FloatTensor(test_Y_)

### define model

In [28]:
class LSTM_model(nn.Module):
    def __init__(self,input_size,batch_size,output_size,hidden_size,n_layers,
                drop_prob):
        super(LSTM_model,self).__init__()
        self.drop_prob = drop_prob
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.input_size = input_size
        self.batch_size = batch_size
        self.output_size = output_size
        
        self.lstm1 = nn.LSTM(input_size,hidden_size,n_layers,batch_first=True)
                           
        self.dropout = nn.Dropout(drop_prob)
        self.fc = nn.Linear(hidden_size,output_size)
       
        
        
    def forward(self,input_x):
        
        #print(input_x.shape)    
        # shape of hid and cell have shape (batch_size,num_layers, hidden_size).       
        h0 = torch.zeros(self.n_layers, self.batch_size, self.hidden_size)
        c0 = torch.zeros(self.n_layers, self.batch_size, self.hidden_size)
        #print(h0.shape)
        #print(c0.shape)
        # shape of lstm_output have shape (batch_size,sequence_length, hidden_size). 
        lstm_output, (h1,c1) = self.lstm1(input_x, (h0,c0))
        
        out = self.dropout(lstm_output)  
        out = self.fc(out[:,-1,:])
        out = out.view(self.batch_size, -1)


        return out
    


### Training

In [73]:
num_epochs = 100
learning_rate = 0.001

input_size = 8
batch_size = 64
output_size = 1
#size of hidden layers
hidden_size = 32
drop_prob = 0.5
num_layers = 2

train_loader = DataLoader(list(zip(train_X_,train_Y_)), batch_size=batch_size,shuffle=False, drop_last=True)
test_loader = DataLoader(list(zip(test_X_,test_Y_)), batch_size=batch_size, shuffle=False, drop_last=True) 

model = LSTM_model(input_size, batch_size, output_size, hidden_size, num_layers,drop_prob)

criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(lstm.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    
    model.train()
    for x_batch, y_batch in train_loader:
        outputs = model(x_batch)
        # obtain the loss function
        loss = criterion(outputs,y_batch)
        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        if epoch % 5 == 0:
          print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))


In [56]:
model.eval()

Y_pred = []
Y_true = []

for x_batch, y_true in test_loader:
    y_pred = model(x_batch).flatten()
    y_pred = y_pred.detach().numpy().tolist()
    Y_pred.append(y_pred)
    Y_true.append(y_true)
    

Y_pred_ = np.array([item for sublist in Y_pred for item in sublist])
Y_true_  = np.array([item for sublist in Y_true for item in sublist])
    

In [72]:
predict_array = np.zeros(shape=(len(Y_pred_), 8) )
true_array = np.zeros(shape=(len(Y_true_), 8) )

# put the predicted values in the first column
predict_array[:,0] = Y_pred_
true_array[:,0] = Y_true_

# inverse transform and then select the first column
Y_prediction = scaler.inverse_transform(predict_array)[:,0]
Y_true = scaler.inverse_transform(true_array)[:,0]


plt.plot(Y_prediction)
plt.plot(Y_true)
plt.suptitle('Time-Series Prediction')
plt.show()