# Time-series prediction with LSTM neural network in Pytorch

In [144]:
import pandas as pd
import plotly.express as px
import torch
from torch import nn
from torch.nn import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score
import numpy as np
from datetime import datetime
from torch.utils.data import Dataset,TensorDataset, DataLoader, random_split
from torch.autograd import Variable
import plotly.graph_objects as go


In [105]:
df = pd.read_csv("C:/Dropbox/pythonProject/Bitcoin Historical Data - Investing.csv",
                                    header = 0,
                                    index_col = 'Date',
                                    usecols = ['Date','Price','Open','High','Low'],
                                    parse_dates=True,
                                    infer_datetime_format=True,
                                    keep_date_col=True,
                                    low_memory = False,
                                    thousands = ',',
                                    decimal = '.')
df
                                

Unnamed: 0_level_0,Price,Open,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-07-18,0.1,0.0,0.1,0.1
2010-07-19,0.1,0.1,0.1,0.1
2010-07-20,0.1,0.1,0.1,0.1
2010-07-21,0.1,0.1,0.1,0.1
2010-07-22,0.1,0.1,0.1,0.1
...,...,...,...,...
2021-02-25,46928.5,49695.9,52013.8,46773.7
2021-02-26,46345.6,46928.5,48413.9,44248.2
2021-02-27,46136.7,46333.1,48335.1,45059.4
2021-02-28,45164.0,46136.0,46582.0,43100.6


In [106]:
features = 'Price'
label = 'Price'
scaler = MinMaxScaler()
# Slice useful data
ddf = df['2-Jul-17':]
# Plot data
fig1 = px.scatter(ddf,template = 'plotly_dark')
fig1.show()

In [107]:
# Value distribution before scaling
fig2 = px.histogram(ddf,template = 'plotly_dark')
fig2.update_layout(barmode = 'overlay')
fig2.show()

In [111]:
# Scale data with minmax scaler
ddf[['Price']] = scaler.fit_transform(ddf[['Price']])
train_df = ddf.sample(frac = 0.75, random_state = 0)
test_df = ddf.sample(frac = 0.25, random_state = 0)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [108]:
#Value distribution after scaling
fig3 = px.histogram(ddf['Price'],template = 'plotly_dark')
fig3.update_layout(barmode = 'overlay')
fig3.show()

In [112]:
#Dataset class to split sequential sequences into features and label
class SequenceDataset(Dataset):
    def __init__(self, df, sequence_length, label, features):
        self.df = df
        self.sequence_length = sequence_length
        self.label = label
        self.features = features
        self.y = np.asarray(df[label])
        self.X = np.asarray(df[features])
    def __len__(self):
        return self.X.shape[0]
    def __getitem__(self):
        x=[]
        y=[]
        for i in range(sequence_length+1 , len(df)-2-sequence_length):
            x.append(np.array(df.iloc[i-1-sequence_length:i]))
            y.append(np.array(df.iloc[i]))
        return x,y

In [113]:
train_dataset = SequenceDataset(
    train_df,
    sequence_length= 4,
    label = label,
    features = features
)
test_dataset = SequenceDataset(
    test_df,
    sequence_length= 1,
    label = label,
    features = features
)

In [114]:
#Reshape numpy arrays of X and y and create Tensors, and then train dataloaders
train_dataset.X = np.asarray(train_dataset.X).reshape(-1,4,1)
train_dataset.y = np.asarray(train_dataset.y).reshape(-1,1)
test_dataset.X = np.asarray(test_dataset.X).reshape(-1,1,1)
test_dataset.y = np.asarray(test_dataset.y).reshape(-1,1)
Xtrain_tensor = torch.Tensor(train_dataset.X)
ytrain_tensor = torch.Tensor(train_dataset.y)
Xtest_tensor = torch.Tensor(test_dataset.X)
ytest_tensor = torch.Tensor(test_dataset.y)
Xtrain_dataloader = DataLoader(Xtrain_tensor,batch_size = 3, shuffle = False, drop_last=True)
ytrain_dataloader = DataLoader(ytrain_tensor, batch_size = 3, shuffle = False, drop_last = True)


In [115]:
X_train = next(iter(Xtrain_dataloader))
y_train = next(iter(ytrain_dataloader))
X_test = next(iter(Xtest_dataloader))
y_test = next(iter(ytest_dataloader))
print(X_train.shape, y_train.shape,X_test.shape,y_test.shape)

torch.Size([3, 4, 1]) torch.Size([3, 1]) torch.Size([3, 1, 1]) torch.Size([3, 1])


In [116]:
# Create LSTM model
class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        h_out = h_out.view(-1, self.hidden_size)
        out = self.fc(h_out)
        return out

In [117]:
num_epochs = 2000
learning_rate = 0.01
model = LSTM(num_classes=1, input_size=1, hidden_size=16, num_layers=1)
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


In [118]:
#train model
epochidx = []
lossidx = []
for epoch in range(num_epochs):
    outputs = model(X_train)
    optimizer.zero_grad()
    loss = loss_fn(outputs, y_train)
    loss.backward()
    optimizer.step()
    epochidx.append(epoch)
    lossidx.append(loss.item())
    if epoch % 100 == 0:
        print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

Epoch: 0, loss: 0.00265
Epoch: 100, loss: 0.00049
Epoch: 200, loss: 0.00024
Epoch: 300, loss: 0.00000
Epoch: 400, loss: 0.00002
Epoch: 500, loss: 0.00000
Epoch: 600, loss: 0.00005
Epoch: 700, loss: 0.00000
Epoch: 800, loss: 0.00000
Epoch: 900, loss: 0.00000
Epoch: 1000, loss: 0.00000
Epoch: 1100, loss: 0.00001
Epoch: 1200, loss: 0.00000
Epoch: 1300, loss: 0.00000
Epoch: 1400, loss: 0.00000
Epoch: 1500, loss: 0.00000
Epoch: 1600, loss: 0.00000
Epoch: 1700, loss: 0.00000
Epoch: 1800, loss: 0.00000
Epoch: 1900, loss: 0.00000


In [138]:
# Make prediction using test dataset
model.eval()
data_predict =model(Xtest_tensor).data.numpy()
ytest_plot = ytest_tensor.data.numpy()
ypred = scaler.inverse_transform(data_predict).flatten()
ytrue = scaler.inverse_transform(ytest_plot).flatten()
fig4 = go.Figure()
fig4.add_trace(go.Scatter(y=ypred, name = 'Predicted', mode = 'markers'))
fig4.add_trace(go.Scatter(y=ytrue, name = 'True', mode = 'markers'))
fig4.show()

In [146]:
r2_score(ytrue.reshape(-1,1), ypred.reshape(-1,1))

0.9325076633036644