In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from datetime import timedelta, date, datetime

#### Check device and assign device

In [None]:
# check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


##### Build Custom Dataset

In [None]:
class PriceDataset(Dataset):
    def __init__(self, item, timespan, start_date_str, end_date_str):
        self.directory = 'csvfiles'
        self.item = item
        self.timespan = timespan
        start_date = datetime.strptime(start_date_str, '%Y-%m-%d').date()
        end_date = datetime.strptime(end_date_str, '%Y-%m-%d').date()
        self.dates = [single_date.strftime("%Y-%m-%d") for single_date in self.daterange(start_date, end_date)]
        self.columns = [0, 1, 2, 3, 4, 7]

    def daterange(self, start_date, end_date):
        for n in range(int((end_date - start_date).days) + 1):
            yield start_date + timedelta(n)

    def __len__(self):
        return len(self.dates)

    def __getitem__(self, idx):
        date = self.dates[idx]
        filename = f"{self.directory}/{self.item}-{self.timespan}-{date}.csv"
        df = pd.read_csv(filename, usecols=self.columns, header=None)
        return torch.tensor(df.values, dtype=torch.float32)

##### Set Dataset and DataLoader
* Send Dataset and DataLoader to GPU for faster Calculation
* Make Batch for Dataloader

In [None]:
dataset = PriceDataset('BTCUSDT', '1m', '2021-03-01', '2023-04-30')
dataloader = DataLoader(dataset, batch_size=32, shuffle=False, drop_last = True)

# loop over dataloader
for batch in dataloader:
    # batch is a tensor of shape [batch_size, num_rows, num_cols]
    pass

##### Build LSTM Model

In [None]:
import torch
import torch.nn as nn
import numpy as np
from pandas import DataFrame as df

In [None]:
class PricePredictionLSTM(nn.Module):
    def __init__(self, input_size=4, hidden_layer_size=150, output_size=3, num_layers=2):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size, num_layers=num_layers, dropout=0.2, batch_first=True)

        self.linear = nn.Linear(hidden_layer_size, output_size)

        # sigmoid function for the probability
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_seq):
        lstm_out, _ = self.lstm(input_seq.view(len(input_seq), 1, -1))
        predictions = self.linear(lstm_out.view(len(input_seq), -1))

        # applying sigmoid function to the third element of each output vector
        predictions[:, -1] = self.sigmoid(predictions[:, -1])

        # return the last 10 time steps
        return predictions[-10:]


#### Train LSTM Model

In [None]:
# Model, Loss, and Optimizer
input_size = 5   # Number of input features
hidden_size = 150   # Number of hidden neurons in the LSTM layers
output_size = 3  # Number of output features
num_layers = 2   # Number of stacked LSTM layers

model = PricePredictionLSTM(input_size=input_size, hidden_layer_size=hidden_size, output_size=output_size, num_layers=num_layers)
model = model.to(device)

criterion = nn.MSELoss()  # Use mean square error loss for regression problem

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Use Adam optimizer


# Set hyperparameters
epochs = 150

# loop over epochs
for epoch in range(epochs):

    # loop over the data loader
    for i, batch in enumerate(dataloader):
        
        # suppose your data is composed of features and targets
        features = batch[:, :, :5].to(device)  # send your features to device
        targets = batch[:, :, 5:].to(device)  # send your targets to device

        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(features)

        # calculate the loss
        loss = criterion(output, targets)

        # zero the gradients
        optimizer.zero_grad()

        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()

        # perform a single optimization step (parameter update)
        optimizer.step()

        # output training information
        if i % 100 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Step [{i+1}/{len(dataloader)}], Loss: {loss.item():.4f}')
