In [7]:
import torch
from torch.utils.data import IterableDataset, DataLoader, Subset
from datetime import datetime as dt, timedelta
import pandas as pd
import os
import random
import numpy as np
import torch.nn as nn
from pandas import DataFrame as df
import mplfinance as mpf

# check if CUDA is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

seed = 42  # choose any seed you prefer
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

cuda


In [8]:
class PriceDataset(torch.utils.data.Dataset):
    def __init__(self, item, timespan, start_date_str, end_date_str):
        self.directory = f'C:/Github/PricePrediction/csvfiles/{item}'
        self.item = item
        self.timespan = timespan
        start_date = dt.strptime(start_date_str, '%Y-%m-%d').date()
        end_date = dt.strptime(end_date_str, '%Y-%m-%d').date()
        self.dates = [single_date.strftime("%Y-%m-%d") for single_date in self.daterange(start_date, end_date)]
        self.columns = [1, 4]  # Selecting open and close prices
        self.filenames = self.get_filenames()

    def daterange(self, start_date, end_date):
        for n in range(int((end_date - start_date).days) + 1):
            yield start_date + timedelta(n)

    def get_filenames(self):
        filenames = []
        for date in self.dates:
            filename = f"{self.directory}/{self.item}-{self.timespan}-{date}.csv"
            if os.path.exists(filename):
                filenames.append(filename)
        return filenames

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        filename = self.filenames[idx]
        df = pd.read_csv(filename, usecols=self.columns, header=None)
        return torch.tensor(df.values, dtype=torch.float32)  # Return open and close prices

def sliding_window_combined(batch):
    windows = []
    for tensor in batch:
        for i in range(tensor.shape[0] - 100 + 1):  # Create windows of size window_size
            window = tensor[i:i+100]
            pct_change = (window[:, 1] - window[:, 0]) * 100 / window[:, 0]
            binary_change = (window[:, 1] > window[:, 0]).float()  # Calculate the binary change
            combined = torch.stack([pct_change, binary_change], dim=-1)  # Stack along a new last dimension
            windows.append(combined)
    return torch.stack(windows)



In [9]:
train_dataset = PriceDataset('BTCUSDT', '1m', '2021-03-01', '2023-04-30')
train_loader = DataLoader(train_dataset, batch_size=1, collate_fn=sliding_window_combined, shuffle=False, drop_last=True)

test_dataset = PriceDataset('ETHUSDT', '1m', '2021-03-01', '2023-04-30')
test_loader = DataLoader(test_dataset, batch_size=1, collate_fn=sliding_window_combined, shuffle=False, drop_last=True)


In [10]:
# Size of the Dataset
print(f'Train dataset size: {len(train_dataset)}')
print(f'Test dataset size: {len(test_dataset)}')

# Size of the DataLoader (i.e., number of batches)
print(f'Train dataloader size: {len(train_loader)}')
print(f'Test dataloader size: {len(test_loader)}')

# Size of the tensor output by the Dataset
sample_tensor = train_dataset[0]
print(f'Shape of the tensor output by train_dataset: {sample_tensor.shape}')

# Size of the tensor output by the DataLoader
for batch in train_loader:
    print(f'Shape of the tensor output by train_loader: {batch.shape}')
    break  # we break after the first batch




Train dataset size: 791
Test dataset size: 791
Train dataloader size: 791
Test dataloader size: 791
Shape of the tensor output by train_dataset: torch.Size([1440, 2])
Shape of the tensor output by train_loader: torch.Size([1341, 100, 2])


In [11]:
class PriceChangePrediction(nn.Module):
    def __init__(self, input_dim=2, hidden_dim=100, num_layers=4):
        super(PriceChangePrediction, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=0.1)
        self.fc_pct = nn.Linear(hidden_dim, 1)  # output layer for percentage prediction
        self.fc_binary = nn.Linear(hidden_dim, 1)  # output layer for binary prediction

        # Initialize the hidden state
        self.hidden = (torch.zeros(num_layers, 1, hidden_dim),
                       torch.zeros(num_layers, 1, hidden_dim))

    def forward(self, x):
        batch_size = x.size(0)
        hidden = (torch.zeros(self.lstm.num_layers, batch_size, self.lstm.hidden_size).to(x.device),
                torch.zeros(self.lstm.num_layers, batch_size, self.lstm.hidden_size).to(x.device))
        out, _ = self.lstm(x, hidden)
        out_pct = self.fc_pct(out[:, -1, :])  # output for percentage prediction
        out_binary = torch.sigmoid(self.fc_binary(out[:, -1, :]))  # output for binary prediction
        return out_pct.squeeze(), out_binary.squeeze()

In [6]:

def train(model, train_loader, criterion, optimizer, device):
    model.train()
    total_batches = len(train_loader)  # Total number of batches
    for i, batch in enumerate(train_loader):  # Use enumerate to get the index (i)
        batch = batch.to(device)
        inputs = batch[:, :-1, :]
        percentage_targets = batch[:, 1:, 0]  # Get the percentage change targets
        binary_targets = batch[:, 1:, 1]  # Get the binary change targets
        optimizer.zero_grad()
        percentage_outputs, binary_outputs = model(inputs)  # Get the two outputs
        loss = criterion(percentage_outputs, binary_outputs, percentage_targets, binary_targets)
        loss.backward()
        optimizer.step()
        if (i + 1) % 200 == 0:  # Print after every 200 batches
            print(f"Training progress: [{i + 1}/{total_batches} Batches]")

def evaluate(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in test_loader:
            batch = batch.to(device)
            inputs = batch[:, :-1, :]
            percentage_targets = batch[:, 1:, 0]  # Get the percentage change targets
            binary_targets = batch[:, 1:, 1]  # Get the binary change targets
            percentage_outputs, binary_outputs = model(inputs)  # Get the two outputs
            loss = criterion(percentage_outputs, binary_outputs, percentage_targets, binary_targets)
            total_loss += loss.item()
    return total_loss / len(test_loader)  # Return average loss

def train_and_evaluate(model, modelname, train_loader, test_loader, criterion, optimizer, epochs, device):
    best_val_loss = float('inf')

    for epoch in range(epochs):
        train(model, train_loader, criterion, optimizer, device)
        val_loss = evaluate(model, test_loader, criterion, device)
        print(f"Epoch {epoch+1}/{epochs} \t {modelname} \t Validation Loss: {val_loss:.10f}")

        # Save the model if the validation loss is the best we've seen so far.
        if val_loss < best_val_loss:
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, f'models/{str(modelname)}.pth')
            best_val_loss = val_loss

In [None]:
price_model = 