## 1. Libraries and settings

In [None]:
import numpy as np
import random
import pandas as pd 
from pylab import mpl, plt


import optuna

import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight

from operator import itemgetter
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler


import math, time
from math import sqrt
import itertools

import datetime
from datetime import date

import matplotlib.pyplot as plt
plt.style.use('seaborn-v0_8-darkgrid')
mpl.rcParams['font.family'] = 'serif'
%matplotlib inline

import warnings
warnings.simplefilter("ignore", UserWarning)

import vectorbtpro as vbt
vbt.settings.set_theme('dark')
vbt.settings['plotting']['layout']['width'] = 600
vbt.settings['plotting']['layout']['height'] = 300

# #hparams
timestep = 20
# # Update these dimensions based on your dataset

# hidden_dim = 32
# num_layers = 2

# num_epochs = 200
# learning_rate=0.01
# step_size=30
# gamma=0.9

# dropout_rate=0.2
# print_epochs = 2

In [None]:
df = pd.read_csv('2ySOLdata1h.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
df.set_index('timestamp', inplace=True)

pd.set_option('future.no_silent_downcasting', True)
df['signal'] = df['signal'].replace({'SignalNone': 1, 'SignalLong': 2, 'SignalShort': 0})
df = df.ffill()

In [None]:
data = vbt.Data.from_data(df)
features = data.run("talib", mavp=vbt.run_arg_dict(periods=14))
data.data['symbol'] = pd.concat([data.data['symbol'], features], axis=1)
data.data['symbol'].drop(['Open', 'High', 'Low'], axis=1, inplace=True)
# This will drop columns from the DataFrame where all values are NaN
data.data['symbol'] = data.data['symbol'].dropna(axis=1, how='all')

open_price = data.get('Open')
high_price = data.get('High')
low_price = data.get('Low')
close_price = data.get('Close')

data.data['symbol'] = data.data['symbol'].dropna()
# predictor_list = data.data['symbol'].drop('signal', axis=1).columns.tolist()
predictor_list = [('aroon', 'aroondown'), 
                  ('aroon', 'aroonup'), 
                  ('bop', 'real'), 
                  ('cci', 'real'), 
                  ('cdlbelthold', 'integer'), 
                  ('macd', 'macd'), 
                  ('midprice', 'real'), 
                  ('mom', 'real'), 
                  ('roc', 'real'), 
                  ('stochrsi', 'fastk'), 
                  ('willr', 'real')]


X = data.data['symbol'][predictor_list]

y = data.data['symbol']['signal']

X.columns = X.columns.astype(str)



In [None]:
# First, split your data into a training+validation set and a separate test set
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.3, shuffle=False)

# Then, split the training+validation set into a training set and a validation set
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.2, shuffle=False)  # 0.2 here means 20% of the original data, or 25% of the training+validation set

scaler = StandardScaler()


X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
X_val_scaled = scaler.fit_transform(X_val)


In [None]:
def create_sequences(input_data, sequence_length):
    sequences = []
    data_len = len(input_data)
    for i in range(data_len - sequence_length):
        seq = input_data[i:(i + sequence_length)]
        sequences.append(seq)
    return np.array(sequences)

In [None]:
X_train_list = create_sequences(X_train_scaled, timestep)
X_test_list = create_sequences(X_test_scaled, timestep)
X_val_list = create_sequences(X_val_scaled, timestep)

In [None]:
y_train_seq_ar = y_train[timestep:]
y_test_seq_ar = y_test[timestep:]
y_val_seq_ar = y_val[timestep:]

In [None]:
x_train_ar = np.array(X_train_list)
x_test_ar = np.array(X_test_list)  
x_val_ar = np.array(X_val_list)  

y_train_seq = np.array(y_train_seq_ar).astype(int)
y_test_seq = np.array(y_test_seq_ar).astype(int)
y_val_seq = np.array(y_val_seq_ar).astype(int)

In [None]:
# Check for MPS (GPU on M1 Mac) availability and set it as the device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:

# Convert to tensors
X_train_tensor = torch.tensor(x_train_ar, dtype=torch.float32) # .to(device)
y_train_tensor = torch.tensor(y_train_seq, dtype=torch.long)
X_test_tensor = torch.tensor(x_test_ar, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_seq, dtype=torch.long)
X_val_tensor = torch.tensor(x_val_ar, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_seq, dtype=torch.long)


In [None]:


# Convert y_train to a numpy array if it's a tensor

if isinstance(y_train_seq, torch.Tensor):
    y_train_seq_np = y_train_seq.cpu().numpy()
else:
    y_train_seq_np = y_train_seq  # Assuming y_train_seq is already a numpy array or similar

class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train_seq_np), y=y_train_seq_np)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)


# Move class weights to the same device as your model and data
class_weights_tensor = class_weights_tensor.to(device)  # device could be 'cpu' or 'cuda'


In [None]:
# Assuming X_train_tensor and y_train_tensor are your input and output training tensors
# Make sure X_train_tensor and y_test_tensor are already tensors

# Check for MPS (GPU on M1 Mac) availability and set it as the device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")




class BiLSTMClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim, dropout_rate):
        super(BiLSTMClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        # Bidirectional LSTM Layer
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, bidirectional=True)
        
        # Dropout layer
        self.dropout = nn.Dropout(dropout_rate)
        
        # Fully connected layer
        # The input dimension is twice the hidden_dim because it's bidirectional
        self.fc = nn.Linear(hidden_dim * 2, output_dim)

    def forward(self, x):
        batch_size = x.size(0)
        # Initialize hidden state and cell state
        h0 = torch.zeros(self.num_layers * 2, batch_size, self.hidden_dim).to(device)
        c0 = torch.zeros(self.num_layers * 2, batch_size, self.hidden_dim).to(device)
        
        # Forward propagate LSTM
        out, (hn, cn) = self.lstm(x, (h0, c0))
        
        # Concatenate the hidden states from both directions
        out = torch.cat((hn[-2,:,:], hn[-1,:,:]), dim = 1)
        
        # Pass the concatenated hidden states to the fully connected layer
        out = self.fc(out)
        
        return out


In [None]:

num_epochs = 50
num_trials=50


def objective(trial):
    # Suggest hyperparameters
    hidden_dim = trial.suggest_categorical('hidden_dim', [16, 32, 64])
    num_layers = trial.suggest_int('num_layers', 1, 3)
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
    step_size = trial.suggest_int('step_size', 10, 100)
    gamma = trial.suggest_float('gamma', 0.85, 0.99)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.4)

    # Initialize model and move it to the MPS device
    model = BiLSTMClassifier(input_dim=X_train_tensor.shape[2], hidden_dim=hidden_dim, num_layers=num_layers, output_dim=len(np.unique(y_train_tensor.cpu().numpy())), dropout_rate=dropout_rate).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)

    X_train_tensor_gpu = X_train_tensor.to(device)
    y_train_tensor_gpu = y_train_tensor.to(device)
    X_val_tensor_gpu = X_val_tensor.to(device)
    y_val_tensor_gpu = y_val_tensor.to(device)
    X_test_tensor_gpu = X_test_tensor.to(device)
    y_test_tensor_gpu = y_test_tensor.to(device)

    # Training loop
    model.train()
    for epoch in range(num_epochs):  # use a small number of epochs for demonstration
        optimizer.zero_grad()
        output = model(X_train_tensor_gpu)
        loss = criterion(output, y_train_tensor_gpu)
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        
    model.eval()
    with torch.no_grad():
        y_test_pred = model(X_test_tensor_gpu)
        probabilities = torch.softmax(y_test_pred, dim=1)
        _, predicted_labels = torch.max(probabilities, 1)
        predicted_labels_numpy = predicted_labels.cpu().numpy()

    # Use predicted labels to simulate a trading strategy
    df_split = data.data['symbol'][-len(predicted_labels_numpy):].copy()
    df_split.loc[:, "signal"] = predicted_labels_numpy
    signal = df_split['signal']
    entries = signal == 2
    exits = signal == 0
    pf = vbt.Portfolio.from_signals(
        close=df_split.Close, 
        long_entries=entries, 
        long_exits=exits,
        size=100,
        size_type='value',
        init_cash='auto'
    )
    pf.plot({"orders", "cum_returns"}, settings=dict(bm_returns=False)).show()
    stats = pf.stats()
    total_return = stats['Total Return [%]']
    max_drawdown = stats['Max Drawdown [%]']
    orders = stats['Total Orders']

    if orders < 5:
        print(f"Only {orders} trades were made")
        total_return = 0.0
    my_score = total_return - max_drawdown
    # Return the negative total return as the objective to maximize it
    return my_score

# Before running the study, ensure your data tensors are on the CPU as Optuna will handle moving them to the GPU
X_train_tensor = X_train_tensor.cpu()
y_train_tensor = y_train_tensor.cpu()
X_val_tensor = X_val_tensor.cpu()
y_val_tensor = y_val_tensor.cpu()
X_test_tensor = X_test_tensor.cpu()
y_test_tensor = y_test_tensor.cpu()

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=num_trials)

print('Best trial:', study.best_trial.params)
