# Imports

In [1]:
import pandas as pd
import vectorbtpro as vbt
import numpy as np

# Initialization

In [2]:
vbt.settings.wrapping ["freq"]                = "1m"
vbt.settings.portfolio['init_cash']           = 10000

# Local Imports

In [3]:
import os
import sys

sys.path.append(os.getcwd())

from lstm_analysis_utils import (read_pickle_files_into_df, add_forward_prices_to_df, generate_fwd_actual_column
                                 , generate_df_with_euclidean_distances, calculate_slopes, calculate_correlation_slopes
                                )
from settings_and_params import extract_prediction_window_size, generate_excel_output_file_path, generate_dataframe_csv_output_file_path
from output_utils import export_raw_dataframe_to_csv
from lstm_results_utils import (export_results, store_backtest_results)
from lstm_only_backtests import run_backtest_lstm_recommendations_reversal_exits, run_backtests_lstm_recommendations_prediction_size_exit
from lstm_analysis_constants import EntryType
from quantile_value import generate_quantile_bands, extract_boundary_values_from_quantile_bands
from prediction_window_slopes import PredictionWindowSlopes
from long_slope_short_slope_backtests import run_backtest_long_slope_short_slope_prediction_size_exit, run_backtest_long_slope_short_slope_fractional_exits
from long_minus_short_backtests import run_backtest_long_minus_short_entry_type_long_only, run_backtest_long_minus_short_entry_type_short_only, run_backtest_long_minus_short_entry_type_long_short

# Settings and Parameters

In [4]:
pickle_files_path               = "../data/RID0036_LSTM_pw18_lb250_bt1000_mem5000/*.pkl"

model_name                      = pickle_files_path.split('/')[-2]
prediction_window               = extract_prediction_window_size(model_name)
min_num_entries                 = 100
excel_output_file_name          = generate_excel_output_file_path(model_name)
dataframe_csv_output_file_name  = generate_dataframe_csv_output_file_path(model_name)

# Processing

In [5]:
df = read_pickle_files_into_df(pickle_files_path)

In [6]:
add_forward_prices_to_df(df, prediction_window)
df = df.copy()  # for large prediction_window size, the copy() call eliminates the fragmented dataframe warning

In [7]:
generate_fwd_actual_column(df)

In [8]:
df = generate_df_with_euclidean_distances(df, prediction_window)

In [9]:
calculate_slopes(df)

In [10]:
calculate_correlation_slopes(df)

Correlation between Euclidean distance between long array and short array and future actual results:  0.10156542440852774
Correlation between difference in long minus short predictions and future actual results for longs:  0.7231040768105375
Correlation between difference in long minus short predictions and future actual results for shorts:  0.7225434965352817
Correlation between long slopes and future results: 0.0027161081521049108
Correlation between short slopes and future results: -0.0021124028628880655


In [11]:
df.index = pd.to_datetime(df["close_time"], utc=True, unit="s")

# Running backtest

- ### Storing the results of the backtests

In [12]:
results_as_list  = []

- ### Baseline backtest - just listen to LSTM

In [13]:
run_backtest_lstm_recommendations_reversal_exits(df, results_as_list)

In [14]:
run_backtests_lstm_recommendations_prediction_size_exit(df, results_as_list, prediction_window)

- ### Using different slopes

In [None]:
min_long_slope  = df["long_slope"].min()
max_long_slope  = df["long_slope"].max()
min_short_slope = df["short_slope"].min()
max_short_slope = df["short_slope"].max()

In [None]:
num_quantiles               = 5
threshold_increment         = 0.001
quantiles                   = np.linspace(0, 1, num=num_quantiles + 1)

- #### long_slope and short_slope

In [None]:
run_backtest_long_slope_short_slope_prediction_size_exit(df, results_as_list, prediction_window, threshold_increment, min_num_entries)

In [None]:
run_backtest_long_slope_short_slope_fractional_exits(df, results_as_list, threshold_increment, min_num_entries) 

- #### long_minus_short

In [None]:
long_minus_short_min = df['long_minus_short'].min()
long_minus_short_max = df['long_minus_short'].max()

long_minus_short_thresholds = [x for x in np.arange(long_minus_short_min, long_minus_short_max, threshold_increment)]

In [None]:
run_backtest_long_minus_short_entry_type_long_only(df, results_as_list, prediction_window, threshold_increment, min_num_entries)

In [None]:
run_backtest_long_minus_short_entry_type_short_only(df, results_as_list, prediction_window, threshold_increment, min_num_entries)

In [None]:
for threshold in long_minus_short_thresholds:  
  entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > 0), True, False))
  short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < 0), True, False))

  num_entries = (entries == True).sum() + (short_entries == True).sum()

  if num_entries > min_num_entries:
    pf = vbt.Portfolio.from_signals(
        high              = df['BTCUSDT_High'],
        low               = df['BTCUSDT_Low'],
        open              = df['BTCUSDT_Open'],
        close             = df['BTCUSDT_Close'],
        entries           = entries, # commented out for a short only backtest    
        short_entries     = short_entries,
        td_stop           = prediction_window, # Hold on to the position for 8 bars
        time_delta_format = 'Rows', # Use the row index to calculate the time delta    
        accumulate        = False,    
        )

    key = f"Long minus short with slopes"    
    slopes = PredictionWindowSlopes(0, 0, None, None, threshold, EntryType.LONG_SHORT)
    store_backtest_results(key, pf, results_as_list, slopes)

In [None]:
for threshold in long_minus_short_thresholds:    
  entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > 0.001), True, False))
  short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < 0.001), True, False))

  num_entries = (entries == True).sum() + (short_entries == True).sum()

  if num_entries > min_num_entries:
    pf = vbt.Portfolio.from_signals(
        high              = df['BTCUSDT_High'],
        low               = df['BTCUSDT_Low'],
        open              = df['BTCUSDT_Open'],
        close             = df['BTCUSDT_Close'],
        entries           = entries, # commented out for a short only backtest    
        short_entries     = short_entries,
        td_stop           = prediction_window, # Hold on to the position for 8 bars
        time_delta_format = 'Rows', # Use the row index to calculate the time delta    
        accumulate        = False,    
        )

    key = f"Long minus short with slopes - type 3"    
    slopes = PredictionWindowSlopes(0.001, 0.001, None, None, threshold, EntryType.LONG_SHORT)
    store_backtest_results(key, pf, results_as_list, slopes)

In [None]:
for threshold in long_minus_short_thresholds:  
  entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope' ] > 0) & (df['short_slope'] > 0), True, False))
  short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < 0) & (df['long_slope' ] < 0), True, False))

  num_entries = (entries == True).sum() + (short_entries == True).sum()

  if num_entries > min_num_entries:
    pf = vbt.Portfolio.from_signals(
        high              = df['BTCUSDT_High'],
        low               = df['BTCUSDT_Low'],
        open              = df['BTCUSDT_Open'],
        close             = df['BTCUSDT_Close'],
        entries           = entries, # commented out for a short only backtest    
        short_entries     = short_entries,
        td_stop           = prediction_window, # Hold on to the position for 8 bars
        time_delta_format = 'Rows', # Use the row index to calculate the time delta    
        accumulate        = False,    
        )

    key = f"Long minus short with slopes - type 2"    
    slopes = PredictionWindowSlopes(0, 0, None, None, threshold, EntryType.LONG_SHORT)
    store_backtest_results(key, pf, results_as_list, slopes)

In [None]:
run_backtest_long_minus_short_entry_type_long_short(df, results_as_list, prediction_window, threshold_increment, min_num_entries, quantiles)

In [None]:
long_minus_short_quantiles  = np.linspace(0, 1, num=101)
long_minus_short_thresholds = [entry for entry in df["long_minus_short"].quantile(long_minus_short_quantiles)]
long_slope_thresholds       = [entry for entry in df["long_slope"].quantile(quantiles)]
short_slope_thresholds      = [entry for entry in df["short_slope"].quantile(quantiles)]

In [None]:
len(long_minus_short_thresholds) * len(long_slope_thresholds) * len(short_slope_thresholds)

In [None]:
lms_threshold       =np.arange(long_minus_short_min, long_minus_short_max, threshold_increment)
long_slope_thresh   =np.arange(min_long_slope, max_long_slope, threshold_increment)
short_slope_thresh  =np.arange(min_short_slope, max_short_slope, threshold_increment)

print(len(lms_threshold) * len(long_slope_thresh) * len(short_slope_thresh))

In [None]:
len(results_as_list)

- ### Using hyper param optimization feature of VBT

In [None]:
# def lms_with_slopes_indicator_func(  long_minus_short, long_slope, short_slope            # input names
#                                    , lms_threshold, long_slope_thresh, short_slope_thresh # param names
#                                   ):
#     entries       = pd.Series(np.where((long_minus_short < lms_threshold) & (long_slope  > long_slope_thresh ), True, False))
#     short_entries = pd.Series(np.where((long_minus_short < lms_threshold) & (short_slope < short_slope_thresh), True, False))
    
#     return entries, short_entries

# lms_with_slopes_indicator = vbt.IndicatorFactory(
#     class_name  ='LongMinusShortwithSlopes', # name of the class
#     short_name  ='LMSWithSlopes', # name of the indicator
#     input_names =['long_minus_short', 'long_slope', 'short_slope'], # names of input arguments
#     param_names =['lms_threshold', 'long_slope_thresh', 'short_slope_thresh'], # names of parameters
#     output_names=['entries', 'short_entries'], # names of output values
# ).with_apply_func(
#     lms_with_slopes_indicator_func, # function to apply
#     takes_1d=True, # whether the function takes 1-dim. arrays as input
#     lms_threshold=0.5, # default value for parameter 'lms_threshold'
#     long_slope_thresh=0.0, # default value for parameter 'long_slope_thresh'
#     short_slope_thresh=0.0, # default value for parameter 'short_slope_thresh'
# )

# lms_strategy = lms_with_slopes_indicator.run(
#     long_minus_short    =df['long_minus_short'],
#     long_slope          =df['long_slope'],
#     short_slope         =df['short_slope'],
#     lms_threshold       =np.arange(long_minus_short_min, long_minus_short_max, threshold_increment),
#     long_slope_thresh   =np.arange(min_long_slope, max_long_slope, threshold_increment),
#     short_slope_thresh  =np.arange(min_short_slope, max_short_slope, threshold_increment),
#     param_product       =True, # True: all combinations of parameters, False: only one combination for each parameter
# )
# multiple_pf = vbt.Portfolio.from_signals(
#     close               =df['BTCUSDT_Close'],
#     high                =df['BTCUSDT_High'],
#     low                 =df['BTCUSDT_Low'],
#     open                =df['BTCUSDT_Open'],
#     entries             =lms_strategy.entries,
#     short_entries       =lms_strategy.short_entries,
#     td_stop             =prediction_window,
#     time_delta_format   ='Rows',
#     accumulate          =False,
    
# )

In [None]:
#multiple_pf.trades.count()

In [None]:
#print(multiple_pf)

# For Joseph

- ### Run the specific backtest

In [None]:
# threshold = 0.356388669788838
# long_threshold  = 0.001
# short_threshold = 0.001

# entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > long_threshold), True, False))
# short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < short_threshold), True, False))

# num_entries = (entries == True).sum() + (short_entries == True).sum()

# if num_entries > min_num_entries:
#   pf = vbt.Portfolio.from_signals(
#       high              = df['BTCUSDT_High'],
#       low               = df['BTCUSDT_Low'],
#       open              = df['BTCUSDT_Open'],
#       close             = df['BTCUSDT_Close'],
#       entries           = entries, # commented out for a short only backtest    
#       short_entries     = short_entries,
#       td_stop           = prediction_window, # Hold on to the position for 8 bars
#       time_delta_format = 'Rows', # Use the row index to calculate the time delta    
#       accumulate        = False,    
#       )
  
#   key = f"Long minus short with slopes"
  

In [None]:
#pf.stats()

In [None]:
# threshold = 0.442388669788838

# entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > 0.001), True, False))
# short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < 0.001), True, False))

# num_entries = (entries == True).sum() + (short_entries == True).sum()

# if num_entries > min_num_entries:
#   pf = vbt.Portfolio.from_signals(
#       high              = df['BTCUSDT_High'],
#       low               = df['BTCUSDT_Low'],
#       open              = df['BTCUSDT_Open'],
#       close             = df['BTCUSDT_Close'],
#       entries           = entries, # commented out for a short only backtest    
#       short_entries     = short_entries,
#       td_stop           = prediction_window, # Hold on to the position for 8 bars
#       time_delta_format = 'Rows', # Use the row index to calculate the time delta    
#       accumulate        = False,    
#       )

#   key = f"Long minus short with slopes - type 3"    
#   slopes = PredictionWindowSlopes(0.001, 0.001, None, None, threshold, EntryType.LONG_SHORT)
#   store_backtest_results(key, pf, results_as_list, slopes)

In [None]:
# threshold = 0.500388669788838
# entries = pd.Series(np.where((df['long_minus_short'] < threshold), True, False))

# num_entries = (entries == True).sum()

# if num_entries > min_num_entries:
#     pf = vbt.Portfolio.from_signals(
#         high              = df['BTCUSDT_High'],
#         low               = df['BTCUSDT_Low'],
#         open              = df['BTCUSDT_Open'],
#         close             = df['BTCUSDT_Close'],
#         entries           = entries, # commented out for a short only backtest    
#         td_stop           = prediction_window, # Hold on to the position for 8 bars
#         time_delta_format = 'Rows', # Use the row index to calculate the time delta    
#         accumulate        = False,    
#         )
    
#     key = f"Long minus short"    
#     slopes = PredictionWindowSlopes(None, None, None, None, threshold, EntryType.LONG_ONLY)
#     store_backtest_results(key, pf, results_as_list, slopes)

- ### Output the CSV file

In [None]:
#pf.trades.records_readable.to_csv('../results/RID0029_136_trades.csv', index=False)

In [None]:
#pf.trades.records_readable

In [None]:
#pf.trades.records_readable[pf.trades.records_readable["Direction"] == "Long"].count()

- ### Output Excel file

In [None]:
#output_df.to_csv('../results/RID0029_dataframe.csv')

# Exporting the results

In [15]:
results_df = export_results(results_as_list)
results_df.to_excel(excel_output_file_name)
export_raw_dataframe_to_csv(df, dataframe_csv_output_file_name)

In [None]:
# Use groupby on the index (date) to calculate statistics
# bars_per_day = df.groupby(df.index.date).size()
# min_bars_per_day = df.groupby(df.index.date).size().min()
# max_bars_per_day = df.groupby(df.index.date).size().max()
# average_bars_per_day = df.groupby(df.index.date).size().mean()

# print("Number of bars per day:")
# print(bars_per_day)

# print("\nMinimum bars per day:", min_bars_per_day)
# print("Maximum bars per day:", max_bars_per_day)
# print("Average bars per day:", average_bars_per_day)
# bars_per_day.to_csv("bars_per_day.csv")

# pf.trades.records_readable.to_csv("records_readable.csv")

In [None]:
# TODO: for Joe
# entries       = pd.Series(np.where((df['long_minus_short'] < 0.615659479451176) & (df['long_slope' ] > 0) & (df['short_slope'] > 0), True, False))
# short_entries = pd.Series(np.where((df['long_minus_short'] < 0.615659479451176) & (df['short_slope'] < 0) & (df['long_slope' ] < 0), True, False))
# clean_entries, clean_short_entries = entries.vbt.signals.clean(short_entries)
# clean_entries.sum() + clean_short_entries.sum()
# pf.trades.records_readable

- ### Other backtests - work in progress and code may not execute at all!!!

In [None]:
df['long_slope'].describe()

In [None]:
df['short_slope'].quantile(quantiles)

In [None]:
df['actual_slope'].describe()

In [None]:
df['long_minus_short'].describe()

In [None]:
# print(f"Num entries = {(entries == True).sum()}")
# print(f"Num short entries = {(short_entries == True).sum()}")
# new_df["long_slope"].describe()