# Imports

In [1]:
import pandas as pd
import vectorbtpro as vbt
import numpy as np

# Initialization

In [2]:
vbt.settings.wrapping ["freq"]                = "1m"
vbt.settings.portfolio['init_cash']           = 10000

# Settings and Parameters

In [3]:
pickle_files_path = "../data/RID0029_LSTM_pw38_lb250_bt2000_mem6000/*.pkl"
prediction_window = 38

min_num_entries          = 100
excel_output_file_name  = f"../results/{pickle_files_path.split('/')[-2] + '.xlsx'}"

# Local Imports

In [4]:
import os
import sys

sys.path.append(os.getcwd())

In [5]:
from lstm_analysis_utils import (read_pickle_files_into_df, add_forward_prices_to_df, generate_fwd_actual_column
                                 , generate_df_with_euclidean_distances, calculate_slopes, calculate_correlation_slopes
                                )
from lstm_results_utils import (export_results, store_backtest_results)
from lstm_only_backtests import run_backtest_lstm_recommendations_reversal_exits, run_backtests_lstm_recommendations_prediction_size_exit
from lstm_analysis_constants import EntryType
from quantile_value import generate_quantile_bands, extract_boundary_values_from_quantile_bands
from prediction_window_slopes import PredictionWindowSlopes
from long_slope_short_slope_backtests import run_backtest_long_slope_short_slope_prediction_size_exit, run_backtest_long_slope_short_slope_fractional_exits
from long_minus_short_backtests import run_backtest_long_minus_short_entry_type_long_only, run_backtest_long_minus_short_entry_type_short_only, run_backtest_long_minus_short_entry_type_long_short

# Processing

In [6]:
df = read_pickle_files_into_df(pickle_files_path)

In [7]:
add_forward_prices_to_df(df, prediction_window)
df = df.copy()  # for large prediction_window size, the copy() call eliminates the fragmented dataframe warning

In [8]:
generate_fwd_actual_column(df)

In [9]:
df = generate_df_with_euclidean_distances(df, prediction_window)

In [10]:
calculate_slopes(df)

In [11]:
calculate_correlation_slopes(df)

Correlation between Euclidean distance between long array and short array and future actual results:  0.2323749569168827
Correlation between difference in long minus short predictions and future actual results for longs:  0.7729074091322778
Correlation between difference in long minus short predictions and future actual results for shorts:  0.7796842534588357
Correlation between long slopes and future results: -0.012208928800469096
Correlation between short slopes and future results: 0.020035842971377452


In [12]:
df.index = pd.to_datetime(df["close_time"], utc=True, unit="s")

# Running backtest

- ### Storing the results of the backtests

In [13]:
results_as_list  = []

- ### Baseline backtest - just listen to LSTM

In [14]:
run_backtest_lstm_recommendations_reversal_exits(df, results_as_list)

In [15]:
run_backtests_lstm_recommendations_prediction_size_exit(df, results_as_list, prediction_window)

- ### Using different slopes

In [16]:
min_long_slope  = df["long_slope"].min()
max_long_slope  = df["long_slope"].max()
min_short_slope = df["short_slope"].min()
max_short_slope = df["short_slope"].max()

In [17]:
num_quantiles               = 5
threshold_increment         = 0.001
quantiles                   = np.linspace(0, 1, num=num_quantiles + 1)

- #### long_slope and short_slope

In [18]:
run_backtest_long_slope_short_slope_prediction_size_exit(df, results_as_list, prediction_window, threshold_increment, min_num_entries)

In [19]:
run_backtest_long_slope_short_slope_fractional_exits(df, results_as_list, threshold_increment, min_num_entries) 

- #### long_minus_short

In [20]:
long_minus_short_min = df['long_minus_short'].min()
long_minus_short_max = df['long_minus_short'].max()

long_minus_short_thresholds = [x for x in np.arange(long_minus_short_min, long_minus_short_max, threshold_increment)]

In [21]:
run_backtest_long_minus_short_entry_type_long_only(df, results_as_list, prediction_window, threshold_increment, min_num_entries)

# for threshold in long_minus_short_thresholds:
#     entries = pd.Series(np.where((df['long_minus_short'] < threshold), True, False))

#     num_entries = (entries == True).sum()

#     if num_entries > min_num_entries:
#         pf = vbt.Portfolio.from_signals(
#             high              = df['BTCUSDT_High'],
#             low               = df['BTCUSDT_Low'],
#             open              = df['BTCUSDT_Open'],
#             close             = df['BTCUSDT_Close'],
#             entries           = entries, # commented out for a short only backtest    
#             td_stop           = prediction_window, # Hold on to the position for 8 bars
#             time_delta_format = 'Rows', # Use the row index to calculate the time delta    
#             accumulate        = False,    
#             )
        
#         key = f"Long minus short"    
#         slopes = PredictionWindowSlopes(None, None, None, None, threshold, EntryType.LONG_ONLY)
#         store_backtest_results(key, pf, results_as_list, slopes)

In [22]:
run_backtest_long_minus_short_entry_type_short_only(df, results_as_list, prediction_window, threshold_increment, min_num_entries)

# for threshold in long_minus_short_thresholds:
#     short_entries = pd.Series(np.where((df['long_minus_short'] < threshold), True, False))

#     num_entries = (short_entries == True).sum()

#     if num_entries > min_num_entries:
#         pf = vbt.Portfolio.from_signals(
#             high              = df['BTCUSDT_High'],
#             low               = df['BTCUSDT_Low'],
#             open              = df['BTCUSDT_Open'],
#             close             = df['BTCUSDT_Close'],
#             short_entries     = short_entries, # commented out for a short only backtest    
#             td_stop           = prediction_window, # Hold on to the position for 8 bars
#             time_delta_format = 'Rows', # Use the row index to calculate the time delta    
#             accumulate        = False,    
#             )
        
#         key = f"Long minus short"    
#         slopes = PredictionWindowSlopes(None, None, None, None, threshold, EntryType.SHORT_ONLY)
#         store_backtest_results(key, pf, results_as_list, slopes)

In [23]:
for threshold in long_minus_short_thresholds:  
  entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > 0), True, False))
  short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < 0), True, False))

  num_entries = (entries == True).sum() + (short_entries == True).sum()

  if num_entries > min_num_entries:
    pf = vbt.Portfolio.from_signals(
        high              = df['BTCUSDT_High'],
        low               = df['BTCUSDT_Low'],
        open              = df['BTCUSDT_Open'],
        close             = df['BTCUSDT_Close'],
        entries           = entries, # commented out for a short only backtest    
        short_entries     = short_entries,
        td_stop           = prediction_window, # Hold on to the position for 8 bars
        time_delta_format = 'Rows', # Use the row index to calculate the time delta    
        accumulate        = False,    
        )

    key = f"Long minus short with slopes"    
    slopes = PredictionWindowSlopes(0, 0, None, None, threshold, EntryType.LONG_SHORT)
    store_backtest_results(key, pf, results_as_list, slopes)

In [24]:
for threshold in long_minus_short_thresholds:  
  entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope' ] > 0) & (df['short_slope'] > 0), True, False))
  short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < 0) & (df['long_slope' ] < 0), True, False))

  num_entries = (entries == True).sum() + (short_entries == True).sum()

  if num_entries > min_num_entries:
    pf = vbt.Portfolio.from_signals(
        high              = df['BTCUSDT_High'],
        low               = df['BTCUSDT_Low'],
        open              = df['BTCUSDT_Open'],
        close             = df['BTCUSDT_Close'],
        entries           = entries, # commented out for a short only backtest    
        short_entries     = short_entries,
        td_stop           = prediction_window, # Hold on to the position for 8 bars
        time_delta_format = 'Rows', # Use the row index to calculate the time delta    
        accumulate        = False,    
        )

    key = f"Long minus short with slopes - type 2"    
    slopes = PredictionWindowSlopes(0, 0, None, None, threshold, EntryType.LONG_SHORT)
    store_backtest_results(key, pf, results_as_list, slopes)

In [25]:
for threshold in long_minus_short_thresholds:    
  entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > 0.001), True, False))
  short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < 0.001), True, False))

  num_entries = (entries == True).sum() + (short_entries == True).sum()

  if num_entries > min_num_entries:
    pf = vbt.Portfolio.from_signals(
        high              = df['BTCUSDT_High'],
        low               = df['BTCUSDT_Low'],
        open              = df['BTCUSDT_Open'],
        close             = df['BTCUSDT_Close'],
        entries           = entries, # commented out for a short only backtest    
        short_entries     = short_entries,
        td_stop           = prediction_window, # Hold on to the position for 8 bars
        time_delta_format = 'Rows', # Use the row index to calculate the time delta    
        accumulate        = False,    
        )

    key = f"Long minus short with slopes - type 3"    
    slopes = PredictionWindowSlopes(0.001, 0.001, None, None, threshold, EntryType.LONG_SHORT)
    store_backtest_results(key, pf, results_as_list, slopes)

In [26]:
#run_backtest_long_minus_short_entry_type_long_short(df, results_as_list, prediction_window, threshold_increment, min_num_entries, quantiles)

# loop 1 is split into 100
# for threshold in long_minus_short_thresholds:  
#   for long_slope_threshold in long_slope_quantile_values:
#     for short_slope_threshold in short_slope_quantile_values:
#       entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > long_slope_threshold), True, False))
#       short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < short_slope_threshold), True, False))

#       num_entries = (entries == True).sum() + (short_entries == True).sum()

#       if num_entries > min_num_entries:
#         pf = vbt.Portfolio.from_signals(
#             high              = df['BTCUSDT_High'],
#             low               = df['BTCUSDT_Low'],
#             open              = df['BTCUSDT_Open'],
#             close             = df['BTCUSDT_Close'],
#             entries           = entries, # commented out for a short only backtest    
#             short_entries     = short_entries,
#             td_stop           = prediction_window, # Hold on to the position for 8 bars
#             time_delta_format = 'Rows', # Use the row index to calculate the time delta    
#             accumulate        = False,    
#             )

#         key = f"Long minus short with slopes - type 3"    
#         slopes = PredictionWindowSlopes(long_slope_threshold, short_slope_threshold, threshold, EntryType.LONG_SHORT)
#         store_backtest_results(key, pf, results_as_list, slopes)

# Exporting the results

In [27]:
results_df = export_results(results_as_list)
results_df.to_excel(excel_output_file_name)

In [28]:
# Use groupby on the index (date) to calculate statistics
# bars_per_day = df.groupby(df.index.date).size()
# min_bars_per_day = df.groupby(df.index.date).size().min()
# max_bars_per_day = df.groupby(df.index.date).size().max()
# average_bars_per_day = df.groupby(df.index.date).size().mean()

# print("Number of bars per day:")
# print(bars_per_day)

# print("\nMinimum bars per day:", min_bars_per_day)
# print("Maximum bars per day:", max_bars_per_day)
# print("Average bars per day:", average_bars_per_day)
# bars_per_day.to_csv("bars_per_day.csv")

# pf.trades.records_readable.to_csv("records_readable.csv")

In [29]:
# TODO: for Joe
# entries       = pd.Series(np.where((df['long_minus_short'] < 0.615659479451176) & (df['long_slope' ] > 0) & (df['short_slope'] > 0), True, False))
# short_entries = pd.Series(np.where((df['long_minus_short'] < 0.615659479451176) & (df['short_slope'] < 0) & (df['long_slope' ] < 0), True, False))
# clean_entries, clean_short_entries = entries.vbt.signals.clean(short_entries)
# clean_entries.sum() + clean_short_entries.sum()
# pf.trades.records_readable

- ### Other backtests - work in progress and code may not execute at all!!!

In [30]:
df['long_slope'].describe()

count   61111.00000
mean       -0.00005
std         0.00251
min        -0.00405
25%        -0.00234
50%        -0.00008
75%         0.00222
max         0.00405
Name: long_slope, dtype: float64

In [31]:
df['short_slope'].quantile(quantiles)

0.00000   -0.00405
0.20000   -0.00315
0.40000   -0.00148
0.60000    0.00063
0.80000    0.00259
1.00000    0.00405
Name: short_slope, dtype: float64

In [32]:
df['actual_slope'].describe()

count   61111.00000
mean       -0.00000
std         0.00001
min        -0.00007
25%        -0.00001
50%        -0.00000
75%         0.00001
max         0.00007
Name: actual_slope, dtype: float64

In [33]:
df['long_minus_short'].describe()

count   61111.00000
mean        0.89544
std         0.20902
min         0.10839
25%         0.74497
50%         0.89507
75%         1.05143
max         1.41295
Name: long_minus_short, dtype: float64

In [34]:
# print(f"Num entries = {(entries == True).sum()}")
# print(f"Num short entries = {(short_entries == True).sum()}")
# new_df["long_slope"].describe()