# Imports

In [1]:
import pandas as pd
import vectorbtpro as vbt
import numpy as np

# Initialization

In [2]:
vbt.settings.wrapping ["freq"]                = "1m"
vbt.settings.portfolio['init_cash']           = 10000

# Settings and Parameters

In [3]:
pickle_files_path = "../data/RID0029_LSTM_pw38_lb250_bt2000_mem6000/*.pkl"
prediction_window = 38

min_num_entries          = 100
excel_output_file_name  = f"../results/{pickle_files_path.split('/')[-2] + '.xlsx'}"

# Local Imports

In [4]:
import os
import sys

sys.path.append(os.getcwd())

In [5]:
from lstm_analysis_utils import (read_pickle_files_into_df, add_forward_prices_to_df, generate_fwd_actual_column
                                 , generate_df_with_euclidean_distances, calculate_slopes, calculate_correlation_slopes
                                )
from lstm_results_utils import (export_results, store_backtest_results)
from lstm_analysis_constants import (ActionType, EntryType, LSTM_REVERSAL_EXITS_BACKTEST_RESULT_KEY
                                     , LSTM_PREDICTION_WINDOW_EXITS_BACKTEST_RESULT_KEY)
from quantile_value import generate_quantile_bands, extract_boundary_values_from_quantile_bands
from prediction_window_slopes import PredictionWindowSlopes

# Processing

In [6]:
df = read_pickle_files_into_df(pickle_files_path)

In [7]:
add_forward_prices_to_df(df, prediction_window)
df = df.copy()  # for large prediction_window size, the copy() call eliminates the fragmented dataframe warning

In [8]:
generate_fwd_actual_column(df)

In [9]:
df = generate_df_with_euclidean_distances(df, prediction_window)

In [10]:
calculate_slopes(df)

In [11]:
calculate_correlation_slopes(df)

Correlation between Euclidean distance between long array and short array and future actual results:  0.2323749569168827
Correlation between difference in long minus short predictions and future actual results for longs:  0.7729074091322778
Correlation between difference in long minus short predictions and future actual results for shorts:  0.7796842534588357
Correlation between long slopes and future results: -0.012208928800469096
Correlation between short slopes and future results: 0.020035842971377452


In [12]:
df.index = pd.to_datetime(df["close_time"], utc=True, unit="s")

# Running backtest

- ### Storing the results of the backtests

In [13]:
results_as_list  = []

- ### Baseline backtest - just listen to LSTM

In [14]:
# Exits are from reversals, as our LSTM model doesn't produce exit signals
entries         = pd.Series(np.where((df['recommendations'] == ActionType.OPEN_LONG  ), True, False))
exits           = pd.Series(np.where((df['recommendations'] == ActionType.CLOSE_LONG ), True, False))
short_entries   = pd.Series(np.where((df['recommendations'] == ActionType.OPEN_SHORT ), True, False))
short_exits     = pd.Series(np.where((df['recommendations'] == ActionType.CLOSE_SHORT), True, False))

pf = vbt.Portfolio.from_signals(
    high                = df['BTCUSDT_High'],
    low                 = df['BTCUSDT_Low'],
    open                = df['BTCUSDT_Open'],
    close               = df['BTCUSDT_Close'],
    entries             = entries, # commented out for a short only backtest
    exits               = exits,
    short_entries       = short_entries,
    short_exits         = short_exits,    
    time_delta_format   = 'Rows', # Use the row index to calculate the time delta    
    )

store_backtest_results(LSTM_REVERSAL_EXITS_BACKTEST_RESULT_KEY, pf, results_as_list, None)

In [15]:
# Exits are controlled by td_stop
entries         = pd.Series(np.where((df['recommendations'] == ActionType.OPEN_LONG  ), True, False))
short_entries   = pd.Series(np.where((df['recommendations'] == ActionType.OPEN_SHORT ), True, False))

pf = vbt.Portfolio.from_signals(
    high                = df['BTCUSDT_High'],
    low                 = df['BTCUSDT_Low'],
    open                = df['BTCUSDT_Open'],
    close               = df['BTCUSDT_Close'],
    entries             = entries, # commented out for a short only backtest    
    short_entries       = short_entries,     
    td_stop             = prediction_window,
    time_delta_format   = 'Rows', # Use the row index to calculate the time delta    
    )
store_backtest_results(LSTM_PREDICTION_WINDOW_EXITS_BACKTEST_RESULT_KEY, pf, results_as_list, None)

- ### Using different slope quantiles

In [16]:
num_quantiles               = 5
threshold_increment         = 0.001
quantiles                   = np.linspace(0, 1, num=num_quantiles + 1)

- #### long_slope and short_slope

In [17]:
long_slope_quantile_bands  = generate_quantile_bands(df["long_slope"].quantile(quantiles))
short_slope_quantile_bands = generate_quantile_bands(df["short_slope"].quantile(quantiles))  

In [18]:
for long_band in long_slope_quantile_bands:
  for short_band in short_slope_quantile_bands:    
    entry_slope_threshold       = [x for x in np.arange(long_band.lower_bound.value , long_band.upper_bound.value , threshold_increment)]
    short_entry_slope_threshold = [x for x in np.arange(short_band.lower_bound.value, short_band.upper_bound.value, threshold_increment)]

    for t1 in entry_slope_threshold:    
      for t2 in short_entry_slope_threshold:        
        entries       = pd.Series(np.where((df['long_slope' ] > t1 ), True, False))
        short_entries = pd.Series(np.where((df['short_slope'] < t2 ), True, False))        

        num_entries = (entries == True).sum() + (short_entries == True).sum()

        if num_entries > min_num_entries:    
          pf = vbt.Portfolio.from_signals(
              high              = df['BTCUSDT_High'],
              low               = df['BTCUSDT_Low'],
              open              = df['BTCUSDT_Open'],
              close             = df['BTCUSDT_Close'],
              entries           = entries, # commented out for a short only backtest          
              short_entries     = short_entries,
              td_stop           = prediction_window, # Hold on to the position for 8 bars
              time_delta_format = 'Rows', # Use the row index to calculate the time delta              
              accumulate        = False,
              # sl_stop = 0.005,
              )    
          
          key = f"Prediction window exits"
          slopes = PredictionWindowSlopes(t1, t2, None, EntryType.LONG_SHORT)
          store_backtest_results(key, pf, results_as_list, slopes)      

In [19]:
for long_band in long_slope_quantile_bands:
  for short_band in short_slope_quantile_bands:    
    entry_slope_threshold       = [x for x in np.arange(long_band.lower_bound.value , long_band.upper_bound.value , threshold_increment)]
    short_entry_slope_threshold = [x for x in np.arange(short_band.lower_bound.value, short_band.upper_bound.value, threshold_increment)]

    for t1 in entry_slope_threshold:      
      exit_t1_threshold = t1 * 0.5

      for t2 in short_entry_slope_threshold:
        exit_t2_threshold = t2 * 0.5

        entries       = pd.Series(np.where((df['long_slope' ] > t1  ), True, False))
        short_entries = pd.Series(np.where((df['short_slope'] < t2 ), True, False))

        exits         = pd.Series(np.where((df['long_slope' ] < exit_t1_threshold  ), True, False)) 
        short_exits   = pd.Series(np.where((df['short_slope'] > exit_t2_threshold ), True, False))

        num_entries = (entries == True).sum() + (short_entries == True).sum()

        if num_entries > min_num_entries:    
          pf = vbt.Portfolio.from_signals(
              high              = df['BTCUSDT_High'],
              low               = df['BTCUSDT_Low'],
              open              = df['BTCUSDT_Open'],
              close             = df['BTCUSDT_Close'],
              entries           = entries, # commented out for a short only backtest          
              exits             = exits,
              short_entries     = short_entries,              
              short_exits       = short_exits,
              time_delta_format = 'Rows', # Use the row index to calculate the time delta              
              accumulate        = False,
              # sl_stop = 0.005,
              )    
          
          key = f"Fractional slope exits"
          slopes = PredictionWindowSlopes(t1, t2, None, EntryType.LONG_SHORT)
          store_backtest_results(key, pf, results_as_list, slopes)      

- #### long_minus_short

In [20]:
long_minus_short_min = df['long_minus_short'].min()
long_minus_short_max = df['long_minus_short'].max()

long_minus_short_thresholds = [x for x in np.arange(long_minus_short_min, long_minus_short_max, threshold_increment)]

In [21]:
for threshold in long_minus_short_thresholds:
    entries = pd.Series(np.where((df['long_minus_short'] < threshold), True, False))

    num_entries = (entries == True).sum()

    if num_entries > min_num_entries:
        pf = vbt.Portfolio.from_signals(
            high              = df['BTCUSDT_High'],
            low               = df['BTCUSDT_Low'],
            open              = df['BTCUSDT_Open'],
            close             = df['BTCUSDT_Close'],
            entries           = entries, # commented out for a short only backtest    
            td_stop           = prediction_window, # Hold on to the position for 8 bars
            time_delta_format = 'Rows', # Use the row index to calculate the time delta    
            accumulate        = False,    
            )
        
        key = f"Long minus short"    
        slopes = PredictionWindowSlopes(None, None, threshold, EntryType.LONG_ONLY)
        store_backtest_results(key, pf, results_as_list, slopes)

In [22]:
for threshold in long_minus_short_thresholds:
    short_entries = pd.Series(np.where((df['long_minus_short'] < threshold), True, False))

    num_entries = (short_entries == True).sum()

    if num_entries > min_num_entries:
        pf = vbt.Portfolio.from_signals(
            high              = df['BTCUSDT_High'],
            low               = df['BTCUSDT_Low'],
            open              = df['BTCUSDT_Open'],
            close             = df['BTCUSDT_Close'],
            short_entries     = short_entries, # commented out for a short only backtest    
            td_stop           = prediction_window, # Hold on to the position for 8 bars
            time_delta_format = 'Rows', # Use the row index to calculate the time delta    
            accumulate        = False,    
            )
        
        key = f"Long minus short"    
        slopes = PredictionWindowSlopes(None, None, threshold, EntryType.SHORT_ONLY)
        store_backtest_results(key, pf, results_as_list, slopes)

In [23]:
# for threshold in long_minus_short_thresholds:
#   for long_band in long_slope_quantile_bands:
#     for short_band in short_slope_quantile_bands:    
#       entry_slope_threshold       = [x for x in np.arange(long_band.lower_bound.value , long_band.upper_bound.value , threshold_increment)]
#       short_entry_slope_threshold = [x for x in np.arange(short_band.lower_bound.value, short_band.upper_bound.value, threshold_increment)]

#       for t1 in entry_slope_threshold:    
#         for t2 in short_entry_slope_threshold: 
#           entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > t1), True, False))
#           short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < t2), True, False))

#           num_entries = (entries == True).sum() + (short_entries == True).sum()

#           if num_entries > 0:
#             pf = vbt.Portfolio.from_signals(
#                 high              = df['BTCUSDT_High'],
#                 low               = df['BTCUSDT_Low'],
#                 open              = df['BTCUSDT_Open'],
#                 close             = df['BTCUSDT_Close'],
#                 entries           = entries, # commented out for a short only backtest    
#                 short_entries     = short_entries,
#                 td_stop           = prediction_window, # Hold on to the position for 8 bars
#                 time_delta_format = 'Rows', # Use the row index to calculate the time delta    
#                 accumulate        = False,    
#                 )
        
#             key = f"Long minus short with slopes"    
#             slopes = PredictionWindowSlopes(t1, t2, threshold, EntryType.LONG_SHORT)
#             store_backtest_results(key, pf, results_as_list, slopes)

In [24]:
for threshold in long_minus_short_thresholds:  
  entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > 0), True, False))
  short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < 0), True, False))

  num_entries = (entries == True).sum() + (short_entries == True).sum()

  if num_entries > min_num_entries:
    pf = vbt.Portfolio.from_signals(
        high              = df['BTCUSDT_High'],
        low               = df['BTCUSDT_Low'],
        open              = df['BTCUSDT_Open'],
        close             = df['BTCUSDT_Close'],
        entries           = entries, # commented out for a short only backtest    
        short_entries     = short_entries,
        td_stop           = prediction_window, # Hold on to the position for 8 bars
        time_delta_format = 'Rows', # Use the row index to calculate the time delta    
        accumulate        = False,    
        )

    key = f"Long minus short with slopes"    
    slopes = PredictionWindowSlopes(0, 0, threshold, EntryType.LONG_SHORT)
    store_backtest_results(key, pf, results_as_list, slopes)

In [25]:
for threshold in long_minus_short_thresholds:  
  entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope' ] > 0) & (df['short_slope'] > 0), True, False))
  short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < 0) & (df['long_slope' ] < 0), True, False))

  num_entries = (entries == True).sum() + (short_entries == True).sum()

  if num_entries > min_num_entries:
    pf = vbt.Portfolio.from_signals(
        high              = df['BTCUSDT_High'],
        low               = df['BTCUSDT_Low'],
        open              = df['BTCUSDT_Open'],
        close             = df['BTCUSDT_Close'],
        entries           = entries, # commented out for a short only backtest    
        short_entries     = short_entries,
        td_stop           = prediction_window, # Hold on to the position for 8 bars
        time_delta_format = 'Rows', # Use the row index to calculate the time delta    
        accumulate        = False,    
        )

    key = f"Long minus short with slopes - type 2"    
    slopes = PredictionWindowSlopes(0, 0, threshold, EntryType.LONG_SHORT)
    store_backtest_results(key, pf, results_as_list, slopes)

In [26]:
for threshold in long_minus_short_thresholds:    
  entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > 0.001), True, False))
  short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < 0.001), True, False))

  num_entries = (entries == True).sum() + (short_entries == True).sum()

  if num_entries > min_num_entries:
    pf = vbt.Portfolio.from_signals(
        high              = df['BTCUSDT_High'],
        low               = df['BTCUSDT_Low'],
        open              = df['BTCUSDT_Open'],
        close             = df['BTCUSDT_Close'],
        entries           = entries, # commented out for a short only backtest    
        short_entries     = short_entries,
        td_stop           = prediction_window, # Hold on to the position for 8 bars
        time_delta_format = 'Rows', # Use the row index to calculate the time delta    
        accumulate        = False,    
        )

    key = f"Long minus short with slopes - type 3"    
    slopes = PredictionWindowSlopes(0.001, 0.001, threshold, EntryType.LONG_SHORT)
    store_backtest_results(key, pf, results_as_list, slopes)

In [27]:
long_slope_quantile_values  = extract_boundary_values_from_quantile_bands(long_slope_quantile_bands)
short_slope_quantile_values = extract_boundary_values_from_quantile_bands(short_slope_quantile_bands)

In [28]:
# loop 1 is split into 100
# for threshold in long_minus_short_thresholds:  
#   for long_slope_threshold in long_slope_quantile_values:
#     for short_slope_threshold in short_slope_quantile_values:
#       entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > long_slope_threshold), True, False))
#       short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < short_slope_threshold), True, False))

#       num_entries = (entries == True).sum() + (short_entries == True).sum()

#       if num_entries > min_num_entries:
#         pf = vbt.Portfolio.from_signals(
#             high              = df['BTCUSDT_High'],
#             low               = df['BTCUSDT_Low'],
#             open              = df['BTCUSDT_Open'],
#             close             = df['BTCUSDT_Close'],
#             entries           = entries, # commented out for a short only backtest    
#             short_entries     = short_entries,
#             td_stop           = prediction_window, # Hold on to the position for 8 bars
#             time_delta_format = 'Rows', # Use the row index to calculate the time delta    
#             accumulate        = False,    
#             )

#         key = f"Long minus short with slopes - type 3"    
#         slopes = PredictionWindowSlopes(long_slope_threshold, short_slope_threshold, threshold, EntryType.LONG_SHORT)
#         store_backtest_results(key, pf, results_as_list, slopes)

# Exporting the results

In [29]:
results_df = export_results(results_as_list)
results_df.to_excel(excel_output_file_name)

In [30]:
# Use groupby on the index (date) to calculate statistics
# bars_per_day = df.groupby(df.index.date).size()
# min_bars_per_day = df.groupby(df.index.date).size().min()
# max_bars_per_day = df.groupby(df.index.date).size().max()
# average_bars_per_day = df.groupby(df.index.date).size().mean()

# print("Number of bars per day:")
# print(bars_per_day)

# print("\nMinimum bars per day:", min_bars_per_day)
# print("Maximum bars per day:", max_bars_per_day)
# print("Average bars per day:", average_bars_per_day)
# bars_per_day.to_csv("bars_per_day.csv")

# pf.trades.records_readable.to_csv("records_readable.csv")

In [31]:
# TODO: for Joe
# entries       = pd.Series(np.where((df['long_minus_short'] < 0.615659479451176) & (df['long_slope' ] > 0) & (df['short_slope'] > 0), True, False))
# short_entries = pd.Series(np.where((df['long_minus_short'] < 0.615659479451176) & (df['short_slope'] < 0) & (df['long_slope' ] < 0), True, False))
# clean_entries, clean_short_entries = entries.vbt.signals.clean(short_entries)
# clean_entries.sum() + clean_short_entries.sum()
# pf.trades.records_readable

- ### Other backtests - work in progress and code may not execute at all!!!

In [32]:
df['long_slope'].describe()

count   61111.00000
mean       -0.00005
std         0.00251
min        -0.00405
25%        -0.00234
50%        -0.00008
75%         0.00222
max         0.00405
Name: long_slope, dtype: float64

In [33]:
df['short_slope'].quantile(quantiles)

0.00000   -0.00405
0.20000   -0.00315
0.40000   -0.00148
0.60000    0.00063
0.80000    0.00259
1.00000    0.00405
Name: short_slope, dtype: float64

In [34]:
df['actual_slope'].describe()

count   61111.00000
mean       -0.00000
std         0.00001
min        -0.00007
25%        -0.00001
50%        -0.00000
75%         0.00001
max         0.00007
Name: actual_slope, dtype: float64

In [35]:
df['long_minus_short'].describe()

count   61111.00000
mean        0.89544
std         0.20902
min         0.10839
25%         0.74497
50%         0.89507
75%         1.05143
max         1.41295
Name: long_minus_short, dtype: float64

In [36]:
# print(f"Num entries = {(entries == True).sum()}")
# print(f"Num short entries = {(short_entries == True).sum()}")
# new_df["long_slope"].describe()