# Imports

In [1]:
import pandas as pd
import vectorbtpro as vbt
import numpy as np
import warnings
warnings.filterwarnings('ignore')



# Initialization

In [2]:
vbt.settings.wrapping ["freq"]                = "1m"
vbt.settings.portfolio['init_cash']           = 10000

# Settings and Parameters

In [3]:
pickle_files_path = "../data/RID0029_LSTM_pw38_lb250_bt2000_mem6000/*.pkl"
prediction_window = 38

min_num_entries          = 100
excel_output_file_name  = f"../results/{pickle_files_path.split('/')[-2] + '.xlsx'}"

# Local Imports

In [88]:
import os
import sys

sys.path.append(os.getcwd())

In [89]:
from lstm_analysis_utils import (read_pickle_files_into_df, add_forward_prices_to_df, generate_fwd_actual_column
                                 , generate_df_with_euclidean_distances, calculate_slopes, calculate_correlation_slopes
                                )
from lstm_results_utils import (export_results, store_backtest_results)
from lstm_only_backtests import run_backtest_lstm_recommendations_reversal_exits, run_backtests_lstm_recommendations_prediction_size_exit
from lstm_analysis_constants import EntryType
from quantile_value import generate_quantile_bands, extract_boundary_values_from_quantile_bands
from prediction_window_slopes import PredictionWindowSlopes
from long_slope_short_slope_backtests import run_backtest_long_slope_short_slope_prediction_size_exit, run_backtest_long_slope_short_slope_fractional_exits
from long_minus_short_backtests import run_backtest_long_minus_short_entry_type_long_only, run_backtest_long_minus_short_entry_type_short_only, run_backtest_long_minus_short_entry_type_long_short

# Processing

In [90]:
df = read_pickle_files_into_df(pickle_files_path)

In [91]:
add_forward_prices_to_df(df, prediction_window)
df = df.copy()  # for large prediction_window size, the copy() call eliminates the fragmented dataframe warning

In [92]:
generate_fwd_actual_column(df)

In [93]:
df = generate_df_with_euclidean_distances(df, prediction_window)

In [94]:
calculate_slopes(df)

In [95]:
calculate_correlation_slopes(df)

Correlation between Euclidean distance between long array and short array and future actual results:  0.23252517589935243
Correlation between difference in long minus short predictions and future actual results for longs:  0.7740239236150575
Correlation between difference in long minus short predictions and future actual results for shorts:  0.778859471995504
Correlation between long slopes and future results: -0.010443185855921886
Correlation between short slopes and future results: 0.018291259185979517


In [96]:
df.index = pd.to_datetime(df["close_time"], utc=True, unit="s")

In [73]:
df.to_csv('RID0029_LSTM_pw38_lb250_bt2000_mem6000.csv')

Unnamed: 0_level_0,close_time,BTCUSDT_Open,BTCUSDT_High,BTCUSDT_Low,BTCUSDT_Close,recommendations,long,short,indx_hi,indx_low,...,BTCUSDT_Open_38,fwd_actual,long_distance_to_actual,short_distance_to_actual,long_minus_short,long_slope,short_slope,actual_slope,entries,clean_entries
close_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-09 00:03:00+00:00,1641686580.00000,41659.21000,41670.06000,41601.83000,41611.49000,ActionType.NOOP,"[0.00028395813, 0.00018264189, 0.00082501565, ...","[1.5434945e-05, 1.1722226e-05, 8.31921e-06, 3....",[6],[9],...,41865.61000,"[0.026259509309008697, 0.026146508072695626, 0...",0.48545,0.47090,0.71420,-0.00270,0.00045,-0.00000,,
2022-01-09 00:08:00+00:00,1641686880.00000,41479.94000,41500.76000,41419.93000,41460.91000,ActionType.NOOP,"[6.961259e-05, 5.0820454e-05, 0.00030092167, 0...","[4.5889006e-05, 3.128108e-05, 1.8344024e-05, 6...",[7],[9],...,41840.61000,"[0.02614310679206083, 0.026117934246361028, 0....",0.48740,0.52652,0.75328,-0.00264,0.00060,-0.00000,,
2022-01-09 00:13:00+00:00,1641687180.00000,41440.00000,41670.94000,41426.89000,41577.59000,ActionType.NOOP,"[0.00015814097, 0.0001234603, 0.00070723606, 0...","[5.8321515e-05, 2.863668e-05, 1.5200444e-05, 5...",[6],[9],...,41802.57000,"[0.02611199858370133, 0.026137820729018903, 0....",0.46220,0.48989,0.71159,-0.00267,0.00055,-0.00000,,
2022-01-09 00:25:00+00:00,1641687900.00000,41480.98000,41532.57000,41466.82000,41523.40000,ActionType.NOOP,"[0.00012365845, 0.00010381089, 0.000671683, 0....","[3.4932964e-05, 2.065421e-05, 1.0802528e-05, 3...",[6],[9],...,41754.46000,"[0.026131850618096277, 0.026197247998116336, 0...",0.47700,0.55266,0.76531,-0.00274,0.00067,-0.00000,,
2022-01-09 00:46:00+00:00,1641689160.00000,41584.79000,41765.00000,41574.99000,41720.04000,ActionType.NOOP,"[5.238538e-05, 4.9717157e-05, 7.11477e-05, 9.8...","[0.00023490381, 7.9151985e-05, 3.4724213e-05, ...",[10],[11],...,41688.78000,"[0.026192735390360395, 0.02628890922619687, 0....",0.51901,0.48254,0.74489,-0.00181,0.00137,-0.00000,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-06-30 16:51:00+00:00,1688143860.00000,30372.80000,30470.00000,30365.30000,30440.50000,ActionType.NOOP,"[0.0061495774, 0.00018050753, 0.00081764325, 0...","[0.0030833296, 0.07149258, 0.87284064, 0.04817...",[27],[24],...,30441.10000,"[0.02624642385475425, 0.026346923614220218, 0....",0.70048,0.86199,1.13339,0.00188,-0.00360,-0.00000,,
2023-06-30 16:53:00+00:00,1688143980.00000,30489.10000,30525.00000,30450.60000,30471.30000,ActionType.NOOP,"[0.0031322541, 0.0010677791, 0.0020046635, 3.9...","[0.01976914, 0.19325812, 0.19367228, 0.0127412...",[27],[8],...,30508.70000,"[0.026345368686192246, 0.026287215335893916, 0...",0.91231,0.56757,1.09725,0.00180,0.00004,-0.00000,,
2023-06-30 16:56:00+00:00,1688144160.00000,30421.80000,30460.00000,30418.00000,30425.30000,ActionType.NOOP,"[0.00017900167, 7.695741e-05, 0.0002836467, 1....","[0.0051571326, 0.12851144, 0.55542386, 0.29695...",[27],[13],...,30459.70000,"[0.026286770138129575, 0.02626525462700139, 0....",0.73379,0.62215,0.98874,0.00167,-0.00357,-0.00000,,
2023-06-30 17:00:00+00:00,1688144400.00000,30396.90000,30407.40000,30372.00000,30381.50000,ActionType.NOOP,"[0.0008620111, 4.6397417e-05, 0.0017905841, 1....","[0.00025132665, 0.0010259316, 0.94365233, 0.00...",[22],[21],...,30444.40000,"[0.026264394507243966, 0.026263530458788126, 0...",0.81872,0.93014,1.25877,0.00077,-0.00331,-0.00000,,


# Running backtest

- ### Storing the results of the backtests

In [13]:
results_as_list  = []

- ### Baseline backtest - just listen to LSTM

- ### Using different slopes

In [97]:
min_long_slope  = df["long_slope"].min()
max_long_slope  = df["long_slope"].max()
min_short_slope = df["short_slope"].min()
max_short_slope = df["short_slope"].max()

In [15]:
num_quantiles               = 5
threshold_increment         = 0.001
quantiles                   = np.linspace(0, 1, num=num_quantiles + 1)

- #### long_slope and short_slope

- #### long_minus_short

In [98]:
long_minus_short_min = df['long_minus_short'].min()
long_minus_short_max = df['long_minus_short'].max()

long_minus_short_thresholds = [x for x in np.arange(long_minus_short_min, long_minus_short_max, threshold_increment)]

# Paramater Combinations in VBT

In [99]:
def lms_slope_type_3(long_minus_short, long_slope, short_slope, lms_threshold, long_slope_thresh, short_slope_thresh):
    entries       = pd.Series(np.where((long_minus_short < lms_threshold) & (long_slope > long_slope_thresh), True, False))
    short_entries = pd.Series(np.where((long_minus_short < lms_threshold) & (short_slope < short_slope_thresh), True, False))
    
    return entries, short_entries

# Create an indicator factory
lms_slope_type_3_indicator = vbt.IndicatorFactory(
    class_name  ='LongMinusShortSlopeType3', # name of the class
    short_name  ='lmsSlope3', # name of the indicator
    input_names =['long_minus_short', 'long_slope', 'short_slope'], # names of input arguments
    param_names =['lms_threshold', 'long_slope_thresh', 'short_slope_thresh'], # names of parameters
    output_names=['entries', 'short_entries'], # names of output values
).with_apply_func(
    lms_slope_type_3, # function to apply
    takes_1d=True, # whether the function takes 1-dim. arrays as input
    lms_threshold=0.5, # default value for parameter 'lms_threshold'
    long_slope_thresh=0.0, # default value for parameter 'long_slope_thresh'
    short_slope_thresh=0.0, # default value for parameter 'short_slope_thresh'
)
  

You created the strategy up above but you haven't run it yet. In this next cell we run it with a lot of different combinations. This basically builds a big matrix of all the different strategy combinations with each having a different `lms_threshold` and `long_slope_thresh` and `short_slope_thresh`. We will use these to simulate a portfolio after this.

In [100]:


lms_min                 = df.long_minus_short.min()
lms_max                 = df.long_minus_short.max()
long_slope_min          = df.long_slope.min()
long_slope_max          = df.long_slope.max()
short_slope_min         = df.short_slope.min()
short_slope_max         = df.short_slope.max()
lms_increment           = abs((df.long_minus_short.max()-df.long_minus_short.min())/10)
long_slope_increment    = abs((df.long_slope.max()-df.long_slope.min())/10)
short_slope_increment   = abs((df.short_slope.max()-df.short_slope.min())/10)

In [19]:
lms_strategy = lms_slope_type_3_indicator.run(
    long_minus_short    =df['long_minus_short'],
    long_slope          =df['long_slope'],
    short_slope         =df['short_slope'],
    lms_threshold       =np.arange(lms_min, lms_max, lms_increment),
    long_slope_thresh   =np.arange(long_slope_min, long_slope_max, long_slope_increment),
    short_slope_thresh  =np.arange(short_slope_min, short_slope_max, short_slope_increment),
    param_product=True, # True: all combinations of parameters, False: only one combination for each parameter
)

Now let's run a portfolio simulation on all of those different parameter combinations. Note, the first time you run this it might take a bit but as you play and run it again it will get really fast.

In [101]:
multiple_pf = vbt.Portfolio.from_signals(
    close               =df['BTCUSDT_Close'],
    high                =df['BTCUSDT_High'],
    low                 =df['BTCUSDT_Low'],
    open                =df['BTCUSDT_Open'],
    entries             =lms_strategy.entries,
    short_entries       =lms_strategy.short_entries,
    td_stop             =prediction_window,
    time_delta_format   ='Rows',
    accumulate          =False,
    
)

print(multiple_pf.stats()) # Prints the average of all of the simulations

Start                         2022-01-09 00:03:00+00:00
End                           2023-06-30 17:04:00+00:00
Period                                 45 days 23:22:00
Start Value                                 10000.00000
Min Value                                    7258.74612
Max Value                                   13695.52379
End Value                                    9131.55045
Total Return [%]                               -8.68450
Benchmark Return [%]                          -26.83031
Total Time Exposure [%]                        51.06045
Max Gross Exposure [%]                        100.06693
Max Drawdown [%]                               45.12538
Max Drawdown Duration                  29 days 23:57:39
Total Orders                                 2079.46600
Total Fees Paid                                 0.00000
Total Trades                                 1388.65200
Win Rate [%]                                   51.03679
Best Trade [%]                                  

In [102]:
multiple_pf.trades.count()

lmsSlope3_lms_threshold  lmsSlope3_long_slope_thresh  lmsSlope3_short_slope_thresh
0.10839                  -0.00405                     -0.00405                           0
                                                      -0.00324                           0
                                                      -0.00243                           0
                                                      -0.00162                           0
                                                      -0.00081                           0
                                                                                      ... 
1.28250                  0.00324                      -0.00000                        2446
                                                      0.00081                         1995
                                                      0.00162                         1763
                                                      0.00243                         1679
       

In [103]:
num_trade_filter = multiple_pf.trades.count() > 100

# Use these indexes to filter multiple_pf
filtered_pf = multiple_pf.loc[:, num_trade_filter]

In [23]:
filtered_pf.trades.count()

lmsSlope3_lms_threshold  lmsSlope3_long_slope_thresh  lmsSlope3_short_slope_thresh
0.36930                  -0.00405                     -0.00405                         216
                                                      -0.00324                         214
                                                      -0.00243                         203
                                                      -0.00162                         176
                                                      -0.00081                         142
                                                                                      ... 
1.28250                  0.00324                      -0.00000                        2446
                                                      0.00081                         1995
                                                      0.00162                         1763
                                                      0.00243                         1679
       

In [24]:
filtered_pf.total_return


lmsSlope3_lms_threshold  lmsSlope3_long_slope_thresh  lmsSlope3_short_slope_thresh
0.36930                  -0.00405                     -0.00405                       -0.09413
                                                      -0.00324                       -0.12288
                                                      -0.00243                       -0.09622
                                                      -0.00162                       -0.02534
                                                      -0.00081                        0.00185
                                                                                       ...   
1.28250                  0.00324                      -0.00000                       -0.36066
                                                      0.00081                        -0.25707
                                                      0.00162                        -0.23125
                                                      0.00243          

In [26]:
#run_backtest_long_minus_short_entry_type_long_short(df, results_as_list, prediction_window, threshold_increment, min_num_entries, quantiles)

# loop 1 is split into 100
# for threshold in long_minus_short_thresholds:  
#   for long_slope_threshold in long_slope_quantile_values:
#     for short_slope_threshold in short_slope_quantile_values:
#       entries       = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['long_slope'] > long_slope_threshold), True, False))
#       short_entries = pd.Series(np.where((df['long_minus_short'] < threshold) & (df['short_slope'] < short_slope_threshold), True, False))

#       num_entries = (entries == True).sum() + (short_entries == True).sum()

#       if num_entries > min_num_entries:
#         pf = vbt.Portfolio.from_signals(
#             high              = df['BTCUSDT_High'],
#             low               = df['BTCUSDT_Low'],
#             open              = df['BTCUSDT_Open'],
#             close             = df['BTCUSDT_Close'],
#             entries           = entries, # commented out for a short only backtest    
#             short_entries     = short_entries,
#             td_stop           = prediction_window, # Hold on to the position for 8 bars
#             time_delta_format = 'Rows', # Use the row index to calculate the time delta    
#             accumulate        = False,    
#             )

#         key = f"Long minus short with slopes - type 3"    
#         slopes = PredictionWindowSlopes(long_slope_threshold, short_slope_threshold, threshold, EntryType.LONG_SHORT)
#         store_backtest_results(key, pf, results_as_list, slopes)

# Exporting the results

In [27]:
results_df = export_results(results_as_list)
results_df.to_excel(excel_output_file_name)

In [28]:
# Use groupby on the index (date) to calculate statistics
# bars_per_day = df.groupby(df.index.date).size()
# min_bars_per_day = df.groupby(df.index.date).size().min()
# max_bars_per_day = df.groupby(df.index.date).size().max()
# average_bars_per_day = df.groupby(df.index.date).size().mean()

# print("Number of bars per day:")
# print(bars_per_day)

# print("\nMinimum bars per day:", min_bars_per_day)
# print("Maximum bars per day:", max_bars_per_day)
# print("Average bars per day:", average_bars_per_day)
# bars_per_day.to_csv("bars_per_day.csv")

# pf.trades.records_readable.to_csv("records_readable.csv")

In [29]:
# TODO: for Joe
# entries       = pd.Series(np.where((df['long_minus_short'] < 0.615659479451176) & (df['long_slope' ] > 0) & (df['short_slope'] > 0), True, False))
# short_entries = pd.Series(np.where((df['long_minus_short'] < 0.615659479451176) & (df['short_slope'] < 0) & (df['long_slope' ] < 0), True, False))
# clean_entries, clean_short_entries = entries.vbt.signals.clean(short_entries)
# clean_entries.sum() + clean_short_entries.sum()
# pf.trades.records_readable

- ### Other backtests - work in progress and code may not execute at all!!!

In [30]:
df['long_slope'].describe()

count   61111.00000
mean       -0.00005
std         0.00251
min        -0.00405
25%        -0.00234
50%        -0.00008
75%         0.00222
max         0.00405
Name: long_slope, dtype: float64

In [31]:
df['short_slope'].quantile(quantiles)

0.00000   -0.00405
0.20000   -0.00315
0.40000   -0.00148
0.60000    0.00063
0.80000    0.00259
1.00000    0.00405
Name: short_slope, dtype: float64

In [32]:
df['actual_slope'].describe()

count   61111.00000
mean       -0.00000
std         0.00001
min        -0.00007
25%        -0.00001
50%        -0.00000
75%         0.00001
max         0.00007
Name: actual_slope, dtype: float64

In [33]:
df['long_minus_short'].describe()

count   61111.00000
mean        0.89544
std         0.20902
min         0.10839
25%         0.74497
50%         0.89507
75%         1.05143
max         1.41295
Name: long_minus_short, dtype: float64

In [34]:
# print(f"Num entries = {(entries == True).sum()}")
# print(f"Num short entries = {(short_entries == True).sum()}")
# new_df["long_slope"].describe()