# Imports

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import vectorbtpro as vbt
import numpy as np


# Initialization

In [None]:
vbt.settings.wrapping ["freq"]                  = "1m"
vbt.settings.portfolio['init_cash']             = 10000
vbt.settings.set_theme("dark")
vbt.settings.plotting["layout"]["width"]        = 800
vbt.settings.plotting['layout']['height']       = 300

# Settings and Parameters

In [None]:
# pickle_files_path = "../data/RID0029_LSTM_pw38_lb250_bt2000_mem6000/*.pkl"
pickle_files_path = "../data/RID0028_LSTM_pw75_lb250_bt2000_mem5000"
prediction_window = 75
filename_prefix = pickle_files_path.split('/')[-1]
min_num_entries          = 100
excel_output_file_name  = f"../results/{filename_prefix + '.xlsx'}"
lstm_output_file_name   = f"../results/{filename_prefix + '_convert_to_excel.csv'}"
lstm_results_file_name  = f"../results/{filename_prefix + 'lstm_model_results.csv'}"

# Local Imports

In [None]:
import os
import sys

sys.path.append(os.getcwd())

In [None]:
from lstm_analysis_utils import (read_pickle_files_into_df, add_forward_prices_to_df, generate_fwd_actual_column
                                 , generate_df_with_euclidean_distances, calculate_slopes, calculate_correlation_slopes
                                )
from lstm_results_utils import (export_results, store_backtest_results)
from lstm_only_backtests import run_backtest_lstm_recommendations_reversal_exits, run_backtests_lstm_recommendations_prediction_size_exit
from lstm_analysis_constants import EntryType
from quantile_value import generate_quantile_bands, extract_boundary_values_from_quantile_bands
from prediction_window_slopes import PredictionWindowSlopes
from long_slope_short_slope_backtests import run_backtest_long_slope_short_slope_prediction_size_exit, run_backtest_long_slope_short_slope_fractional_exits
from long_minus_short_backtests import run_backtest_long_minus_short_entry_type_long_only, run_backtest_long_minus_short_entry_type_short_only, run_backtest_long_minus_short_entry_type_long_short

# Processing

In [None]:
def process_LSTM_model_results(pickle_files_path, prediction_window):
    df = read_pickle_files_into_df(pickle_files_path)
    add_forward_prices_to_df(df, prediction_window)
    df= df.copy()
    generate_fwd_actual_column(df)
    df= generate_df_with_euclidean_distances(df, prediction_window)
    calculate_slopes(df)
    calculate_correlation_slopes(df)
    df.index = pd.to_datetime(df['close_time'], utc=True, unit='s')
    return df

df = process_LSTM_model_results(pickle_files_path, prediction_window)
# df.to_csv(lstm_results_file_name)
# df = pd.read_csv(lstm_output_file_name, index_col=0, parse_dates=True, infer_datetime_format=True)


In [None]:
lstm_results_file_name

In [None]:
df.to_csv(lstm_results_file_name)

In [None]:
# # Define mapping from old names to new ones
# column_mapping = {
#     'BTCUSDT_Open': 'Open',
#     'BTCUSDT_High': 'High',
#     'BTCUSDT_Low': 'Low',
#     'BTCUSDT_Close': 'Close',
#     'long_minus_short': 'long_minus_short',
#     'long_slope': 'long_slope',
#     'short_slope': 'short_slope'
# }

# # Rename columns using the mapping
# df = df.rename(columns=column_mapping)



# Paramater Combinations in VBT

In [None]:
def lms_slope_type_3(long_minus_short, long_slope, short_slope, lms_threshold, long_slope_thresh, short_slope_thresh):
    entries       = pd.Series(np.where((long_minus_short < lms_threshold) & (long_slope > long_slope_thresh), True, False))
    short_entries = pd.Series(np.where((long_minus_short < lms_threshold) & (short_slope < short_slope_thresh), True, False))
    
    return entries, short_entries



# Create an indicator factory
lms_slope_type_3_indicator = vbt.IndicatorFactory(
    class_name  ='LongMinusShortSlopeType3', # name of the class
    short_name  ='lmsSlope3', # name of the indicator
    input_names =['long_minus_short', 'long_slope', 'short_slope'], # names of input arguments
    param_names =['lms_threshold', 'long_slope_thresh', 'short_slope_thresh'], # names of parameters
    output_names=['entries', 'short_entries'], # names of output values
).with_apply_func(
    lms_slope_type_3, # function to apply
    takes_1d=True, # whether the function takes 1-dim. arrays as input
    lms_threshold=0.5, # default value for parameter 'lms_threshold'
    long_slope_thresh=0.0, # default value for parameter 'long_slope_thresh'
    short_slope_thresh=0.0, # default value for parameter 'short_slope_thresh'
)



You created the strategy up above but you haven't run it yet. In this next cell we run it with a lot of different combinations. This basically builds a big matrix of all the different strategy combinations with each having a different `lms_threshold` and `long_slope_thresh` and `short_slope_thresh`. We will use these to simulate a portfolio after this.

In [None]:
num_increments         = 20

lms_min                 = df.long_minus_short.min() # To narrow the range Try 0.6
lms_max                 = df.long_minus_short.max() # and Try 1.1
long_slope_min          = df.long_slope.min()
long_slope_max          = df.long_slope.max()
short_slope_min         = df.short_slope.min()
short_slope_max         = df.short_slope.max()
lms_increment           = abs((lms_max-lms_min)/num_increments)
long_slope_increment    = abs((long_slope_max-long_slope_min)/num_increments)
short_slope_increment   = abs((short_slope_max-short_slope_min)/num_increments)

lms_strategy = lms_slope_type_3_indicator.run(
    long_minus_short    =df['long_minus_short'],
    long_slope          =df['long_slope'],
    short_slope         =df['short_slope'],
    lms_threshold       =np.arange(lms_min, lms_max, lms_increment),
    long_slope_thresh   =np.arange(long_slope_min, long_slope_max, long_slope_increment),
    short_slope_thresh  =np.arange(short_slope_min, short_slope_max, short_slope_increment),   
    param_product=True, # True: all combinations of parameters, False: only one combination for each parameter
    execute_kwargs=dict(
        engine="threadpool",
        chunk_len="auto",
        show_progress=True,
    )
)


Now let's run a portfolio simulation on all of those different parameter combinations. Note, the first time you run this it might take a bit but as you play and run it again it will get really fast.

In [None]:
multiple_pf = vbt.Portfolio.from_signals(
    close               =df['BTCUSDT_Close'],
    high                =df['BTCUSDT_High'],
    low                 =df['BTCUSDT_Low'],
    open                =df['BTCUSDT_Open'],
    entries             =lms_strategy.entries,
    short_entries       =lms_strategy.short_entries,
    td_stop             =prediction_window,
    time_delta_format   ='Rows',
    # tp_stop             =0.003,
    # sl_stop             =0.01,
    
    accumulate          =False,
    
)

print(multiple_pf.stats()) # Prints the average of all of the simulations

#### Show the best sharpe ratio portfolio simulation

In [None]:
best_sharpe = multiple_pf.sharpe_ratio.idxmax()
print(multiple_pf[best_sharpe].stats())
multiple_pf[best_sharpe].plot().show()

### Create a large dataframe with all of the metrics we care about

In [None]:

num_trade_filter = multiple_pf.trades.count() > 100

# Use these indexes to filter multiple_pf
filtered_pf = multiple_pf.loc[:, num_trade_filter]
filtered_pf.trades.count()

### Create a large dataframe with all of the metrics we care about

In [None]:
metrics = [
    filtered_pf.total_return,
    filtered_pf.trades.win_rate,
    filtered_pf.sharpe_ratio,
    filtered_pf.sortino_ratio,
    filtered_pf.max_drawdown,
    filtered_pf.trades.profit_factor,
    filtered_pf.trades.direction_long.count(),
    filtered_pf.trades.direction_short.count(),
    filtered_pf.trades.direction_long.pnl.sum(),
    filtered_pf.trades.direction_short.pnl.sum()
]

keys = [
    'total_return',
    'win_rate',
    'sharpe_ratio',
    'sortino_ratio',
    'max_drawdown',
    'profit_factor',
    'long_count',
    'short_count',
    'long_pnl_sum',
    'short_pnl_sum'
]

combined_stats = pd.concat(metrics, axis=1, keys=keys)
combined_stats.to_csv(lstm_results_file_name)
combined_stats


Sort by a metric, eg. Total Return

In [None]:
combined_stats.sort_values(by='win_rate', ascending=False).head(20)
# combined_stats.sort_values(by='total_return', ascending=False).head(20)
# combined_stats.sort_values(by='sortino_ratio', ascending=False).head(20)
# combined_stats.sort_values(by='profit_factor', ascending=False).head(20)
# combined_stats.sort_values(by='max_drawdown', ascending=False).head(20)
# combined_stats.sort_values(by='sharpe_ratio', ascending=False).head(20)



#### Create a dataframe for visualizations

In [None]:
# 1. Stack the DataFrame to move metrics to an index level
stacked_df = combined_stats.stack().rename("value")

# The resulting DataFrame (stacked_df) will have the metrics as an additional level
# This will be appended to the end of the current multi-index

# 2. If you wish to rearrange the index levels, you can use `reorder_levels`:
# For example, if you want the metrics level (now at the end) to be the first level:
stacked_df = stacked_df.reorder_levels(
    [-1, 'lmsSlope3_lms_threshold', 'lmsSlope3_long_slope_thresh', 'lmsSlope3_short_slope_thresh']
)

# 3. Use the volume method
stacked_df.vbt.volume(
    x_level='lmsSlope3_lms_threshold',
    y_level='lmsSlope3_long_slope_thresh',
    z_level='lmsSlope3_short_slope_thresh',
    slider_level=0,  # assuming the metric became the first level after rearranging
).show()


In [None]:
# Print the best combination of parameters for the LSTM model

# Isolate the best total return portfolio
best_total_return = filtered_pf.total_return.max()
print(f'The best total return of all the combinations is {best_total_return:.2%}')
best_total_return_combination = filtered_pf.total_return.idxmax()
print(f'The best combination for Total Return is {best_total_return_combination}')

# Isolate the best max drawdown
best_max_drawdown = filtered_pf.max_drawdown.max()
print(f'The best max drawdown of all the combinations is {best_max_drawdown:.2%}')
best_max_drawdown_combination = filtered_pf.max_drawdown.idxmax()
print(f'The best combination for max drawdown is {best_max_drawdown_combination}')

# Isolate the best Sharpe ratio portfolio
best_sharpe = filtered_pf.sharpe_ratio.max()
print(f'The best Sharpe ratio of all the combinations is {best_sharpe:.2f}')
best_sharpe_combination = filtered_pf.sharpe_ratio.idxmax()
print(f'The best combination for Sharpe Ratio is {best_sharpe_combination}')

# Isolate the best Win Rate
best_win_rate = filtered_pf.trades.win_rate.max()
print(f'The best Win Rate of all the combinations is {best_win_rate:.2%}')
best_win_rate_combination = filtered_pf.trades.win_rate.idxmax()
print(f'The best combination for Win Rate is {best_win_rate_combination}')

# isolate the best Profit Factor
best_profit_factor = filtered_pf.trades.profit_factor.max()
print(f'The best Profit Factor of all the combinations is {best_profit_factor:.2f}')
best_profit_factor_combination = filtered_pf.trades.profit_factor.idxmax()
print(f'The best combination for Profit Factor is {best_profit_factor_combination}')

In [None]:
print(multiple_pf[best_total_return_combination].stats())
multiple_pf[best_total_return_combination].plot().show()

In [None]:
print(multiple_pf[best_win_rate_combination].stats())
multiple_pf[best_win_rate_combination].plot().show()

In [None]:
print(multiple_pf[best_sharpe_combination].stats())
print(multiple_pf[best_sharpe_combination].trades.direction_long.count())
print(multiple_pf[best_sharpe_combination].trades.direction_short.count())
multiple_pf[best_sharpe_combination].plot().show()

In [None]:
multiple_pf[best_sharpe_combination].trades.records_readable #.to_csv('best_sharpe_combination.csv')

In [None]:
# Run best profit factor sim

print(multiple_pf[best_profit_factor_combination].stats())
multiple_pf[best_profit_factor_combination].plot(height=900).show()

## Now let's run the alternate strategy

In [None]:
# Clear up some memory

# del lms_strategy
# del multiple_pf

In [None]:
num_increments         = 15

lms_min                 = df.long_minus_short.min() # Try 0.6
lms_mid                 = df.long_minus_short.median()
lms_max                 = df.long_minus_short.max() # Try 1.1
long_slope_min          = df.long_slope.min()
long_slope_max          = df.long_slope.max()
short_slope_min         = df.short_slope.min()
short_slope_max         = df.short_slope.max()


In [None]:
def lms_slope_type_4(long_minus_short, long_slope, short_slope, lms_threshold, long_slope_thresh, short_slope_thresh):
    entries       = pd.Series(np.where((long_minus_short < lms_threshold) & (long_slope > long_slope_thresh), True, False))
    exits         = pd.Series(np.where((long_minus_short > lms_threshold*1.2), True, False))
    
    short_entries = pd.Series(np.where((long_minus_short < lms_threshold) & (short_slope < short_slope_thresh), True, False))
    short_exits   = pd.Series(np.where((long_minus_short > lms_threshold*1.2), True, False))
    
    return entries, exits, short_entries, short_exits

# Create an indicator factory
lms_slope_type_4_indicator = vbt.IndicatorFactory(
    class_name  ='LongMinusShortSlopeType4', # name of the class
    short_name  ='lmsSlope4', # name of the indicator
    input_names =['long_minus_short', 'long_slope', 'short_slope'], # names of input arguments
    param_names =['lms_threshold','long_slope_thresh', 'short_slope_thresh'], # names of parameters
    output_names=['entries', 'exits', 'short_entries', 'short_exits'], # names of output values
).with_apply_func(
    lms_slope_type_4, # function to apply
    takes_1d=True, # whether the function takes 1-dim. arrays as input
    lms_threshold=0.7, # default value for parameter 'lms_threshold'
    # lms_lower_threshold=0.4, # default value for parameter 'lms_lower_threshold'
    long_slope_thresh=0.0, # default value for parameter 'long_slope_thresh'
    short_slope_thresh=0.0, # default value for parameter 'short_slope_thresh'
)
lms_4_strategy = lms_slope_type_4_indicator.run(
    long_minus_short    =df['long_minus_short'],
    long_slope          =df['long_slope'],
    short_slope         =df['short_slope'],
    lms_threshold       =np.linspace(lms_min, lms_max, num_increments), 
    long_slope_thresh   =np.linspace(long_slope_min, long_slope_max, num_increments),
    short_slope_thresh  =np.linspace(short_slope_min, short_slope_max, num_increments),
    param_product=True, # True: all combinations of parameters, False: only one combination for each parameter
    execute_kwargs=dict(
        engine="threadpool",
        chunk_len="auto",
        show_progress=True,
    )
)

multiple_pf = vbt.Portfolio.from_signals(
    close               =df['BTCUSDT_Close'],
    high                =df['BTCUSDT_High'],
    low                 =df['BTCUSDT_Low'],
    open                =df['BTCUSDT_Open'],
    entries             =lms_4_strategy.entries,
    exits               =lms_4_strategy.exits,
    short_entries       =lms_4_strategy.short_entries,
    short_exits         =lms_4_strategy.short_exits,
    # td_stop             =prediction_window,
    # time_delta_format   ='Rows',
    accumulate          =False,
    
)

print(multiple_pf.stats()) # Prints the average of all of the simulations


In [None]:
multiple_pf.sortino_ratio


In [None]:
multiple_pf.sortino_ratio.idxmax()

In [None]:
best_sharpe = multiple_pf.sharpe_ratio.idxmax()
print(multiple_pf[best_sharpe].stats())
multiple_pf[best_sharpe].plot().show()

In [None]:

num_trade_filter = multiple_pf.trades.count() > 100
# Use these indexes to filter multiple_pf
filtered_pf = multiple_pf.loc[:, num_trade_filter]

metrics = [
    filtered_pf.total_return,
    filtered_pf.trades.win_rate,
    filtered_pf.sharpe_ratio,
    filtered_pf.sortino_ratio,
    filtered_pf.max_drawdown,
    filtered_pf.trades.profit_factor,
    filtered_pf.trades.direction_long.count(),
    filtered_pf.trades.direction_short.count(),
    filtered_pf.trades.direction_long.pnl.sum(),
    filtered_pf.trades.direction_short.pnl.sum()
]

keys = [
    'total_return',
    'win_rate',
    'sharpe_ratio',
    'sortino_ratio',
    'max_drawdown',
    'profit_factor',
    'long_count',
    'short_count',
    'long_pnl_sum',
    'short_pnl_sum'
]

combined_stats = pd.concat(metrics, axis=1, keys=keys)
combined_stats.to_csv("combined_stats.csv")
combined_stats


In [None]:
combined_stats.to_csv('combined_stats.csv')

Sort by a metric, eg. Total Return

In [None]:
# combined_stats.sort_values(by='win_rate', ascending=False).head(20)
combined_stats.sort_values(by='total_return', ascending=False).head(20)
# combined_stats.sort_values(by='sortino_ratio', ascending=False).head(20)
# combined_stats.sort_values(by='profit_factor', ascending=False).head(20)
# combined_stats.sort_values(by='max_drawdown', ascending=False).head(20)



#### Create a dataframe for visualizations

In [None]:
# 1. Stack the DataFrame to move metrics to an index level
stacked_df = combined_stats.stack().rename("value")

# The resulting DataFrame (stacked_df) will have the metrics as an additional level
# This will be appended to the end of the current multi-index

# 2. If you wish to rearrange the index levels, you can use `reorder_levels`:
# For example, if you want the metrics level (now at the end) to be the first level:
stacked_df = stacked_df.reorder_levels(
    [-1, 'lmsSlope4_lms_threshold', 'lmsSlope4_long_slope_thresh', 'lmsSlope4_short_slope_thresh']
)

# 3. Use the volume method
stacked_df.vbt.volume(
    x_level='lmsSlope4_lms_threshold',
    y_level='lmsSlope4_long_slope_thresh',
    z_level='lmsSlope4_short_slope_thresh',
    slider_level=0,  # assuming the metric became the first level after rearranging
).show()


In [None]:
# Print the best combination of parameters for the LSTM model

# Isolate the best total return portfolio
best_total_return = filtered_pf.total_return.max()
print(f'The best total return of all the combinations is {best_total_return:.2%}')
best_total_return_combination = filtered_pf.total_return.idxmax()
print(f'The best combination for Total Return is {best_total_return_combination}')

# Isolate the best max drawdown
best_max_drawdown = filtered_pf.max_drawdown.max()
print(f'The best max drawdown of all the combinations is {best_max_drawdown:.2%}')
best_max_drawdown_combination = filtered_pf.max_drawdown.idxmax()
print(f'The best combination for max drawdown is {best_max_drawdown_combination}')

# Isolate the best Sharpe ratio portfolio
best_sharpe = filtered_pf.sharpe_ratio.max()
print(f'The best Sharpe ratio of all the combinations is {best_sharpe:.2f}')
best_sharpe_combination = filtered_pf.sharpe_ratio.idxmax()
print(f'The best combination for Sharpe Ratio is {best_sharpe_combination}')

# Isolate the best Win Rate
best_win_rate = filtered_pf.trades.win_rate.max()
print(f'The best Win Rate of all the combinations is {best_win_rate:.2%}')
best_win_rate_combination = filtered_pf.trades.win_rate.idxmax()
print(f'The best combination for Win Rate is {best_win_rate_combination}')

# isolate the best Profit Factor
best_profit_factor = filtered_pf.trades.profit_factor.max()
print(f'The best Profit Factor of all the combinations is {best_profit_factor:.2f}')
best_profit_factor_combination = filtered_pf.trades.profit_factor.idxmax()
print(f'The best combination for Profit Factor is {best_profit_factor_combination}')

In [None]:
print(multiple_pf[best_total_return_combination].stats())
multiple_pf[best_total_return_combination].plot().show()

In [None]:
print(multiple_pf[best_win_rate_combination].stats())
multiple_pf[best_win_rate_combination].plot().show()

In [None]:
print(multiple_pf[best_sharpe_combination].stats())
print(multiple_pf[best_sharpe_combination].trades.direction_long.count())
print(multiple_pf[best_sharpe_combination].trades.direction_short.count())
multiple_pf[best_sharpe_combination].plot().show()

In [None]:
multiple_pf[best_sharpe_combination].trades.records_readable #.to_csv('best_sharpe_combination.csv')

In [None]:
# Run best profit factor sim

print(multiple_pf[best_profit_factor_combination].stats())
multiple_pf[best_profit_factor_combination].plot(height=900).show()