# Backtesting Examples and Guides

In [1]:
import pandas as pd
import vectorbtpro as vbt
import numpy as np

In [2]:
vbt.settings.wrapping ["freq"]                = "1m"
vbt.settings.portfolio['init_cash']           = 10000

In [3]:
import os
import sys

sys.path.append(os.getcwd())

from dataframes_merger_factory import DataFrameMergerType, DataFrameMergerFactory
from dataframes_merger import DataFrameMergerUtils
from parameter_optimization import DataFrameFormat
from parameter_optimization_factory import VbtBackTestProcessorType, VbtBackTestProcessorFactory
from settings_and_params import extract_prediction_window_size, get_data_frame_file_path, get_results_file_path, generate_csv_for_excel_output_file_path, extract_run_id
from lstm_analysis_utils import process_pickle_files
from multiple_models_backtesting import MultiModelBacktest, MultiModelBacktestMethod

# Running backtest on just one RID

### Old format - input is a directory containing pickle files

In [None]:
pickle_files_path               = "../data/RID0047_LSTM_pw75_lb250_bt1000_mem10000"

model_name                      = pickle_files_path.split('/')[-1]
prediction_window               = extract_prediction_window_size(model_name)

- must use DataFrameFormat.SINGLE
- VbtBackTestProcessorType can be any type that your system can handle
- Recommended value is VbtBackTestProcessorType.WITH_MEMORY_CONSTRAINT_TWO_LOOPS unless you have more than 64 GB of RAM

In [None]:
df1 = process_pickle_files(pickle_files_path, prediction_window)
df1_result = VbtBackTestProcessorFactory.create(VbtBackTestProcessorType.WITH_MEMORY_CONSTRAINT_TWO_LOOPS, df1, prediction_window, DataFrameFormat.SINGLE).run_backtest()

### New format - input is the CSV file containing the dataframe dumped by calling process_pickle_files() first
Or it could be the files downloaded from our Google shared drive

In [None]:
DATAFRAME_CSV_FILES    = ["../results/RID0047_LSTM_pw75_lb250_bt1000_mem10000.csv"]

model_name             = DATAFRAME_CSV_FILES[0].split("/")[-1].split(".")[0]
prediction_window      = extract_prediction_window_size(model_name)

- Must use DataFrameFormat.MERGED
- VbtBackTestProcessorType can be any type that your system can handle
- Recommended value is VbtBackTestProcessorType.WITH_MEMORY_CONSTRAINT_TWO_LOOPS unless you have more than 64 GB of RAM

In [None]:
df2 = DataFrameMergerFactory.create(DataFrameMergerType.INTERSECTION).process(DATAFRAME_CSV_FILES)
df2_result = VbtBackTestProcessorFactory.create(VbtBackTestProcessorType.WITH_MEMORY_CONSTRAINT_TWO_LOOPS, df2, prediction_window, DataFrameFormat.MERGED).run_backtest()

# Running backtest on multiple RIDs

### Using average method

In [None]:
MISMATCHING_DATAFRAME_CSV_FILES = [  "../results/RID0029_LSTM_pw38_lb250_bt2000_mem6000.csv"
                                   , "../results/RID0045_LSTM_pw38_lb5000_bt2000_mem10000.csv"
                                   , "../results/RID0046_LSTM_pw38_lb250_bt2000_mem6000.csv"]

The merger type can either be INTERSECTION or UNION

In [None]:
merger = DataFrameMergerFactory.create(DataFrameMergerType.INTERSECTION)

Running the back test is one line of code

In [None]:
df, result = MultiModelBacktest(merger, MISMATCHING_DATAFRAME_CSV_FILES).run()

### Using individual method

In [4]:
MODEL_NAMES = ["RID0029_LSTM_pw38_lb250_bt2000_mem6000", "RID0045_LSTM_pw38_lb5000_bt2000_mem10000"]

In [5]:
intersection_df = DataFrameMergerFactory.create(DataFrameMergerType.INTERSECTION).process([get_data_frame_file_path(entry) for entry in MODEL_NAMES])
union_df = DataFrameMergerFactory.create(DataFrameMergerType.UNION).process([get_data_frame_file_path(entry) for entry in MODEL_NAMES])

In [6]:
from typing import Dict


def load_result_files(model_names: str) -> Dict[str, pd.DataFrame]:
  return {extract_run_id(model_name): pd.read_csv(get_results_file_path(model_name)) for model_name in model_names}

In [7]:
MODEL_NAMES.sort()
full_results = load_result_files(MODEL_NAMES)

In [8]:
def select_top_n_combinations(df: pd.DataFrame, n: int) -> pd.DataFrame:
  df["combined_weight"]         = df["total_return"] * df["win_rate"] * df["sharpe_ratio"] * df["sortino_ratio"] * (1 + df["max_drawdown"]) * df["profit_factor"]
  df["long_short_count"]        = df["long_count"] + df["short_count"]
  df["long_vs_short_diff_pct"]  = abs((df["long_count"] / df["long_short_count"]) - (df["short_count"] / df["long_short_count"]))

  filtered_df = df[(df["long_vs_short_diff_pct"] < 0.5)].nlargest(n, "combined_weight")

  return filtered_df


In [9]:
def select_top_performing_combinations(full_results: Dict[str, pd.DataFrame]) -> Dict[str, pd.DataFrame]:
  return {key: select_top_n_combinations(value, 10) for key, value in full_results.items()}

In [10]:
top_results = select_top_performing_combinations(full_results)

In [11]:
from dataclasses import dataclass
from typing import List, Tuple
import itertools

@dataclass
class Thresholds:  
  model_id        : str
  long_slope      : float
  short_slope     : float
  long_minus_short: float
    
def extract_thresholds(df: pd.DataFrame, model_id: str) -> List[Thresholds]:
  return [Thresholds( model_id        = model_id,
                      long_slope      = row['LMSWithSlopes_long_slope_thresh'],
                      short_slope     = row['LMSWithSlopes_short_slope_thresh'],
                      long_minus_short= row['LMSWithSlopes_lms_threshold'])
          for _, row in df.iterrows()]


def extract_thresholds_from_models(data: Dict[str, pd.DataFrame]) -> Dict[str, List[Thresholds]]:
  result = {}

  for key, value in data.items():
    result[key] = extract_thresholds(value, key)

  return result


def calculate_majority_count(num_cols: int) -> int:  
  return (num_cols // 2) + 1




def calc_product(data: Dict[str, List[Thresholds]]) -> List[List[Thresholds]]:
  # Get the values from the dictionary
  values = list(data.values())

  # Generate all possible combinations
  combinations = list(itertools.product(*values))

  return [list(entry) for entry in combinations]
  

In [12]:
def calculate_entries_using_majority_rule(df: pd.DataFrame, thresholds: List[Thresholds]):
  entries_series_list       = []
  short_entries_series_list = []

  for model_thresholds in thresholds:
    lms_col_name          = DataFrameMergerUtils.get_long_minus_short_col_name(model_thresholds.model_id)
    long_slope_col_name   = DataFrameMergerUtils.get_long_slope_col_name(model_thresholds.model_id)
    short_slope_col_name  = DataFrameMergerUtils.get_short_slope_col_name(model_thresholds.model_id)

    entries_series        = pd.Series(np.where((df[lms_col_name] < model_thresholds.long_minus_short) & (df[long_slope_col_name ] > model_thresholds.long_slope ), True, False))
    short_entries_series  = pd.Series(np.where((df[lms_col_name] < model_thresholds.long_minus_short) & (df[short_slope_col_name] < model_thresholds.short_slope), True, False))

    entries_series_list.append(entries_series)
    short_entries_series_list.append(short_entries_series)

  num_true_entries        = sum(entries_series_list)
  num_true_short_entries  = sum(short_entries_series_list)
  majority_count          = calculate_majority_count(len(thresholds))

  majority_entries        = num_true_entries >= majority_count
  majority_short_entries  = num_true_short_entries >= majority_count

  return majority_entries, majority_short_entries
    

In [13]:
top_results_thresholds = extract_thresholds_from_models(top_results)

In [14]:
flattened_combinations = calc_product(top_results_thresholds)

In [15]:
flattened_combinations

[[Thresholds(model_id='RID0029', long_slope=0.0034901446115511, short_slope=-0.0034901491865038, long_minus_short=0.8282123056465182),
  Thresholds(model_id='RID0045', long_slope=-0.0018148835764432, short_slope=0.0001395794120174, long_minus_short=0.8632932058696089)],
 [Thresholds(model_id='RID0029', long_slope=0.0034901446115511, short_slope=-0.0034901491865038, long_minus_short=0.8282123056465182),
  Thresholds(model_id='RID0045', long_slope=-0.002931733149661, short_slope=0.0009772104746914, long_minus_short=0.954925427149082)],
 [Thresholds(model_id='RID0029', long_slope=0.0034901446115511, short_slope=-0.0034901491865038, long_minus_short=0.8282123056465182),
  Thresholds(model_id='RID0045', long_slope=-0.0015356711831387, short_slope=0.0001395794120174, long_minus_short=0.8632932058696089)],
 [Thresholds(model_id='RID0029', long_slope=0.0034901446115511, short_slope=-0.0034901491865038, long_minus_short=0.8282123056465182),
  Thresholds(model_id='RID0045', long_slope=0.00153566

In [25]:
# This needs to be in a loop
entries, short_entries = calculate_entries_using_majority_rule(union_df, flattened_combinations[0])

In [17]:
union_df.head(2)

Unnamed: 0_level_0,open,high,low,close,long_minus_short_RID0029,long_slope_RID0029,short_slope_RID0029,long_minus_short_RID0045,long_slope_RID0045,short_slope_RID0045,long_slope_avg,short_slope_avg,long_minus_short_avg
close_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-01-09 00:03:00+00:00,41659.21,41670.06,41601.83,41611.49,0.602898,-0.001501,0.002778,,,,-0.001501,0.002778,0.602898
2022-01-09 00:08:00+00:00,41479.94,41500.76,41419.93,41460.91,0.63646,-0.000976,0.002901,,,,-0.000976,0.002901,0.63646


In [18]:
union_df.tail(2)

Unnamed: 0_level_0,open,high,low,close,long_minus_short_RID0029,long_slope_RID0029,short_slope_RID0029,long_minus_short_RID0045,long_slope_RID0045,short_slope_RID0045,long_slope_avg,short_slope_avg,long_minus_short_avg
close_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2023-07-30 09:44:00+00:00,29264.4,29264.5,29261.2,29264.5,0.913718,0.001026,0.001053,1.101525,0.003711,-0.001327,0.002369,-0.000137,1.007621
2023-07-30 10:53:00+00:00,29254.0,29254.0,29250.1,29250.1,1.29121,-0.003611,0.000432,1.233473,0.003828,-0.00247,0.000108,-0.001019,1.262342
