# Backtesting Examples and Guides

In [1]:
import pandas as pd
import vectorbtpro as vbt
import numpy as np

In [2]:
vbt.settings.wrapping ["freq"]                = "1m"
vbt.settings.portfolio['init_cash']           = 10000

In [3]:
import os
import sys

sys.path.append(os.getcwd())

from dataframes_merger_factory import DataFrameMergerType, DataFrameMergerFactory
from parameter_optimization import DataFrameFormat
from parameter_optimization_factory import VbtBackTestProcessorType, VbtBackTestProcessorFactory
from settings_and_params import extract_prediction_window_size
from lstm_analysis_utils import process_pickle_files

# Running backtest on just one RID

### Old format - input is a directory containing pickle files

In [4]:
pickle_files_path               = "../data/RID0047_LSTM_pw75_lb250_bt1000_mem10000"

model_name                      = pickle_files_path.split('/')[-1]
prediction_window               = extract_prediction_window_size(model_name)

- must use DataFrameFormat.SINGLE
- VbtBackTestProcessorType can be any type that your system can handle
- Recommended value is VbtBackTestProcessorType.WITH_MEMORY_CONSTRAINT_TWO_LOOPS unless you have more than 64 GB of RAM

In [5]:
df1 = process_pickle_files(pickle_files_path, prediction_window)
df1_result = VbtBackTestProcessorFactory.create(VbtBackTestProcessorType.WITH_MEMORY_CONSTRAINT_TWO_LOOPS, df1, prediction_window, DataFrameFormat.SINGLE).run_backtest()

Correlation between Euclidean distance between long array and short array and future actual results:  0.16387027184406824
Correlation between difference in long minus short predictions and future actual results for longs:  0.7528851278002029
Correlation between difference in long minus short predictions and future actual results for shorts:  0.7552587935258038
Correlation between long slopes and future results: -0.022706356842955373
Correlation between short slopes and future results: 0.043585041128925334


### New format - input is the CSV file containing the dataframe dumped by calling process_pickle_files() first
Or it could be the files downloaded from our Google shared drive

In [11]:
DATAFRAME_CSV_FILES    = ["../results/RID0047_LSTM_pw75_lb250_bt1000_mem10000.csv"]

model_name             = DATAFRAME_CSV_FILES[0].split("/")[-1].split(".")[0]
prediction_window      = extract_prediction_window_size(model_name)

- Must use DataFrameFormat.MERGED
- VbtBackTestProcessorType can be any type that your system can handle
- Recommended value is VbtBackTestProcessorType.WITH_MEMORY_CONSTRAINT_TWO_LOOPS unless you have more than 64 GB of RAM

In [12]:
df2 = DataFrameMergerFactory.create(DataFrameMergerType.INTERSECTION).process(DATAFRAME_CSV_FILES)
df2_result = VbtBackTestProcessorFactory.create(VbtBackTestProcessorType.WITH_MEMORY_CONSTRAINT_TWO_LOOPS, df2, prediction_window, DataFrameFormat.MERGED).run_backtest()

# Running backtest on multiple RIDs

In [6]:
MATCHING_DATAFRAME_CSV_FILES    = ["../results/RID0044_LSTM_pw38_lb5000_bt2000_mem6000.csv", "../results/RID0046_LSTM_pw38_lb250_bt2000_mem6000.csv"]
MISMATCHING_DATAFRAME_CSV_FILES = ["../results/RID0029_LSTM_pw38_lb250_bt2000_mem6000.csv", "../results/RID0045_LSTM_pw38_lb5000_bt2000_mem10000.csv", "../results/RID0046_LSTM_pw38_lb250_bt2000_mem6000.csv"]

In [13]:
intersection_df = DataFrameMergerFactory.create(DataFrameMergerType.INTERSECTION).process(MISMATCHING_DATAFRAME_CSV_FILES)
intersection_df_result = VbtBackTestProcessorFactory.create(VbtBackTestProcessorType.WITH_MEMORY_CONSTRAINT_TWO_LOOPS, intersection_df, prediction_window, DataFrameFormat.MERGED).run_backtest()

In [None]:
union_df = DataFrameMergerFactory.create(DataFrameMergerType.UNION).process(MISMATCHING_DATAFRAME_CSV_FILES)
union_df_result = VbtBackTestProcessorFactory.create(VbtBackTestProcessorType.WITH_MEMORY_CONSTRAINT_TWO_LOOPS, union_df, prediction_window, DataFrameFormat.MERGED).run_backtest()