In [1]:
import pandas as pd
import vectorbtpro as vbt
import numpy as np

In [2]:
vbt.settings.wrapping ["freq"]                = "1m"
vbt.settings.portfolio['init_cash']           = 10000

In [3]:
import os
import sys

sys.path.append(os.getcwd())

from dataframes_merger import IntersectionDataFrameMerger, UnionDataFrameMerger
from parameter_optimization import run_vbt_backtest, extract_metrics_from_result, DataFrameFormat
from settings_and_params import extract_prediction_window_size, generate_excel_output_file_path, generate_dataframe_csv_output_file_path, generate_csv_for_excel_output_file_path
from lstm_analysis_utils import process_pickle_files

# Running backtest on just one RID

### Old format

In [None]:
pickle_files_path               = "../data/RID0047_LSTM_pw75_lb250_bt1000_mem10000"

model_name                      = pickle_files_path.split('/')[-1]
prediction_window               = extract_prediction_window_size(model_name)
excel_output_file_name          = generate_excel_output_file_path(model_name)
csv_for_excel_output_file_name  = generate_csv_for_excel_output_file_path(model_name)
dataframe_csv_output_file_name  = generate_dataframe_csv_output_file_path(model_name)

In [5]:
df1 = process_pickle_files(pickle_files_path, prediction_window)
df1_result = run_vbt_backtest(df1, prediction_window, DataFrameFormat.SINGLE)
extract_metrics_from_result(df1_result, csv_for_excel_output_file_name)

Correlation between Euclidean distance between long array and short array and future actual results:  0.16387027184406824
Correlation between difference in long minus short predictions and future actual results for longs:  0.7528851278002029
Correlation between difference in long minus short predictions and future actual results for shorts:  0.7552587935258038
Correlation between long slopes and future results: -0.022706356842955373
Correlation between short slopes and future results: 0.043585041128925334


### New format

In [6]:
DATAFRAME_CSV_FILES    = ["../results/RID0044_LSTM_pw38_lb5000_bt2000_mem6000.csv"]

model_name                      = DATAFRAME_CSV_FILES[0].split("/")[-1].split(".")[0]
prediction_window               = extract_prediction_window_size(model_name)
excel_output_file_name          = generate_excel_output_file_path(model_name)
csv_for_excel_output_file_name  = generate_csv_for_excel_output_file_path(model_name)
dataframe_csv_output_file_name  = generate_dataframe_csv_output_file_path(model_name)

In [11]:
df2 = IntersectionDataFrameMerger().process(DATAFRAME_CSV_FILES)
df2_result = run_vbt_backtest(df2, prediction_window, DataFrameFormat.MERGED)
extract_metrics_from_result(df2_result, csv_for_excel_output_file_name)

# Running backtest on multiple RIDs

In [None]:
MATCHING_DATAFRAME_CSV_FILES    = ["../results/RID0044_LSTM_pw38_lb5000_bt2000_mem6000.csv", "../results/RID0046_LSTM_pw38_lb250_bt2000_mem6000.csv"]
MISMATCHING_DATAFRAME_CSV_FILES = ["../results/RID0029_LSTM_pw38_lb250_bt2000_mem6000.csv", "../results/RID0045_LSTM_pw38_lb5000_bt2000_mem10000.csv", "../results/RID0046_LSTM_pw38_lb250_bt2000_mem6000.csv"]

In [None]:
intersection_df = IntersectionDataFrameMerger().process(MISMATCHING_DATAFRAME_CSV_FILES)

In [None]:
union_df = UnionDataFrameMerger().process(MISMATCHING_DATAFRAME_CSV_FILES)