In [3]:
import boto3
import pandas as pd
import warnings
import ast

s3 = boto3.client('s3')

### Analysis of BFC TFS Process

In [4]:
base_path = '/Users/charlesmiller/Documents/temporal_simulation_results'
bfc_df = pd.read_csv(f'{base_path}/CDBFC/2024_4_23.csv')

In [5]:
df = bfc_df
df = df.sort_values('median_gross_accuracy', ascending=False).reset_index(drop=True)
df['gross_acc_median_rank'] = df.index + 1
df = df.sort_values('median_tp', ascending=False).reset_index(drop=True)
df['median_tp_rank'] = df.index + 1
df = df.sort_values('tp_avg', ascending=False).reset_index(drop=True)
df['tp_avg_rank'] = df.index + 1
df = df.sort_values('gross_accuracy', ascending=False).reset_index(drop=True)
df['gross_acc_rank'] = df.index + 1
df['weighted_rank'] = (df['gross_acc_median_rank'] + df['median_tp_rank'] + df['tp_avg_rank'] + df['gross_acc_rank'])/4
df.sort_values('weighted_rank', ascending=True,inplace=True)

In [6]:
df.head(10)

Unnamed: 0,model_name,features,tp_avg,gross_accuracy,num_features,median_tp,median_gross_accuracy,gross_acc_median_rank,median_tp_rank,tp_avg_rank,gross_acc_rank,weighted_rank
0,CDBFC_temporal_simulation_41,"['price_change_H', 'day_of_month', 'cmf_15MA_d...",80.251497,17.57485,41,69.0,11.0,23,2,1,1,6.75
1,CDBFC_temporal_simulation_10,"['High-Close', 'cmf_15MA', 'SPY_5d_diff', 'day...",79.904192,16.790419,47,65.0,13.0,11,13,2,2,7.0
9,CDBFC_temporal_simulation_64,"['roc3', 'roc5', 'macd_15MA_diff', 'DMplus', '...",79.215569,15.51497,46,65.0,13.0,10,16,5,10,10.25
7,CDBFC_temporal_simulation_76,"['price_range', 'SPY_5d', 'macd_15MA', 'day_of...",79.299401,15.922156,44,64.0,14.0,8,22,4,8,10.5
4,CDBFC_temporal_simulation_99,"['month', 'return_vol_5D', 'DX', 'cmf_15MA_dif...",78.796407,16.149701,36,63.0,14.0,5,26,7,5,10.75
6,CDBFC_temporal_simulation_90,"['price_change_H', 'DIminus', 'price_3D20D_dif...",79.742515,16.02994,48,65.0,11.0,25,10,3,7,11.25
2,CDBFC_temporal_simulation_53,"['volume_sum15_5DMA_diff', 'return_vol_10D', '...",76.502994,16.54491,36,65.0,13.0,12,9,22,3,11.5
11,CDBFC_temporal_simulation_34,"['cmf_15MA_diff', 'DIminus', 'High-Close', 'pr...",77.311377,14.904192,36,65.0,15.0,4,14,17,12,11.75
10,CDBFC_temporal_simulation_78,"['price_change_H', 'High-Close', 'return_vol_1...",77.712575,15.323353,46,67.0,12.0,18,5,14,11,12.0
8,CDBFC_temporal_simulation_32,"['bb_spread', 'H-PrevClose', 'rsi_15MA', 'macd...",78.934132,15.550898,47,63.0,14.0,7,28,6,9,12.5


In [7]:
## Statistics of note
print(f"Best Strategies avg features {bfc_df.head(10)['num_features'].mean()}")
print(f"Best Strategies avg gross accuracy {bfc_df.head(10)['gross_accuracy'].mean()}")
print(f"Best Strategies avg median gross accuracy {bfc_df.head(10)['median_gross_accuracy'].mean()}")
print(f"Best Strategies avg median tp {bfc_df.head(10)['median_tp'].mean()}")
print(f"Best Strategies avg tp avg {bfc_df.head(10)['tp_avg'].mean()}")

Best Strategies avg features 37.1
Best Strategies avg gross accuracy 7.727544910179641
Best Strategies avg median gross accuracy 5.4
Best Strategies avg median tp 55.8
Best Strategies avg tp avg 69.86946107784432


In [8]:
df['features'] = df['features'].apply(lambda x: ast.literal_eval(x))
feature_df = df.explode('features')

In [10]:
feature_rank_avg = feature_df.groupby('features')['weighted_rank'].mean()
top_25_features_bfc = pd.DataFrame(feature_rank_avg).sort_values('weighted_rank', ascending=True).index.tolist()
print(top_25_features_bfc)

['price_change_H', 'cd_vol', 'price_range', 'bb_trend', 'price_range_5DMA_diff', 'roc', 'bb_category', 'bb_spread', 'volume_15MA_diff', 'price_change_D', 'month', 'return_vol_10D', 'PrevClose-L', 'roc3', 'H-L', 'TR', 'volume_sum15', 'return_vol_5D_diff', 'price_3Ddiff', 'roc_15MA', 'macd_15MA_diff', 'rsi_15MA', 'SPY_5d_diff', 'return_vol_8H_diff', 'High-Close', 'DMminus', 'hour', 'return_vol_10D_diff', 'H-PrevClose', 'DMplus', 'DX', 'SPY_range_vol', 'Low-Close', 'price_5Ddiff', 'volume_sum15_10DMA_diff', 'rsi', 'cmf_15MA_diff', 'return_vol_5D', 'price_10Ddiff', 'day_of_week', 'High-Low', 'roc_15MA_diff', 'cmf', 'volume_sum15_5DMA_diff', 'SPY_20d', 'macd_15MA', 'day_of_month', 'adx', 'price_range_8MA_diff', 'cd_vol3', 'DIplus', 'return_vol_8H', 'price_20Ddiff', 'cmf_15MA', 'price_3D20D_diff', 'SPY_5d', 'SPY_20d_diff', 'rsi_15MA_diff', 'macd', 'roc5', 'DIminus']


In [11]:
x = ['price_change_H', 'cd_vol', 'price_range', 'bb_trend', 'price_range_5DMA_diff', 'roc', 'bb_category', 'bb_spread', 'volume_15MA_diff', 'price_change_D', 'month', 'return_vol_10D', 'PrevClose-L', 'roc3', 'H-L', 'TR', 'volume_sum15', 'return_vol_5D_diff', 'price_3Ddiff', 'roc_15MA', 'macd_15MA_diff', 'rsi_15MA', 'SPY_5d_diff', 'return_vol_8H_diff', 'High-Close', 'DMminus', 'hour', 'return_vol_10D_diff', 'H-PrevClose', 'DMplus', 'DX', 'SPY_range_vol', 'Low-Close', 'price_5Ddiff', 'volume_sum15_10DMA_diff', 'rsi', 'cmf_15MA_diff', 'return_vol_5D', 'price_10Ddiff', 'day_of_week', 'High-Low', 'roc_15MA_diff', 'cmf', 'volume_sum15_5DMA_diff', 'SPY_20d', 'macd_15MA', 'day_of_month', 'adx', 'price_range_8MA_diff', 'cd_vol3', 'DIplus', 'return_vol_8H', 'price_20Ddiff', 'cmf_15MA', 'price_3D20D_diff', 'SPY_5d', 'SPY_20d_diff', 'rsi_15MA_diff', 'macd', 'roc5', 'DIminus']
print(len(x))


61


### Analysis of BFP TFS Process

In [140]:
base_path = '/Users/charlesmiller/Documents/temporal_simulation_results'
bfp_df = pd.read_csv(f'{base_path}/CDBFP/2024_4_23.csv')

In [141]:
df = bfp_df
df = df.sort_values('median_gross_accuracy', ascending=False).reset_index(drop=True)
df['gross_acc_median_rank'] = df.index + 1
df = df.sort_values('median_tp', ascending=False).reset_index(drop=True)
df['median_tp_rank'] = df.index + 1
df = df.sort_values('tp_avg', ascending=False).reset_index(drop=True)
df['tp_avg_rank'] = df.index + 1
df = df.sort_values('gross_accuracy', ascending=False).reset_index(drop=True)
df['gross_acc_rank'] = df.index + 1
df['weighted_rank'] = (df['gross_acc_median_rank'] + df['median_tp_rank'] + df['tp_avg_rank'] + df['gross_acc_rank'])/4
df.sort_values('weighted_rank', ascending=True,inplace=True)

In [142]:
df.head(10)

Unnamed: 0,model_name,features,tp_avg,gross_accuracy,num_features,median_tp,median_gross_accuracy,gross_acc_median_rank,median_tp_rank,tp_avg_rank,gross_acc_rank,weighted_rank
0,CDBFP_temporal_simulation_67,"['volume_15MA_diff', 'day_of_month', 'price_ch...",77.964072,19.994012,40,72.0,11.0,7,2,2,1,3.0
13,CDBFP_temporal_simulation_83,"['return_vol_10D_diff', 'return_vol_8H', 'pric...",77.832335,18.191617,41,74.0,11.0,8,1,3,14,6.5
8,CDBFP_temporal_simulation_91,"['H-L', 'cd_vol', 'price_range_5DMA_diff', 'pr...",76.988024,18.670659,47,69.0,10.0,14,11,9,9,10.75
2,CDBFP_temporal_simulation_54,"['cmf_15MA', 'TR', 'roc', 'bb_category', 'DIpl...",75.017964,19.886228,42,70.0,9.0,22,6,15,3,11.5
4,CDBFP_temporal_simulation_77,"['return_vol_8H_diff', 'price_change_D', 'day_...",78.143713,19.42515,33,68.0,8.0,26,14,1,5,11.5
17,CDBFP_temporal_simulation_82,"['volume_sum15_10DMA_diff', 'SPY_5d_diff', 'ma...",77.08982,17.832335,30,68.0,10.0,9,12,7,18,11.5
1,CDBFP_temporal_simulation_60,"['price_change_H', 'price_3D20D_diff', 'return...",77.017964,19.976048,40,69.0,8.0,29,10,8,2,12.25
3,CDBFP_temporal_simulation_65,"['roc_15MA', 'macd_15MA', 'adx', 'price_20Ddif...",77.664671,19.742515,43,71.0,7.0,37,4,4,4,12.25
5,CDBFP_temporal_simulation_34,"['macd_15MA', 'return_vol_10D', 'bb_trend', 'p...",74.239521,19.317365,32,66.0,11.0,5,22,22,6,13.75
19,CDBFP_temporal_simulation_17,"['return_vol_10D', 'DX', 'PrevClose-L', 'price...",75.736527,17.437126,41,67.0,11.0,6,19,13,20,14.5


In [143]:
## Statistics of note
print(f"Best Strategies avg features {df.head(10)['num_features'].mean()}")
print(f"Best Strategies avg gross accuracy {df.head(10)['gross_accuracy'].mean()}")
print(f"Best Strategies avg median gross accuracy {df.head(10)['median_gross_accuracy'].mean()}")
print(f"Best Strategies avg median tp {df.head(10)['median_tp'].mean()}")
print(f"Best Strategies avg tp avg {df.head(10)['tp_avg'].mean()}")

Best Strategies avg features 38.9
Best Strategies avg gross accuracy 19.047305389221556
Best Strategies avg median gross accuracy 9.6
Best Strategies avg median tp 69.4
Best Strategies avg tp avg 76.76946107784431


In [144]:
df['features'] = df['features'].apply(lambda x: ast.literal_eval(x))
feature_df = df.explode('features')

In [2]:
feature_rank_avg = feature_df.groupby('features')['weighted_rank'].mean()
top_25_features_bfp = pd.DataFrame(feature_rank_avg).sort_values('weighted_rank', ascending=True).index.tolist()
top_25_features_bfp

NameError: name 'feature_df' is not defined

### Analysis of BFC_1D 

In [146]:
base_path = '/Users/charlesmiller/Documents/temporal_simulation_results'
bfc1d_df = pd.read_csv(f'{base_path}/CDBFC_1D/2024_4_24.csv')

In [147]:
df = bfc1d_df
df = df.sort_values('median_gross_accuracy', ascending=False).reset_index(drop=True)
df['gross_acc_median_rank'] = df.index + 1
df = df.sort_values('median_tp', ascending=False).reset_index(drop=True)
df['median_tp_rank'] = df.index + 1
df = df.sort_values('tp_avg', ascending=False).reset_index(drop=True)
df['tp_avg_rank'] = df.index + 1
df = df.sort_values('gross_accuracy', ascending=False).reset_index(drop=True)
df['gross_acc_rank'] = df.index + 1
df['weighted_rank'] = (df['gross_acc_median_rank'] + df['median_tp_rank'] + df['tp_avg_rank'] + df['gross_acc_rank'])/4
df.sort_values('weighted_rank', ascending=True,inplace=True)

In [148]:
df.head(10)

Unnamed: 0,model_name,features,tp_avg,gross_accuracy,num_features,median_tp,median_gross_accuracy,gross_acc_median_rank,median_tp_rank,tp_avg_rank,gross_acc_rank,weighted_rank
0,CDBFC_1D_temporal_simulation_29,"['price_range_8MA_diff', 'rsi_15MA', 'roc', 'r...",135.622754,23.467066,43,121.0,30.0,1,3,2,1,1.75
2,CDBFC_1D_temporal_simulation_5,"['price_5Ddiff', 'cmf_15MA_diff', 'price_3Ddif...",134.538922,20.131737,43,127.0,24.0,13,1,5,3,5.5
5,CDBFC_1D_temporal_simulation_22,"['price_20Ddiff', 'SPY_5d', 'return_vol_5D', '...",134.640719,18.88024,43,112.0,26.0,3,17,4,6,7.5
6,CDBFC_1D_temporal_simulation_0,"['rsi_15MA', 'price_3Ddiff', 'roc5', 'macd_15M...",133.233533,17.796407,47,117.0,24.0,12,5,8,7,8.0
10,CDBFC_1D_temporal_simulation_2,"['price_3Ddiff', 'H-L', 'bb_trend', 'macd', 'v...",131.628743,16.694611,32,114.0,25.0,7,11,15,11,11.0
9,CDBFC_1D_temporal_simulation_38,"['DMminus', 'macd', 'rsi_15MA_diff', 'Low-Clos...",133.676647,16.934132,43,112.0,24.0,9,18,7,10,11.0
17,CDBFC_1D_temporal_simulation_25,"['bb_category', 'SPY_5d', 'return_vol_5D_diff'...",133.94012,14.646707,39,115.0,24.0,11,9,6,18,11.0
16,CDBFC_1D_temporal_simulation_34,"['roc', 'High-Close', 'cmf_15MA', 'price_range...",131.05988,15.083832,45,119.0,25.0,8,4,18,17,11.75
3,CDBFC_1D_temporal_simulation_17,"['return_vol_10D', 'price_5Ddiff', 'price_10Dd...",130.329341,19.443114,48,112.0,26.0,4,19,21,4,12.0
11,CDBFC_1D_temporal_simulation_24,"['rsi', 'price_5Ddiff', 'DX', 'day_of_month', ...",130.227545,16.670659,40,115.0,26.0,5,10,22,12,12.25


In [149]:
## Statistics of note
print(f"Best Strategies avg features {df.head(10)['num_features'].mean()}")
print(f"Best Strategies avg gross accuracy {df.head(10)['gross_accuracy'].mean()}")
print(f"Best Strategies avg median gross accuracy {df.head(10)['median_gross_accuracy'].mean()}")
print(f"Best Strategies avg median tp {df.head(10)['median_tp'].mean()}")
print(f"Best Strategies avg tp avg {df.head(10)['tp_avg'].mean()}")

Best Strategies avg features 42.3
Best Strategies avg gross accuracy 17.974850299401197
Best Strategies avg median gross accuracy 25.4
Best Strategies avg median tp 116.4
Best Strategies avg tp avg 132.88982035928143


In [150]:
df['features'] = df['features'].apply(lambda x: ast.literal_eval(x))
feature_df = df.explode('features')

In [151]:
## top 20 only bc of interrupted test
feature_rank_avg = feature_df.groupby('features')['weighted_rank'].mean()
top_25_features_bfc1d = pd.DataFrame(feature_rank_avg).sort_values('weighted_rank', ascending=True)
top_25_features_bfc1d

['price_change_H',
 'PrevClose-L',
 'return_vol_10D',
 'price_5Ddiff',
 'volume_sum15_5DMA_diff',
 'return_vol_8H_diff',
 'roc5',
 'cd_vol3',
 'H-PrevClose',
 'TR',
 'day_of_week',
 'bb_category',
 'macd_15MA_diff',
 'price_change_D',
 'rsi',
 'price_range_5DMA_diff',
 'High-Low',
 'H-L',
 'roc',
 'return_vol_5D_diff']

### Analysis of BFP_1D

In [152]:
base_path = '/Users/charlesmiller/Documents/temporal_simulation_results'
bfp1d_df = pd.read_csv(f'{base_path}/CDBFP_1D/2024_4_23.csv')

In [153]:
df = bfp1d_df
df = df.sort_values('median_gross_accuracy', ascending=False).reset_index(drop=True)
df['gross_acc_median_rank'] = df.index + 1
df = df.sort_values('median_tp', ascending=False).reset_index(drop=True)
df['median_tp_rank'] = df.index + 1
df = df.sort_values('tp_avg', ascending=False).reset_index(drop=True)
df['tp_avg_rank'] = df.index + 1
df = df.sort_values('gross_accuracy', ascending=False).reset_index(drop=True)
df['gross_acc_rank'] = df.index + 1
df['weighted_rank'] = (df['gross_acc_median_rank'] + df['median_tp_rank'] + df['tp_avg_rank'] + df['gross_acc_rank'])/4
df.sort_values('weighted_rank', ascending=True,inplace=True)

In [154]:
df.head(10)

Unnamed: 0,model_name,features,tp_avg,gross_accuracy,num_features,median_tp,median_gross_accuracy,gross_acc_median_rank,median_tp_rank,tp_avg_rank,gross_acc_rank,weighted_rank
7,CDBFP_1D_temporal_simulation_68,"['price_5Ddiff', 'volume_sum15', 'SPY_20d_diff...",91.491018,7.892216,45,74.0,3.0,1,2,1,8,3.0
0,CDBFP_1D_temporal_simulation_93,"['price_5Ddiff', 'price_range_8MA_diff', 'roc_...",85.155689,12.790419,25,74.0,3.0,2,4,21,1,7.0
5,CDBFP_1D_temporal_simulation_83,"['price_change_H', 'volume_sum15_5DMA_diff', '...",86.658683,8.203593,48,70.0,3.0,3,13,8,6,7.5
1,CDBFP_1D_temporal_simulation_53,"['H-L', 'cd_vol3', 'SPY_20d_diff', 'cmf', 'rsi...",87.365269,9.598802,48,69.0,0.0,4,23,6,2,8.75
6,CDBFP_1D_temporal_simulation_31,"['return_vol_8H', 'price_change_H', 'macd', 'p...",87.916168,7.904192,28,69.0,-1.0,10,24,5,7,11.5
9,CDBFP_1D_temporal_simulation_64,"['rsi', 'cmf', 'SPY_5d_diff', 'H-L', 'price_20...",85.185629,7.233533,31,75.0,-4.0,21,1,20,10,13.0
10,CDBFP_1D_temporal_simulation_41,"['return_vol_8H', 'return_vol_5D', 'return_vol...",85.658683,7.131737,25,71.0,-4.0,23,6,16,11,14.0
24,CDBFP_1D_temporal_simulation_30,"['price_3D20D_diff', 'H-PrevClose', 'High-Clos...",88.443114,3.772455,45,70.0,-2.0,13,17,4,25,14.75
12,CDBFP_1D_temporal_simulation_70,"['roc3', 'price_10Ddiff', 'hour', 'volume_15MA...",89.155689,6.838323,35,72.0,-7.0,43,5,2,13,15.75
8,CDBFP_1D_temporal_simulation_61,"['volume_sum15_10DMA_diff', 'DMplus', 'return_...",86.461078,7.353293,41,70.0,-8.0,56,10,9,9,21.0


In [155]:
## Statistics of note
print(f"Best Strategies avg features {df.head(10)['num_features'].mean()}")
print(f"Best Strategies avg gross accuracy {df.head(10)['gross_accuracy'].mean()}")
print(f"Best Strategies avg median gross accuracy {df.head(10)['median_gross_accuracy'].mean()}")
print(f"Best Strategies avg median tp {df.head(10)['median_tp'].mean()}")
print(f"Best Strategies avg tp avg {df.head(10)['tp_avg'].mean()}")

Best Strategies avg features 37.1
Best Strategies avg gross accuracy 7.871856287425149
Best Strategies avg median gross accuracy -1.7
Best Strategies avg median tp 71.4
Best Strategies avg tp avg 87.3491017964072


In [156]:
df['features'] = df['features'].apply(lambda x: ast.literal_eval(x))
feature_df = df.explode('features')

In [157]:
feature_rank_avg = feature_df.groupby('features')['weighted_rank'].mean()
top_25_features_bfp1d = pd.DataFrame(feature_rank_avg).sort_values('weighted_rank', ascending=True).head(25).index.tolist()
top_25_features_bfp1d

['price_change_H',
 'DIminus',
 'return_vol_10D',
 'price_range',
 'cd_vol3',
 'DIplus',
 'hour',
 'return_vol_8H_diff',
 'cmf_15MA',
 'return_vol_8H',
 'DX',
 'return_vol_5D',
 'cmf',
 'rsi_15MA',
 'day_of_week',
 'High-Low',
 'SPY_20d_diff',
 'adx',
 'cd_vol',
 'bb_category',
 'volume_sum15',
 'volume_sum15_5DMA_diff',
 'return_vol_10D_diff',
 'DMplus',
 'price_10Ddiff']

### Selecting features for next round of tests

In [158]:
unique_values = list(set(top_25_features_bfp1d + top_25_features_bfc + top_25_features_bfc1d + top_25_features_bfp))
print(len(unique_values))
print(unique_values)

53
['macd_15MA_diff', 'roc_15MA', 'price_3Ddiff', 'price_range', 'cd_vol', 'price_change_D', 'SPY_5d_diff', 'DX', 'H-PrevClose', 'price_change_H', 'High-Low', 'cmf_15MA', 'cmf', 'roc5', 'price_10Ddiff', 'TR', 'macd', 'roc_15MA_diff', 'roc', 'hour', 'return_vol_5D', 'volume_sum15_10DMA_diff', 'volume_sum15', 'return_vol_5D_diff', 'roc3', 'volume_sum15_5DMA_diff', 'bb_trend', 'bb_spread', 'DIminus', 'rsi_15MA', 'volume_15MA_diff', 'High-Close', 'price_range_5DMA_diff', 'Low-Close', 'return_vol_8H', 'SPY_20d_diff', 'price_range_8MA_diff', 'day_of_week', 'return_vol_10D', 'rsi', 'rsi_15MA_diff', 'H-L', 'macd_15MA', 'PrevClose-L', 'month', 'price_5Ddiff', 'return_vol_8H_diff', 'adx', 'cd_vol3', 'DIplus', 'return_vol_10D_diff', 'DMplus', 'bb_category']


110
