### Analyze Results

In [1]:
import pandas as pd
from model_training_utils import load_data_columns_config, load_metrics
from model_analysis import summarize_metrics

config_dict = load_data_columns_config()
factors_columns = list(config_dict.keys())[1:-1]

# Specify the custom order for the 'factor_type'
custom_order = ['train', 'validation', 'test']
multi_idx = pd.MultiIndex.from_product([factors_columns, custom_order], names=['factor_type', 'type'])

GPU devices are already configured, skipping setup.


#### [1]. Matrics
##### 1. Individual factor set test

In [None]:
filename_roots=("train_metrics_dict_lstm_single_cate_", "test_metrics_lstm_single_cate_")
lstm_metrics_summary = summarize_metrics(filename_roots, factors_columns)

filename_roots=("train_metrics_dict_dense_single_cate_", "test_metrics_dense_single_cate_")
dense_metrics_summary = summarize_metrics(filename_roots, factors_columns)

# Mean
lstm_summary_df = lstm_metrics_summary.groupby(["factor_type", "type"]).mean().drop(columns=["cv_num"])
lstm_summary_df = lstm_summary_df.reindex(multi_idx)

dense_summary_df = dense_metrics_summary.groupby(["factor_type", "type"]).mean().drop(columns=["cv_num"])
dense_summary_df = dense_summary_df.reindex(multi_idx)

In [4]:
pd.merge(dense_summary_df, lstm_summary_df, left_index=True, right_index=True,
         suffixes=("_dense","_lstm"))

Unnamed: 0_level_0,Unnamed: 1_level_0,MSE_dense,r2_dense,MSE_lstm,r2_lstm
factor_type,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tech_factors,train,0.208909,0.92535,0.188665,0.932387
tech_factors,validation,0.319666,0.880754,0.192089,0.927028
tech_factors,test,0.209865,0.917738,0.176823,0.930624
calendar_factors,train,2.939762,-0.019811,2.943795,-0.022895
calendar_factors,validation,2.825492,-0.033412,2.904103,-0.070547
calendar_factors,test,2.732922,-0.054428,2.928666,-0.131298
fundamental_factors,train,0.658607,0.768281,0.419936,0.852595
fundamental_factors,validation,0.757219,0.719695,0.703363,0.734776
fundamental_factors,test,0.941913,0.634949,1.052825,0.59213
industry_factors,train,1.797015,0.373034,1.538738,0.461532


In [7]:
# Standard deviation
lstm_summary_std_df = lstm_metrics_summary.groupby(["factor_type", "type"]).std().drop(columns=["cv_num"])
lstm_summary_std_df = lstm_summary_std_df.reindex(multi_idx)

dense_summary_std_df = dense_metrics_summary.groupby(["factor_type", "type"]).std().drop(columns=["cv_num"])
dense_summary_std_df = dense_summary_std_df.reindex(multi_idx)

In [8]:
pd.merge(dense_summary_std_df, lstm_summary_std_df, left_index=True, right_index=True,
         suffixes=("_dense","_lstm"))

Unnamed: 0_level_0,Unnamed: 1_level_0,MSE_dense,r2_dense,MSE_lstm,r2_lstm
factor_type,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tech_factors,train,0.02309,0.010036,0.009194,0.005976
tech_factors,validation,0.18306,0.067519,0.026039,0.009754
tech_factors,test,0.058585,0.022944,0.004001,0.001632
calendar_factors,train,0.172713,0.002954,0.16695,0.00577
calendar_factors,validation,0.222857,0.019482,0.270702,0.074727
calendar_factors,test,0.07183,0.02806,0.404487,0.158189
fundamental_factors,train,0.023279,0.016883,0.017269,0.009247
fundamental_factors,validation,0.026951,0.017851,0.062457,0.022262
fundamental_factors,test,0.040269,0.015571,0.041502,0.016275
industry_factors,train,0.064517,0.028658,0.02932,0.02892


##### 2. Cumulative factor sets test

In [3]:
filename_roots=("train_metrics_dict_lstm_", "test_metrics_lstm_")
lstm_cum_metrics_summary = summarize_metrics(filename_roots, factors_columns)

filename_roots=("train_metrics_dict_dense_", "test_metrics_dense_")
dense_cum_metrics_summary = summarize_metrics(filename_roots, factors_columns)

# Mean
lstm_cum_summary_df = lstm_cum_metrics_summary.groupby(["factor_type", "type"]).mean().drop(columns=["cv_num"])
lstm_cum_summary_df = lstm_cum_summary_df.reindex(multi_idx)

dense_cum_summary_df = dense_cum_metrics_summary.groupby(["factor_type", "type"]).mean().drop(columns=["cv_num"])
dense_cum_summary_df = dense_cum_summary_df.reindex(multi_idx)

In [11]:
pd.merge(dense_cum_summary_df, lstm_cum_summary_df, left_index=True, right_index=True,
         suffixes=("_dense","_lstm"))

Unnamed: 0_level_0,Unnamed: 1_level_0,MSE_dense,r2_dense,MSE_lstm,r2_lstm
factor_type,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tech_factors,train,0.208909,0.92535,0.188665,0.932387
tech_factors,validation,0.319666,0.880754,0.192089,0.927028
tech_factors,test,0.209865,0.917738,0.176823,0.930624
calendar_factors,train,0.216388,0.922758,0.208952,0.92502
calendar_factors,validation,0.334786,0.874247,0.278013,0.892753
calendar_factors,test,0.251651,0.901506,0.235762,0.906339
fundamental_factors,train,0.664831,0.773458,0.2013,0.927669
fundamental_factors,validation,0.614017,0.772371,0.252483,0.903858
fundamental_factors,test,0.595296,0.769473,0.230763,0.908537
industry_factors,train,0.190694,0.931772,0.206102,0.925774


In [12]:
# Standard deviation
lstm_cum_summary_std_df = lstm_cum_metrics_summary.groupby(["factor_type", "type"]).std().drop(columns=["cv_num"])
lstm_cum_summary_std_df = lstm_cum_summary_std_df.reindex(multi_idx)

dense_cum_summary_std_df = dense_cum_metrics_summary.groupby(["factor_type", "type"]).std().drop(columns=["cv_num"])
dense_cum_summary_std_df = dense_cum_summary_std_df.reindex(multi_idx)

In [13]:
pd.merge(dense_cum_summary_std_df, lstm_cum_summary_std_df, left_index=True, right_index=True,
         suffixes=("_dense","_lstm"))

Unnamed: 0_level_0,Unnamed: 1_level_0,MSE_dense,r2_dense,MSE_lstm,r2_lstm
factor_type,type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
tech_factors,train,0.02309,0.010036,0.009194,0.005976
tech_factors,validation,0.18306,0.067519,0.026039,0.009754
tech_factors,test,0.058585,0.022944,0.004001,0.001632
calendar_factors,train,0.026337,0.011465,0.023422,0.009016
calendar_factors,validation,0.291711,0.109159,0.111366,0.048438
calendar_factors,test,0.146917,0.057301,0.070513,0.028935
fundamental_factors,train,1.165876,0.387974,0.015986,0.008396
fundamental_factors,validation,1.047038,0.387839,0.070898,0.027545
fundamental_factors,test,1.016893,0.391722,0.047171,0.019504
industry_factors,train,0.006907,0.006227,0.016746,0.009635


#### [2]. Major contribution factor
##### 1. Load data and prepare parameters

In [1]:
# get the validation dataset
# shuffle the k-th column value
# make the prediction and test by metrics (MSE) again

In [3]:
from model_training_utils import NonRNNDataFeeder, ModelConfig, read_data, load_data_columns_config, load_metrics
from train_models import fit_models_with_cross_validation
from model_analysis import load_model, plot_importance, save_plots_to_html
from pmdarima.model_selection import RollingForecastCV, SlidingWindowForecastCV
import tensorflow as tf
from keras import backend as K
import numpy as np
import pandas as pd
import pickle
from tqdm import tqdm
import matplotlib.pyplot as plt

In [3]:
config_dict = load_data_columns_config()
# Get hte factor columns from config_dict
factors_columns=['tech_factors', 'calendar_factors', 'fundamental_factors', 
                 'industry_factors', 'release_schedule_factors']

In [3]:
# Step 1: Load data
folder_path = "F:/predictors"
final_dataset = read_data(filename="final_dataset", folder_path=folder_path)
final_dataset = final_dataset.sort_values(by=["date", "isin"], ignore_index=True)

In [4]:
# Step 2: Split train, validation (create cross validation spliter) and test datasets
dates_list = final_dataset["date"].unique()
dates_list.sort()
num_of_days = dates_list.shape[0]

step = 60 # step in window movement
h = 60 # time horizon for validation dataset
trainval_test_threshold = int(num_of_days * 0.6) # 60% dates are used to training and validation
initial_threshold = int(trainval_test_threshold / 3) # the window size of the 1st train dataset
# Update the split threshold of train_validation and test
trainval_test_threshold = (
    (trainval_test_threshold - (initial_threshold + h)) // step * step
    + h
    + initial_threshold
)
# train_dates are the dates used for training and validation in models.
train_dates = dates_list[:trainval_test_threshold]
# Create cross validation spliter with sliding window (non-cumulative datasets)
cv_spliter = SlidingWindowForecastCV(h=h, step=step, window_size=initial_threshold)

In [5]:
def create_dense_model(win_size, predictors_size):
    inputs = tf.keras.layers.Input(shape=(win_size, predictors_size))
    layer1 = tf.keras.layers.Dense(32, activation='relu')(inputs)
    layer2 = tf.keras.layers.Dense(16, activation='relu')(layer1)
    layer3 = tf.keras.layers.Dense(8, activation='relu')(layer2)
    output = tf.keras.layers.Dense(1)(layer3)
    model = tf.keras.Model(inputs=inputs, outputs=output)
    return model

# Only use the last validation split
for _, test_idx in tqdm(cv_spliter.split(train_dates)):
    pass

6it [00:00, 5997.57it/s]


##### 1. Analyze models by cumulatively adding predictor sets

In [17]:
tf.random.set_seed(4321)

input_columns = []
num = 1

# runtime: ~ 17.5 hours
for factor in tqdm(factors_columns):
    # input_columns are "isin" + the column names of predcitors in this model
    if num == 5:
        input_columns = input_columns + config_dict[factor][:-1]
    else:
        input_columns = input_columns + config_dict[factor]
    # data_columns includes input_columns and response variable column name
    data_columns = input_columns + ["log_adj_volume"]
    
    data_feeder = NonRNNDataFeeder(data_df=final_dataset[data_columns], 
                                window_size=1, 
                                batch_size=1024,
                                predictors_size = len(input_columns), 
                                predictors_dates=final_dataset['date'])
    
    checkpoint_path=f"./checkpoints/dense_{num}_tp_CV6"
    model = load_model(data_feeder.window_size, data_feeder.predictors_size, checkpoint_path, create_dense_model)

    val_filter = (data_feeder.predictors_dates>= train_dates[test_idx[0]]) & (
        data_feeder.predictors_dates <= train_dates[test_idx[-1]]
    )

    # Compute "Permutation Feature Importance"
    # https://www.kaggle.com/code/cdeotte/lstm-feature-importance 
    # https://christophm.github.io/interpretable-ml-book/feature-importance.html#feature-importance
    # runtime: 1 min/feature
    fea_import_metrics={}
    valid_ds = data_feeder.gen_tf_dataset(val_filter)
    fea_import_metrics["baseline"] = model.evaluate(valid_ds, verbose=1)
    del valid_ds
    
    for i in range(data_feeder.predictors_size):
        valid_ds_i = data_feeder.gen_tf_dataset(subset_filter=val_filter, column_idx=i)
        fea_import_metrics[input_columns[i]] = model.evaluate(valid_ds_i)
        del valid_ds_i
    # Save the metric of this model
    with open(f"./metrics/fea_import_dense_{num}.pkl", "wb") as pickle_file:
        pickle.dump(fea_import_metrics, pickle_file)
    del data_feeder, model, fea_import_metrics
    num += 1

  0%|                                                                                            | 0/2 [00:00<?, ?it/s]



 50%|█████████████████████████████████████████                                         | 1/2 [37:47<37:47, 2267.73s/it]



100%|████████████████████████████████████████████████████████████████████████████████| 2/2 [1:19:12<00:00, 2376.04s/it]


##### 2. Analyze models by each predictor set (factors_columns)

In [6]:
tf.random.set_seed(4321)

num = 1
# runtime: 1.8 hours
for factor in tqdm(factors_columns):
    # input_columns are "isin" + the column names of predcitors in this model
    if num == 5:
        input_columns = config_dict[factor][:-1]
    else:
        input_columns = config_dict[factor]
    # data_columns includes input_columns and response variable column name
    data_columns = input_columns + ["log_adj_volume"]
    
    data_feeder = NonRNNDataFeeder(data_df=final_dataset[data_columns], 
                                window_size=1, 
                                batch_size=1024,
                                predictors_size = len(input_columns), 
                                predictors_dates=final_dataset['date'])
    
    checkpoint_path=f"./checkpoints/dense_{num}_tp_sc_CV6"
    model = load_model(data_feeder.window_size, data_feeder.predictors_size, checkpoint_path, create_dense_model)

    val_filter = (data_feeder.predictors_dates>= train_dates[test_idx[0]]) & (
        data_feeder.predictors_dates <= train_dates[test_idx[-1]]
    )

    # Compute "Permutation Feature Importance"
    # https://www.kaggle.com/code/cdeotte/lstm-feature-importance 
    # https://christophm.github.io/interpretable-ml-book/feature-importance.html#feature-importance
    # runtime: 1 min/feature
    fea_import_metrics={}
    valid_ds = data_feeder.gen_tf_dataset(val_filter)
    fea_import_metrics["baseline"] = model.evaluate(valid_ds, verbose=1)
    del valid_ds
    
    for i in range(data_feeder.predictors_size):
        valid_ds_i = data_feeder.gen_tf_dataset(subset_filter=val_filter, column_idx=i)
        fea_import_metrics[input_columns[i]] = model.evaluate(valid_ds_i)
        del valid_ds_i
    # Save the metric of this model
    with open(f"./metrics/fea_import_dense_single_cate_{num}.pkl", "wb") as pickle_file:
        pickle.dump(fea_import_metrics, pickle_file)
    del data_feeder, model, fea_import_metrics
    num += 1

  0%|                                                                                            | 0/2 [00:00<?, ?it/s]



 50%|█████████████████████████████████████████                                         | 1/2 [22:48<22:48, 1368.27s/it]



100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [26:39<00:00, 799.65s/it]


##### 3. DISPLAY LSTM FEATURE IMPORTANCE

In [5]:
# 1. plot version:
plots = []
num = 1
for factor in tqdm(factors_columns):
    fig = plot_importance(metrics_path=f"./metrics/fea_import_dense_single_cate_{num}.pkl",
                          factor=factor, num=num)
    plots.append(fig)
    # Increase the counter
    num += 1       
save_plots_to_html(figures=plots, filename="./dense_feature_importance_plots1.html")

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:01<00:00,  4.69it/s]


In [7]:
# 2. numerical version:
num = 1
for factor in tqdm(factors_columns):
    test_dict = load_metrics(f"./metrics/fea_import_dense_single_cate_{num}.pkl")
    test_df = pd.DataFrame(test_dict, index=["MSE", "R2"]).T
    print(f"\n{num}: ", factor)
    print("baseline: ", test_df.loc["baseline"].tolist())
    test_df['sqrt_MSE_per_change'] = np.sqrt(test_df['MSE'])/np.sqrt(test_df.loc["baseline", 'MSE'])-1
    
    test_df["abs_sqrt_MSE_per_change"]=test_df['sqrt_MSE_per_change'].abs()
    print(test_df[test_df["abs_sqrt_MSE_per_change"]>0.001].sort_values(by=["abs_sqrt_MSE_per_change"], ascending=False).to_string())
    # Increase the counter
    num += 1  

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 72.10it/s]


1:  tech_factors
baseline:  [0.3320235013961792, 0.8757203221321106]
                       MSE        R2  sqrt_MSE_per_change  abs_sqrt_MSE_per_change
lag_logvol_ma22   0.932313  0.651897             0.675701                 0.675701
lag_logvol_ma5    0.900405  0.663878             0.646776                 0.646776
lag_logvol_ma252  0.560388  0.790644             0.299152                 0.299152
lag_logvol_ma1    0.499417  0.813485             0.226443                 0.226443

2:  calendar_factors
baseline:  [2.7643942832946777, -0.03204427659511566]
Empty DataFrame
Columns: [MSE, R2, sqrt_MSE_per_change, abs_sqrt_MSE_per_change]
Index: []

3:  fundamental_factors
baseline:  [0.7237229347229004, 0.728627622127533]
                             MSE        R2  sqrt_MSE_per_change  abs_sqrt_MSE_per_change
lag_log_mkt_cap         2.572815  0.038401             0.885463                 0.885463
lag_EFMUSATRD_LIQUIDTY  1.517291  0.432250             0.447932                 0.447932
lag_E




In [None]:
# 1. plot version:
plots = []
num = 1
factors=""
for factor in tqdm(factors_columns):
    factors += f"{factor.split('_')[0]}&"
    fig = plot_importance(metrics_path=f"./metrics/fea_import_dense_{num}.pkl",
                          factor=factors[:-1], num=num)
    plots.append(fig)
    # Increase the counter
    num += 1       
save_plots_to_html(figures=plots, filename="./dense_feature_importance_plots2.html")

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 12.25it/s]


In [9]:
# 2. numerical version:
num = 1
factors=""
for factor in tqdm(factors_columns):
    factors += f"{factor.split('_')[0]}&"
    test_dict = load_metrics(f"./metrics/fea_import_dense_{num}.pkl")
    test_df = pd.DataFrame(test_dict, index=["MSE", "R2"]).T
    print(f"\n{num}: ", factors[:-1])
    print("baseline: ", test_df.loc["baseline"].tolist())
    test_df['sqrt_MSE_per_change'] = np.sqrt(test_df['MSE'])/np.sqrt(test_df.loc["baseline", 'MSE'])-1
    
    test_df["abs_sqrt_MSE_per_change"]=test_df['sqrt_MSE_per_change'].abs()
    print(test_df[test_df["abs_sqrt_MSE_per_change"]>0.001].sort_values(by=["abs_sqrt_MSE_per_change"], ascending=False).to_string())
    # Increase the counter
    num += 1  

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 63.74it/s]


1:  tech
baseline:  [0.3320235013961792, 0.8757203221321106]
                       MSE        R2  sqrt_MSE_per_change  abs_sqrt_MSE_per_change
lag_logvol_ma22   0.932313  0.651897             0.675701                 0.675701
lag_logvol_ma5    0.900405  0.663878             0.646776                 0.646776
lag_logvol_ma252  0.560388  0.790644             0.299152                 0.299152
lag_logvol_ma1    0.499417  0.813485             0.226443                 0.226443

2:  tech&calendar
baseline:  [0.5831783413887024, 0.7815729379653931]
                       MSE        R2  sqrt_MSE_per_change  abs_sqrt_MSE_per_change
lag_logvol_ma5    1.630105  0.391103             0.671887                 0.671887
lag_logvol_ma252  1.311699  0.509649             0.499742                 0.499742
lag_logvol_ma1    1.289620  0.518304             0.487066                 0.487066
lag_logvol_ma22   0.645234  0.758154             0.051860                 0.051860

3:  tech&calendar&fundamental
baseli


