In [2]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append("/content/drive/MyDrive/Colab Notebooks/instquality/")

import os
os.chdir("/content/drive/MyDrive/Colab Notebooks/instquality/")

import warnings
warnings.filterwarnings("ignore")

Mounted at /content/drive


In [7]:
import pandas as pd
import numpy as np

from functions.spec import run_unified_regularized_regression, filter_cols
from functions.utils import check_split_year
from functions.summary import get_benchmark_stats

### Incremental signal regressions

In [8]:
# load data
df = pd.read_pickle("saved/df.dat")

# define columns and inputs
id_cols = ["country","year","iso_code_1","iso_code_2","region"]
exclude = id_cols + [c for c in df.columns if c.startswith("tgt_")]
macro_cols = filter_cols([c for c in df.columns if c.startswith("wb_") and not c.startswith("wb_iq_") and c not in exclude], [])
iq_cols = filter_cols([c for c in df.columns if (c.startswith("wb_iq_") or (not c.startswith("wb_") and c not in exclude))], [])
macro_cols += ["tgt_spread"]
y_name = "tgt_spread_t1"

# define models to run
models = ['lasso', 'ridge', 'elastic']

# iterate over split shares
split_shares = [0.75, 0.8, 0.85]
for split_share in split_shares:

    # get split year and benchmarks
    split_year = check_split_year(df, y_name, split_share)
    if split_share != 0.75:
        name_prefix = f"signal_t1_{split_share*100:.0f}"
    else:
        name_prefix = "signal_t1"
    print("\n" + "=" * 100)
    print(f"Runnning {split_share*100:.0f}-{(1-split_share)*100:.0f} split...")
    print(f"Split year for {split_share*100:.0f}-{(1-split_share)*100:.0f} split: {split_year}")
    benchmark_df = get_benchmark_stats(df, split_share=split_share)
    h_benchmark = benchmark_df[benchmark_df["horizon"] == 1].iloc[0]
    r2_benchmark = h_benchmark["R2_test"]
    rmse_benchmark = h_benchmark["RMSE_test"]

    # run spec for models
    run_unified_regularized_regression(
        name_prefix=name_prefix,
        data_path="saved/df.dat",
        macro_cols=macro_cols,
        iq_cols=iq_cols,
        y_name=y_name,
        spread_col="tgt_spread",
        year_col="year",
        split_year=split_year,
        model_types=models,
        agnostic=False,
        param_factors=np.logspace(-3, 10, 25),
        l1_ratios=[0.25, 0.5, 0.75],
        use_clustering=True,
        corr_threshold=0.9,
        use_expanding_cv=True,
        min_train_years=8,
        cv_stride=2,
        r2_benchmark=r2_benchmark,
        rmse_benchmark=rmse_benchmark,
        early_stop_threshold=0.10,
        stability_iterations=100,
        save_results=True,
        output_dir="specs",
        n_jobs=-1,
        parallel_param_factors=True,
        compute_shap=True,
        max_shap_samples=500,
        force_interpretability=False,
        r2_convergence_threshold=1e-4,
        r2_convergence_count=3,
        multicollinearity_check=True,
        perform_interpretability=True
    )


Runnning 75-25 split...
Split year for 75-25 split: 2014

UNIFIED REGULARIZED REGRESSION: SIGNAL_T1
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.25, 0.5, 0.75]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2014 (1564 obs)
Test:  2015-2023 (534 obs)

β=1 specification active
After removing NaN: 1564 train, 534 test obs

Benchmark metrics:
  R² (test): 0.9074
  RMSE (test): 1.2728

MULTICOLLINEARITY DIAGNOSTICS (Core Variables Only)

Found 56 core variable pairs with correlation above 0.9:



Unnamed: 0,core_variable_1,core_variable_2,correlation
0,p5d_xrreg,p5d_xconst,0.999259
1,wb_fp_cpi_totl_zg,wb_ny_gdp_defl_kd_zg,0.999178
2,p5d_democ,p5d_xconst,0.99904
3,p5d_democ,p5d_xrreg,0.997578
4,p5d_autoc,p5d_xrreg,0.997499
5,p5d_autoc,p5d_xconst,0.99488
6,p5d_democ,p5d_autoc,0.99107
7,fiw_b,fiw_pr,0.987332
8,fiw_f,fiw_cl,0.975276
9,fiw_a,fiw_pr,0.971855



Note: These correlations are reported for transparency.
Regularization handles multicollinearity. Interpretation focuses on stable patterns.

Clustering variants of same variables...
  Original features: 972
  After clustering: 640
  Number of clusters: 640

Features used in models: 640


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.483293

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.952514,0.489696,2.988004,565,-11.909514,0.0
1,0.003481,0.001682,0.945931,0.531183,2.863972,473,-12.331897,0.0
2,0.012115,0.005855,0.932586,0.614423,2.597301,351,-11.398731,0.0
3,0.04217,0.02038,0.910313,0.823581,1.756868,204,-5.101179,3.37545e-07
4,0.14678,0.070938,0.864961,0.879063,1.454611,90,-2.98071,0.002875811
5,0.510897,0.246913,0.8169,0.90275,1.304405,13,-1.227936,0.2194709
6,1.778279,0.85943,0.758771,0.908984,1.261905,1,0.997312,0.3186132



Best LASSO:
  Parameter: 0.859430
  R² (test): 0.9090
  ΔR² vs benchmark: +0.16 p.p.
  RMSE (test): 1.2619
  ΔRMSE vs benchmark: -0.0109
  Active vars: 1
  Active variables: wb_fp_cpi_totl_zg
  DM test: stat=1.00, p=0.319

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.319)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 24297.810658

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,24.29781,0.938644,0.579978,2.710833,640,-11.976861,0.0
1,0.003481,84.5734,0.927629,0.686802,2.340864,640,-9.936409,0.0
2,0.012115,294.3747,0.909781,0.800096,1.870154,640,-7.046383,1.836309e-12
3,0.04217,1024.63,0.882594,0.861946,1.554145,640,-5.132073,2.865685e-07
4,0.14678,3566.431,0.837401,0.888385,1.397424,640,-4.222864,2.412171e-05
5,0.510897,12413.68,0.748635,0.90084,1.317152,640,-2.813241,0.004904484
6,1.778279,43208.3,0.636128,0.906704,1.277613,640,-0.536661,0.5915021
7,6.189658,150395.1,0.564418,0.908355,1.266254,640,0.887477,0.374822
8,21.544347,523480.5,0.535773,0.908522,1.265099,640,1.029974,0.3030224
9,74.989421,1822079.0,0.526517,0.908478,1.265403,640,0.996816,0.3188536



Best RIDGE:
  Parameter: 523480.461736
  R² (test): 0.9085
  ΔR² vs benchmark: +0.11 p.p.
  RMSE (test): 1.2651
  ΔRMSE vs benchmark: -0.0077
  Active vars: 640
  DM test: stat=1.03, p=0.303

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.303)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.25)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 1.623777

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.001624,0.949196,0.485728,2.999598,591,-12.576574,0.0
1,0.003481,0.005652,0.94153,0.548073,2.811906,503,-12.489136,0.0
2,0.012115,0.019673,0.928935,0.671167,2.398583,411,-10.191852,0.0
3,0.04217,0.068474,0.905367,0.829243,1.728448,287,-5.274826,1.328824e-07
4,0.14678,0.238338,0.860782,0.884638,1.420685,127,-3.170016,0.001524303
5,0.510897,0.829583,0.807724,0.905338,1.286929,24,-0.878978,0.3794133
6,1.778279,2.887529,0.721383,0.908555,1.264875,9,1.078749,0.2806996



Best ELASTIC (L1=0.25):
  Parameter: 2.887529
  R² (test): 0.9086
  ΔR² vs benchmark: +0.12 p.p.
  RMSE (test): 1.2649
  ΔRMSE vs benchmark: -0.0079
  Active vars: 9
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_kd_zg_delta, wb_ny_gdp_defl_kd_zg_ma3, wb_ny_gdp_defl_kd_zg_ma5, wb_ny_gdp_defl_kd_zg_ma10
  DM test: stat=1.08, p=0.281

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.281)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.885867

Stopping: model has 0 active v

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000886,0.951161,0.485466,3.000365,579,-12.23648,0.0
1,0.003481,0.003083,0.94371,0.531456,2.863136,489,-12.546425,0.0
2,0.012115,0.010733,0.931289,0.638727,2.514112,374,-10.940384,0.0
3,0.04217,0.037357,0.90851,0.823736,1.756096,245,-5.255483,1.476369e-07
4,0.14678,0.130027,0.863938,0.880869,1.443706,102,-3.119657,0.001810617
5,0.510897,0.452587,0.812838,0.903906,1.296626,21,-1.216952,0.2236224
6,1.778279,1.575319,0.74066,0.908616,1.264454,7,1.077759,0.2811413



Best ELASTIC (L1=0.5):
  Parameter: 1.575319
  R² (test): 0.9086
  ΔR² vs benchmark: +0.12 p.p.
  RMSE (test): 1.2645
  ΔRMSE vs benchmark: -0.0084
  Active vars: 7
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_kd_zg_delta, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=1.08, p=0.281

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.281)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.75)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.483293

Stopping: model has 0 active variables at factor 21.54434690031882



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.952747,0.496524,2.967947,590,-11.870937,0.0
1,0.003481,0.001682,0.946904,0.514169,2.915477,511,-12.416642,0.0
2,0.012115,0.005855,0.935135,0.579228,2.713253,380,-12.170283,0.0
3,0.04217,0.02038,0.915588,0.793577,1.900403,246,-6.308766,2.812697e-10
4,0.14678,0.070938,0.876334,0.870216,1.506876,113,-3.478794,0.000503675
5,0.510897,0.246913,0.824091,0.898878,1.330118,19,-2.105491,0.0352486
6,1.778279,0.85943,0.766679,0.908651,1.264209,4,1.001864,0.3164091
7,6.189658,2.991419,0.550912,0.908496,1.265282,2,1.015131,0.3100433



Best ELASTIC (L1=0.75):
  Parameter: 0.859430
  R² (test): 0.9087
  ΔR² vs benchmark: +0.12 p.p.
  RMSE (test): 1.2642
  ΔRMSE vs benchmark: -0.0086
  Active vars: 4
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg
  DM test: stat=1.00, p=0.316

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.316)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 260.0s


Runnning 80-20 split...
Split year for 80-20 split: 2016

UNIFIED REGULARIZED REGRESSION: SIGNAL_T1_80
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.25, 0.5, 0.75]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2016 (

Unnamed: 0,core_variable_1,core_variable_2,correlation
0,p5d_xrreg,p5d_xconst,0.999254
1,wb_fp_cpi_totl_zg,wb_ny_gdp_defl_kd_zg,0.999121
2,p5d_democ,p5d_xconst,0.999009
3,p5d_democ,p5d_xrreg,0.997529
4,p5d_autoc,p5d_xrreg,0.997493
5,p5d_autoc,p5d_xconst,0.994855
6,p5d_democ,p5d_autoc,0.990991
7,fiw_b,fiw_pr,0.988011
8,fiw_cl,fiw_f,0.976832
9,fiw_a,fiw_pr,0.972572



Note: These correlations are reported for transparency.
Regularization handles multicollinearity. Interpretation focuses on stable patterns.

Clustering variants of same variables...
  Original features: 972
  After clustering: 629
  Number of clusters: 629

Features used in models: 629


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.483293

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.947741,0.748153,2.007745,568,-8.018459,1.110223e-15
1,0.003481,0.001682,0.940953,0.786811,1.84724,481,-7.217299,5.302425e-13
2,0.012115,0.005855,0.9288,0.811778,1.735703,345,-6.824142,8.845147e-12
3,0.04217,0.02038,0.905131,0.845504,1.57253,209,-4.768412,1.85684e-06
4,0.14678,0.070938,0.859625,0.888296,1.337131,74,-2.398551,0.01646008
5,0.510897,0.246913,0.820014,0.903825,1.240715,9,-0.829791,0.406657
6,1.778279,0.85943,0.767267,0.907868,1.214357,1,0.159307,0.8734266



Best LASSO:
  Parameter: 0.859430
  R² (test): 0.9079
  ΔR² vs benchmark: +0.04 p.p.
  RMSE (test): 1.2144
  ΔRMSE vs benchmark: -0.0024
  Active vars: 1
  Active variables: wb_fp_cpi_totl_zg
  DM test: stat=0.16, p=0.873

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.873)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 24297.810658

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,24.29781,0.934636,0.774523,1.899727,629,-7.793439,6.439294e-15
1,0.003481,84.5734,0.924151,0.798255,1.796975,629,-7.196535,6.177281e-13
2,0.012115,294.3747,0.906965,0.82558,1.670853,629,-6.412092,1.435358e-10
3,0.04217,1024.63,0.882217,0.853348,1.532092,629,-5.597408,2.175799e-08
4,0.14678,3566.431,0.842775,0.879278,1.390059,629,-4.438423,9.062043e-06
5,0.510897,12413.68,0.764449,0.896806,1.285192,629,-2.611215,0.00902212
6,1.778279,43208.3,0.660745,0.904503,1.236332,629,-1.137573,0.2552987
7,6.189658,150395.1,0.592272,0.906107,1.225905,629,-0.716388,0.4737517
8,21.544347,523480.5,0.564456,0.90629,1.22471,629,-0.737896,0.4605777
9,74.989421,1822079.0,0.555384,0.906312,1.224564,629,-0.79127,0.4287867



Best RIDGE:
  Parameter: 6342110.581443
  R² (test): 0.9063
  ΔR² vs benchmark: -0.12 p.p.
  RMSE (test): 1.2246
  ΔRMSE vs benchmark: +0.0078
  Active vars: 629
  DM test: stat=-0.82, p=0.414

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.414)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.25)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 1.623777

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.001624,0.944174,0.747966,2.008492,563,-8.219186,2.220446e-16
1,0.003481,0.005652,0.936737,0.779701,1.877791,513,-7.608855,2.775558e-14
2,0.012115,0.019673,0.925083,0.813687,1.726881,400,-6.674494,2.480882e-11
3,0.04217,0.068474,0.900155,0.847918,1.560195,280,-4.95234,7.332635e-07
4,0.14678,0.238338,0.85699,0.886982,1.344977,122,-2.887335,0.003885204
5,0.510897,0.829583,0.811722,0.905613,1.229124,24,-0.650785,0.5151851
6,1.778279,2.887529,0.732482,0.90714,1.219144,9,-0.235144,0.8140973



Best ELASTIC (L1=0.25):
  Parameter: 2.887529
  R² (test): 0.9071
  ΔR² vs benchmark: -0.04 p.p.
  RMSE (test): 1.2191
  ΔRMSE vs benchmark: +0.0024
  Active vars: 9
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_kd_zg_delta, wb_ny_gdp_defl_kd_zg_ma3, wb_ny_gdp_defl_kd_zg_ma5, wb_ny_gdp_defl_kd_zg_ma10
  DM test: stat=-0.24, p=0.814

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.814)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.885867

Stopping: model has 0 active 

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000886,0.946219,0.743102,2.027781,569,-8.260401,2.220446e-16
1,0.003481,0.003083,0.93882,0.780699,1.873532,501,-7.483085,7.260859e-14
2,0.012115,0.010733,0.927417,0.813331,1.728529,364,-6.685974,2.293943e-11
3,0.04217,0.037357,0.903355,0.847745,1.561083,249,-4.767583,1.864489e-06
4,0.14678,0.130027,0.858986,0.886821,1.345935,95,-2.688872,0.007169382
5,0.510897,0.452587,0.816186,0.904725,1.234895,18,-0.812958,0.4162424
6,1.778279,1.575319,0.750118,0.907317,1.217979,7,-0.115262,0.9082376



Best ELASTIC (L1=0.5):
  Parameter: 1.575319
  R² (test): 0.9073
  ΔR² vs benchmark: -0.02 p.p.
  RMSE (test): 1.2180
  ΔRMSE vs benchmark: +0.0012
  Active vars: 7
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_kd_zg_delta, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=-0.12, p=0.908

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.908)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.75)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.483293

Stopping: model has 0 active variables at factor 21.54434690031882



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.947902,0.739335,2.042593,578,-8.205642,2.220446e-16
1,0.003481,0.001682,0.941937,0.775298,1.896463,509,-7.543513,4.574119e-14
2,0.012115,0.005855,0.931235,0.804798,1.767596,377,-7.014338,2.310374e-12
3,0.04217,0.02038,0.910677,0.842105,1.589735,264,-4.987578,6.114109e-07
4,0.14678,0.070938,0.870772,0.877349,1.401123,111,-3.258205,0.001121194
5,0.510897,0.246913,0.825991,0.901232,1.257327,18,-1.441332,0.1494908
6,1.778279,0.85943,0.775444,0.907549,1.216458,4,0.022954,0.9816871
7,6.189658,2.991419,0.564642,0.906375,1.224156,2,-0.779349,0.4357741



Best ELASTIC (L1=0.75):
  Parameter: 0.859430
  R² (test): 0.9075
  ΔR² vs benchmark: +0.00 p.p.
  RMSE (test): 1.2165
  ΔRMSE vs benchmark: -0.0003
  Active vars: 4
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg
  DM test: stat=0.02, p=0.982

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.982)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 254.9s


Runnning 85-15 split...
Split year for 85-15 split: 2018

UNIFIED REGULARIZED REGRESSION: SIGNAL_T1_85
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.25, 0.5, 0.75]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2018 (

Unnamed: 0,core_variable_1,core_variable_2,correlation
0,p5d_xrreg,p5d_xconst,0.999234
1,wb_fp_cpi_totl_zg,wb_ny_gdp_defl_kd_zg,0.999107
2,p5d_democ,p5d_xconst,0.998966
3,p5d_democ,p5d_xrreg,0.997443
4,p5d_autoc,p5d_xrreg,0.997367
5,p5d_autoc,p5d_xconst,0.994671
6,p5d_democ,p5d_autoc,0.990609
7,fiw_b,fiw_pr,0.988603
8,fiw_cl,fiw_f,0.977251
9,fiw_a,fiw_pr,0.972884



Note: These correlations are reported for transparency.
Regularization handles multicollinearity. Interpretation focuses on stable patterns.

Clustering variants of same variables...
  Original features: 972
  After clustering: 619
  Number of clusters: 619

Features used in models: 619


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.483293

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.946514,0.798639,1.817985,558,-4.771346,2e-06
1,0.003481,0.001682,0.940047,0.826316,1.688428,460,-4.200001,2.7e-05
2,0.012115,0.005855,0.927041,0.838529,1.627982,341,-3.898519,9.7e-05
3,0.04217,0.02038,0.904682,0.861164,1.509575,198,-2.527922,0.011474
4,0.14678,0.070938,0.860803,0.877928,1.415506,66,-1.998974,0.045611
5,0.510897,0.246913,0.824558,0.889333,1.347757,9,-0.867815,0.385496
6,1.778279,0.85943,0.773758,0.893566,1.32173,1,-0.117756,0.906261



Best LASSO:
  Parameter: 0.859430
  R² (test): 0.8936
  ΔR² vs benchmark: -0.03 p.p.
  RMSE (test): 1.3217
  ΔRMSE vs benchmark: +0.0022
  Active vars: 1
  Active variables: wb_fp_cpi_totl_zg
  DM test: stat=-0.12, p=0.906

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.906)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 24297.810658

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,24.29781,0.933973,0.838427,1.628498,619,-3.90154,9.6e-05
1,0.003481,84.5734,0.923787,0.846146,1.589122,619,-3.544941,0.000393
2,0.012115,294.3747,0.907785,0.854388,1.545973,619,-3.19943,0.001377
3,0.04217,1024.63,0.884884,0.868516,1.46906,619,-2.731933,0.006296
4,0.14678,3566.431,0.848788,0.881257,1.396069,619,-2.249744,0.024465
5,0.510897,12413.68,0.777101,0.887252,1.36037,619,-1.84329,0.065287
6,1.778279,43208.3,0.678563,0.890618,1.33991,619,-1.280271,0.20045
7,6.189658,150395.1,0.611245,0.891795,1.332684,619,-1.001205,0.316728
8,21.544347,523480.5,0.583447,0.891949,1.331735,619,-1.021518,0.307009
9,74.989421,1822079.0,0.574344,0.891932,1.331838,619,-1.08519,0.277838



Best RIDGE:
  Parameter: 523480.461736
  R² (test): 0.8919
  ΔR² vs benchmark: -0.20 p.p.
  RMSE (test): 1.3317
  ΔRMSE vs benchmark: +0.0122
  Active vars: 619
  DM test: stat=-1.02, p=0.307

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.307)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.25)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 1.623777

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.001624,0.943038,0.813447,1.749861,568,-4.523816,6e-06
1,0.003481,0.005652,0.93571,0.833642,1.652436,492,-4.03161,5.5e-05
2,0.012115,0.019673,0.923888,0.841782,1.611502,384,-3.71531,0.000203
3,0.04217,0.068474,0.899942,0.863592,1.496312,259,-2.548058,0.010832
4,0.14678,0.238338,0.858414,0.879852,1.404304,110,-2.076598,0.037839
5,0.510897,0.829583,0.816844,0.890235,1.342255,23,-1.091221,0.275176
6,1.778279,2.887529,0.740449,0.89282,1.326353,9,-0.543595,0.58672



Best ELASTIC (L1=0.25):
  Parameter: 2.887529
  R² (test): 0.8928
  ΔR² vs benchmark: -0.11 p.p.
  RMSE (test): 1.3264
  ΔRMSE vs benchmark: +0.0068
  Active vars: 9
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_kd_zg_delta, wb_ny_gdp_defl_kd_zg_ma3, wb_ny_gdp_defl_kd_zg_ma5, wb_ny_gdp_defl_kd_zg_ma10
  DM test: stat=-0.54, p=0.587

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.587)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.885867

Stopping: model has 0 active 

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000886,0.945074,0.806593,1.781718,561,-4.592342,4e-06
1,0.003481,0.003083,0.937858,0.829736,1.671722,477,-4.104059,4.1e-05
2,0.012115,0.010733,0.92594,0.839077,1.625221,365,-3.843567,0.000121
3,0.04217,0.037357,0.902887,0.861829,1.505955,228,-2.563701,0.010356
4,0.14678,0.130027,0.8604,0.877566,1.417603,88,-2.164183,0.03045
5,0.510897,0.452587,0.821048,0.889496,1.346764,16,-1.110554,0.26676
6,1.778279,1.575319,0.757064,0.893026,1.325078,7,-0.417115,0.676594



Best ELASTIC (L1=0.5):
  Parameter: 1.575319
  R² (test): 0.8930
  ΔR² vs benchmark: -0.09 p.p.
  RMSE (test): 1.3251
  ΔRMSE vs benchmark: +0.0055
  Active vars: 7
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_kd_zg_delta, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=-0.42, p=0.677

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.677)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.75)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.483293

Stopping: model has 0 active variables at factor 21.54434690031882



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.946669,0.796456,1.827813,578,-4.830032,1e-06
1,0.003481,0.001682,0.940853,0.822328,1.707702,497,-4.338886,1.4e-05
2,0.012115,0.005855,0.929421,0.838173,1.629779,378,-3.871751,0.000108
3,0.04217,0.02038,0.910026,0.85638,1.535362,246,-2.849457,0.004379
4,0.14678,0.070938,0.87131,0.874852,1.433229,103,-2.131835,0.03302
5,0.510897,0.246913,0.829961,0.886429,1.365328,18,-1.462753,0.143535
6,1.778279,0.85943,0.782267,0.893282,1.323494,4,-0.26494,0.791055
7,6.189658,2.991419,0.571927,0.891916,1.331937,2,-1.124226,0.260917



Best ELASTIC (L1=0.75):
  Parameter: 0.859430
  R² (test): 0.8933
  ΔR² vs benchmark: -0.06 p.p.
  RMSE (test): 1.3235
  ΔRMSE vs benchmark: +0.0039
  Active vars: 4
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg
  DM test: stat=-0.26, p=0.791

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.791)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 270.3s



### Priced-in level regressions

In [None]:
# load data
df = pd.read_pickle("saved/df.dat")

# define columns
id_cols = ["country", "year", "iso_code_1", "iso_code_2", "region"]
exclude = id_cols + [c for c in df.columns if c.startswith("tgt_")]
macro_cols = filter_cols([c for c in df.columns if c.startswith("wb_") and not c.startswith("wb_iq_") and c not in exclude], [])
iq_cols = filter_cols([c for c in df.columns if (c.startswith("wb_iq_") or (not c.startswith("wb_") and c not in exclude))], [])
y_name = "tgt_spread"

# define models to run
models = ['lasso', 'ridge', 'elastic']

# iterate over split shares
split_shares = [0.75, 0.8, 0.85]
for split_share in split_shares:

    # get split year
    split_year = check_split_year(df, y_name, split_share)
    if split_share != 0.75:
        name_prefix = f"levels_t0_{split_share*100:.0f}"
    else:
        name_prefix = "levels_t0"
    print("\n" + "=" * 100)
    print(f"Runnning {split_share*100:.0f}-{(1-split_share)*100:.0f} split...")
    print(f"Split year for {split_share*100:.0f}-{(1-split_share)*100:.0f} split: {split_year}")

    # run spec for models
    results = run_unified_regularized_regression(
        name_prefix=name_prefix,
        data_path="saved/df.dat",
        macro_cols=macro_cols,
        iq_cols=iq_cols,
        use_clustering=True,
        corr_threshold=0.9,
        y_name=y_name,
        spread_col="tgt_spread",
        year_col="year",
        split_year=split_year,
        model_types=models,
        agnostic=True,
        param_factors=np.logspace(-2, 5, 15),
        l1_ratios=[0.25, 0.5, 0.75],
        use_expanding_cv=True,
        min_train_years=8,
        cv_stride=2,
        r2_benchmark=None,
        rmse_benchmark=None,
        early_stop_threshold=0.10,
        save_results=True,
        output_dir="specs",
        n_jobs=-1
    )

Runnning 75-25 split...
Split year for 75-25 split: 2015

UNIFIED REGULARIZED REGRESSION: LEVELS_T0
Specification: Agnostic (β unrestricted)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.25, 0.5, 0.75]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2015 (1652 obs)
Test:  2016-2024 (541 obs)

MULTICOLLINEARITY DIAGNOSTICS (Core Variables Only)

Found 60 core variable pairs with correlation above 0.9:



Unnamed: 0,core_variable_1,core_variable_2,correlation
0,p5d_democ,p5d_xconst,0.999063
1,wb_fp_cpi_totl_zg,wb_ny_gdp_defl_kd_zg,0.999056
2,p5d_xrreg,p5d_xconst,0.998977
3,p5d_democ,p5d_xrreg,0.997217
4,p5d_autoc,p5d_xrreg,0.997004
5,p5d_autoc,p5d_xconst,0.993404
6,p5d_democ,p5d_autoc,0.989513
7,fiw_b,fiw_pr,0.988235
8,fiw_cl,fiw_f,0.976274
9,fiw_a,fiw_pr,0.974487



Note: These correlations are reported for transparency.
Regularization handles multicollinearity. Interpretation focuses on stable patterns.

Clustering variants of same variables...
  Original features: 972
  After clustering: 634
  Number of clusters: 634

Features used in models: 634


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.003793

Stopping: model has 0 active variables at factor 3162.2776601683795



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,3.8e-05,0.96368,-0.100415,4.396,632,,
1,0.031623,0.00012,0.963665,-0.08415,4.36339,629,,
2,0.1,0.000379,0.96335,-0.049959,4.294035,605,,
3,0.316228,0.001199,0.961327,-0.009447,4.210379,542,,
4,1.0,0.003793,0.954384,0.178697,3.79779,446,,
5,3.162278,0.011994,0.93981,0.432268,3.157555,329,,
6,10.0,0.037927,0.915739,0.472107,3.044755,203,,
7,31.622777,0.119935,0.862338,0.495854,2.975484,86,,
8,100.0,0.379269,0.790401,0.514568,2.919736,24,,
9,316.227766,1.199354,0.680979,0.303349,3.497735,7,,



Best LASSO:
  Parameter: 0.379269
  R² (test): 0.5146
  RMSE (test): 2.9197
  Active vars: 24


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 19 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.564296,21.183727,5
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.561254,21.069518,7
12,efw_2d_military_interference,"[efw_2d_military_interference_ma10, efw_2d_mil...",0.302574,11.358659,5
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.253023,9.498501,7
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.245889,9.230686,7
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_ma3, efw_2b_impartial...",0.166839,6.263139,5
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.152605,5.728794,6
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation_t-1, e...",0.105727,3.968998,8
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_ma3, efw_5aii_...",0.055045,2.066408,8
99,wb_ny_gdp_petr_rt_zs,"[wb_ny_gdp_petr_rt_zs_delta, wb_ny_gdp_petr_rt...",0.050091,1.880408,5



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 8 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,efw_2d_military_interference,[efw_2d_military_interference_ma10],1.0,-0.34085,0.083244,1
1,efw_2e_legal_integrity,[efw_2e_legal_integrity_ma10],1.0,-0.637692,0.130343,1
5,wb_bn_cab_xoka_gd_zs,[wb_bn_cab_xoka_gd_zs_ma5],1.0,-0.328675,0.052443,1
4,ief_monetary_freedom,[ief_monetary_freedom_t-3],0.97,-0.793808,0.354935,1
2,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5],0.93,-0.114641,0.05924,1
3,efw_4c_black_market_exchange_rates,"[efw_4c_black_market_exchange_rates_ma10, efw_...",0.895,-0.154545,0.078826,2
6,wb_ny_gdp_petr_rt_zs,[wb_ny_gdp_petr_rt_zs_delta],0.8,0.272866,0.069735,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 174.867862



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,1.748679,0.961864,-0.000488,4.191653,634,,
1,0.031623,5.529807,0.95819,0.061217,4.060336,634,,
2,0.1,17.48679,0.951627,0.194124,3.761953,634,,
3,0.316228,55.29807,0.940659,0.336398,3.413761,634,,
4,1.0,174.8679,0.923693,0.430671,3.161994,634,,
5,3.162278,552.9807,0.897645,0.488617,2.996765,634,,
6,10.0,1748.679,0.851113,0.535431,2.856306,634,,
7,31.622777,5529.807,0.744029,0.554868,2.795915,634,,
8,100.0,17486.79,0.537628,0.524745,2.888967,634,,
9,316.227766,55298.07,0.316116,0.460726,3.077403,634,,



Best RIDGE:
  Parameter: 5529.807340
  R² (test): 0.5549
  RMSE (test): 2.7959
  Active vars: 634


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.494245,4.847237,9
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.335004,3.285497,7
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.27467,2.693782,7
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.252809,2.479386,7
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",0.251828,2.469765,7
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_t-1, efw_5aii_...",0.247966,2.431892,8
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_ma5, efw_1b_tr...",0.21933,2.151048,6
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_delta3, efw_2b_impart...",0.218597,2.143857,5
33,efw_4diii_freedom_of_foreigners_to_visit,"[efw_4diii_freedom_of_foreigners_to_visit_t-3,...",0.198621,1.947943,6
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.192163,1.884608,5



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.25)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.003793

Stopping: model has 0 active variables at factor 10000.0



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,3.8e-05,0.963678,-0.102981,4.401122,634,,
1,0.031623,0.00012,0.963653,-0.085342,4.365789,633,,
2,0.1,0.000379,0.96339,-0.050065,4.294252,631,,
3,0.316228,0.001199,0.961672,0.011039,4.167436,608,,
4,1.0,0.003793,0.956859,0.098377,3.979165,569,,
5,3.162278,0.011994,0.947802,0.293544,3.522264,505,,
6,10.0,0.037927,0.931405,0.432754,3.156206,407,,
7,31.622777,0.119935,0.904009,0.479872,3.02228,297,,
8,100.0,0.379269,0.849222,0.508562,2.937741,181,,
9,316.227766,1.199354,0.761361,0.49999,2.963254,88,,



Best ELASTIC (L1=0.25):
  Parameter: 0.379269
  R² (test): 0.5086
  RMSE (test): 2.9377
  Active vars: 181


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(ELASTIC (L1=0.25))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.620143,7.928968,7
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation_t-1, e...",0.385783,4.932502,8
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_ma5, efw_1b_tr...",0.368986,4.717745,6
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.364376,4.658797,7
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.350585,4.482479,9
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.303633,3.882155,7
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_ma3, efw_5aii_...",0.231827,2.964075,8
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.228749,2.924718,5
31,efw_4di_financial_openness,"[efw_4di_financial_openness_t-5, efw_4di_finan...",0.224732,2.873356,7
12,efw_2d_military_interference,"[efw_2d_military_interference_ma10, efw_2d_mil...",0.200197,2.559661,5



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 104 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
4,efw_2_legal_system_property_rights_no_gender_a...,[efw_2_legal_system_property_rights_no_gender_...,1.0,-0.155814,0.030632,1
6,efw_2b_impartial_courts,[efw_2b_impartial_courts_ma3],1.0,-0.121949,0.03002,1
5,efw_2a_judicial_independence,[efw_2a_judicial_independence_ma10],1.0,-0.149647,0.042592,1
40,p5d_durable,[p5d_durable],1.0,-0.137484,0.03046,1
45,wb_ny_gdp_minr_rt_zs,"[wb_ny_gdp_minr_rt_zs_t-5, wb_ny_gdp_minr_rt_z...",0.995,0.062058,0.022937,2
47,wb_ny_gdp_pcap_kd,"[wb_ny_gdp_pcap_kd, wb_ny_gdp_pcap_kd_delta]",0.99,-0.082787,0.023595,2
42,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_t-3, wb_bn_cab_xoka_gd_z...",0.99,-0.068413,0.030326,4
51,wb_tx_val_fuel_zs_un,"[wb_tx_val_fuel_zs_un_ma10, wb_tx_val_fuel_zs_...",0.985,0.075988,0.029394,2
32,efw_ie_state_ownership,[efw_ie_state_ownership_ma10],0.98,-0.222067,0.068203,1
36,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.98,-0.166595,0.084016,4



--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.003793

Stopping: model has 0 active variables at factor 3162.2776601683795



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,3.8e-05,0.963678,-0.099957,4.395084,634,,
1,0.031623,0.00012,0.963658,-0.085576,4.366258,634,,
2,0.1,0.000379,0.963393,-0.048819,4.291702,624,,
3,0.316228,0.001199,0.961584,0.016022,4.156925,577,,
4,1.0,0.003793,0.95593,0.131464,3.90547,518,,
5,3.162278,0.011994,0.944683,0.352732,3.371488,428,,
6,10.0,0.037927,0.924965,0.463164,3.070438,303,,
7,31.622777,0.119935,0.888626,0.497679,2.970094,196,,
8,100.0,0.379269,0.820254,0.509748,2.934197,80,,
9,316.227766,1.199354,0.72607,0.43511,3.149644,40,,



Best ELASTIC (L1=0.5):
  Parameter: 0.379269
  R² (test): 0.5097
  RMSE (test): 2.9342
  Active vars: 80


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.546426,12.318291,7
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.353336,7.965386,7
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.26106,5.88517,7
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.254002,5.726058,5
12,efw_2d_military_interference,"[efw_2d_military_interference_ma10, efw_2d_mil...",0.247397,5.577171,5
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation_t-1, e...",0.241654,5.447697,8
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.205463,4.631834,9
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",0.191706,4.321711,6
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_ma3, efw_5aii_...",0.144621,3.260244,8
7,efw_2_legal_system_property_rights_no_gender_a...,[efw_2_legal_system_property_rights_no_gender_...,0.138701,3.126791,5



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 44 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
1,efw_2_legal_system_property_rights_no_gender_a...,[efw_2_legal_system_property_rights_no_gender_...,1.0,-0.153153,0.050417,1
2,efw_2b_impartial_courts,[efw_2b_impartial_courts_ma3],1.0,-0.130797,0.04362,1
3,efw_2d_military_interference,[efw_2d_military_interference_ma10],1.0,-0.270869,0.057867,1
4,efw_2e_legal_integrity,[efw_2e_legal_integrity_ma10],1.0,-0.2949,0.037666,1
12,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma10],1.0,-0.112567,0.043716,1
8,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5],0.99,-0.121914,0.046067,1
0,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",0.99,0.09396,0.037128,2
16,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.973333,-0.305182,0.118028,3
11,efw_5aii_private_sector_credit,[efw_5aii_private_sector_credit_ma3],0.96,-0.103735,0.041902,1
7,efw_4aiii_standard_deviation_of_tariff_rates,[efw_4aiii_standard_deviation_of_tariff_rates],0.96,0.055161,0.023858,1



--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.75)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.003793

Stopping: model has 0 active variables at factor 3162.2776601683795



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,3.8e-05,0.963679,-0.10007,4.395311,632,,
1,0.031623,0.00012,0.963662,-0.084526,4.364148,632,,
2,0.1,0.000379,0.963379,-0.04726,4.288512,616,,
3,0.316228,0.001199,0.961462,0.013128,4.163033,560,,
4,1.0,0.003793,0.955141,0.163787,3.832109,477,,
5,3.162278,0.011994,0.941889,0.411688,3.214276,367,,
6,10.0,0.037927,0.919791,0.470709,3.048785,244,,
7,31.622777,0.119935,0.873971,0.506286,2.944538,134,,
8,100.0,0.379269,0.80017,0.517463,2.911017,47,,
9,316.227766,1.199354,0.702578,0.341336,3.401037,24,,



Best ELASTIC (L1=0.75):
  Parameter: 0.379269
  R² (test): 0.5175
  RMSE (test): 2.9110
  Active vars: 47


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(ELASTIC (L1=0.75))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.495285,15.500015,7
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.354614,11.097688,5
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.306696,9.598103,7
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.302624,9.470652,7
12,efw_2d_military_interference,"[efw_2d_military_interference_ma10, efw_2d_mil...",0.292964,9.168359,5
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_ma3, efw_2b_impartial...",0.166089,5.19777,5
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation_t-1, e...",0.160066,5.009282,8
7,efw_2_legal_system_property_rights_no_gender_a...,[efw_2_legal_system_property_rights_no_gender_...,0.124562,3.898199,5
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_ma3, efw_5aii_...",0.099538,3.115044,8
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_t-...",0.095857,2.999866,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 17 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
2,efw_2d_military_interference,[efw_2d_military_interference_ma10],1.0,-0.317335,0.075235,1
3,efw_2e_legal_integrity,[efw_2e_legal_integrity_ma10],1.0,-0.41419,0.064632,1
10,wb_bn_cab_xoka_gd_zs,[wb_bn_cab_xoka_gd_zs_ma5],1.0,-0.342323,0.05101,1
5,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5],0.99,-0.129201,0.054952,1
1,efw_2b_impartial_courts,[efw_2b_impartial_courts_ma3],0.98,-0.159025,0.065406,1
0,efw_2_legal_system_property_rights_no_gender_a...,[efw_2_legal_system_property_rights_no_gender_...,0.98,-0.139892,0.076022,1
13,wb_ny_gdp_pcap_kd,[wb_ny_gdp_pcap_kd_delta],0.93,-0.062646,0.03103,1
6,efw_4c_black_market_exchange_rates,"[efw_4c_black_market_exchange_rates_ma10, efw_...",0.93,-0.181481,0.072913,2
9,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.925,-0.476386,0.209519,2
7,efw_5aii_private_sector_credit,[efw_5aii_private_sector_credit_ma3],0.89,-0.09761,0.048855,1



COMPLETED IN 499.8s

Runnning 80-20 split...
Split year for 80-20 split: 2017

UNIFIED REGULARIZED REGRESSION: LEVELS_T0_80
Specification: Agnostic (β unrestricted)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.25, 0.5, 0.75]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2017 (1780 obs)
Test:  2018-2024 (413 obs)

MULTICOLLINEARITY DIAGNOSTICS (Core Variables Only)

Found 61 core variable pairs with correlation above 0.9:



Unnamed: 0,core_variable_1,core_variable_2,correlation
0,wb_fp_cpi_totl_zg,wb_ny_gdp_defl_kd_zg,0.999012
1,p5d_democ,p5d_xconst,0.999012
2,p5d_xrreg,p5d_xconst,0.998965
3,p5d_democ,p5d_xrreg,0.997143
4,p5d_autoc,p5d_xrreg,0.996899
5,p5d_autoc,p5d_xconst,0.99325
6,p5d_democ,p5d_autoc,0.989191
7,fiw_b,fiw_pr,0.988497
8,fiw_cl,fiw_f,0.976849
9,fiw_a,fiw_pr,0.974106



Note: These correlations are reported for transparency.
Regularization handles multicollinearity. Interpretation focuses on stable patterns.

Clustering variants of same variables...
  Original features: 972
  After clustering: 627
  Number of clusters: 627

Features used in models: 627


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.006952

Stopping: model has 0 active variables at factor 1000.0



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,7e-05,0.957721,0.107244,3.796439,625,,
1,0.031623,0.00022,0.957637,0.151205,3.701787,608,,
2,0.1,0.000695,0.956725,0.246056,3.488828,573,,
3,0.316228,0.002198,0.951894,0.406048,3.096604,485,,
4,1.0,0.006952,0.941385,0.50875,2.816187,378,,
5,3.162278,0.021984,0.921092,0.569346,2.636782,273,,
6,10.0,0.069519,0.882847,0.612661,2.500667,146,,
7,31.622777,0.219839,0.810831,0.567293,2.643061,44,,
8,100.0,0.695193,0.738799,0.4493,2.981725,16,,
9,316.227766,2.198393,0.510383,0.053039,3.909994,4,,



Best LASSO:
  Parameter: 0.069519
  R² (test): 0.6127
  RMSE (test): 2.5007
  Active vars: 146


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation, efw_3...",1.060929,9.29914,8
74,ief_monetary_freedom,"[ief_monetary_freedom_ma5, ief_monetary_freedo...",0.951012,8.33571,7
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.503271,4.411214,6
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",0.47644,4.176044,6
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.401159,3.516193,7
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.382694,3.354346,5
51,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma1...,0.378082,3.313921,5
31,efw_4di_financial_openness,"[efw_4di_financial_openness_t-5, efw_4di_finan...",0.321848,2.821024,7
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg, wb_ny_gdp_mktp_kd_zg_ma...",0.313519,2.748026,9
9,efw_2a_judicial_independence,"[efw_2a_judicial_independence_ma10, efw_2a_jud...",0.305951,2.681685,5



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 67 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
13,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5],1.0,-0.339571,0.066775,1
16,efw_4dii_capital_controls,[efw_4dii_capital_controls_t-5],1.0,0.157206,0.058557,1
18,efw_5aii_private_sector_credit,[efw_5aii_private_sector_credit_t-3],1.0,-0.170184,0.060969,1
6,efw_2f_contracts,[efw_2f_contracts_delta3],1.0,-0.177079,0.042437,1
48,wb_pa_nus_fcrf,[wb_pa_nus_fcrf_delta3],1.0,0.0803,0.026062,1
44,wb_ny_gdp_minr_rt_zs,[wb_ny_gdp_minr_rt_zs_ma10],1.0,0.288152,0.048507,1
28,efw_gender_disparity_index,[efw_gender_disparity_index_ma10],1.0,0.497335,0.10516,1
10,efw_4ai_trade_tax_revenue,[efw_4ai_trade_tax_revenue_t-3],0.99,-0.231385,0.08635,1
25,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma10],0.99,-0.426081,0.141964,1
50,wgi_governmenteffectiveness_estimate,[wgi_governmenteffectiveness_estimate_delta3],0.99,-0.071283,0.038279,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 110.408951



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,1.10409,0.957074,0.225993,3.534943,627,,
1,0.031623,3.491438,0.954128,0.301439,3.358244,627,,
2,0.1,11.0409,0.948894,0.38022,3.163216,627,,
3,0.316228,34.91438,0.939625,0.462746,2.945098,627,,
4,1.0,110.409,0.925027,0.541878,2.719571,627,,
5,3.162278,349.1438,0.903188,0.59995,2.541367,627,,
6,10.0,1104.09,0.868455,0.623125,2.466656,627,,
7,31.622777,3491.438,0.797686,0.611648,2.503934,627,,
8,100.0,11040.9,0.639959,0.565775,2.64769,627,,
9,316.227766,34914.38,0.414275,0.491836,2.864257,627,,



Best RIDGE:
  Parameter: 1104.089514
  R² (test): 0.6231
  RMSE (test): 2.4667
  Active vars: 627


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.599841,3.122865,7
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.560311,2.917066,9
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_ma5, efw_1b_tr...",0.457467,2.381643,6
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_t-3, efw_5aii_...",0.437311,2.276706,8
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.436119,2.270503,7
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.42551,2.215268,6
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation_t-1, e...",0.366776,1.909493,8
9,efw_2a_judicial_independence,"[efw_2a_judicial_independence_ma10, efw_2a_jud...",0.36438,1.897019,5
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",0.336302,1.750837,6
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.326125,1.697855,5



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.25)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.003793

Stopping: model has 0 active variables at factor 10000.0



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,3.8e-05,0.957727,0.096642,3.818915,627,,
1,0.031623,0.00012,0.957708,0.123154,3.762458,627,,
2,0.1,0.000379,0.957447,0.185739,3.6257,621,,
3,0.316228,0.001199,0.955641,0.283892,3.400158,606,,
4,1.0,0.003793,0.95068,0.395969,3.122766,565,,
5,3.162278,0.011994,0.941223,0.488149,2.874628,502,,
6,10.0,0.037927,0.924389,0.552466,2.687962,393,,
7,31.622777,0.119935,0.895606,0.602073,2.534614,303,,
8,100.0,0.379269,0.838731,0.590587,2.570933,180,,
9,316.227766,1.199354,0.75118,0.524777,2.769867,84,,



Best ELASTIC (L1=0.25):
  Parameter: 0.119935
  R² (test): 0.6021
  RMSE (test): 2.5346
  Active vars: 303


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(ELASTIC (L1=0.25))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",1.05931,6.080713,7
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation, efw_3...",0.827906,4.752391,8
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",0.588975,3.380867,6
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.49871,2.862723,6
9,efw_2a_judicial_independence,"[efw_2a_judicial_independence_ma10, efw_2a_jud...",0.459507,2.637685,5
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg, wb_ny_gdp_mktp_kd_zg_ma...",0.416376,2.390107,9
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_t-3, efw_5aii_...",0.398965,2.290158,8
50,efw_5cii_bureacracy_costs,"[efw_5cii_bureacracy_costs_ma10, efw_5cii_bure...",0.37765,2.167806,8
55,efw_5dii_business_permits,"[efw_5dii_business_permits_t-5, efw_5dii_busin...",0.362561,2.081194,6
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.357072,2.049684,5



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 187 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
4,efw_1di_top_marginal_income_tax_rate,[efw_1di_top_marginal_income_tax_rate_t-5],1.0,-0.098188,0.031629,1
11,efw_2f_contracts,[efw_2f_contracts_delta3],1.0,-0.205277,0.036347,1
41,efw_5ci_regulatory_burden,"[efw_5ci_regulatory_burden_t-1, efw_5ci_regula...",1.0,0.123937,0.036847,2
42,efw_5cii_bureacracy_costs,"[efw_5cii_bureacracy_costs_t-3, efw_5cii_burea...",1.0,0.008686,0.044604,2
26,efw_4di_financial_openness,"[efw_4di_financial_openness_t-5, efw_4di_finan...",1.0,-0.213516,0.042959,2
65,p5d_autoc,[p5d_autoc_ma10],1.0,-0.114182,0.039495,1
67,p5d_fragment,[p5d_fragment_t-3],1.0,0.127616,0.043288,1
62,ief_property_rights,"[ief_property_rights_t-3, ief_property_rights_...",1.0,0.129325,0.040601,2
83,wgi_governmenteffectiveness_estimate,[wgi_governmenteffectiveness_estimate_delta3],1.0,-0.103933,0.038427,1
38,efw_5bv_cost_of_worker_dismissal,[efw_5bv_cost_of_worker_dismissal_ma10],1.0,-0.13078,0.057754,1



--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.003793

Stopping: model has 0 active variables at factor 3162.2776601683795



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,3.8e-05,0.957727,0.096792,3.818597,627,,
1,0.031623,0.00012,0.957708,0.123375,3.761983,627,,
2,0.1,0.000379,0.957445,0.187135,3.622591,616,,
3,0.316228,0.001199,0.955513,0.296538,3.370003,587,,
4,1.0,0.003793,0.949728,0.426831,3.041946,511,,
5,3.162278,0.011994,0.938221,0.514887,2.798538,424,,
6,10.0,0.037927,0.917699,0.577274,2.612399,318,,
7,31.622777,0.119935,0.87932,0.611565,2.5042,201,,
8,100.0,0.379269,0.808411,0.559497,2.666763,79,,
9,316.227766,1.199354,0.718571,0.458081,2.957858,42,,



Best ELASTIC (L1=0.5):
  Parameter: 0.119935
  R² (test): 0.6116
  RMSE (test): 2.5042
  Active vars: 201


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.937056,7.736126,7
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation, efw_3...",0.749098,6.184383,8
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",0.5162,4.261635,6
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.476813,3.936465,6
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.381792,3.151991,7
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.355347,2.933668,9
51,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma1...,0.312198,2.577438,5
31,efw_4di_financial_openness,"[efw_4di_financial_openness_t-5, efw_4di_finan...",0.29588,2.442717,7
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.294016,2.427327,5
9,efw_2a_judicial_independence,"[efw_2a_judicial_independence_ma10, efw_2a_jud...",0.287881,2.376683,5



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 109 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
2,efw_1di_top_marginal_income_tax_rate,[efw_1di_top_marginal_income_tax_rate_t-5],1.0,-0.101123,0.04077,1
4,efw_2_legal_system_property_rights_no_gender_a...,[efw_2_legal_system_property_rights_no_gender_...,1.0,-0.174428,0.093866,1
9,efw_2f_contracts,[efw_2f_contracts_delta3],1.0,-0.169563,0.038908,1
23,efw_4dii_capital_controls,[efw_4dii_capital_controls_t-5],1.0,0.139896,0.042057,1
20,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5],1.0,-0.305624,0.051154,1
46,p5d_autoc,[p5d_autoc_ma10],1.0,-0.107166,0.040447,1
34,efw_5dii_business_permits,"[efw_5dii_business_permits_t-5, efw_5dii_busin...",1.0,0.019336,0.040591,2
32,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma10],1.0,-0.313178,0.07602,1
58,wb_ny_gdp_pcap_kd,[wb_ny_gdp_pcap_kd],1.0,-0.1402,0.039594,1
62,wgi_governmenteffectiveness_estimate,[wgi_governmenteffectiveness_estimate_delta3],1.0,-0.08271,0.03639,1



--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.75)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.003793

Stopping: model has 0 active variables at factor 3162.2776601683795



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,3.8e-05,0.957726,0.096897,3.818375,627,,
1,0.031623,0.00012,0.957706,0.123285,3.762178,625,,
2,0.1,0.000379,0.957428,0.188144,3.620342,607,,
3,0.316228,0.001199,0.955345,0.306428,3.346229,564,,
4,1.0,0.003793,0.948872,0.445345,2.992412,474,,
5,3.162278,0.011994,0.935658,0.534434,2.741579,363,,
6,10.0,0.037927,0.912318,0.584112,2.591183,250,,
7,31.622777,0.119935,0.863825,0.603038,2.531538,137,,
8,100.0,0.379269,0.787586,0.553943,2.683521,45,,
9,316.227766,1.199354,0.692241,0.367796,3.194764,26,,



Best ELASTIC (L1=0.75):
  Parameter: 0.119935
  R² (test): 0.6030
  RMSE (test): 2.5315
  Active vars: 137


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(ELASTIC (L1=0.75))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.861403,9.734797,7
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation_t-1, e...",0.5981,6.759185,8
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.451238,5.099481,6
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",0.415599,4.696726,6
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.405331,4.580682,7
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.337162,3.810306,5
51,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma1...,0.297226,3.358984,5
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.272981,3.084986,9
31,efw_4di_financial_openness,"[efw_4di_financial_openness_t-5, efw_4di_finan...",0.256692,2.900902,7
37,efw_5ai_ownership_of_banks,"[efw_5ai_ownership_of_banks, efw_5ai_ownership...",0.217203,2.454631,7



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 67 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
5,efw_2e_legal_integrity,[efw_2e_legal_integrity_ma10],1.0,-0.380718,0.104655,1
13,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5],1.0,-0.292272,0.057288,1
10,efw_4ai_trade_tax_revenue,[efw_4ai_trade_tax_revenue_t-3],1.0,-0.177921,0.064791,1
20,efw_5ci_regulatory_burden,[efw_5ci_regulatory_burden_t-3],1.0,0.144873,0.049565,1
21,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma10],1.0,-0.318572,0.10355,1
16,efw_4dii_capital_controls,[efw_4dii_capital_controls_t-5],1.0,0.113392,0.043981,1
31,p5d_autoc,[p5d_autoc_ma10],1.0,-0.090781,0.040755,1
32,p5d_durable,[p5d_durable],1.0,-0.206648,0.054521,1
39,wb_ny_gdp_minr_rt_zs,[wb_ny_gdp_minr_rt_zs_ma10],1.0,0.240618,0.038861,1
25,fiw_a,[fiw_a_t-1],0.99,0.126439,0.056319,1



COMPLETED IN 567.2s

Runnning 85-15 split...
Split year for 85-15 split: 2019

UNIFIED REGULARIZED REGRESSION: LEVELS_T0_85
Specification: Agnostic (β unrestricted)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.25, 0.5, 0.75]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2019 (1898 obs)
Test:  2020-2024 (295 obs)

MULTICOLLINEARITY DIAGNOSTICS (Core Variables Only)

Found 61 core variable pairs with correlation above 0.9:



Unnamed: 0,core_variable_1,core_variable_2,correlation
0,wb_fp_cpi_totl_zg,wb_ny_gdp_defl_kd_zg,0.999
1,p5d_democ,p5d_xconst,0.998978
2,p5d_xrreg,p5d_xconst,0.998954
3,p5d_democ,p5d_xrreg,0.997063
4,p5d_autoc,p5d_xrreg,0.996793
5,p5d_autoc,p5d_xconst,0.993085
6,fiw_b,fiw_pr,0.988896
7,p5d_democ,p5d_autoc,0.988853
8,fiw_cl,fiw_f,0.976834
9,fiw_a,fiw_pr,0.974276



Note: These correlations are reported for transparency.
Regularization handles multicollinearity. Interpretation focuses on stable patterns.

Clustering variants of same variables...
  Original features: 972
  After clustering: 614
  Number of clusters: 614

Features used in models: 614


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.006952

Stopping: model has 0 active variables at factor 1000.0



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,7e-05,0.953078,0.40252,3.150945,612,,
1,0.031623,0.00022,0.953025,0.434883,3.06442,596,,
2,0.1,0.000695,0.952251,0.509353,2.855378,563,,
3,0.316228,0.002198,0.947719,0.540453,2.763403,482,,
4,1.0,0.006952,0.937096,0.503506,2.87234,377,,
5,3.162278,0.021984,0.917566,0.580466,2.640357,262,,
6,10.0,0.069519,0.879357,0.630728,2.477149,143,,
7,31.622777,0.219839,0.807536,0.576631,2.652397,41,,
8,100.0,0.695193,0.736282,0.470407,2.966538,15,,
9,316.227766,2.198393,0.493936,0.062862,3.946216,4,,



Best LASSO:
  Parameter: 0.069519
  R² (test): 0.6307
  RMSE (test): 2.4771
  Active vars: 143


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation, efw_3...",0.969743,9.051031,8
74,ief_monetary_freedom,"[ief_monetary_freedom_ma3, ief_monetary_freedo...",0.927237,8.654302,7
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.48748,4.549856,6
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",0.478209,4.463329,6
51,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma1...,0.408504,3.812741,4
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.393647,3.674077,7
59,fiw_a,"[fiw_a_ma10, fiw_a_t-3, fiw_a_t-5, fiw_a_delta...",0.362868,3.386805,5
31,efw_4di_financial_openness,"[efw_4di_financial_openness_t-5, efw_4di_finan...",0.315877,2.94822,7
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.300582,2.805459,5
9,efw_2a_judicial_independence,"[efw_2a_judicial_independence_ma10, efw_2a_jud...",0.282178,2.633689,5



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 72 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",1.0,0.23968,0.077001,2
5,efw_2f_contracts,[efw_2f_contracts_delta3],1.0,-0.158592,0.038644,1
10,efw_4ai_trade_tax_revenue,[efw_4ai_trade_tax_revenue_t-3],1.0,-0.218844,0.076952,1
16,efw_4dii_capital_controls,[efw_4dii_capital_controls_t-5],1.0,0.191075,0.054208,1
18,efw_5aii_private_sector_credit,[efw_5aii_private_sector_credit_t-3],1.0,-0.17427,0.062951,1
13,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5],1.0,-0.34297,0.055297,1
25,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma10],1.0,-0.457048,0.1136,1
51,wb_pa_nus_fcrf,[wb_pa_nus_fcrf_delta3],1.0,0.09264,0.024709,1
38,p5d_autoc,[p5d_autoc_ma10],1.0,-0.141063,0.053575,1
30,efw_gender_disparity_index,[efw_gender_disparity_index_ma10],1.0,0.458284,0.085036,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 2.864150



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,0.028641,0.957182,0.49381,2.900252,614,,
1,0.031623,0.090572,0.956531,0.481346,2.935741,614,,
2,0.1,0.286415,0.955076,0.471489,2.963508,614,,
3,0.316228,0.905724,0.953019,0.476355,2.949833,614,,
4,1.0,2.86415,0.950374,0.487201,2.919124,614,,
5,3.162278,9.057237,0.946114,0.482469,2.932563,614,,
6,10.0,28.641497,0.938375,0.48079,2.937315,614,,
7,31.622777,90.572366,0.925468,0.523319,2.814446,614,,
8,100.0,286.414971,0.905663,0.595268,2.593361,614,,
9,316.227766,905.723664,0.874754,0.640018,2.44579,614,,



Best RIDGE:
  Parameter: 905.723664
  R² (test): 0.6400
  RMSE (test): 2.4458
  Active vars: 614


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.656459,3.246606,7
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.561481,2.776879,9
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_t-3, efw_5aii_...",0.541683,2.678967,8
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",0.494215,2.444208,6
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,0.474843,2.348399,6
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.45188,2.234834,6
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_delta, wb_bn_cab_xoka_gd...",0.432695,2.139952,7
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation, efw_3...",0.407218,2.013952,8
9,efw_2a_judicial_independence,"[efw_2a_judicial_independence_ma10, efw_2a_jud...",0.38988,1.928205,5
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",0.335751,1.660499,6



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.25)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.003793

Stopping: model has 0 active variables at factor 10000.0



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,3.8e-05,0.953079,0.394839,3.171132,613,,
1,0.031623,0.00012,0.953073,0.411864,3.126208,611,,
2,0.1,0.000379,0.952847,0.452557,3.01612,612,,
3,0.316228,0.001199,0.951147,0.501017,2.879533,591,,
4,1.0,0.003793,0.946308,0.511487,2.849163,555,,
5,3.162278,0.011994,0.937335,0.492578,2.903779,501,,
6,10.0,0.037927,0.920864,0.5529,2.725721,393,,
7,31.622777,0.119935,0.891821,0.632689,2.470564,289,,
8,100.0,0.379269,0.834883,0.619187,2.515561,186,,
9,316.227766,1.199354,0.747752,0.536771,2.774449,87,,



Best ELASTIC (L1=0.25):
  Parameter: 0.119935
  R² (test): 0.6327
  RMSE (test): 2.4706
  Active vars: 289


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(ELASTIC (L1=0.25))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_ma3, ief_monetary_freedo...",0.975461,5.934788,7
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation, efw_3...",0.783318,4.765772,8
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",0.614867,3.740902,6
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.508023,3.090856,6
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_t-3, efw_5aii_...",0.453329,2.758093,8
9,efw_2a_judicial_independence,"[efw_2a_judicial_independence_ma10, efw_2a_jud...",0.423061,2.573937,5
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg, wb_ny_gdp_mktp_kd_zg_ma...",0.388821,2.365621,9
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.349643,2.127254,7
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.340273,2.070247,5
57,efw_gender_disparity_index,"[efw_gender_disparity_index_ma10, efw_gender_d...",0.307929,1.873464,7



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 197 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
4,efw_1di_top_marginal_income_tax_rate,[efw_1di_top_marginal_income_tax_rate_t-5],1.0,-0.097598,0.033854,1
6,efw_2_legal_system_property_rights_no_gender_a...,[efw_2_legal_system_property_rights_no_gender_...,1.0,-0.187121,0.050118,1
11,efw_2f_contracts,[efw_2f_contracts_delta3],1.0,-0.182597,0.036742,1
21,efw_4aiii_standard_deviation_of_tariff_rates,[efw_4aiii_standard_deviation_of_tariff_rates],1.0,0.113448,0.035765,1
50,efw_ie_state_ownership,[efw_ie_state_ownership_ma10],1.0,-0.319619,0.086127,1
44,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma10],1.0,-0.301499,0.050096,1
28,efw_4dii_capital_controls,"[efw_4dii_capital_controls_ma10, efw_4dii_capi...",1.0,0.13646,0.038848,2
64,ief_tax_burden,[ief_tax_burden_t-5],1.0,0.09872,0.032617,1
80,wb_ny_gdp_pcap_kd,[wb_ny_gdp_pcap_kd],1.0,-0.135431,0.033559,1
38,efw_5biii_flexible_wage_determination,"[efw_5biii_flexible_wage_determination, efw_5b...",0.995,-0.109332,0.046367,2



--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.003793

Stopping: model has 0 active variables at factor 3162.2776601683795



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,3.8e-05,0.953078,0.394896,3.170983,613,,
1,0.031623,0.00012,0.953075,0.412561,3.124356,609,,
2,0.1,0.000379,0.952852,0.456242,3.005951,604,,
3,0.316228,0.001199,0.951075,0.519059,2.826995,572,,
4,1.0,0.003793,0.945363,0.521598,2.819522,509,,
5,3.162278,0.011994,0.934298,0.515378,2.837792,422,,
6,10.0,0.037927,0.914315,0.591514,2.60536,304,,
7,31.622777,0.119935,0.875454,0.638928,2.449491,202,,
8,100.0,0.379269,0.80368,0.569491,2.67467,81,,
9,316.227766,1.199354,0.715345,0.476476,2.949492,42,,



Best ELASTIC (L1=0.5):
  Parameter: 0.119935
  R² (test): 0.6389
  RMSE (test): 2.4495
  Active vars: 202


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_ma3, ief_monetary_freedo...",0.85763,7.515391,7
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation, efw_3...",0.694454,6.085483,8
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",0.511067,4.478472,6
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.479173,4.198985,6
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.386214,3.384387,7
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_t-3, efw_5aii_...",0.340539,2.984134,8
51,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma1...,0.306051,2.681918,4
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg, wb_ny_gdp_mktp_kd_zg_ma...",0.305612,2.678067,9
31,efw_4di_financial_openness,"[efw_4di_financial_openness_t-5, efw_4di_finan...",0.289246,2.53466,7
59,fiw_a,"[fiw_a_ma10, fiw_a_t-3, fiw_a_delta, fiw_a_t-5...",0.283394,2.483376,5



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 116 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",1.0,0.232881,0.057588,2
2,efw_1di_top_marginal_income_tax_rate,[efw_1di_top_marginal_income_tax_rate_t-5],1.0,-0.095256,0.044779,1
8,efw_2f_contracts,[efw_2f_contracts_delta3],1.0,-0.150771,0.037002,1
7,efw_2e_legal_integrity,[efw_2e_legal_integrity_ma10],1.0,-0.317189,0.066902,1
60,wb_ny_gdp_minr_rt_zs,[wb_ny_gdp_minr_rt_zs_ma10],1.0,0.275646,0.038703,1
49,p5d_autoc,[p5d_autoc_ma10],1.0,-0.138491,0.044943,1
18,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5],1.0,-0.309254,0.04387,1
21,efw_4dii_capital_controls,[efw_4dii_capital_controls_t-5],1.0,0.16553,0.040187,1
33,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma10],1.0,-0.340496,0.057832,1
39,efw_ie_state_ownership,[efw_ie_state_ownership_ma10],1.0,-0.263366,0.089338,1



--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.75)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.003793

Stopping: model has 0 active variables at factor 3162.2776601683795



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,3.8e-05,0.953078,0.394975,3.170778,611,,
1,0.031623,0.00012,0.953075,0.413123,3.122859,609,,
2,0.1,0.000379,0.952855,0.46065,2.993741,593,,
3,0.316228,0.001199,0.950945,0.526852,2.803996,554,,
4,1.0,0.003793,0.944609,0.524425,2.81118,468,,
5,3.162278,0.011994,0.931795,0.517607,2.831259,362,,
6,10.0,0.037927,0.908908,0.608669,2.550063,241,,
7,31.622777,0.119935,0.859991,0.628941,2.483136,134,,
8,100.0,0.379269,0.784227,0.563277,2.693903,42,,
9,316.227766,1.199354,0.687078,0.395746,3.168756,24,,



Best ELASTIC (L1=0.75):
  Parameter: 0.119935
  R² (test): 0.6289
  RMSE (test): 2.4831
  Active vars: 134


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(ELASTIC (L1=0.75))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_ma3, ief_monetary_freedo...",0.810362,9.740875,7
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation_t-1, e...",0.511914,6.153407,8
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,0.42992,5.16781,6
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_t-3, efw_1b_tr...",0.410927,4.939512,6
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.406334,4.884294,7
51,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma1...,0.309737,3.723162,4
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.27965,3.361505,5
59,fiw_a,"[fiw_a_ma10, fiw_a_t-5, fiw_a_t-3, fiw_a_delta...",0.245268,2.948216,5
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_t-3, efw_5aii_...",0.245131,2.946569,8
31,efw_4di_financial_openness,"[efw_4di_financial_openness_t-5, efw_4di_finan...",0.243015,2.921132,7



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 67 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
6,efw_2f_contracts,[efw_2f_contracts_delta3],1.0,-0.114977,0.038117,1
5,efw_2e_legal_integrity,[efw_2e_legal_integrity_ma10],1.0,-0.360907,0.09891,1
12,efw_4ai_trade_tax_revenue,[efw_4ai_trade_tax_revenue_t-3],1.0,-0.172761,0.056832,1
15,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5],1.0,-0.30106,0.049379,1
24,efw_5ciii_impartial_public_administration,[efw_5ciii_impartial_public_administration_ma10],1.0,-0.348119,0.084065,1
26,efw_gender_disparity_index,[efw_gender_disparity_index_ma10],1.0,0.287805,0.073636,1
22,efw_5ci_regulatory_burden,[efw_5ci_regulatory_burden_t-3],1.0,0.133594,0.045129,1
41,wb_ny_gdp_minr_rt_zs,[wb_ny_gdp_minr_rt_zs_ma10],1.0,0.253204,0.040108,1
32,p5d_autoc,[p5d_autoc_ma10],1.0,-0.126239,0.042886,1
33,p5d_durable,[p5d_durable],1.0,-0.194405,0.041324,1



COMPLETED IN 587.3s

