In [1]:
from google.colab import drive
drive.mount('/content/drive')

import sys
sys.path.append("/content/drive/MyDrive/Colab Notebooks/instquality/")

import os
os.chdir("/content/drive/MyDrive/Colab Notebooks/instquality/")

import warnings
warnings.filterwarnings("ignore")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import numpy as np

from functions.spec import run_unified_regularized_regression, filter_cols
from functions.utils import check_split_year
from functions.summary import get_benchmark_stats

### Lagged incremental signal regressions

In [3]:
# load data
df = pd.read_pickle("saved/df.dat")

# define columns
id_cols = ["country", "year", "iso_code_1", "iso_code_2", "region"]
exclude = id_cols + [c for c in df.columns if c.startswith("tgt_")]
macro_cols = filter_cols([c for c in df.columns if c.startswith("wb_") and not c.startswith("wb_iq_") and c not in exclude], [])
iq_cols = filter_cols([c for c in df.columns if (c.startswith("wb_iq_") or (not c.startswith("wb_") and c not in exclude))], [])
macro_cols += ["tgt_spread"]

# define models to run
models = ['lasso', 'ridge', 'elastic']

# iterate over split shares
split_shares = [0.75, 0.8, 0.85]
for split_share in split_shares:
    print("\n" + "=" * 100)
    print(f"Runnning {split_share*100:.0f}-{(1-split_share)*100:.0f} split...")

    # run for each horizon
    for h in range(1, 11):

        # get split year and benchmarks
        y_name = f"tgt_spread_t{h}"
        split_year = check_split_year(df, y_name, split_share)
        if split_share != 0.75:
            name_prefix = f"signal_t{h}_{split_share*100:.0f}"
        else:
            name_prefix = f"signal_t{h}"
        benchmark_df = get_benchmark_stats(df, split_share=split_share)
        h_benchmark = benchmark_df[benchmark_df["horizon"] == h].iloc[0]
        r2_benchmark = h_benchmark["R2_test"]
        rmse_benchmark = h_benchmark["RMSE_test"]

        # run spec for models
        print("\n" + "=" * 100)
        print(f"Running forecast horizon: t+{h}")
        print(f"Split year for {split_share*100:.0f}-{(1-split_share)*100:.0f} split: {split_year}")
        all_horizon_results = {}
        run_unified_regularized_regression(
            name_prefix=name_prefix,
            data_path="saved/df.dat",
            macro_cols=macro_cols,
            iq_cols=iq_cols,
            use_clustering=True,
            corr_threshold=0.9,
            y_name=y_name,
            spread_col="tgt_spread",
            year_col="year",
            split_year=split_year,
            model_types=models,
            agnostic=False,
            param_factors=np.logspace(-3, 10, 25),
            l1_ratios=[0.5],
            use_expanding_cv=True,
            min_train_years=8,
            cv_stride=2,
            r2_benchmark=r2_benchmark,
            rmse_benchmark=rmse_benchmark,
            early_stop_threshold=0.10,
            save_results=True,
            output_dir="specs",
            n_jobs=-1,
            multicollinearity_check=False,
            perform_interpretability=True
        )


Runnning 75-25 split...

Running forecast horizon: t+1
Split year for 75-25 split: 2014

UNIFIED REGULARIZED REGRESSION: SIGNAL_T1
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2014 (1564 obs)
Test:  2015-2023 (534 obs)

β=1 specification active
After removing NaN: 1564 train, 534 test obs

Benchmark metrics:
  R² (test): 0.9074
  RMSE (test): 1.2728

Clustering variants of same variables...
  Original features: 972
  After clustering: 640
  Number of clusters: 640

Features used in models: 640


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal paramete

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.952514,0.489696,2.988004,565,-11.909514,0.0
1,0.003481,0.001682,0.945931,0.531183,2.863972,473,-12.331897,0.0
2,0.012115,0.005855,0.932586,0.614423,2.597301,351,-11.398731,0.0
3,0.04217,0.02038,0.910313,0.823581,1.756868,204,-5.101179,3.37545e-07
4,0.14678,0.070938,0.864961,0.879063,1.454611,90,-2.98071,0.002875811
5,0.510897,0.246913,0.8169,0.90275,1.304405,13,-1.227936,0.2194709
6,1.778279,0.85943,0.758771,0.908984,1.261905,1,0.997312,0.3186132



Best LASSO:
  Parameter: 0.859430
  R² (test): 0.9090
  ΔR² vs benchmark: +0.16 p.p.
  RMSE (test): 1.2619
  ΔRMSE vs benchmark: -0.0109
  Active vars: 1
  Active variables: wb_fp_cpi_totl_zg
  DM test: stat=1.00, p=0.319

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.319)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 24297.810658

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,24.29781,0.938644,0.579978,2.710833,640,-11.976861,0.0
1,0.003481,84.5734,0.927629,0.686802,2.340864,640,-9.936409,0.0
2,0.012115,294.3747,0.909781,0.800096,1.870154,640,-7.046383,1.836309e-12
3,0.04217,1024.63,0.882594,0.861946,1.554145,640,-5.132073,2.865685e-07
4,0.14678,3566.431,0.837401,0.888385,1.397424,640,-4.222864,2.412171e-05
5,0.510897,12413.68,0.748635,0.90084,1.317152,640,-2.813241,0.004904484
6,1.778279,43208.3,0.636128,0.906704,1.277613,640,-0.536661,0.5915021
7,6.189658,150395.1,0.564418,0.908355,1.266254,640,0.887477,0.374822
8,21.544347,523480.5,0.535773,0.908522,1.265099,640,1.029974,0.3030224
9,74.989421,1822079.0,0.526517,0.908478,1.265403,640,0.996816,0.3188536



Best RIDGE:
  Parameter: 523480.461736
  R² (test): 0.9085
  ΔR² vs benchmark: +0.11 p.p.
  RMSE (test): 1.2651
  ΔRMSE vs benchmark: -0.0077
  Active vars: 640
  DM test: stat=1.03, p=0.303

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.303)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.885867

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000886,0.951161,0.485466,3.000365,579,-12.23648,0.0
1,0.003481,0.003083,0.94371,0.531456,2.863136,489,-12.546425,0.0
2,0.012115,0.010733,0.931289,0.638727,2.514112,374,-10.940384,0.0
3,0.04217,0.037357,0.90851,0.823736,1.756096,245,-5.255483,1.476369e-07
4,0.14678,0.130027,0.863938,0.880869,1.443706,102,-3.119657,0.001810617
5,0.510897,0.452587,0.812838,0.903906,1.296626,21,-1.216952,0.2236224
6,1.778279,1.575319,0.74066,0.908616,1.264454,7,1.077759,0.2811413



Best ELASTIC (L1=0.5):
  Parameter: 1.575319
  R² (test): 0.9086
  ΔR² vs benchmark: +0.12 p.p.
  RMSE (test): 1.2645
  ΔRMSE vs benchmark: -0.0084
  Active vars: 7
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_kd_zg_delta, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=1.08, p=0.281

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.281)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 159.9s


Running forecast horizon: t+2
Split year for 75-25 split: 2013

UNIFIED REGULARIZED REGRESSION: SIGNAL_T2
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² c

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000886,0.873789,-189.06445,57.315788,553,-10.888208,0.0
1,0.003481,0.003083,0.850863,-395.426807,82.77617,458,-10.950764,0.0
2,0.012115,0.010733,0.804895,-390.316885,82.240949,326,-10.773653,0.0
3,0.04217,0.037357,0.728347,-150.682202,51.202496,177,-9.94631,0.0
4,0.14678,0.130027,0.578496,-21.866831,19.880477,55,-8.483787,0.0
5,0.510897,0.452587,0.407289,0.801173,1.853793,7,0.909113,0.36329



Best LASSO:
  Parameter: 0.452587
  R² (test): 0.8012
  ΔR² vs benchmark: +0.69 p.p.
  RMSE (test): 1.8538
  ΔRMSE vs benchmark: -0.0318
  Active vars: 7
  Active variables: wb_ny_gdp_mktp_kd_zg_ma5, wb_fp_cpi_totl_zg, wb_bn_cab_xoka_gd_zs_delta3, wb_ny_gdp_petr_rt_zs_delta, efw_4c_black_market_exchange_rates_delta3, ief_monetary_freedom_ma5, ief_monetary_freedom_t-3
  DM test: stat=0.91, p=0.363

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.363)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 15341.274046

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,15.34127,0.848809,-321.347176,74.642427,642,-10.640174,0.0
1,0.003481,53.39838,0.818627,-410.582422,84.343618,642,-10.991113,0.0
2,0.012115,185.8638,0.771592,-332.036188,75.869904,642,-10.728859,0.0
3,0.04217,646.9362,0.704189,-175.488294,55.230856,642,-10.166705,0.0
4,0.14678,2251.791,0.612686,-57.333457,31.752847,642,-9.902343,0.0
5,0.510897,7837.811,0.506042,-9.383793,13.396824,642,-9.853401,0.0
6,1.778279,27281.07,0.4149,-0.292118,4.725796,642,-9.346244,0.0
7,6.189658,94957.24,0.359022,0.702758,2.266619,642,-5.421537,5.908878e-08
8,21.544347,330517.7,0.333747,0.790149,1.904493,642,-0.856096,0.3919448
9,74.989421,1150433.0,0.32467,0.796642,1.874799,642,0.973886,0.330113



Best RIDGE:
  Parameter: 4004313.714983
  R² (test): 0.7969
  ΔR² vs benchmark: +0.26 p.p.
  RMSE (test): 1.8737
  ΔRMSE vs benchmark: -0.0119
  Active vars: 642
  DM test: stat=1.32, p=0.186

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.186)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 1.623777

Stopping: model has 0 active variables at factor 1.7782794100389228



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.001624,0.868783,-210.384458,60.444995,573,-10.611133,0.0
1,0.003481,0.005652,0.845102,-405.237548,83.794179,479,-10.815995,0.0
2,0.012115,0.019673,0.799271,-389.298162,82.13383,354,-10.788632,0.0
3,0.04217,0.068474,0.719922,-138.771171,49.15103,205,-9.919279,0.0
4,0.14678,0.238338,0.573426,-15.834132,17.057649,74,-8.434779,0.0
5,0.510897,0.829583,0.4037,0.80025,1.858093,13,0.943701,0.345322



Best ELASTIC (L1=0.5):
  Parameter: 0.829583
  R² (test): 0.8002
  ΔR² vs benchmark: +0.60 p.p.
  RMSE (test): 1.8581
  ΔRMSE vs benchmark: -0.0275
  Active vars: 13
  DM test: stat=0.94, p=0.345

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.345)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 150.1s


Running forecast horizon: t+3
Split year for 75-25 split: 2013

UNIFIED REGULARIZED REGRESSION: SIGNAL_T3
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2013 (1487 obs)
Test:  2014-2021 (466 obs)

β=1 specification active
After removing NaN: 1487 train, 466 test obs

Benchma

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000264,0.866501,-443.120896,83.96923,623,-9.18821,0.0
1,0.003481,0.000918,0.860715,-398.040191,79.593552,559,-9.091808,0.0
2,0.012115,0.003194,0.831931,-438.46127,83.527574,469,-8.819711,0.0
3,0.04217,0.011119,0.770757,-388.888204,78.675518,329,-8.634,0.0
4,0.14678,0.038701,0.676198,-322.095161,71.620078,178,-8.630929,0.0
5,0.510897,0.134706,0.521976,-54.175299,29.596606,62,-8.639373,0.0
6,1.778279,0.46887,0.309145,0.767737,1.920257,5,0.820714,0.411809



Best LASSO:
  Parameter: 0.468870
  R² (test): 0.7677
  ΔR² vs benchmark: +0.58 p.p.
  RMSE (test): 1.9203
  ΔRMSE vs benchmark: -0.0238
  Active vars: 5
  Active variables: wb_ny_gdp_mktp_kd_zg_ma5, wb_fp_cpi_totl_zg, wb_bn_cab_xoka_gd_zs_delta3, ief_monetary_freedom_delta3, ief_monetary_freedom_t-3
  DM test: stat=0.82, p=0.412

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.412)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 4691.172785

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,4.691173,0.844649,-531.26261,91.924769,642,-8.917351,0.0
1,0.003481,16.32857,0.820713,-509.691811,90.042804,642,-8.744324,0.0
2,0.012115,56.83486,0.783302,-405.156864,80.30017,642,-8.643645,0.0
3,0.04217,197.8251,0.727813,-291.057279,68.093175,642,-8.58848,0.0
4,0.14678,688.57,0.646382,-176.353019,53.062641,642,-8.566865,0.0
5,0.510897,2396.706,0.533136,-68.062986,33.112521,642,-8.573821,0.0
6,1.778279,8342.216,0.404322,-15.528036,16.198701,642,-8.606034,0.0
7,6.189658,29036.76,0.295708,-1.781356,6.645044,642,-8.526534,0.0
8,21.544347,101068.3,0.228141,0.478623,2.877038,642,-7.122375,1.060929e-12
9,74.989421,351788.3,0.197646,0.739361,2.034178,642,-2.712553,0.006676713



Best RIDGE:
  Parameter: 625577945.135945
  R² (test): 0.7670
  ΔR² vs benchmark: +0.51 p.p.
  RMSE (test): 1.9231
  ΔRMSE vs benchmark: -0.0209
  Active vars: 642
  DM test: stat=1.94, p=0.052

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.052)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.483293

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.864798,-445.504769,84.194286,618,-9.171083,0.0
1,0.003481,0.001682,0.851691,-432.005713,82.911807,574,-8.973472,0.0
2,0.012115,0.005855,0.821008,-488.705946,88.173332,495,-8.721681,0.0
3,0.04217,0.02038,0.76331,-387.890409,78.574781,362,-8.633022,0.0
4,0.14678,0.070938,0.669679,-282.527962,67.091502,208,-8.62931,0.0
5,0.510897,0.246913,0.513066,-55.762533,30.019292,81,-8.635698,0.0
6,1.778279,0.85943,0.296732,0.770985,1.906784,9,1.491923,0.135719



Best ELASTIC (L1=0.5):
  Parameter: 0.859430
  R² (test): 0.7710
  ΔR² vs benchmark: +0.90 p.p.
  RMSE (test): 1.9068
  ΔRMSE vs benchmark: -0.0372
  Active vars: 9
  Active variables: wb_ny_gdp_mktp_kd_zg_ma3, wb_ny_gdp_mktp_kd_zg_ma5, wb_ny_gdp_mktp_kd_zg_ma10, wb_fp_cpi_totl_zg, wb_bn_cab_xoka_gd_zs_delta3, efw_3b_standard_deviation_of_inflation_ma3, ief_monetary_freedom_delta3, ief_monetary_freedom_ma5, ief_monetary_freedom_t-3
  DM test: stat=1.49, p=0.136

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.136)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 161.6s


Running forecast horizon: t+4
Split year for 75-25 split: 2012

UNIFIED REGULARIZED REGRESSION: SIGNAL_T4
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Stat

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000144,0.841604,-10.086684,13.328446,634,-7.396501,1.398881e-13
1,0.003481,0.000501,0.838556,-3.135373,8.140218,613,-8.451547,0.0
2,0.012115,0.001743,0.820889,-1.477668,6.300866,542,-9.219548,0.0
3,0.04217,0.006066,0.77234,-0.246455,4.469068,399,-7.899468,2.88658e-15
4,0.14678,0.021114,0.685411,-1.878551,6.791499,262,-7.013747,2.320144e-12
5,0.510897,0.07349,0.541562,0.567236,2.633325,123,-5.530554,3.192215e-08
6,1.778279,0.255797,0.283633,0.722301,2.109432,31,-0.096109,0.9234342
7,6.189658,0.890351,-0.008387,0.731754,2.07322,2,1.72596,0.08435469



Best LASSO:
  Parameter: 0.890351
  R² (test): 0.7318
  ΔR² vs benchmark: +0.84 p.p.
  RMSE (test): 2.0732
  ΔRMSE vs benchmark: -0.0324
  Active vars: 2
  Active variables: wb_fp_cpi_totl_zg, ief_monetary_freedom_t-3
  DM test: stat=1.73, p=0.084

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.084)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 905.723664

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.9057237,0.83675,-3.187399,8.191262,646,-8.386453,0.0
1,0.003481,3.152553,0.823108,-0.903717,5.523061,646,-7.705028,1.310063e-14
2,0.012115,10.97309,0.801078,-9.728957,13.111653,646,-8.877922,0.0
3,0.04217,38.19405,0.763105,-19.187339,17.985322,646,-8.972779,0.0
4,0.14678,132.9421,0.70182,-7.139154,11.420058,646,-6.631572,3.321299e-11
5,0.510897,462.7315,0.608836,-5.477766,10.188056,646,-8.18156,2.220446e-16
6,1.778279,1610.63,0.472161,-11.527865,14.168283,646,-6.909112,4.876988e-12
7,6.189658,5606.12,0.30229,-6.921339,11.266214,646,-7.20661,5.735412e-13
8,21.544347,19513.22,0.141581,-1.3404,6.123838,646,-7.288619,3.130829e-13
9,74.989421,67919.69,0.028958,0.43853,2.999451,646,-6.334083,2.38757e-10



Best RIDGE:
  Parameter: 2864149.709788
  R² (test): 0.7328
  ΔR² vs benchmark: +0.94 p.p.
  RMSE (test): 2.0693
  ΔRMSE vs benchmark: -0.0363
  Active vars: 646
  DM test: stat=2.68, p=0.007


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,0.03683,45.827242,5
90,wb_iq_spi_pil1,"[wb_iq_spi_pil1_delta3, wb_iq_spi_pil1_ma3, wb...",0.002854,3.550791,7
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.001773,2.206111,8
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.001569,1.952818,9
50,efw_5cii_bureacracy_costs,"[efw_5cii_bureacracy_costs, efw_5cii_bureacrac...",0.001123,1.396885,8
66,fiw_g,"[fiw_g_t-5, fiw_g_ma3, fiw_g_t-3, fiw_g_delta,...",0.000888,1.104906,5
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_ma10, efw_5aii...",0.000845,1.051975,8
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation_ma3, e...",0.000807,1.004524,8
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_delta, wb_bn_cab_xoka_gd...",0.000767,0.953788,7
62,fiw_cl,"[fiw_cl_ma10, fiw_cl_t-5, fiw_cl_t-3, fiw_cl_d...",0.000763,0.949773,5



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.143845

Stopping: model has 0 active variables at factor 21.54434690031882



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000144,0.841576,-9.630717,13.051486,641,-7.437085,1.030287e-13
1,0.003481,0.000501,0.838611,-3.5441,8.533017,628,-8.216096,2.220446e-16
2,0.012115,0.001743,0.824054,-1.354993,6.142901,580,-9.885225,0.0
3,0.04217,0.006066,0.789659,-1.001395,5.66298,503,-7.862276,3.774758e-15
4,0.14678,0.021114,0.725541,-3.245701,8.24809,372,-7.04103,1.908251e-12
5,0.510897,0.07349,0.61119,0.50362,2.820238,218,-6.294205,3.089797e-10
6,1.778279,0.255797,0.408679,0.6658,2.314099,85,-3.396272,0.0006831047
7,6.189658,0.890351,0.12577,0.744339,2.024003,15,3.107545,0.001886486



Best ELASTIC (L1=0.5):
  Parameter: 0.890351
  R² (test): 0.7443
  ΔR² vs benchmark: +2.10 p.p.
  RMSE (test): 2.0240
  ΔRMSE vs benchmark: -0.0816
  Active vars: 15
  DM test: stat=3.11, p=0.002


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 9 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.262713,29.093983,8
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma3, wb_ny_gdp_mktp_kd_z...",0.233081,25.812343,9
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_t-5, wb_...",0.129821,14.37692,7
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_delta3, wb_bn_cab_xoka_g...",0.117671,13.031352,7
11,efw_2c_property_rights,"[efw_2c_property_rights_t-5, efw_2c_property_r...",0.090917,10.06852,5
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation_ma3, e...",0.032963,3.650514,8
99,wb_ny_gdp_petr_rt_zs,"[wb_ny_gdp_petr_rt_zs_delta, wb_ny_gdp_petr_rt...",0.027293,3.022508,5
50,efw_5cii_bureacracy_costs,"[efw_5cii_bureacracy_costs, efw_5cii_bureacrac...",0.007958,0.881332,8
29,efw_4c_black_market_exchange_rates,"[efw_4c_black_market_exchange_rates_delta, efw...",0.000565,0.062527,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 10 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
5,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma3, wb_ny_gdp_mktp_kd_z...",0.935,0.127472,0.032351,2
3,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_delta3, wb_bn_cab_xoka_g...",0.93,-0.100874,0.040852,2
2,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.92,0.081441,0.060435,3
0,efw_2c_property_rights,[efw_2c_property_rights_t-5],0.88,-0.067075,0.030805,1
1,efw_4c_black_market_exchange_rates,[efw_4c_black_market_exchange_rates_delta],0.83,0.099751,0.054972,1
4,wb_fp_cpi_totl_zg,[wb_fp_cpi_totl_zg],0.82,-0.472661,0.03477,1



COMPLETED IN 171.8s


Running forecast horizon: t+5
Split year for 75-25 split: 2011

UNIFIED REGULARIZED REGRESSION: SIGNAL_T5
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2011 (1367 obs)
Test:  2012-2019 (457 obs)

β=1 specification active
After removing NaN: 1367 train, 457 test obs

Benchmark metrics:
  R² (test): 0.6204
  RMSE (test): 2.4632

Clustering variants of same variables...
  Original features: 972
  After clustering: 652
  Number of clusters: 652

Features used in models: 652


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.828654,-107.933678,41.729842,612,-5.272195,1.34802e-07
1,0.003481,0.001682,0.811191,-127.568776,45.334978,520,-5.149689,2.60919e-07
2,0.012115,0.005855,0.769314,-166.840585,51.798138,426,-5.72925,1.008758e-08
3,0.04217,0.02038,0.673399,-110.196731,42.161074,301,-5.549164,2.870388e-08
4,0.14678,0.070938,0.507271,-5.327972,10.057683,150,-5.444286,5.201357e-08
5,0.510897,0.246913,0.21164,0.551443,2.677776,40,-2.199266,0.027859
6,1.778279,0.85943,-0.175934,0.597872,2.535409,5,-2.007956,0.04464796



Best LASSO:
  Parameter: 0.859430
  R² (test): 0.5979
  ΔR² vs benchmark: -2.26 p.p.
  RMSE (test): 2.5354
  ΔRMSE vs benchmark: +0.0722
  Active vars: 5
  Active variables: wb_ny_gdp_mktp_kd_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg_t-1, wb_ny_gdp_defl_kd_zg_ma10, ief_monetary_freedom_t-3
  DM test: stat=-2.01, p=0.045


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 4 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.321902,67.122406,6
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.097636,20.358846,9
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.046516,9.699433,8
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg_t-1, wb_ny_gdp_defl_kd_z...",0.013521,2.819315,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 2 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,[wb_fp_cpi_totl_zg],0.86,-1.200881,0.495601,1
1,wb_ny_gdp_defl_kd_zg,[wb_ny_gdp_defl_kd_zg_t-1],0.84,-0.665035,0.40984,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 571.860368

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.5718604,0.830335,-107.590284,41.664017,652,-5.253104,1.495568e-07
1,0.003480701,1.990475,0.819466,-120.934986,44.14991,652,-5.115768,3.124666e-07
2,0.01211528,6.928247,0.801203,-151.398419,49.357811,652,-5.270077,1.363662e-07
3,0.04216965,24.11515,0.768581,-188.631321,55.058033,652,-5.64992,1.605226e-08
4,0.1467799,83.93762,0.712115,-170.198978,52.313797,652,-5.836558,5.329036e-09
5,0.510897,292.1617,0.619517,-83.8305,36.824908,652,-5.686203,1.298947e-08
6,1.778279,1016.928,0.467737,-19.40136,18.059063,652,-5.400731,6.636977e-08
7,6.189658,3539.62,0.222002,-3.043259,8.039544,652,-6.677528,2.430078e-11
8,21.54435,12320.36,-0.106725,-0.252028,4.473759,652,-4.0913,4.289618e-05
9,74.98942,42883.48,-0.399369,0.481067,2.88019,652,-2.899516,0.003737389



Best RIDGE:
  Parameter: 923896523.061936
  R² (test): 0.6416
  ΔR² vs benchmark: +2.12 p.p.
  RMSE (test): 2.3935
  ΔRMSE vs benchmark: -0.0697
  Active vars: 652
  DM test: stat=4.45, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,5.5e-05,25.802862,5
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",7e-06,3.118845,9
74,ief_monetary_freedom,"[ief_monetary_freedom_ma5, ief_monetary_freedo...",6e-06,2.699488,8
90,wb_iq_spi_pil1,"[wb_iq_spi_pil1_ma3, wb_iq_spi_pil1, wb_iq_spi...",5e-06,2.436837,7
66,fiw_g,"[fiw_g_t-5, fiw_g_ma3, fiw_g_t-3, fiw_g_delta3...",3e-06,1.54043,5
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",3e-06,1.49662,7
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation_ma3, e...",3e-06,1.380007,8
62,fiw_cl,"[fiw_cl_ma10, fiw_cl_t-5, fiw_cl_t-3, fiw_cl_d...",3e-06,1.293654,5
28,efw_4bii_costs_of_importing_and_exporting,"[efw_4bii_costs_of_importing_and_exporting, ef...",3e-06,1.283953,5
54,efw_5di_market_openness,"[efw_5di_market_openness_ma5, efw_5di_market_o...",3e-06,1.280699,7



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.078476

Stopping: model has 0 active variables at factor 74.98942093324558



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,7.8e-05,0.832126,-103.531667,40.877996,650,-5.330146,9.813399e-08
1,0.003481,0.000273,0.831029,-106.568513,41.467537,644,-5.2716,1.352399e-07
2,0.012115,0.000951,0.823459,-110.2994,42.180534,613,-5.196123,2.034875e-07
3,0.04217,0.003309,0.800926,-140.58346,47.574247,541,-5.256331,1.46958e-07
4,0.14678,0.011519,0.754793,-193.727785,55.792988,444,-5.805393,6.421516e-09
5,0.510897,0.040093,0.655272,-98.280293,39.837969,316,-5.475605,4.360194e-08
6,1.778279,0.139552,0.48291,-5.566931,10.245823,165,-5.705005,1.163399e-08
7,6.189658,0.48574,0.155387,0.57926,2.593418,57,-1.591262,0.1115506
8,21.544347,1.690714,-0.248613,0.622,2.458167,10,0.206936,0.83606



Best ELASTIC (L1=0.5):
  Parameter: 1.690714
  R² (test): 0.6220
  ΔR² vs benchmark: +0.16 p.p.
  RMSE (test): 2.4582
  ΔRMSE vs benchmark: -0.0050
  Active vars: 10
  Active variables: wb_ny_gdp_mktp_kd_zg_ma5, wb_ny_gdp_mktp_kd_zg_ma10, wb_fp_cpi_totl_zg_t-1, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma5, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg_t-1, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma10, ief_monetary_freedom_t-3
  DM test: stat=0.21, p=0.836

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.836)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 163.4s


Running forecast horizon: t+6
Split year for 75-25 split: 2010

UNIFIED REGULARIZED REGRESSION: SIGNAL_T6
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical 

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.847462,-20.434767,18.720481,659,-5.798901,6.675105e-09
1,0.003481,7e-06,0.847457,-20.561226,18.775623,659,-5.785232,7.241248e-09
2,0.012115,2.5e-05,0.847433,-20.990592,18.961648,658,-5.743112,9.295204e-09
3,0.04217,8.7e-05,0.847279,-22.437893,19.575682,652,-5.626517,1.838851e-08
4,0.14678,0.000304,0.845872,-26.628139,21.253646,635,-5.360567,8.296136e-08
5,0.510897,0.001057,0.835437,-27.765142,21.686571,562,-4.998276,5.784514e-07
6,1.778279,0.00368,0.800673,-46.003706,27.721944,465,-5.054371,4.318124e-07
7,6.189658,0.012807,0.72981,-109.337344,42.473582,340,-5.726743,1.023768e-08
8,21.544347,0.044578,0.582268,-78.314917,36.01099,190,-5.561438,2.675612e-08
9,74.989421,0.155163,0.297156,-0.877122,5.539925,83,-4.654221,3.252074e-06



Best LASSO:
  Parameter: 1.879848
  R² (test): 0.5798
  ΔR² vs benchmark: +2.04 p.p.
  RMSE (test): 2.6211
  ΔRMSE vs benchmark: -0.0629
  Active vars: 3
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg
  DM test: stat=3.65, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.075514,100.0,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...
No variables meet stability criterion (80% selection frequency)


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 571.860368

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.5718604,0.845137,-23.612925,20.060384,659,-5.363288,8.172041e-08
1,0.003480701,1.990475,0.832389,-34.238856,24.003166,659,-4.926469,8.372895e-07
2,0.01211528,6.928247,0.810314,-62.956742,32.337079,659,-5.416739,6.069574e-08
3,0.04216965,24.11515,0.771939,-108.652973,42.341655,659,-5.852245,4.849819e-09
4,0.1467799,83.93762,0.705841,-138.249221,47.714899,659,-5.919881,3.221745e-09
5,0.510897,292.1617,0.592153,-87.789304,38.101138,659,-5.819981,5.885437e-09
6,1.778279,1016.928,0.391421,-22.509103,19.605398,659,-4.996358,5.842327e-07
7,6.189658,3539.62,-0.035245,-3.584184,8.657423,659,-5.424766,5.803063e-08
8,21.54435,12320.36,-0.915471,-0.257848,4.534942,659,-4.516612,6.283701e-06
9,74.98942,42883.48,-1.908501,0.482744,2.908107,659,-1.809622,0.07035439



Best RIDGE:
  Parameter: 519545.195117
  R² (test): 0.6033
  ΔR² vs benchmark: +4.39 p.p.
  RMSE (test): 2.5468
  ΔRMSE vs benchmark: -0.1372
  Active vars: 659
  DM test: stat=5.94, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,0.126852,28.795476,5
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.020508,4.655365,9
74,ief_monetary_freedom,"[ief_monetary_freedom_t-1, ief_monetary_freedo...",0.009284,2.107548,8
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",0.00872,1.979424,7
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_del...,0.007695,1.746713,6
66,fiw_g,"[fiw_g, fiw_g_t-3, fiw_g_t-1, fiw_g_delta3, fi...",0.006567,1.490783,5
65,fiw_f,"[fiw_f_ma5, fiw_f_t-3, fiw_f_t-1, fiw_f_delta,...",0.005911,1.34183,5
64,fiw_e,"[fiw_e_ma5, fiw_e_t-3, fiw_e_t-1, fiw_e_delta3...",0.005802,1.316996,5
11,efw_2c_property_rights,"[efw_2c_property_rights_t-5, efw_2c_property_r...",0.005628,1.277601,5
62,fiw_cl,"[fiw_cl_ma5, fiw_cl_t-3, fiw_cl_delta3, fiw_cl...",0.005052,1.146769,4



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.001129

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,1e-06,0.847462,-20.40612,18.707967,659,-5.801704,6.564436e-09
1,0.003481,4e-06,0.847459,-20.463694,18.733109,659,-5.794471,6.853685e-09
2,0.012115,1.4e-05,0.847445,-20.659525,18.818374,659,-5.770466,7.905275e-09
3,0.04217,4.8e-05,0.847374,-21.296063,19.092892,658,-5.699256,1.203314e-08
4,0.14678,0.000166,0.846855,-23.591827,20.051784,654,-5.493859,3.932455e-08
5,0.510897,0.000577,0.84301,-29.255448,22.241262,638,-5.128183,2.925521e-07
6,1.778279,0.002007,0.824831,-39.377433,25.693719,580,-4.968828,6.735896e-07
7,6.189658,0.006987,0.787816,-76.383002,35.569717,491,-5.562658,2.656956e-08
8,21.544347,0.02432,0.710673,-117.173615,43.955972,383,-5.79807,6.708236e-09
9,74.989421,0.084651,0.555896,-69.242617,33.888939,242,-5.583113,2.362508e-08



Best ELASTIC (L1=0.5):
  Parameter: 1.025569
  R² (test): 0.5840
  ΔR² vs benchmark: +2.46 p.p.
  RMSE (test): 2.6081
  ΔRMSE vs benchmark: -0.0759
  Active vars: 17
  DM test: stat=1.29, p=0.196

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.196)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 191.6s


Running forecast horizon: t+7
Split year for 75-25 split: 2009

UNIFIED REGULARIZED REGRESSION: SIGNAL_T7
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2009 (1257 obs)
Test:  2010-2017 (449 obs)

β=1 specification active
After removing NaN: 1257 train, 449 test obs

Benchma

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,4e-06,0.816655,-80.039204,36.313897,660,-5.042914,4.584952e-07
1,0.003481,1.3e-05,0.816637,-79.924254,36.288133,660,-5.042535,4.594059e-07
2,0.012115,4.6e-05,0.816546,-79.413764,36.173495,655,-5.039196,4.674918e-07
3,0.04217,0.00016,0.815955,-76.772903,35.574551,647,-5.025614,5.018245e-07
4,0.14678,0.000557,0.811539,-60.641971,31.671128,619,-4.969249,6.721284e-07
5,0.510897,0.001938,0.787281,-35.083646,24.231511,543,-4.888112,1.018078e-06
6,1.778279,0.006744,0.729678,-20.050814,18.50801,407,-5.621955,1.888088e-08
7,6.189658,0.023475,0.625171,-23.257805,19.867857,269,-5.579329,2.414476e-08
8,21.544347,0.081711,0.415047,0.192329,3.625287,134,-7.111322,1.149303e-12
9,74.989421,0.284412,-0.021732,0.493752,2.870167,33,-1.415408,0.1569488



Best LASSO:
  Parameter: 3.445726
  R² (test): 0.5786
  ΔR² vs benchmark: +4.79 p.p.
  RMSE (test): 2.6188
  ΔRMSE vs benchmark: -0.1449
  Active vars: 2
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg
  DM test: stat=7.35, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.045716,100.0,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...
No variables meet stability criterion (80% selection frequency)


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 397.972312

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.3979723,0.817266,-65.903894,32.995216,660,-5.102776,3.347071e-07
1,0.003480701,1.385222,0.80309,-57.946605,30.970961,660,-5.008849,5.475646e-07
2,0.01211528,4.821545,0.780389,-55.545049,30.333503,660,-4.878129,1.070971e-06
3,0.04216965,16.78235,0.741972,-54.092413,29.941336,660,-4.845506,1.262896e-06
4,0.1467799,58.41435,0.6787,-54.002553,29.916908,660,-5.024586,5.045189e-07
5,0.510897,203.3229,0.573728,-35.378554,24.33033,660,-5.344924,9.045507e-08
6,1.778279,707.706,0.389533,-7.026696,11.428617,660,-5.390124,7.040914e-08
7,6.189658,2463.313,0.00353,-0.19358,4.407081,660,-5.299027,1.164214e-07
8,21.54435,8574.054,-0.861277,0.225995,3.548928,660,-2.992707,0.002765149
9,74.98942,29843.71,-2.038896,0.491826,2.87562,660,-1.165673,0.2437469



Best RIDGE:
  Parameter: 642963310.007776
  R² (test): 0.5940
  ΔR² vs benchmark: +6.34 p.p.
  RMSE (test): 2.5702
  ΔRMSE vs benchmark: -0.1935
  Active vars: 660
  DM test: stat=8.04, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,4.9e-05,14.355121,4
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",1.8e-05,5.374579,9
74,ief_monetary_freedom,"[ief_monetary_freedom_delta, ief_monetary_free...",8e-06,2.430104,8
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-1...,8e-06,2.307603,7
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",8e-06,2.284162,7
63,fiw_d,"[fiw_d_ma5, fiw_d_t-1, fiw_d_t-3, fiw_d_delta3...",6e-06,1.798051,5
90,wb_iq_spi_pil1,"[wb_iq_spi_pil1_delta3, wb_iq_spi_pil1_t-1, wb...",6e-06,1.777968,6
62,fiw_cl,"[fiw_cl_ma5, fiw_cl_t-1, fiw_cl_t-3, fiw_cl_de...",6e-06,1.760911,5
66,fiw_g,"[fiw_g_t-1, fiw_g, fiw_g_t-3, fiw_g_delta3, fi...",6e-06,1.706667,5
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_delta3, efw_2b_impart...",5e-06,1.590345,6



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.816657,-80.097744,36.327011,660,-5.042918,4.584857e-07
1,0.003481,7e-06,0.816647,-80.118955,36.331761,660,-5.042321,4.599177e-07
2,0.012115,2.5e-05,0.816603,-80.167975,36.342737,660,-5.040053,4.654033e-07
3,0.04217,8.7e-05,0.816347,-79.923261,36.287911,655,-5.031597,4.864111e-07
4,0.14678,0.000304,0.814604,-76.363311,35.480751,643,-4.996496,5.838148e-07
5,0.510897,0.001057,0.804266,-55.685825,30.371239,615,-4.886143,1.028307e-06
6,1.778279,0.00368,0.770628,-40.723338,26.056423,550,-4.958328,7.110251e-07
7,6.189658,0.012807,0.713225,-24.574948,20.400117,433,-5.513361,3.520443e-08
8,21.544347,0.044578,0.600439,-19.716783,18.360582,321,-5.543783,2.960062e-08
9,74.989421,0.155163,0.37034,0.193571,3.622499,173,-6.799788,1.04774e-11



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.5833
  ΔR² vs benchmark: +5.27 p.p.
  RMSE (test): 2.6040
  ΔRMSE vs benchmark: -0.1597
  Active vars: 5
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=7.73, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma3, wb_...",0.018163,57.595023,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma...",0.013373,42.404977,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 5 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb...",0.83,-0.374501,0.0658,3
1,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_de...",0.83,-0.236111,0.03897,2



COMPLETED IN 182.7s


Running forecast horizon: t+8
Split year for 75-25 split: 2008

UNIFIED REGULARIZED REGRESSION: SIGNAL_T8
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2008 (1200 obs)
Test:  2009-2016 (445 obs)

β=1 specification active
After removing NaN: 1200 train, 445 test obs

Benchmark metrics:
  R² (test): 0.4001
  RMSE (test): 3.1244

Clustering variants of same variables...
  Original features: 940
  After clustering: 644
  Number of clusters: 644

Features used in models: 644


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.798119,-6.551367e+29,3265029000000000.0,644,-7.401169,1.350031e-13
1,0.003481,7e-06,0.798111,-6.540862e+29,3262410000000000.0,644,-7.401169,1.350031e-13
2,0.012115,2.5e-05,0.798078,-6.502696e+29,3252878000000000.0,644,-7.401169,1.350031e-13
3,0.04217,8.7e-05,0.797885,-6.371413e+29,3219875000000000.0,642,-7.401169,1.350031e-13
4,0.14678,0.000304,0.796165,-5.918384999999999e+29,3103293000000000.0,629,-7.401169,1.350031e-13
5,0.510897,0.001057,0.784762,-3.986908e+29,2547060000000000.0,550,-7.401169,1.350031e-13
6,1.778279,0.00368,0.750102,-2.2486149999999997e+29,1912840000000000.0,441,-7.401169,1.350031e-13
7,6.189658,0.012807,0.667318,-5.226286e+28,922184300000000.0,336,-7.401169,1.350031e-13
8,21.544347,0.044578,0.498143,-4.584491,9.532634,210,-5.744701,9.208335e-09
9,74.989421,0.155163,0.168571,0.1213188,3.781262,88,-6.938599,3.960166e-12



Best LASSO:
  Parameter: 1.879848
  R² (test): 0.4400
  ΔR² vs benchmark: +3.99 p.p.
  RMSE (test): 3.0187
  ΔRMSE vs benchmark: -0.1057
  Active vars: 3
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg
  DM test: stat=6.46, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.094401,100.0,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 1 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,[wb_fp_cpi_totl_zg],0.83,-1.793974,1.082505,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 251.273762



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.2512738,0.803518,-6.854203e+29,3339640000000000.0,644,-7.401169,1.350031e-13
1,0.003480701,0.8746087,0.790124,-5.87907e+29,3092968000000000.0,644,-7.401169,1.350031e-13
2,0.01211528,3.044251,0.771358,-4.399184e+29,2675514000000000.0,644,-7.401169,1.350031e-13
3,0.04216965,10.59613,0.739655,-2.9481809999999998e+29,2190273000000000.0,644,-7.401169,1.350031e-13
4,0.1467799,36.88194,0.683327,-1.92571e+29,1770176000000000.0,644,-7.401169,1.350031e-13
5,0.510897,128.375,0.583714,-1.10314e+29,1339789000000000.0,644,-7.401169,1.350031e-13
6,1.778279,446.835,0.41517,-4.396944e+28,845856300000000.0,644,-7.401169,1.350031e-13
7,6.189658,1555.299,0.092959,-1.091176e+28,421374700000000.0,644,-7.401169,1.350031e-13
8,21.54435,5413.529,-0.671641,-1.954846e+27,178351700000000.0,644,-7.401169,1.350031e-13
9,74.98942,18842.87,-1.992403,-2.410651e+26,62630850000000.0,644,-7.401169,1.350031e-13



Best RIDGE:
  Parameter: 2512737621716.441895
  R² (test): -6058080961.9878
  ΔR² vs benchmark: -605808096238.79 p.p.
  RMSE (test): 313970.3612
  ΔRMSE vs benchmark: +313967.2368
  Active vars: 644
  DM test: stat=-7.40, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
41,efw_5bi_labor_regulations_and_minimum_wage,[efw_5bi_labor_regulations_and_minimum_wage_de...,158979.319528,100.0,6



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.79812,-6.552690999999999e+29,3265359000000000.0,644,-7.401169,1.350031e-13
1,0.003481,7e-06,0.798115,-6.545278999999999e+29,3263512000000000.0,644,-7.401169,1.350031e-13
2,0.012115,2.5e-05,0.79809,-6.519895e+29,3257177000000000.0,644,-7.401169,1.350031e-13
3,0.04217,8.7e-05,0.797895,-6.425427e+29,3233495000000000.0,643,-7.401169,1.350031e-13
4,0.14678,0.000304,0.796275,-6.047207e+29,3136885000000000.0,638,-7.401169,1.350031e-13
5,0.510897,0.001057,0.786032,-4.8160319999999996e+29,2799406000000000.0,598,-7.401169,1.350031e-13
6,1.778279,0.00368,0.756086,-2.9269169999999998e+29,2182360000000000.0,511,-7.401169,1.350031e-13
7,6.189658,0.012807,0.697791,-1.4744599999999999e+29,1548951000000000.0,435,-7.401169,1.350031e-13
8,21.544347,0.044578,0.569205,-1.836938e+28,546724300000000.0,313,-7.401169,1.350031e-13
9,74.989421,0.155163,0.317364,-0.08659025,4.204884,183,-7.828275,4.884981e-15



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.4671
  ΔR² vs benchmark: +6.70 p.p.
  RMSE (test): 2.9448
  ΔRMSE vs benchmark: -0.1796
  Active vars: 5
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=8.63, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma3, wb_...",0.021204,56.148792,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma...",0.01656,43.851208,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 5 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb...",0.83,-0.418037,0.056769,3
1,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_k...",0.83,-0.261148,0.03568,2



COMPLETED IN 177.0s


Running forecast horizon: t+9
Split year for 75-25 split: 2007

UNIFIED REGULARIZED REGRESSION: SIGNAL_T9
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2007 (1145 obs)
Test:  2008-2015 (437 obs)

β=1 specification active
After removing NaN: 1145 train, 437 test obs

Benchmark metrics:
  R² (test): 0.3407
  RMSE (test): 3.2646

Clustering variants of same variables...
  Original features: 934
  After clustering: 644
  Number of clusters: 644

Features used in models: 644


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.811811,-4.147793e+29,2589388000000000.0,644,-7.033093,2.01994e-12
1,0.003481,7e-06,0.811806,-4.1530429999999996e+29,2591026000000000.0,644,-7.033093,2.01994e-12
2,0.012115,2.5e-05,0.811782,-4.169642e+29,2596199000000000.0,644,-7.033093,2.01994e-12
3,0.04217,8.7e-05,0.811598,-4.226728999999999e+29,2613911000000000.0,639,-7.033093,2.01994e-12
4,0.14678,0.000304,0.809724,-4.245895e+29,2619830000000000.0,625,-7.033093,2.01994e-12
5,0.510897,0.001057,0.796968,-3.7936449999999996e+29,2476378000000000.0,552,-7.033093,2.01994e-12
6,1.778279,0.00368,0.753412,-4.364288e+29,2656105000000000.0,450,-7.033093,2.01994e-12
7,6.189658,0.012807,0.665769,-8.61406e+28,1180028000000000.0,345,-7.033093,2.01994e-12
8,21.544347,0.044578,0.483118,-0.2058264,4.415002,207,-6.113939,9.720167e-10
9,74.989421,0.155163,0.13606,0.08276349,3.850607,88,-5.694088,1.24033e-08



Best LASSO:
  Parameter: 1.879848
  R² (test): 0.3927
  ΔR² vs benchmark: +5.20 p.p.
  RMSE (test): 3.1333
  ΔRMSE vs benchmark: -0.1313
  Active vars: 3
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg
  DM test: stat=7.19, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.103678,100.0,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 1 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,[wb_fp_cpi_totl_zg],0.82,-1.797206,1.149792,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 251.273762



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.2512738,0.816921,-5.2781369999999994e+29,2920982000000000.0,644,-7.033093,2.01994e-12
1,0.003480701,0.8746087,0.803036,-5.5155819999999994e+29,2985961000000000.0,644,-7.033093,2.01994e-12
2,0.01211528,3.044251,0.780954,-5.258517999999999e+29,2915548000000000.0,644,-7.033093,2.01994e-12
3,0.04216965,10.59613,0.745064,-3.576726e+29,2404536000000000.0,644,-7.033093,2.01994e-12
4,0.1467799,36.88194,0.6823,-1.3118639999999999e+29,1456240000000000.0,644,-7.033093,2.01994e-12
5,0.510897,128.375,0.56963,-1.33737e+28,464958600000000.0,644,-7.033093,2.01994e-12
6,1.778279,446.835,0.382233,-3.678734e+25,24385840000000.0,644,-7.033093,2.01994e-12
7,6.189658,1555.299,0.020963,-1.316552e+24,4613255000000.0,644,-7.033093,2.01994e-12
8,21.54435,5413.529,-0.847243,-1.369251e+26,47046800000000.0,644,-7.033093,2.01994e-12
9,74.98942,18842.87,-2.331054,-5.434599e+25,29639600000000.0,644,-7.033093,2.01994e-12



Best RIDGE:
  Parameter: 2512737621716.441895
  R² (test): -3053608442.5850
  ΔR² vs benchmark: -305360844292.57 p.p.
  RMSE (test): 222175.0074
  ΔRMSE vs benchmark: +222171.7428
  Active vars: 644
  DM test: stat=-7.03, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
41,efw_5bi_labor_regulations_and_minimum_wage,[efw_5bi_labor_regulations_and_minimum_wage_de...,114565.358642,100.0,6



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.811813,-4.144211e+29,2588269000000000.0,644,-7.033093,2.01994e-12
1,0.003481,7e-06,0.811815,-4.14067e+29,2587163000000000.0,644,-7.033093,2.01994e-12
2,0.012115,2.5e-05,0.811805,-4.128761e+29,2583440000000000.0,644,-7.033093,2.01994e-12
3,0.04217,8.7e-05,0.811642,-4.10989e+29,2577530000000000.0,644,-7.033093,2.01994e-12
4,0.14678,0.000304,0.809909,-4.0936159999999996e+29,2572421000000000.0,637,-7.033093,2.01994e-12
5,0.510897,0.001057,0.798537,-4.57186e+29,2718535000000000.0,595,-7.033093,2.01994e-12
6,1.778279,0.00368,0.763483,-4.7364819999999995e+29,2767047000000000.0,536,-7.033093,2.01994e-12
7,6.189658,0.012807,0.697878,-3.1481849999999996e+29,2255894000000000.0,436,-7.033093,2.01994e-12
8,21.544347,0.044578,0.559126,-0.4371662,4.819944,313,-7.410231,1.261213e-13
9,74.989421,0.155163,0.286922,-0.07109707,4.16105,182,-5.562764,2.65535e-08



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.4193
  ΔR² vs benchmark: +7.86 p.p.
  RMSE (test): 3.0639
  ΔRMSE vs benchmark: -0.2007
  Active vars: 7
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma3, wb_ny_gdp_defl_kd_zg_t-1, wb_ny_gdp_defl_kd_zg_ma10
  DM test: stat=8.60, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma3, wb_...",0.024304,53.370207,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma...",0.021234,46.629793,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 7 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb...",0.84,-0.442818,0.060786,3
1,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_k...",0.825,-0.242158,0.05906,4



COMPLETED IN 163.0s


Running forecast horizon: t+10
Split year for 75-25 split: 2006

UNIFIED REGULARIZED REGRESSION: SIGNAL_T10
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2006 (1091 obs)
Test:  2007-2014 (430 obs)

β=1 specification active
After removing NaN: 1091 train, 430 test obs

Benchmark metrics:
  R² (test): 0.3729
  RMSE (test): 3.1837

Clustering variants of same variables...
  Original features: 912
  After clustering: 624
  Number of clusters: 627

Features used in models: 624


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.834423,-127.397882,45.557154,624,-3.241988,0.001186991
1,0.003481,7e-06,0.834426,-126.368607,45.374187,624,-3.243659,0.001180051
2,0.012115,2.5e-05,0.834427,-122.908067,44.753546,624,-3.24933,0.001156773
3,0.04217,8.7e-05,0.834312,-111.411541,42.626837,622,-3.270356,0.001074121
4,0.14678,0.000304,0.832933,-74.25818,34.878236,603,-3.383385,0.0007159825
5,0.510897,0.001057,0.821164,-24.020456,20.110611,547,-3.903745,9.47157e-05
6,1.778279,0.00368,0.773062,-2.261067,7.260349,449,-10.67527,0.0
7,6.189658,0.012807,0.671907,-0.943848,5.605428,319,-8.845969,0.0
8,21.544347,0.044578,0.48098,-0.226829,4.453173,195,-6.902092,5.124123e-12
9,74.989421,0.155163,0.124562,0.098272,3.817821,83,-5.969763,2.375981e-09



Best LASSO:
  Parameter: 1.879848
  R² (test): 0.4281
  ΔR² vs benchmark: +5.52 p.p.
  RMSE (test): 3.0404
  ΔRMSE vs benchmark: -0.1433
  Active vars: 3
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg
  DM test: stat=6.22, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.102893,100.0,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 1 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,[wb_fp_cpi_totl_zg],0.81,-2.013046,1.164057,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 397.972312

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.3979723,0.83649,-98.775867,40.159695,624,-3.198973,0.001379183
1,0.003480701,1.385222,0.81712,-28.391737,21.796677,624,-3.690492,0.0002238205
2,0.01211528,4.821545,0.785395,-4.090803,9.071327,624,-9.324437,0.0
3,0.04216965,16.78235,0.732785,-3.782049,8.791939,624,-4.976532,6.473367e-07
4,0.1467799,58.41435,0.643672,-6.639531,11.11248,624,-3.46315,0.0005338898
5,0.510897,203.3229,0.497329,-1.802813,6.730924,624,-4.365192,1.270109e-05
6,1.778279,707.706,0.246884,-0.230617,4.460044,624,-6.167411,6.941729e-10
7,6.189658,2463.313,-0.318957,-0.172314,4.353109,624,-3.833531,0.000126317
8,21.54435,8574.054,-1.632921,0.273501,3.42685,624,-2.171546,0.02988992
9,74.98942,29843.71,-3.32941,0.436741,3.017391,624,3.962585,7.414246e-05



Best RIDGE:
  Parameter: 2237962771.481950
  R² (test): 0.4689
  ΔR² vs benchmark: +9.60 p.p.
  RMSE (test): 2.9300
  ΔRMSE vs benchmark: -0.2537
  Active vars: 624
  DM test: stat=7.75, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma5, wb_ny_gdp_mktp_kd_z...",6e-06,6.371071,9
58,efw_ie_state_ownership,"[efw_ie_state_ownership_t-3, efw_ie_state_owne...",2e-06,2.71294,7
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_delta3, efw_2b_impart...",2e-06,2.505525,6
74,ief_monetary_freedom,"[ief_monetary_freedom_delta, ief_monetary_free...",2e-06,2.238751,8
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-5...,2e-06,2.075141,7
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,2e-06,1.722971,3
98,wb_ny_gdp_pcap_pp_kd,"[wb_ny_gdp_pcap_pp_kd_delta3, wb_ny_gdp_pcap_p...",1e-06,1.616384,3
7,efw_2_legal_system_property_rights_no_gender_a...,[efw_2_legal_system_property_rights_no_gender_...,1e-06,1.592343,6
9,efw_2a_judicial_independence,"[efw_2a_judicial_independence_t-3, efw_2a_judi...",1e-06,1.571122,6
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_t-3, efw_2e_legal_inte...",1e-06,1.518684,7



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.834426,-127.371055,45.552394,624,-3.241944,0.001187172
1,0.003481,7e-06,0.834434,-126.278666,45.358164,624,-3.243508,0.001180674
2,0.012115,2.5e-05,0.83445,-122.589505,44.695979,624,-3.248914,0.001158465
3,0.04217,8.7e-05,0.834359,-110.707187,42.49308,623,-3.268381,0.001081645
4,0.14678,0.000304,0.832791,-78.092609,35.755727,619,-3.346816,0.0008174546
5,0.510897,0.001057,0.821199,-26.441418,21.061094,578,-3.767844,0.0001646637
6,1.778279,0.00368,0.782298,-2.673931,7.706252,536,-10.966214,0.0
7,6.189658,0.012807,0.70573,-1.181941,5.938806,429,-9.374253,0.0
8,21.544347,0.044578,0.55955,-0.543243,4.994532,311,-7.986963,1.332268e-15
9,74.989421,0.155163,0.276277,-0.0753,4.169101,183,-6.452109,1.103044e-10



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.4499
  ΔR² vs benchmark: +7.70 p.p.
  RMSE (test): 2.9818
  ΔRMSE vs benchmark: -0.2019
  Active vars: 8
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_kd_zg_ma3, wb_ny_gdp_defl_kd_zg_t-1, wb_ny_gdp_defl_kd_zg_ma10
  DM test: stat=7.07, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma3, wb_...",0.024955,51.555627,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg_ma3, wb_ny_gdp_defl_kd_z...",0.023449,48.444373,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 7 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.84,-0.487402,0.061974,3
1,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma...",0.84,-0.278242,0.064413,4



COMPLETED IN 155.3s


Runnning 80-20 split...

Running forecast horizon: t+1
Split year for 80-20 split: 2016

UNIFIED REGULARIZED REGRESSION: SIGNAL_T1_80
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2016 (1692 obs)
Test:  2017-2023 (406 obs)

β=1 specification active
After removing NaN: 1692 train, 406 test obs

Benchmark metrics:
  R² (test): 0.9075
  RMSE (test): 1.2167

Clustering variants of same variables...
  Original features: 972
  After clustering: 629
  Number of clusters: 629

Features used in models: 629


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding wind

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.947741,0.748153,2.007745,568,-8.018459,1.110223e-15
1,0.003481,0.001682,0.940953,0.786811,1.84724,481,-7.217299,5.302425e-13
2,0.012115,0.005855,0.9288,0.811778,1.735703,345,-6.824142,8.845147e-12
3,0.04217,0.02038,0.905131,0.845504,1.57253,209,-4.768412,1.85684e-06
4,0.14678,0.070938,0.859625,0.888296,1.337131,74,-2.398551,0.01646008
5,0.510897,0.246913,0.820014,0.903825,1.240715,9,-0.829791,0.406657
6,1.778279,0.85943,0.767267,0.907868,1.214357,1,0.159307,0.8734266



Best LASSO:
  Parameter: 0.859430
  R² (test): 0.9079
  ΔR² vs benchmark: +0.04 p.p.
  RMSE (test): 1.2144
  ΔRMSE vs benchmark: -0.0024
  Active vars: 1
  Active variables: wb_fp_cpi_totl_zg
  DM test: stat=0.16, p=0.873

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.873)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 24297.810658

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,24.29781,0.934636,0.774523,1.899727,629,-7.793439,6.439294e-15
1,0.003481,84.5734,0.924151,0.798255,1.796975,629,-7.196535,6.177281e-13
2,0.012115,294.3747,0.906965,0.82558,1.670853,629,-6.412092,1.435358e-10
3,0.04217,1024.63,0.882217,0.853348,1.532092,629,-5.597408,2.175799e-08
4,0.14678,3566.431,0.842775,0.879278,1.390059,629,-4.438423,9.062043e-06
5,0.510897,12413.68,0.764449,0.896806,1.285192,629,-2.611215,0.00902212
6,1.778279,43208.3,0.660745,0.904503,1.236332,629,-1.137573,0.2552987
7,6.189658,150395.1,0.592272,0.906107,1.225905,629,-0.716388,0.4737517
8,21.544347,523480.5,0.564456,0.90629,1.22471,629,-0.737896,0.4605777
9,74.989421,1822079.0,0.555384,0.906312,1.224564,629,-0.79127,0.4287867



Best RIDGE:
  Parameter: 6342110.581443
  R² (test): 0.9063
  ΔR² vs benchmark: -0.12 p.p.
  RMSE (test): 1.2246
  ΔRMSE vs benchmark: +0.0078
  Active vars: 629
  DM test: stat=-0.82, p=0.414

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.414)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.885867

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000886,0.946219,0.743102,2.027781,569,-8.260401,2.220446e-16
1,0.003481,0.003083,0.93882,0.780699,1.873532,501,-7.483085,7.260859e-14
2,0.012115,0.010733,0.927417,0.813331,1.728529,364,-6.685974,2.293943e-11
3,0.04217,0.037357,0.903355,0.847745,1.561083,249,-4.767583,1.864489e-06
4,0.14678,0.130027,0.858986,0.886821,1.345935,95,-2.688872,0.007169382
5,0.510897,0.452587,0.816186,0.904725,1.234895,18,-0.812958,0.4162424
6,1.778279,1.575319,0.750118,0.907317,1.217979,7,-0.115262,0.9082376



Best ELASTIC (L1=0.5):
  Parameter: 1.575319
  R² (test): 0.9073
  ΔR² vs benchmark: -0.02 p.p.
  RMSE (test): 1.2180
  ΔRMSE vs benchmark: +0.0012
  Active vars: 7
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_kd_zg_delta, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=-0.12, p=0.908

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.908)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 147.5s


Running forecast horizon: t+2
Split year for 80-20 split: 2015

UNIFIED REGULARIZED REGRESSION: SIGNAL_T2_80
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)


Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000886,0.869934,0.316047,3.273272,550,-7.714312,1.221245e-14
1,0.003481,0.003083,0.849182,0.49112,2.823428,451,-6.151632,7.668945e-10
2,0.012115,0.010733,0.806451,0.60882,2.475469,322,-5.238593,1.618052e-07
3,0.04217,0.037357,0.736544,0.690667,2.201313,167,-3.749011,0.0001775335
4,0.14678,0.130027,0.594981,0.741691,2.011586,54,-2.227906,0.02588682
5,0.510897,0.452587,0.442595,0.799319,1.773053,5,0.971372,0.3313632



Best LASSO:
  Parameter: 0.452587
  R² (test): 0.7993
  ΔR² vs benchmark: +0.94 p.p.
  RMSE (test): 1.7731
  ΔRMSE vs benchmark: -0.0411
  Active vars: 5
  Active variables: wb_ny_gdp_mktp_kd_zg_ma5, wb_fp_cpi_totl_zg, wb_bn_cab_xoka_gd_zs_delta3, efw_4c_black_market_exchange_rates_delta3, ief_monetary_freedom_t-3
  DM test: stat=0.97, p=0.331

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.331)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 15341.274046

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,15.34127,0.848069,0.515453,2.755098,637,-5.722743,1.04818e-08
1,0.003481,53.39838,0.821661,0.57908,2.567843,637,-4.930446,8.204208e-07
2,0.012115,185.8638,0.779846,0.624921,2.423989,637,-4.4803,7.453835e-06
3,0.04217,646.9362,0.719008,0.67059,2.27163,637,-4.123347,3.734062e-05
4,0.14678,2251.791,0.637025,0.719693,2.095493,637,-3.568213,0.0003594249
5,0.510897,7837.811,0.542197,0.760285,1.937835,637,-2.597297,0.009396066
6,1.778279,27281.07,0.461838,0.783857,1.840093,637,-0.999876,0.3173707
7,6.189658,94957.24,0.412931,0.790928,1.809744,637,0.280066,0.7794268
8,21.544347,330517.7,0.390892,0.79186,1.805706,637,0.685334,0.4931331
9,74.989421,1150433.0,0.382986,0.7918,1.805965,637,0.747672,0.4546583



Best RIDGE:
  Parameter: 330517.729947
  R² (test): 0.7919
  ΔR² vs benchmark: +0.20 p.p.
  RMSE (test): 1.8057
  ΔRMSE vs benchmark: -0.0085
  Active vars: 637
  DM test: stat=0.69, p=0.493

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.493)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 1.623777

Stopping: model has 0 active variables at factor 1.7782794100389228



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.001624,0.865743,0.389908,3.091482,568,-7.070375,1.545208e-12
1,0.003481,0.005652,0.843996,0.524326,2.729756,469,-5.754127,8.709041e-09
2,0.012115,0.019673,0.801472,0.618322,2.445217,357,-4.903114,9.432935e-07
3,0.04217,0.068474,0.728179,0.697338,2.177448,194,-3.516493,0.0004372889
4,0.14678,0.238338,0.591604,0.752256,1.970021,70,-1.941401,0.05220972
5,0.510897,0.829583,0.441187,0.797925,1.7792,12,0.998966,0.3178112



Best ELASTIC (L1=0.5):
  Parameter: 0.829583
  R² (test): 0.7979
  ΔR² vs benchmark: +0.80 p.p.
  RMSE (test): 1.7792
  ΔRMSE vs benchmark: -0.0350
  Active vars: 12
  DM test: stat=1.00, p=0.318

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.318)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 161.3s


Running forecast horizon: t+3
Split year for 80-20 split: 2014

UNIFIED REGULARIZED REGRESSION: SIGNAL_T3_80
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2014 (1550 obs)
Test:  2015-2021 (403 obs)

β=1 specification active
After removing NaN: 1550 train, 403 test obs

Benc

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000264,0.864385,-0.002474,3.965388,626,-8.507858,0.0
1,0.003481,0.000918,0.857903,0.184054,3.577502,542,-8.011079,1.110223e-15
2,0.012115,0.003194,0.830293,0.433579,2.980703,459,-6.323475,2.557452e-10
3,0.04217,0.011119,0.771872,0.505292,2.785631,334,-6.526823,6.717937e-11
4,0.14678,0.038701,0.680325,0.617917,2.448093,182,-4.194254,2.73771e-05
5,0.510897,0.134706,0.530933,0.681084,2.236596,56,-3.763332,0.0001676644
6,1.778279,0.46887,0.332527,0.747769,1.989063,5,0.654393,0.5128585



Best LASSO:
  Parameter: 0.468870
  R² (test): 0.7478
  ΔR² vs benchmark: +0.50 p.p.
  RMSE (test): 1.9891
  ΔRMSE vs benchmark: -0.0197
  Active vars: 5
  Active variables: wb_ny_gdp_mktp_kd_zg_ma5, wb_fp_cpi_totl_zg, wb_bn_cab_xoka_gd_zs_delta3, ief_monetary_freedom_delta3, ief_monetary_freedom_t-3
  DM test: stat=0.65, p=0.513

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.513)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 4691.172785

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,4.691173,0.844069,0.310017,3.289792,643,-7.762897,8.21565e-15
1,0.003481,16.32857,0.821153,0.393504,3.084346,643,-7.656001,1.909584e-14
2,0.012115,56.83486,0.785445,0.475222,2.869042,643,-7.106685,1.188605e-12
3,0.04217,197.8251,0.732816,0.558913,2.630337,643,-5.781391,7.408555e-09
4,0.14678,688.57,0.656053,0.60672,2.483706,643,-4.868462,1.124701e-06
5,0.510897,2396.706,0.549598,0.640506,2.374623,643,-4.721567,2.340345e-06
6,1.778279,8342.216,0.428065,0.688958,2.208811,643,-3.893975,9.8615e-05
7,6.189658,29036.76,0.325057,0.729931,2.058195,643,-1.693353,0.0903883
8,21.544347,101068.3,0.261038,0.743908,2.004228,643,0.260693,0.7943289
9,74.989421,351788.3,0.232235,0.746072,1.995744,643,0.927366,0.3537364



Best RIDGE:
  Parameter: 1224469.849592
  R² (test): 0.7462
  ΔR² vs benchmark: +0.35 p.p.
  RMSE (test): 1.9953
  ΔRMSE vs benchmark: -0.0135
  Active vars: 643
  DM test: stat=1.04, p=0.298

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.298)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.483293

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.862653,0.079612,3.799571,621,-8.309726,0.0
1,0.003481,0.001682,0.849848,0.289388,3.338606,562,-7.592327,3.153033e-14
2,0.012115,0.005855,0.819586,0.439685,2.964595,489,-6.741433,1.568323e-11
3,0.04217,0.02038,0.764683,0.522627,2.736391,360,-6.186622,6.14673e-10
4,0.14678,0.070938,0.673694,0.619052,2.444453,210,-4.190585,2.78236e-05
5,0.510897,0.246913,0.522531,0.679894,2.240764,69,-3.82774,0.0001293255
6,1.778279,0.85943,0.321303,0.750581,1.977945,9,1.163038,0.2448139



Best ELASTIC (L1=0.5):
  Parameter: 0.859430
  R² (test): 0.7506
  ΔR² vs benchmark: +0.78 p.p.
  RMSE (test): 1.9779
  ΔRMSE vs benchmark: -0.0309
  Active vars: 9
  Active variables: wb_ny_gdp_mktp_kd_zg_ma3, wb_ny_gdp_mktp_kd_zg_ma5, wb_ny_gdp_mktp_kd_zg_ma10, wb_fp_cpi_totl_zg, wb_bn_cab_xoka_gd_zs_delta3, efw_3b_standard_deviation_of_inflation_ma3, ief_monetary_freedom_delta3, ief_monetary_freedom_ma5, ief_monetary_freedom_t-3
  DM test: stat=1.16, p=0.245

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.245)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 172.6s


Running forecast horizon: t+4
Split year for 80-20 split: 2013

UNIFIED REGULARIZED REGRESSION: SIGNAL_T4_80
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
S

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000144,0.842035,-76.902,35.090799,630,-7.301422,2.846612e-13
1,0.003481,0.000501,0.839383,-49.949934,28.378587,606,-7.289189,3.117506e-13
2,0.012115,0.001743,0.823632,-2.192639,7.103852,532,-8.395694,0.0
3,0.04217,0.006066,0.773591,0.350393,3.204382,403,-7.01104,2.365441e-12
4,0.14678,0.021114,0.690224,0.48629,2.849561,268,-6.392544,1.631477e-10
5,0.510897,0.07349,0.548716,0.628671,2.422694,124,-4.105395,4.036254e-05
6,1.778279,0.255797,0.306545,0.70899,2.144731,27,-1.008687,0.313125
7,6.189658,0.890351,0.024365,0.727054,2.0771,2,1.170821,0.2416709



Best LASSO:
  Parameter: 0.890351
  R² (test): 0.7271
  ΔR² vs benchmark: +0.63 p.p.
  RMSE (test): 2.0771
  ΔRMSE vs benchmark: -0.0240
  Active vars: 2
  Active variables: wb_fp_cpi_totl_zg, ief_monetary_freedom_t-3
  DM test: stat=1.17, p=0.242

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.242)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 905.723664

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.9057237,0.838104,-44.070405,26.69099,642,-7.452946,9.126033e-14
1,0.003481,3.152553,0.825569,-20.294933,18.346663,642,-7.382254,1.556533e-13
2,0.012115,10.97309,0.803096,-1.330389,6.069224,642,-8.112554,4.440892e-16
3,0.04217,38.19405,0.765085,-2.803648,7.753881,642,-8.626684,0.0
4,0.14678,132.9421,0.706643,-1.576882,6.382138,642,-6.034633,1.593244e-09
5,0.510897,462.7315,0.619092,-7.647139,11.691095,642,-7.148958,8.744117e-13
6,1.778279,1610.63,0.490318,-15.639141,16.217518,642,-7.089134,1.349587e-12
7,6.189658,5606.12,0.330183,-9.091961,12.630099,642,-7.313602,2.600142e-13
8,21.544347,19513.22,0.17805,-1.817623,6.673603,642,-7.336898,2.184919e-13
9,74.989421,67919.69,0.070355,0.376166,3.140172,642,-6.518007,7.12479e-11



Best RIDGE:
  Parameter: 2864149.709788
  R² (test): 0.7262
  ΔR² vs benchmark: +0.55 p.p.
  RMSE (test): 2.0802
  ΔRMSE vs benchmark: -0.0209
  Active vars: 642
  DM test: stat=1.26, p=0.206

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.206)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.143845

Stopping: model has 0 active variables at factor 21.54434690031882



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000144,0.842027,-76.866678,35.082843,638,-7.335348,2.211564e-13
1,0.003481,0.000501,0.839497,-50.584295,28.554707,624,-7.394196,1.423306e-13
2,0.012115,0.001743,0.826265,-9.325653,12.775495,579,-7.60046,2.953193e-14
3,0.04217,0.006066,0.790687,0.294639,3.339064,496,-7.192223,6.374901e-13
4,0.14678,0.021114,0.72754,-0.630959,5.07739,372,-7.375889,1.632028e-13
5,0.510897,0.07349,0.616543,0.573582,2.596192,217,-4.883201,1.043772e-06
6,1.778279,0.255797,0.419144,0.665768,2.298492,79,-3.366872,0.0007602604
7,6.189658,0.890351,0.156583,0.734682,2.047871,15,1.762569,0.07797317



Best ELASTIC (L1=0.5):
  Parameter: 0.890351
  R² (test): 0.7347
  ΔR² vs benchmark: +1.40 p.p.
  RMSE (test): 2.0479
  ΔRMSE vs benchmark: -0.0532
  Active vars: 15
  DM test: stat=1.76, p=0.078

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.078)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 170.5s


Running forecast horizon: t+5
Split year for 80-20 split: 2012

UNIFIED REGULARIZED REGRESSION: SIGNAL_T5_80
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2012 (1426 obs)
Test:  2013-2019 (398 obs)

β=1 specification active
After removing NaN: 1426 train, 398 test obs

Benc

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.826929,-85.093879,37.121899,609,-5.274125,1.333906e-07
1,0.003481,0.001682,0.810533,-75.304625,34.947771,531,-5.244172,1.569857e-07
2,0.012115,0.005855,0.76171,-84.654892,37.027137,421,-5.557009,2.74436e-08
3,0.04217,0.02038,0.664308,-78.332343,35.634379,289,-5.639674,1.703722e-08
4,0.14678,0.070938,0.510272,-11.279567,14.019599,154,-5.773162,7.779774e-09
5,0.510897,0.246913,0.201014,0.64987,2.36733,40,-1.273621,0.2027978
6,1.778279,0.85943,-0.15346,0.688594,2.232581,5,-0.612759,0.540036



Best LASSO:
  Parameter: 0.859430
  R² (test): 0.6886
  ΔR² vs benchmark: -0.83 p.p.
  RMSE (test): 2.2326
  ΔRMSE vs benchmark: +0.0298
  Active vars: 5
  Active variables: wb_ny_gdp_mktp_kd_zg_ma5, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg_t-1, wb_ny_gdp_defl_kd_zg_ma10, ief_monetary_freedom_t-3
  DM test: stat=-0.61, p=0.540

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.540)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 905.723664

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.9057237,0.825235,-87.145956,37.561701,644,-5.227112,1.721786e-07
1,0.003481,3.152553,0.81109,-90.738891,38.319585,644,-5.231511,1.681297e-07
2,0.012115,10.97309,0.785883,-106.536023,41.48786,644,-5.375954,7.617826e-08
3,0.04217,38.19405,0.743698,-130.236919,45.832343,644,-5.641284,1.68787e-08
4,0.14678,132.9421,0.67658,-119.117092,43.847663,644,-5.748185,9.020638e-09
5,0.510897,462.7315,0.570408,-61.4661,31.620319,644,-5.611944,2.000664e-08
6,1.778279,1610.63,0.396409,-15.336942,16.170723,644,-5.617847,1.933521e-08
7,6.189658,5606.12,0.126922,-2.102644,7.047094,644,-7.036381,1.972866e-12
8,21.544347,19513.22,-0.190789,0.254952,3.453314,644,-5.284723,1.25895e-07
9,74.989421,67919.69,-0.434514,0.645704,2.381371,644,-2.347542,0.01889777



Best RIDGE:
  Parameter: 420399684.638619
  R² (test): 0.7058
  ΔR² vs benchmark: +0.89 p.p.
  RMSE (test): 2.1701
  ΔRMSE vs benchmark: -0.0327
  Active vars: 644
  DM test: stat=1.51, p=0.132

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.132)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.078476

Stopping: model has 0 active variables at factor 74.98942093324558



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,7.8e-05,0.830562,-88.911195,37.935948,643,-5.272969,1.342344e-07
1,0.003481,0.000273,0.829254,-88.813153,37.915258,638,-5.263838,1.410789e-07
2,0.012115,0.000951,0.82164,-80.902639,36.207038,611,-5.2376,1.626779e-07
3,0.04217,0.003309,0.797204,-83.480658,36.772461,542,-5.284104,1.263215e-07
4,0.14678,0.011519,0.745401,-110.274083,42.20278,446,-5.705466,1.16025e-08
5,0.510897,0.040093,0.647092,-74.905268,34.856198,313,-5.556543,2.751701e-08
6,1.778279,0.139552,0.48455,-11.594017,14.197968,167,-5.587878,2.298614e-08
7,6.189658,0.48574,0.149022,0.657099,2.342763,54,-1.239155,0.2152879
8,21.544347,1.690714,-0.228445,0.699594,2.192795,11,0.300921,0.763475



Best ELASTIC (L1=0.5):
  Parameter: 1.690714
  R² (test): 0.6996
  ΔR² vs benchmark: +0.27 p.p.
  RMSE (test): 2.1928
  ΔRMSE vs benchmark: -0.0100
  Active vars: 11
  DM test: stat=0.30, p=0.763

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.763)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 173.4s


Running forecast horizon: t+6
Split year for 80-20 split: 2011

UNIFIED REGULARIZED REGRESSION: SIGNAL_T6_80
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2011 (1369 obs)
Test:  2012-2018 (395 obs)

β=1 specification active
After removing NaN: 1369 train, 395 test obs

Benc

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.836833,-45.670083,27.66043,657,-4.898299,9.666972e-07
1,0.003481,7e-06,0.836827,-45.699665,27.669195,657,-4.894225,9.869375e-07
2,0.012115,2.5e-05,0.836798,-45.765939,27.688821,657,-4.882171,1.049243e-06
3,0.04217,8.7e-05,0.836617,-46.114298,27.791757,652,-4.847096,1.252818e-06
4,0.14678,0.000304,0.835163,-46.390901,27.873219,631,-4.805991,1.539866e-06
5,0.510897,0.001057,0.824444,-54.057397,30.043289,582,-4.765182,1.886834e-06
6,1.778279,0.00368,0.793039,-90.681865,38.768711,469,-5.306578,1.117021e-07
7,6.189658,0.012807,0.7208,-162.111028,51.710806,346,-5.951639,2.654707e-09
8,21.544347,0.044578,0.57633,-46.025923,27.76568,190,-5.054748,4.309584e-07
9,74.989421,0.155163,0.297282,-4.052037,9.10066,74,-3.409426,0.0006509967



Best LASSO:
  Parameter: 1.879848
  R² (test): 0.6151
  ΔR² vs benchmark: +1.98 p.p.
  RMSE (test): 2.5118
  ΔRMSE vs benchmark: -0.0639
  Active vars: 3
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg
  DM test: stat=2.86, p=0.004


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.075462,100.0,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 1 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,[wb_fp_cpi_totl_zg],0.82,-1.814841,0.912965,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 571.860368

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.5718604,0.835116,-50.16184,28.960944,657,-4.793583,1.638287e-06
1,0.003480701,1.990475,0.823658,-62.670061,32.307792,657,-4.804397,1.552185e-06
2,0.01211528,6.928247,0.802645,-99.354846,40.561015,657,-5.3936,6.905975e-08
3,0.04216965,24.11515,0.764521,-155.598261,50.667924,657,-5.852127,4.853249e-09
4,0.1467799,83.93762,0.699217,-168.700754,52.745028,657,-5.909931,3.422505e-09
5,0.510897,292.1617,0.590557,-95.359396,39.745382,657,-5.641428,1.68646e-08
6,1.778279,1016.928,0.402937,-26.178592,21.108293,657,-4.887471,1.021396e-06
7,6.189658,3539.62,0.008528,-5.034324,9.946126,657,-5.702302,1.182001e-08
8,21.54435,12320.36,-0.812736,-0.493297,4.947806,657,-4.590139,4.429519e-06
9,74.98942,42883.48,-1.762975,0.493463,2.881676,657,-1.919469,0.05492495



Best RIDGE:
  Parameter: 519545.195117
  R² (test): 0.6333
  ΔR² vs benchmark: +3.80 p.p.
  RMSE (test): 2.4518
  ΔRMSE vs benchmark: -0.1239
  Active vars: 657
  DM test: stat=4.02, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,0.128338,30.332922,5
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.020947,4.950746,9
90,wb_iq_spi_pil1,"[wb_iq_spi_pil1_t-1, wb_iq_spi_pil1_ma3, wb_iq...",0.009724,2.298217,7
74,ief_monetary_freedom,"[ief_monetary_freedom_ma5, ief_monetary_freedo...",0.0094,2.221681,8
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",0.008391,1.983262,7
11,efw_2c_property_rights,"[efw_2c_property_rights_t-5, efw_2c_property_r...",0.005246,1.239885,5
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_delta3, efw_2b_impart...",0.005222,1.23422,5
9,efw_2a_judicial_independence,"[efw_2a_judicial_independence_ma10, efw_2a_jud...",0.005143,1.215567,6
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_del...,0.004998,1.181294,5
26,efw_4b_regulatory_trade_barriers,"[efw_4b_regulatory_trade_barriers_delta3, efw_...",0.004808,1.136367,6



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.836832,-45.69323,27.667289,657,-4.898024,9.680534e-07
1,0.003481,7e-06,0.836824,-45.776057,27.691817,657,-4.893368,9.912489e-07
2,0.012115,2.5e-05,0.836788,-46.057795,27.775087,657,-4.87856,1.068629e-06
3,0.04217,8.7e-05,0.836588,-46.917086,28.027531,655,-4.841528,1.288445e-06
4,0.14678,0.000304,0.835203,-49.625371,28.808706,647,-4.778564,1.765514e-06
5,0.510897,0.001057,0.826914,-55.653372,30.475618,615,-4.758442,1.950925e-06
6,1.778279,0.00368,0.800304,-95.653832,39.806059,535,-5.341877,9.198901e-08
7,6.189658,0.012807,0.749837,-169.712918,52.90209,446,-5.861969,4.574107e-09
8,21.544347,0.044578,0.638215,-95.148293,39.701821,304,-5.649296,1.61106e-08
9,74.989421,0.155163,0.426851,-15.94187,16.665576,168,-4.756849,1.966377e-06



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.6260
  ΔR² vs benchmark: +3.07 p.p.
  RMSE (test): 2.4760
  ΔRMSE vs benchmark: -0.0997
  Active vars: 5
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=4.04, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma3, wb_...",0.01563,59.666119,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma...",0.010566,40.333881,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 4 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.82,-0.339269,0.061034,3
1,wb_ny_gdp_defl_kd_zg,[wb_ny_gdp_defl_kd_zg],0.82,-0.375694,0.02982,1



COMPLETED IN 194.7s


Running forecast horizon: t+7
Split year for 80-20 split: 2010

UNIFIED REGULARIZED REGRESSION: SIGNAL_T7_80
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2010 (1314 obs)
Test:  2011-2017 (392 obs)

β=1 specification active
After removing NaN: 1314 train, 392 test obs

Benchmark metrics:
  R² (test): 0.5178
  RMSE (test): 2.8039

Clustering variants of same variables...
  Original features: 963
  After clustering: 656
  Number of clusters: 656

Features used in models: 656


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal paramete

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,4e-06,0.812412,-118.765116,44.187656,656,-5.216876,1.819657e-07
1,0.003481,1.3e-05,0.812406,-118.336385,44.108495,656,-5.219279,1.796209e-07
2,0.012115,4.6e-05,0.812363,-116.280896,43.726975,653,-5.226132,1.730926e-07
3,0.04217,0.00016,0.811963,-108.373255,42.227111,648,-5.237389,1.628643e-07
4,0.14678,0.000557,0.807902,-90.33955,38.589152,614,-5.214395,1.84418e-07
5,0.510897,0.001938,0.785166,-62.723272,32.23182,520,-5.20285,1.962548e-07
6,1.778279,0.006744,0.733,-44.704038,27.296884,416,-5.508384,3.621425e-08
7,6.189658,0.023475,0.623733,-13.754214,15.509358,282,-5.650527,1.599565e-08
8,21.544347,0.081711,0.408997,0.367158,3.212058,135,-4.087876,4.353404e-05
9,74.989421,0.284412,-0.003322,0.514686,2.812854,35,-0.091876,0.9267969



Best LASSO:
  Parameter: 3.445726
  R² (test): 0.5688
  ΔR² vs benchmark: +5.10 p.p.
  RMSE (test): 2.6515
  ΔRMSE vs benchmark: -0.1524
  Active vars: 2
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg
  DM test: stat=6.63, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.041273,100.0,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...
No variables meet stability criterion (80% selection frequency)


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 397.972312

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.3979723,0.813993,-103.985399,41.371403,656,-5.143579,2.695532e-07
1,0.003480701,1.385222,0.800922,-90.964608,38.720964,656,-5.161741,2.446639e-07
2,0.01211528,4.821545,0.780064,-80.341045,36.415868,656,-5.30311,1.13846e-07
3,0.04216965,16.78235,0.744415,-63.15961,32.341984,656,-5.420085,5.957057e-08
4,0.1467799,58.41435,0.683915,-46.104869,27.712053,656,-5.519046,3.408443e-08
5,0.510897,203.3229,0.581323,-25.580848,20.817099,656,-5.622108,1.886415e-08
6,1.778279,707.706,0.404817,-5.388867,10.205816,656,-4.998849,5.767363e-07
7,6.189658,2463.313,0.047761,-0.420659,4.81261,656,-4.875594,1.084818e-06
8,21.54435,8574.054,-0.749298,0.156143,3.709114,656,-2.576921,0.009968487
9,74.98942,29843.71,-1.85836,0.496918,2.863884,656,-0.491239,0.6232572



Best RIDGE:
  Parameter: 361564.839799
  R² (test): 0.5831
  ΔR² vs benchmark: +6.53 p.p.
  RMSE (test): 2.6071
  ΔRMSE vs benchmark: -0.1968
  Active vars: 656
  DM test: stat=6.64, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,0.085402,16.449317,5
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.030234,5.823336,9
74,ief_monetary_freedom,"[ief_monetary_freedom_t-1, ief_monetary_freedo...",0.012883,2.481451,8
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",0.012336,2.375993,7
90,wb_iq_spi_pil1,"[wb_iq_spi_pil1, wb_iq_spi_pil1_ma3, wb_iq_spi...",0.0111,2.137995,7
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_del...,0.010063,1.938272,6
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_delta3, efw_2b_impart...",0.008769,1.68902,6
11,efw_2c_property_rights,"[efw_2c_property_rights_t-5, efw_2c_property_r...",0.008267,1.592357,5
66,fiw_g,"[fiw_g, fiw_g_t-1, fiw_g_t-3, fiw_g_delta3, fi...",0.008225,1.584211,5
65,fiw_f,"[fiw_f_ma5, fiw_f_t-1, fiw_f_t-3, fiw_f_delta3...",0.007205,1.387776,5



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.812414,-118.901467,44.212802,656,-5.21638,1.824537e-07
1,0.003481,7e-06,0.812416,-118.806401,44.195272,656,-5.21745,1.814028e-07
2,0.012115,2.5e-05,0.812414,-118.458766,44.131106,656,-5.221119,1.778454e-07
3,0.04217,8.7e-05,0.812313,-116.954145,43.852302,656,-5.232344,1.673738e-07
4,0.14678,0.000304,0.811011,-110.920365,42.715978,646,-5.24695,1.546375e-07
5,0.510897,0.001057,0.80177,-92.233477,38.987172,618,-5.213201,1.856095e-07
6,1.778279,0.00368,0.770348,-68.308479,33.61468,543,-5.375175,7.650839e-08
7,6.189658,0.012807,0.716582,-39.099502,25.568502,440,-5.52568,3.282115e-08
8,21.544347,0.044578,0.600587,-14.110752,15.695633,320,-5.632137,1.779905e-08
9,74.989421,0.155163,0.36873,0.357977,3.235275,179,-3.830827,0.0001277132



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.5732
  ΔR² vs benchmark: +5.55 p.p.
  RMSE (test): 2.6378
  ΔRMSE vs benchmark: -0.1661
  Active vars: 5
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=6.90, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma3, wb_...",0.016846,59.386359,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma...",0.011521,40.613641,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...
No variables meet stability criterion (80% selection frequency)


COMPLETED IN 187.5s


Running forecast horizon: t+8
Split year for 80-20 split: 2009

UNIFIED REGULARIZED REGRESSION: SIGNAL_T8_80
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2009 (1257 obs)
Test:  2010-2016 (388 obs)

β=1 specification active
After removing NaN: 1257 train, 388 test obs

Benchmark metrics:
  R² (test): 0.4250
  RMSE (test): 3.0601

Clustering variants of same variables...
  Original features: 960
  After clustering: 655
  Number of clusters: 655

Features used in models: 655


--------------------------------------------------------------------------------
RUNNING LASSO
--------

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.798245,-173.646219,53.332869,655,-4.552423,5.303151e-06
1,0.003481,7e-06,0.798234,-173.117508,53.25208,654,-4.551712,5.321111e-06
2,0.012115,2.5e-05,0.798188,-171.574593,53.015613,651,-4.549715,5.371864e-06
3,0.04217,8.7e-05,0.797938,-166.868198,52.287704,645,-4.545651,5.476582e-06
4,0.14678,0.000304,0.796147,-148.145891,49.285708,633,-4.538261,5.671992e-06
5,0.510897,0.001057,0.784785,-88.707342,38.223403,567,-4.529935,5.900184e-06
6,1.778279,0.00368,0.748155,-80.468905,36.425984,452,-4.365344,1.269229e-05
7,6.189658,0.012807,0.665099,-36.058748,24.567487,349,-4.148013,3.353728e-05
8,21.544347,0.044578,0.505152,-10.57946,13.732813,204,-5.341103,9.238265e-08
9,74.989421,0.155163,0.169787,0.235122,3.529486,81,-4.780515,1.74847e-06



Best LASSO:
  Parameter: 1.879848
  R² (test): 0.4722
  ΔR² vs benchmark: +4.71 p.p.
  RMSE (test): 2.9319
  ΔRMSE vs benchmark: -0.1281
  Active vars: 3
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg
  DM test: stat=6.23, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.083378,100.0,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 1 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,[wb_fp_cpi_totl_zg],0.82,-1.943753,1.152451,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 251.273762

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.2512738,0.802513,-151.179639,49.784441,655,-4.611329,4.001022e-06
1,0.003480701,0.8746087,0.790383,-128.725821,45.965138,655,-4.575206,4.757526e-06
2,0.01211528,3.044251,0.77232,-99.814977,40.520788,655,-4.45594,8.352657e-06
3,0.04216965,10.59613,0.741328,-84.303903,37.273468,655,-4.195932,2.717514e-05
4,0.1467799,36.88194,0.685793,-85.938045,37.628793,655,-4.05484,5.01686e-05
5,0.510897,128.375,0.58966,-67.782788,33.469964,655,-4.163585,3.132895e-05
6,1.778279,446.835,0.428298,-22.626234,19.616097,655,-4.486953,7.224884e-06
7,6.189658,1555.299,0.124755,-1.597344,6.503994,655,-5.082683,3.721414e-07
8,21.54435,5413.529,-0.580757,0.416646,3.082346,655,-0.158558,0.8740174
9,74.98942,18842.87,-1.812931,0.500811,2.851332,655,2.185899,0.028823



Best RIDGE:
  Parameter: 65586.402344
  R² (test): 0.5149
  ΔR² vs benchmark: +8.99 p.p.
  RMSE (test): 2.8108
  ΔRMSE vs benchmark: -0.2493
  Active vars: 655
  DM test: stat=5.71, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,0.21842,9.058055,4
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.155284,6.439738,9
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_del...,0.060396,2.504666,7
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",0.057516,2.385236,7
74,ief_monetary_freedom,"[ief_monetary_freedom_t-1, ief_monetary_freedo...",0.056603,2.347387,8
73,ief_labor_freedom,"[ief_labor_freedom_delta3, ief_labor_freedom_d...",0.051127,2.120272,5
11,efw_2c_property_rights,"[efw_2c_property_rights_t-5, efw_2c_property_r...",0.044478,1.844548,5
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_delta3, efw_2b_impart...",0.042091,1.745544,6
45,efw_5bv_cost_of_worker_dismissal,"[efw_5bv_cost_of_worker_dismissal_delta3, efw_...",0.038727,1.60604,5
55,efw_5dii_business_permits,"[efw_5dii_business_permits_delta3, efw_5dii_bu...",0.037551,1.557287,5



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.798246,-173.710118,53.342625,655,-4.552533,5.300394e-06
1,0.003481,7e-06,0.798238,-173.336694,53.285588,655,-4.552093,5.311483e-06
2,0.012115,2.5e-05,0.798203,-172.062237,53.090463,652,-4.550874,5.342356e-06
3,0.04217,8.7e-05,0.797982,-168.25644,52.503464,651,-4.549029,5.389402e-06
4,0.14678,0.000304,0.796288,-153.003231,50.08184,637,-4.543953,5.520896e-06
5,0.510897,0.001057,0.78617,-104.024576,41.358123,588,-4.503358,6.688811e-06
6,1.778279,0.00368,0.755645,-76.331478,35.48898,539,-4.313222,1.608922e-05
7,6.189658,0.012807,0.696657,-69.960006,33.995558,449,-4.107333,4.002538e-05
8,21.544347,0.044578,0.573611,-33.096721,23.565229,318,-4.668124,3.039619e-06
9,74.989421,0.155163,0.321759,0.095582,3.837955,186,-5.645475,1.647262e-08



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.4956
  ΔR² vs benchmark: +7.06 p.p.
  RMSE (test): 2.8660
  ΔRMSE vs benchmark: -0.1940
  Active vars: 5
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=7.54, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma3, wb_...",0.018526,58.387234,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma...",0.013204,41.612766,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 5 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.83,-0.392995,0.066186,3
1,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_k...",0.83,-0.253208,0.039459,2



COMPLETED IN 180.9s


Running forecast horizon: t+9
Split year for 80-20 split: 2008

UNIFIED REGULARIZED REGRESSION: SIGNAL_T9_80
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2008 (1200 obs)
Test:  2009-2015 (382 obs)

β=1 specification active
After removing NaN: 1200 train, 382 test obs

Benchmark metrics:
  R² (test): 0.3348
  RMSE (test): 3.3075

Clustering variants of same variables...
  Original features: 940
  After clustering: 645
  Number of clusters: 645

Features used in models: 645


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal paramete

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.804587,-6.418915e+29,3249149000000000.0,645,-7.090077,1.340483e-12
1,0.003481,7e-06,0.804587,-6.39343e+29,3242692000000000.0,645,-7.090077,1.340483e-12
2,0.012115,2.5e-05,0.804579,-6.306884e+29,3220670000000000.0,645,-7.090077,1.340483e-12
3,0.04217,8.7e-05,0.804461,-6.001087999999999e+29,3141620000000000.0,638,-7.090077,1.340483e-12
4,0.14678,0.000304,0.803124,-5.1423969999999994e+29,2908182000000000.0,622,-7.090077,1.340483e-12
5,0.510897,0.001057,0.791856,-3.041388e+29,2236530000000000.0,566,-7.090077,1.340483e-12
6,1.778279,0.00368,0.750499,-1.351464e+29,1490874000000000.0,467,-7.090077,1.340483e-12
7,6.189658,0.012807,0.659303,-1.219748e+28,447892600000000.0,335,-7.090077,1.340483e-12
8,21.544347,0.044578,0.481791,-49.76298,28.89431,207,-4.086847,4.372758e-05
9,74.989421,0.155163,0.136924,0.200806,3.625474,86,-3.017808,0.002546102



Best LASSO:
  Parameter: 1.879848
  R² (test): 0.3922
  ΔR² vs benchmark: +5.74 p.p.
  RMSE (test): 3.1617
  ΔRMSE vs benchmark: -0.1458
  Active vars: 3
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg
  DM test: stat=6.93, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.095011,100.0,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...
No variables meet stability criterion (80% selection frequency)


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 251.273762



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.2512738,0.810793,-5.9492629999999994e+29,3128026000000000.0,645,-7.090077,1.340483e-12
1,0.003480701,0.8746087,0.797869,-4.5076279999999995e+29,2722781000000000.0,645,-7.090077,1.340483e-12
2,0.01211528,3.044251,0.777956,-2.9762309999999997e+29,2212443000000000.0,645,-7.090077,1.340483e-12
3,0.04216965,10.59613,0.743374,-1.958577e+29,1794771000000000.0,645,-7.090077,1.340483e-12
4,0.1467799,36.88194,0.681635,-1.260982e+29,1440102000000000.0,645,-7.090077,1.340483e-12
5,0.510897,128.375,0.573356,-6.872397e+28,1063146000000000.0,645,-7.090077,1.340483e-12
6,1.778279,446.835,0.396473,-3.043436e+28,707490900000000.0,645,-7.090077,1.340483e-12
7,6.189658,1555.299,0.059177,-9.570983e+27,396750100000000.0,645,-7.090077,1.340483e-12
8,21.54435,5413.529,-0.743965,-2.139716e+27,187593100000000.0,645,-7.090077,1.340483e-12
9,74.98942,18842.87,-2.133559,-3.032945e+26,70627040000000.0,645,-7.090077,1.340483e-12



Best RIDGE:
  Parameter: 2512737621716.441895
  R² (test): -11069322574.7954
  ΔR² vs benchmark: -1106932257513.02 p.p.
  RMSE (test): 426677.0289
  ΔRMSE vs benchmark: +426673.7214
  Active vars: 645
  DM test: stat=-7.09, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
41,efw_5bi_labor_regulations_and_minimum_wage,[efw_5bi_labor_regulations_and_minimum_wage_de...,212569.709017,100.0,6



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.80459,-6.41845e+29,3249031000000000.0,645,-7.090077,1.340483e-12
1,0.003481,7e-06,0.804596,-6.391831e+29,3242286000000000.0,645,-7.090077,1.340483e-12
2,0.012115,2.5e-05,0.804609,-6.301344e+29,3219255000000000.0,645,-7.090077,1.340483e-12
3,0.04217,8.7e-05,0.80454,-5.99872e+29,3141001000000000.0,644,-7.090077,1.340483e-12
4,0.14678,0.000304,0.803273,-5.11854e+29,2901428000000000.0,633,-7.090077,1.340483e-12
5,0.510897,0.001057,0.793533,-3.731649e+29,2477360000000000.0,604,-7.090077,1.340483e-12
6,1.778279,0.00368,0.760024,-2.330162e+29,1957635000000000.0,541,-7.090077,1.340483e-12
7,6.189658,0.012807,0.69338,-6.638666e+28,1044911000000000.0,452,-7.090077,1.340483e-12
8,21.544347,0.044578,0.561131,-4.958497e+25,28557080000000.0,318,-7.090077,1.340483e-12
9,74.989421,0.155163,0.289863,-20.00643,18.58724,188,-4.144729,3.402154e-05



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.4180
  ΔR² vs benchmark: +8.32 p.p.
  RMSE (test): 3.0938
  ΔRMSE vs benchmark: -0.2138
  Active vars: 6
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma3, wb_ny_gdp_defl_kd_zg_t-1
  DM test: stat=7.93, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.021602,56.559664,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma...",0.016591,43.440336,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 6 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.83,-0.425804,0.060393,3
1,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_k...",0.82,-0.200992,0.036826,3



COMPLETED IN 177.7s


Running forecast horizon: t+10
Split year for 80-20 split: 2007

UNIFIED REGULARIZED REGRESSION: SIGNAL_T10_80
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2007 (1147 obs)
Test:  2008-2014 (374 obs)

β=1 specification active
After removing NaN: 1147 train, 374 test obs

Benchmark metrics:
  R² (test): 0.3513
  RMSE (test): 3.2712

Clustering variants of same variables...
  Original features: 934
  After clustering: 645
  Number of clusters: 645

Features used in models: 645


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parame

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.826364,-2.4252529999999996e+29,2000089000000000.0,645,-6.822777,8.929746e-12
1,0.003481,7e-06,0.82635,-2.423945e+29,1999549000000000.0,645,-6.822777,8.929746e-12
2,0.012115,2.5e-05,0.826288,-2.4196769999999997e+29,1997788000000000.0,645,-6.822777,8.929746e-12
3,0.04217,8.7e-05,0.825985,-2.3921269999999998e+29,1986383000000000.0,641,-6.822777,8.929746e-12
4,0.14678,0.000304,0.823983,-2.2615929999999996e+29,1931426000000000.0,625,-6.822777,8.929746e-12
5,0.510897,0.001057,0.81176,-2.2033879999999998e+29,1906410000000000.0,563,-6.822777,8.929746e-12
6,1.778279,0.00368,0.761771,-1.3913319999999999e+29,1514907000000000.0,466,-6.822777,8.929746e-12
7,6.189658,0.012807,0.662618,-3.555921e+27,242184800000000.0,338,-6.822777,8.929746e-12
8,21.544347,0.044578,0.47296,0.0423626,3.974397,210,-4.771731,1.826494e-06
9,74.989421,0.155163,0.125104,0.2213544,3.583774,83,-3.306908,0.0009433189



Best LASSO:
  Parameter: 1.879848
  R² (test): 0.4132
  ΔR² vs benchmark: +6.19 p.p.
  RMSE (test): 3.1112
  ΔRMSE vs benchmark: -0.1600
  Active vars: 3
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg
  DM test: stat=6.92, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.098328,100.0,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...
No variables meet stability criterion (80% selection frequency)


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 251.273762



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.2512738,0.831885,-3.1513659999999998e+29,2279923000000000.0,645,-6.822777,8.929746e-12
1,0.003480701,0.8746087,0.817309,-2.817824e+29,2155895000000000.0,645,-6.822777,8.929746e-12
2,0.01211528,3.044251,0.791788,-2.3816519999999998e+29,1982028000000000.0,645,-6.822777,8.929746e-12
3,0.04216965,10.59613,0.749496,-1.8385999999999997e+29,1741463000000000.0,645,-6.822777,8.929746e-12
4,0.1467799,36.88194,0.678881,-8.516023e+28,1185193000000000.0,645,-6.822777,8.929746e-12
5,0.510897,128.375,0.560978,-1.163207e+28,438025300000000.0,645,-6.822777,8.929746e-12
6,1.778279,446.835,0.373098,-1.563996e+26,50791220000000.0,645,-6.822777,8.929746e-12
7,6.189658,1555.299,0.00177,-2.939642e+26,69633460000000.0,645,-6.822777,8.929746e-12
8,21.54435,5413.529,-0.915355,-3.381303e+26,74681460000000.0,645,-6.822777,8.929746e-12
9,74.98942,18842.87,-2.485827,-9.818907e+25,40244110000000.0,645,-6.822777,8.929746e-12



Best RIDGE:
  Parameter: 2512737621716.441895
  R² (test): -6471858940.2199
  ΔR² vs benchmark: -647185894057.11 p.p.
  RMSE (test): 326727.1805
  ΔRMSE vs benchmark: +326723.9093
  Active vars: 645
  DM test: stat=-6.82, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
41,efw_5bi_labor_regulations_and_minimum_wage,[efw_5bi_labor_regulations_and_minimum_wage_de...,203323.320102,100.0,7



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.826365,-2.423356e+29,1999306000000000.0,645,-6.822777,8.929746e-12
1,0.003481,7e-06,0.826354,-2.417411e+29,1996852000000000.0,645,-6.822777,8.929746e-12
2,0.012115,2.5e-05,0.826303,-2.398111e+29,1988865000000000.0,645,-6.822777,8.929746e-12
3,0.04217,8.7e-05,0.826009,-2.34731e+29,1967687000000000.0,645,-6.822777,8.929746e-12
4,0.14678,0.000304,0.823954,-2.202933e+29,1906213000000000.0,638,-6.822777,8.929746e-12
5,0.510897,0.001057,0.812641,-2.6490819999999997e+29,2090347000000000.0,600,-6.822777,8.929746e-12
6,1.778279,0.00368,0.773075,-2.0555019999999997e+29,1841322000000000.0,550,-6.822777,8.929746e-12
7,6.189658,0.012807,0.697303,-8.781783e+28,1203544000000000.0,441,-6.822777,8.929746e-12
8,21.544347,0.044578,0.552646,-0.7120468,5.314086,319,-3.73392,0.0001885227
9,74.989421,0.155163,0.275927,0.1192459,3.811519,186,-4.151947,3.296585e-05



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.4394
  ΔR² vs benchmark: +8.81 p.p.
  RMSE (test): 3.0409
  ΔRMSE vs benchmark: -0.2303
  Active vars: 7
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma3, wb_ny_gdp_defl_kd_zg_t-1, wb_ny_gdp_defl_kd_zg_ma10
  DM test: stat=8.16, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma3, wb_...",0.023495,54.189664,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg_ma3, wb_ny_gdp_defl_kd_z...",0.019862,45.810336,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 1 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_ny_gdp_defl_kd_zg,[wb_ny_gdp_defl_kd_zg_t-1],0.85,-0.108269,0.043435,1



COMPLETED IN 171.1s


Runnning 85-15 split...

Running forecast horizon: t+1
Split year for 85-15 split: 2018

UNIFIED REGULARIZED REGRESSION: SIGNAL_T1_85
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2018 (1808 obs)
Test:  2019-2023 (290 obs)

β=1 specification active
After removing NaN: 1808 train, 290 test obs

Benchmark metrics:
  R² (test): 0.8939
  RMSE (test): 1.3196

Clustering variants of same variables...
  Original features: 972
  After clustering: 619
  Number of clusters: 619

Features used in models: 619


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding wind

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.946514,0.798639,1.817985,558,-4.771346,2e-06
1,0.003481,0.001682,0.940047,0.826316,1.688428,460,-4.200001,2.7e-05
2,0.012115,0.005855,0.927041,0.838529,1.627982,341,-3.898519,9.7e-05
3,0.04217,0.02038,0.904682,0.861164,1.509575,198,-2.527922,0.011474
4,0.14678,0.070938,0.860803,0.877928,1.415506,66,-1.998974,0.045611
5,0.510897,0.246913,0.824558,0.889333,1.347757,9,-0.867815,0.385496
6,1.778279,0.85943,0.773758,0.893566,1.32173,1,-0.117756,0.906261



Best LASSO:
  Parameter: 0.859430
  R² (test): 0.8936
  ΔR² vs benchmark: -0.03 p.p.
  RMSE (test): 1.3217
  ΔRMSE vs benchmark: +0.0022
  Active vars: 1
  Active variables: wb_fp_cpi_totl_zg
  DM test: stat=-0.12, p=0.906

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.906)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 24297.810658

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,24.29781,0.933973,0.838427,1.628498,619,-3.90154,9.6e-05
1,0.003481,84.5734,0.923787,0.846146,1.589122,619,-3.544941,0.000393
2,0.012115,294.3747,0.907785,0.854388,1.545973,619,-3.19943,0.001377
3,0.04217,1024.63,0.884884,0.868516,1.46906,619,-2.731933,0.006296
4,0.14678,3566.431,0.848788,0.881257,1.396069,619,-2.249744,0.024465
5,0.510897,12413.68,0.777101,0.887252,1.36037,619,-1.84329,0.065287
6,1.778279,43208.3,0.678563,0.890618,1.33991,619,-1.280271,0.20045
7,6.189658,150395.1,0.611245,0.891795,1.332684,619,-1.001205,0.316728
8,21.544347,523480.5,0.583447,0.891949,1.331735,619,-1.021518,0.307009
9,74.989421,1822079.0,0.574344,0.891932,1.331838,619,-1.08519,0.277838



Best RIDGE:
  Parameter: 523480.461736
  R² (test): 0.8919
  ΔR² vs benchmark: -0.20 p.p.
  RMSE (test): 1.3317
  ΔRMSE vs benchmark: +0.0122
  Active vars: 619
  DM test: stat=-1.02, p=0.307

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.307)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.885867

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000886,0.945074,0.806593,1.781718,561,-4.592342,4e-06
1,0.003481,0.003083,0.937858,0.829736,1.671722,477,-4.104059,4.1e-05
2,0.012115,0.010733,0.92594,0.839077,1.625221,365,-3.843567,0.000121
3,0.04217,0.037357,0.902887,0.861829,1.505955,228,-2.563701,0.010356
4,0.14678,0.130027,0.8604,0.877566,1.417603,88,-2.164183,0.03045
5,0.510897,0.452587,0.821048,0.889496,1.346764,16,-1.110554,0.26676
6,1.778279,1.575319,0.757064,0.893026,1.325078,7,-0.417115,0.676594



Best ELASTIC (L1=0.5):
  Parameter: 1.575319
  R² (test): 0.8930
  ΔR² vs benchmark: -0.09 p.p.
  RMSE (test): 1.3251
  ΔRMSE vs benchmark: +0.0055
  Active vars: 7
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_kd_zg_delta, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=-0.42, p=0.677

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.677)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 157.6s


Running forecast horizon: t+2
Split year for 85-15 split: 2017

UNIFIED REGULARIZED REGRESSION: SIGNAL_T2_85
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)


Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000886,0.861079,0.429989,3.017647,546,-6.062967,1.33633e-09
1,0.003481,0.003083,0.841804,0.554951,2.666435,457,-4.977696,6.434573e-07
2,0.012115,0.010733,0.801048,0.643272,2.387238,316,-4.641784,3.454136e-06
3,0.04217,0.037357,0.73567,0.710468,2.150677,174,-3.514313,0.0004408926
4,0.14678,0.130027,0.597082,0.771164,1.912005,57,-1.88277,0.05973148
5,0.510897,0.452587,0.459381,0.819024,1.700347,5,0.099913,0.9204135



Best LASSO:
  Parameter: 0.452587
  R² (test): 0.8190
  ΔR² vs benchmark: +0.11 p.p.
  RMSE (test): 1.7003
  ΔRMSE vs benchmark: -0.0054
  Active vars: 5
  Active variables: wb_ny_gdp_mktp_kd_zg_ma5, wb_fp_cpi_totl_zg, wb_bn_cab_xoka_gd_zs_delta3, efw_4c_black_market_exchange_rates_delta3, ief_monetary_freedom_t-3
  DM test: stat=0.10, p=0.920

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.920)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 15341.274046

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,15.34127,0.841808,0.520469,2.767804,626,-5.53686,3.07943e-08
1,0.003481,53.39838,0.81878,0.580512,2.58873,626,-4.985497,6.180269e-07
2,0.012115,185.8638,0.78111,0.631603,2.425967,626,-4.539791,5.631011e-06
3,0.04217,646.9362,0.724758,0.680874,2.257917,626,-4.176239,2.963685e-05
4,0.14678,2251.791,0.648505,0.731242,2.072086,626,-3.888811,0.0001007365
5,0.510897,7837.811,0.560332,0.768653,1.922469,626,-3.774109,0.0001605803
6,1.778279,27281.07,0.485188,0.793438,1.816571,626,-3.262127,0.001105795
7,6.189658,94957.24,0.438852,0.805748,1.761611,626,-2.487809,0.01285328
8,21.544347,330517.7,0.417803,0.809351,1.745198,626,-2.22524,0.02606514
9,74.989421,1150433.0,0.410201,0.810197,1.741319,626,-2.268182,0.02331814



Best RIDGE:
  Parameter: 168860509.223935
  R² (test): 0.8105
  ΔR² vs benchmark: -0.74 p.p.
  RMSE (test): 1.7401
  ΔRMSE vs benchmark: +0.0344
  Active vars: 626
  DM test: stat=-2.36, p=0.018


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",2.5e-05,5.205874,8
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg, wb_ny_gdp_mktp_kd_zg_ma...",2.2e-05,4.548541,9
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,1.7e-05,3.575862,6
90,wb_iq_spi_pil1,"[wb_iq_spi_pil1_t-5, wb_iq_spi_pil1_delta3, wb...",1.6e-05,3.404171,7
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_delta, wb_bn_cab_xoka_gd...",1.3e-05,2.706361,7
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_ma10, efw_5aii...",1.2e-05,2.463347,8
68,ief_business_freedom,"[ief_business_freedom_delta3, ief_business_fre...",8e-06,1.737996,6
36,efw_5a_credit_market_regulation,"[efw_5a_credit_market_regulation_ma10, efw_5a_...",8e-06,1.720911,8
27,efw_4bi_non_tariff_trade_barriers,"[efw_4bi_non_tariff_trade_barriers_ma3, efw_4b...",8e-06,1.655012,6
50,efw_5cii_bureacracy_costs,"[efw_5cii_bureacracy_costs_t-1, efw_5cii_burea...",8e-06,1.634948,8



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 1.623777

Stopping: model has 0 active variables at factor 1.7782794100389228



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.001624,0.856909,0.469952,2.909943,560,-5.809416,6.269111e-09
1,0.003481,0.005652,0.83648,0.569637,2.622071,486,-4.900776,9.545898e-07
2,0.012115,0.019673,0.797824,0.64008,2.397894,343,-4.65987,3.164086e-06
3,0.04217,0.068474,0.728542,0.715478,2.131989,199,-3.335965,0.0008500387
4,0.14678,0.238338,0.59511,0.777807,1.884048,68,-1.789167,0.07358793
5,0.510897,0.829583,0.458442,0.816594,1.711726,9,-0.132845,0.8943161



Best ELASTIC (L1=0.5):
  Parameter: 0.829583
  R² (test): 0.8166
  ΔR² vs benchmark: -0.13 p.p.
  RMSE (test): 1.7117
  ΔRMSE vs benchmark: +0.0060
  Active vars: 9
  Active variables: wb_ny_gdp_mktp_kd_zg_ma5, wb_ny_gdp_mktp_kd_zg_ma10, wb_fp_cpi_totl_zg, wb_bn_cab_xoka_gd_zs_delta3, wb_ny_gdp_petr_rt_zs_delta, efw_4c_black_market_exchange_rates_delta3, ief_monetary_freedom_delta3, ief_monetary_freedom_ma5, ief_monetary_freedom_t-3
  DM test: stat=-0.13, p=0.894

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.894)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 157.2s


Running forecast horizon: t+3
Split year for 85-15 split: 2016

UNIFIED REGULARIZED REGRESSION: SIGNAL_T3_85
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000264,0.850198,-0.12582,4.244578,615,-9.643354,0.0
1,0.003481,0.000918,0.84412,0.077563,3.842097,560,-8.902724,0.0
2,0.012115,0.003194,0.817938,0.399063,3.101092,457,-7.163721,7.851497e-13
3,0.04217,0.011119,0.76115,0.57216,2.616622,334,-7.593021,3.130829e-14
4,0.14678,0.038701,0.679682,0.65296,2.356621,175,-6.535122,6.35576e-11
5,0.510897,0.134706,0.531803,0.713571,2.140958,57,-7.428674,1.0969e-13
6,1.778279,0.46887,0.347046,0.812496,1.732229,4,-2.853,0.00433086



Best LASSO:
  Parameter: 0.468870
  R² (test): 0.8125
  ΔR² vs benchmark: -1.83 p.p.
  RMSE (test): 1.7322
  ΔRMSE vs benchmark: +0.0866
  Active vars: 4
  Active variables: wb_ny_gdp_mktp_kd_zg_ma5, wb_fp_cpi_totl_zg, wb_bn_cab_xoka_gd_zs_delta3, ief_monetary_freedom_t-3
  DM test: stat=-2.85, p=0.004


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 4 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.174985,32.684282,8
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma5, wb_ny_gdp_mktp_kd_z...",0.17035,31.818466,9
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_t-5, wb_...",0.110534,20.645986,7
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_delta3, wb_bn_cab_xoka_g...",0.079511,14.851265,7



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 4 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
1,wb_bn_cab_xoka_gd_zs,[wb_bn_cab_xoka_gd_zs_delta3],1.0,-0.151167,0.051284,1
3,wb_ny_gdp_mktp_kd_zg,[wb_ny_gdp_mktp_kd_zg_ma5],0.92,0.272557,0.082847,1
0,ief_monetary_freedom,[ief_monetary_freedom_t-3],0.86,0.324931,0.090652,1
2,wb_fp_cpi_totl_zg,[wb_fp_cpi_totl_zg],0.86,-0.525479,0.053289,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 4691.172785

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,4.691173,0.833681,0.213322,3.548121,633,-8.822459,0.0
1,0.003481,16.32857,0.811762,0.39024,3.123774,633,-8.37321,0.0
2,0.012115,56.83486,0.779572,0.505222,2.813876,633,-8.002755,1.110223e-15
3,0.04217,197.8251,0.73225,0.575433,2.606592,633,-7.406498,1.29674e-13
4,0.14678,688.57,0.660907,0.625708,2.447401,633,-7.026385,2.119416e-12
5,0.510897,2396.706,0.560523,0.673367,2.286281,633,-7.256896,3.961276e-13
6,1.778279,8342.216,0.445881,0.725545,2.095733,633,-7.501788,6.306067e-14
7,6.189658,29036.76,0.348279,0.778478,1.88282,633,-6.090629,1.12468e-09
8,21.544347,101068.3,0.287018,0.805253,1.765368,633,-4.435867,9.170259e-06
9,74.989421,351788.3,0.259443,0.813073,1.72956,633,-3.827051,0.0001296878



Best RIDGE:
  Parameter: 625577945.135945
  R² (test): 0.8160
  ΔR² vs benchmark: -1.48 p.p.
  RMSE (test): 1.7159
  ΔRMSE vs benchmark: +0.0703
  Active vars: 633
  DM test: stat=-3.77, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
90,wb_iq_spi_pil1,"[wb_iq_spi_pil1_delta3, wb_iq_spi_pil1_t-5, wb...",8e-06,5.355472,7
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",8e-06,4.943297,8
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg, wb_ny_gdp_mktp_kd_zg_ma...",7e-06,4.493599,9
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,5e-06,3.243498,4
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_ma10, efw_5aii...",4e-06,2.528404,8
50,efw_5cii_bureacracy_costs,"[efw_5cii_bureacracy_costs, efw_5cii_bureacrac...",4e-06,2.28957,8
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_delta3, wb_bn_cab_xoka_g...",3e-06,2.152374,7
68,ief_business_freedom,"[ief_business_freedom_delta3, ief_business_fre...",3e-06,1.752574,6
36,efw_5a_credit_market_regulation,"[efw_5a_credit_market_regulation_t-3, efw_5a_c...",3e-06,1.639996,8
27,efw_4bi_non_tariff_trade_barriers,"[efw_4bi_non_tariff_trade_barriers_ma3, efw_4b...",3e-06,1.604415,6



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.483293

Stopping: model has 0 active variables at factor 6.189658188912603



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.849061,-0.056569,4.111961,616,-9.466124,0.0
1,0.003481,0.001682,0.837787,0.180625,3.621106,557,-8.583857,0.0
2,0.012115,0.005855,0.80841,0.422801,3.039224,476,-7.64045,2.153833e-14
3,0.04217,0.02038,0.755841,0.575471,2.606476,354,-7.53886,4.751755e-14
4,0.14678,0.070938,0.67355,0.646834,2.37733,208,-6.685217,2.305822e-11
5,0.510897,0.246913,0.5243,0.711951,2.147006,75,-7.49067,6.861178e-14
6,1.778279,0.85943,0.337041,0.815541,1.718106,8,-2.575424,0.01001172



Best ELASTIC (L1=0.5):
  Parameter: 0.859430
  R² (test): 0.8155
  ΔR² vs benchmark: -1.52 p.p.
  RMSE (test): 1.7181
  ΔRMSE vs benchmark: +0.0725
  Active vars: 8
  Active variables: wb_ny_gdp_mktp_kd_zg_ma3, wb_ny_gdp_mktp_kd_zg_ma5, wb_ny_gdp_mktp_kd_zg_ma10, wb_fp_cpi_totl_zg, wb_bn_cab_xoka_gd_zs_delta3, ief_monetary_freedom_delta3, ief_monetary_freedom_ma5, ief_monetary_freedom_t-3
  DM test: stat=-2.58, p=0.010


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 4 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.189752,38.170917,8
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma5, wb_ny_gdp_mktp_kd_z...",0.156267,31.434897,9
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.082265,16.548495,7
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_delta3, wb_bn_cab_xoka_g...",0.068829,13.84569,7



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 7 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
1,wb_bn_cab_xoka_gd_zs,[wb_bn_cab_xoka_gd_zs_delta3],1.0,-0.130883,0.03516,1
3,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma3, wb_ny_gdp_mktp_kd_z...",0.985,0.107875,0.032518,2
0,ief_monetary_freedom,"[ief_monetary_freedom_ma5, ief_monetary_freedo...",0.91,0.055012,0.046213,3
2,wb_fp_cpi_totl_zg,[wb_fp_cpi_totl_zg],0.86,-0.382551,0.038288,1



COMPLETED IN 190.8s


Running forecast horizon: t+4
Split year for 85-15 split: 2015

UNIFIED REGULARIZED REGRESSION: SIGNAL_T4_85
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2015 (1602 obs)
Test:  2016-2020 (283 obs)

β=1 specification active
After removing NaN: 1602 train, 283 test obs

Benchmark metrics:
  R² (test): 0.7558
  RMSE (test): 1.9851

Clustering variants of same variables...
  Original features: 972
  After clustering: 637
  Number of clusters: 637

Features used in models: 637


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal paramete

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000144,0.836679,-0.589525,5.064785,624,-10.481536,0.0
1,0.003481,0.000501,0.833691,-0.407047,4.765206,600,-10.046508,0.0
2,0.012115,0.001743,0.818301,-0.040094,4.096978,517,-8.677138,0.0
3,0.04217,0.006066,0.773041,0.326862,3.295941,396,-6.826778,8.684164e-12
4,0.14678,0.021114,0.68782,0.485175,2.882419,259,-5.807205,6.352429e-09
5,0.510897,0.07349,0.559648,0.573209,2.62443,115,-5.848784,4.951807e-09
6,1.778279,0.255797,0.330815,0.669446,2.309663,26,-4.399378,1.085616e-05
7,6.189658,0.890351,0.069888,0.742951,2.036738,2,-1.982506,0.04742261



Best LASSO:
  Parameter: 0.890351
  R² (test): 0.7430
  ΔR² vs benchmark: -1.29 p.p.
  RMSE (test): 2.0367
  ΔRMSE vs benchmark: +0.0516
  Active vars: 2
  Active variables: wb_fp_cpi_totl_zg, ief_monetary_freedom_t-3
  DM test: stat=-1.98, p=0.047


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
74,ief_monetary_freedom,"[ief_monetary_freedom_t-3, ief_monetary_freedo...",0.065651,86.666685,8
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_t-3, wb_...",0.0101,13.333315,7



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...
No variables meet stability criterion (80% selection frequency)


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 905.723664

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.9057237,0.833612,-0.364286,4.692237,637,-9.913966,0.0
1,0.003481,3.152553,0.821662,-0.089814,4.19376,637,-9.124064,0.0
2,0.012115,10.97309,0.799374,0.168521,3.663134,637,-7.984922,1.332268e-15
3,0.04217,38.19405,0.763403,0.308791,3.339888,637,-7.06737,1.578959e-12
4,0.14678,132.9421,0.709995,0.385661,3.1487,637,-6.603235,4.022827e-11
5,0.510897,462.7315,0.630117,0.45357,2.969577,637,-6.482514,9.020673e-11
6,1.778279,1610.63,0.51302,0.524117,2.771262,637,-6.46997,9.802248e-11
7,6.189658,5606.12,0.36692,0.601147,2.537078,637,-5.978866,2.246966e-09
8,21.544347,19513.22,0.226369,0.681893,2.26576,637,-4.384145,1.164421e-05
9,74.989421,67919.69,0.125304,0.726791,2.099785,637,-2.792596,0.00522869



Best RIDGE:
  Parameter: 420399684.638619
  R² (test): 0.7427
  ΔR² vs benchmark: -1.32 p.p.
  RMSE (test): 2.0379
  ΔRMSE vs benchmark: +0.0528
  Active vars: 637
  DM test: stat=-2.35, p=0.019


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
90,wb_iq_spi_pil1,"[wb_iq_spi_pil1_t-1, wb_iq_spi_pil1_t-3, wb_iq...",1.5e-05,5.642473,7
74,ief_monetary_freedom,"[ief_monetary_freedom_ma5, ief_monetary_freedo...",1.3e-05,4.798217,8
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg, wb_ny_gdp_mktp_kd_zg_ma...",1.1e-05,4.16927,9
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,7e-06,2.603449,5
50,efw_5cii_bureacracy_costs,"[efw_5cii_bureacracy_costs, efw_5cii_bureacrac...",7e-06,2.512826,8
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_ma10, efw_5aii...",6e-06,2.108208,8
87,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_delta, wb_bn_cab_xoka_gd...",5e-06,1.944247,7
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_t-1...,5e-06,1.934164,7
68,ief_business_freedom,"[ief_business_freedom_delta3, ief_business_fre...",4e-06,1.653099,6
26,efw_4b_regulatory_trade_barriers,"[efw_4b_regulatory_trade_barriers_ma10, efw_4b...",4e-06,1.478152,6



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.143845

Stopping: model has 0 active variables at factor 21.54434690031882



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000144,0.836725,-0.602222,5.084973,631,-10.507648,0.0
1,0.003481,0.000501,0.83422,-0.41952,4.786279,627,-10.110295,0.0
2,0.012115,0.001743,0.82094,-0.064613,4.144987,569,-8.898603,0.0
3,0.04217,0.006066,0.788207,0.26247,3.449984,487,-7.245925,4.294343e-13
4,0.14678,0.021114,0.723721,0.426876,3.041246,366,-6.211881,5.235399e-10
5,0.510897,0.07349,0.621309,0.532755,2.745993,208,-5.795462,6.813349e-09
6,1.778279,0.255797,0.434094,0.603552,2.529417,81,-6.542965,6.031109e-11
7,6.189658,0.890351,0.190515,0.745597,2.026228,14,-1.033112,0.3015514



Best ELASTIC (L1=0.5):
  Parameter: 0.890351
  R² (test): 0.7456
  ΔR² vs benchmark: -1.02 p.p.
  RMSE (test): 2.0262
  ΔRMSE vs benchmark: +0.0411
  Active vars: 14
  DM test: stat=-1.03, p=0.302

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.302)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 187.6s


Running forecast horizon: t+5
Split year for 85-15 split: 2014

UNIFIED REGULARIZED REGRESSION: SIGNAL_T5_85
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2014 (1539 obs)
Test:  2015-2019 (285 obs)

β=1 specification active
After removing NaN: 1539 train, 285 test obs

Ben

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000483,0.817257,-0.743446,5.342644,596,-7.09746,1.270761e-12
1,0.003481,0.001682,0.802385,-0.432744,4.843243,524,-6.615884,3.693379e-11
2,0.012115,0.005855,0.753249,-0.102348,4.248263,417,-6.571499,4.981127e-11
3,0.04217,0.02038,0.662849,0.3772,3.193201,283,-4.410299,1.03228e-05
4,0.14678,0.070938,0.517485,0.473941,2.934736,148,-3.214119,0.001308455
5,0.510897,0.246913,0.240263,0.536173,2.755687,33,-2.806122,0.005014176
6,1.778279,0.85943,-0.08457,0.680687,2.286441,6,-0.961566,0.3362679



Best LASSO:
  Parameter: 0.859430
  R² (test): 0.6807
  ΔR² vs benchmark: -1.65 p.p.
  RMSE (test): 2.2864
  ΔRMSE vs benchmark: +0.0599
  Active vars: 6
  Active variables: wb_ny_gdp_mktp_kd_zg_ma5, wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb_ny_gdp_defl_kd_zg_t-1, wb_ny_gdp_defl_kd_zg_ma10, ief_monetary_freedom_t-3
  DM test: stat=-0.96, p=0.336

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.336)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 905.723664

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.9057237,0.816704,-0.636447,5.176103,639,-6.921817,4.458878e-12
1,0.003481,3.152553,0.804434,-0.241137,4.507771,639,-6.04201,1.522061e-09
2,0.012115,10.97309,0.780025,0.073078,3.895592,639,-5.27037,1.36149e-07
3,0.04217,38.19405,0.739914,0.209603,3.597283,639,-4.997035,5.82184e-07
4,0.14678,132.9421,0.679977,0.244585,3.516777,639,-5.074144,3.89244e-07
5,0.510897,462.7315,0.586606,0.244554,3.516848,639,-5.239126,1.613386e-07
6,1.778279,1610.63,0.433647,0.303677,3.376427,639,-5.245848,1.55565e-07
7,6.189658,5606.12,0.195057,0.44524,3.013732,639,-4.966897,6.803253e-07
8,21.544347,19513.22,-0.092393,0.586552,2.60173,639,-3.822049,0.0001323473
9,74.989421,67919.69,-0.320046,0.656092,2.372865,639,-2.494296,0.01262073



Best RIDGE:
  Parameter: 420399684.638619
  R² (test): 0.6803
  ΔR² vs benchmark: -1.68 p.p.
  RMSE (test): 2.2877
  ΔRMSE vs benchmark: +0.0611
  Active vars: 639
  DM test: stat=-2.06, p=0.039


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
90,wb_iq_spi_pil1,"[wb_iq_spi_pil1_t-1, wb_iq_spi_pil1_ma3, wb_iq...",1.9e-05,6.191289,8
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",1.4e-05,4.601719,9
74,ief_monetary_freedom,"[ief_monetary_freedom_ma5, ief_monetary_freedo...",1.3e-05,4.382613,8
50,efw_5cii_bureacracy_costs,"[efw_5cii_bureacracy_costs, efw_5cii_bureacrac...",7e-06,2.456987,8
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_delta3, efw_2b_impart...",6e-06,2.047931,5
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",5e-06,1.78444,7
68,ief_business_freedom,"[ief_business_freedom_t-5, ief_business_freedo...",5e-06,1.76539,6
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_del...,5e-06,1.681806,6
38,efw_5aii_private_sector_credit,"[efw_5aii_private_sector_credit_ma10, efw_5aii...",5e-06,1.631643,8
55,efw_5dii_business_permits,"[efw_5dii_business_permits, efw_5dii_business_...",5e-06,1.572843,6



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.143845

Stopping: model has 0 active variables at factor 74.98942093324558



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.000144,0.820199,-0.897094,5.573094,635,-7.526791,5.195844e-14
1,0.003481,0.000501,0.81752,-0.717047,5.30204,622,-7.12133,1.068923e-12
2,0.012115,0.001743,0.804334,-0.349099,4.699741,580,-6.376196,1.815406e-10
3,0.04217,0.006066,0.768298,0.036939,3.970808,504,-5.69895,1.205477e-08
4,0.14678,0.021114,0.699135,0.258003,3.485403,386,-4.991829,5.981026e-07
5,0.510897,0.07349,0.584737,0.42571,3.06632,240,-3.866111,0.0001105846
6,1.778279,0.255797,0.366842,0.485315,2.902839,107,-3.316282,0.0009122369
7,6.189658,0.890351,0.01784,0.648956,2.397356,21,-1.736306,0.08250978
8,21.544347,3.099046,-0.363188,0.683565,2.276114,3,-1.550797,0.1209503



Best ELASTIC (L1=0.5):
  Parameter: 3.099046
  R² (test): 0.6836
  ΔR² vs benchmark: -1.36 p.p.
  RMSE (test): 2.2761
  ΔRMSE vs benchmark: +0.0495
  Active vars: 3
  Active variables: wb_fp_cpi_totl_zg_t-1, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg_t-1
  DM test: stat=-1.55, p=0.121

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.121)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 184.1s


Running forecast horizon: t+6
Split year for 85-15 split: 2013

UNIFIED REGULARIZED REGRESSION: SIGNAL_T6_85
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2013 (1479 obs)
Test:  2014-2018

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.819935,-35.253306,24.764769,643,-6.835287,8.18412e-12
1,0.003481,7e-06,0.819929,-35.283727,24.775157,643,-6.835447,8.175016e-12
2,0.012115,2.5e-05,0.819906,-35.407461,24.817365,643,-6.834686,8.218537e-12
3,0.04217,8.7e-05,0.81975,-35.918967,24.991092,641,-6.830583,8.457013e-12
4,0.14678,0.000304,0.818398,-37.218624,25.427168,625,-6.806007,1.003442e-11
5,0.510897,0.001057,0.80908,-39.215237,26.082894,574,-6.606721,3.929235e-11
6,1.778279,0.00368,0.774565,-79.176963,36.828605,462,-6.246388,4.200533e-10
7,6.189658,0.012807,0.698299,-140.079126,48.853068,354,-6.094955,1.094682e-09
8,21.544347,0.044578,0.564954,-72.488023,35.258899,212,-5.101177,3.375473e-07
9,74.989421,0.155163,0.289165,-1.644997,6.689183,76,-6.292913,3.115628e-10



Best LASSO:
  Parameter: 1.879848
  R² (test): 0.7392
  ΔR² vs benchmark: -0.93 p.p.
  RMSE (test): 2.1003
  ΔRMSE vs benchmark: +0.0378
  Active vars: 3
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg
  DM test: stat=-0.93, p=0.352

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.352)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 571.860368

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.5718604,0.81869,-32.3802,23.763201,643,-6.772011,1.270051e-11
1,0.003481,1.990475,0.807142,-42.722027,27.196349,643,-6.593781,4.287637e-11
2,0.012115,6.928247,0.785167,-64.982031,33.409756,643,-6.37634,1.813698e-10
3,0.04217,24.11515,0.746553,-102.767296,41.897751,643,-6.242739,4.299754e-10
4,0.14678,83.93762,0.685242,-130.034886,47.081895,643,-6.131407,8.710546e-10
5,0.510897,292.1617,0.587342,-100.230498,41.382446,643,-5.77283,7.795121e-09
6,1.778279,1016.928,0.421162,-39.24773,26.09343,643,-5.08955,3.589144e-07
7,6.189658,3539.62,0.077677,-8.928847,12.960145,643,-5.461181,4.729775e-08
8,21.544347,12320.36,-0.64936,-1.05131,5.890825,643,-6.643075,3.072054e-11
9,74.989421,42883.48,-1.530118,0.506852,2.888344,643,-4.891641,9.999877e-07



Best RIDGE:
  Parameter: 1808381.266360
  R² (test): 0.7352
  ΔR² vs benchmark: -1.33 p.p.
  RMSE (test): 2.1165
  ΔRMSE vs benchmark: +0.0540
  Active vars: 643
  DM test: stat=-1.26, p=0.208

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.208)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.819934,-35.257013,24.766035,643,-6.835104,8.194556e-12
1,0.003481,7e-06,0.819929,-35.277175,24.77292,643,-6.834914,8.205436e-12
2,0.012115,2.5e-05,0.819902,-35.360886,24.801485,643,-6.833537,8.284706e-12
3,0.04217,8.7e-05,0.819738,-35.745397,24.932277,643,-6.826401,8.707035e-12
4,0.14678,0.000304,0.818472,-36.500777,25.187241,637,-6.802367,1.029132e-11
5,0.510897,0.001057,0.810122,-42.72835,27.198316,599,-6.641976,3.095058e-11
6,1.778279,0.00368,0.781949,-68.811355,34.365566,533,-6.328571,2.474423e-10
7,6.189658,0.012807,0.726652,-132.904343,47.594613,431,-6.159736,7.286654e-10
8,21.544347,0.044578,0.622975,-100.049764,41.345488,309,-5.625074,1.854284e-08
9,74.989421,0.155163,0.42129,-37.894923,25.651155,184,-5.004251,5.607972e-07



Best ELASTIC (L1=0.5):
  Parameter: 1.879848
  R² (test): 0.7377
  ΔR² vs benchmark: -1.08 p.p.
  RMSE (test): 2.1065
  ΔRMSE vs benchmark: +0.0440
  Active vars: 11
  DM test: stat=-0.95, p=0.340

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.340)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 204.1s


Running forecast horizon: t+7
Split year for 85-15 split: 2012

UNIFIED REGULARIZED REGRESSION: SIGNAL_T7_85
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2012 (1423 obs)
Test:  2013-2017 (283 obs)

β=1 specification active
After removing NaN: 1423 train, 283 test obs

Ben

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,4e-06,0.799886,-173.379343,54.122365,646,-5.184555,2.165315e-07
1,0.003481,1.3e-05,0.79987,-172.166239,53.93378,646,-5.188981,2.114482e-07
2,0.012115,4.6e-05,0.799786,-168.787224,53.404979,644,-5.203473,1.955979e-07
3,0.04217,0.00016,0.799219,-154.017043,51.029225,633,-5.264048,1.409177e-07
4,0.14678,0.000557,0.794992,-116.652213,44.455889,610,-5.50363,3.720506e-08
5,0.510897,0.001938,0.774872,-60.386511,32.111879,521,-5.699024,1.204955e-08
6,1.778279,0.006744,0.718104,-33.160156,23.954613,416,-6.207992,5.366585e-10
7,6.189658,0.023475,0.612248,-13.910304,15.826048,287,-5.96066,2.512217e-09
8,21.544347,0.081711,0.407976,0.519393,2.841346,134,-4.336291,1.449074e-05
9,74.989421,0.284412,0.001052,0.628884,2.4968,32,-1.870796,0.06137338



Best LASSO:
  Parameter: 3.445726
  R² (test): 0.7504
  ΔR² vs benchmark: +1.72 p.p.
  RMSE (test): 2.0475
  ΔRMSE vs benchmark: -0.0695
  Active vars: 2
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg
  DM test: stat=1.72, p=0.086

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.086)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 397.972312

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.3979723,0.801153,-142.151855,49.037434,646,-5.315404,1.064209e-07
1,0.003480701,1.385222,0.78976,-116.318345,44.392766,646,-5.445797,5.157398e-08
2,0.01211528,4.821545,0.769677,-80.679328,37.041224,646,-5.565088,2.620194e-08
3,0.04216965,16.78235,0.73375,-48.381671,28.801303,646,-5.908068,3.461428e-09
4,0.1467799,58.41435,0.674057,-30.380664,22.959388,646,-6.139196,8.294012e-10
5,0.510897,203.3229,0.57738,-20.087196,18.820825,646,-5.481734,4.211765e-08
6,1.778279,707.706,0.417564,-8.557722,12.67087,646,-5.02484,5.03853e-07
7,6.189658,2463.313,0.105932,-1.611555,6.623367,646,-5.992889,2.061457e-09
8,21.54435,8574.054,-0.589737,0.181369,3.708287,646,-4.783155,1.725647e-06
9,74.98942,29843.71,-1.602315,0.658161,2.396295,646,-1.917766,0.05514067



Best RIDGE:
  Parameter: 361564.839799
  R² (test): 0.7523
  ΔR² vs benchmark: +1.91 p.p.
  RMSE (test): 2.0397
  ΔRMSE vs benchmark: -0.0773
  Active vars: 646
  DM test: stat=1.52, p=0.128

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.128)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.79989,-173.618835,54.159518,646,-5.183781,2.174323e-07
1,0.003481,7e-06,0.799885,-172.993611,54.062472,646,-5.186208,2.146195e-07
2,0.012115,2.5e-05,0.799861,-170.898717,53.736028,646,-5.194745,2.05001e-07
3,0.04217,8.7e-05,0.799682,-164.457405,52.71963,644,-5.2229,1.761423e-07
4,0.14678,0.000304,0.798261,-144.489974,49.43628,637,-5.335149,9.546612e-08
5,0.510897,0.001057,0.789423,-96.643282,40.499572,607,-5.550118,2.854763e-08
6,1.778279,0.00368,0.758882,-57.139466,31.251061,541,-5.84476,5.073002e-09
7,6.189658,0.012807,0.700768,-34.615501,24.459568,436,-6.177727,6.503122e-10
8,21.544347,0.044578,0.590397,-12.98837,15.328964,319,-5.946712,2.735818e-09
9,74.989421,0.155163,0.365833,0.486142,2.937992,184,-4.270101,1.953842e-05



Best ELASTIC (L1=0.5):
  Parameter: 1.879848
  R² (test): 0.7514
  ΔR² vs benchmark: +1.82 p.p.
  RMSE (test): 2.0433
  ΔRMSE vs benchmark: -0.0736
  Active vars: 11
  DM test: stat=1.70, p=0.089

Skipping interpretability analysis: no significant incremental value over mean reversion benchmark (DM p=0.089)

Interpretation: Macro/IQ variables do not provide statistically significant
incremental predictive power beyond the mean reversion benchmark.


COMPLETED IN 195.9s


Running forecast horizon: t+8
Split year for 85-15 split: 2011

UNIFIED REGULARIZED REGRESSION: SIGNAL_T8_85
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2011 (1365 obs)
Test:  2012-2016 (280 obs)

β=1 specification active
After removing NaN: 1365 train, 280 test obs

Benc

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.786319,-36.056568,24.951963,654,-5.417346,6.049021e-08
1,0.003481,7e-06,0.786308,-36.111239,24.970363,654,-5.41837,6.014497e-08
2,0.012115,2.5e-05,0.786264,-36.272614,25.024594,654,-5.421538,5.908828e-08
3,0.04217,8.7e-05,0.786031,-36.494498,25.09897,649,-5.431858,5.577041e-08
4,0.14678,0.000304,0.784363,-36.653091,25.151995,631,-5.447956,5.095204e-08
5,0.510897,0.001057,0.774317,-38.745236,25.841318,566,-5.42907,5.66485e-08
6,1.778279,0.00368,0.739135,-58.939308,31.734232,466,-5.067758,4.025279e-07
7,6.189658,0.012807,0.665859,-23.436861,20.262584,345,-5.500269,3.792131e-08
8,21.544347,0.044578,0.50325,-1.331981,6.259427,217,-5.317613,1.051373e-07
9,74.989421,0.155163,0.171999,0.397449,3.181772,79,-1.614245,0.1064742



Best LASSO:
  Parameter: 0.540078
  R² (test): 0.5367
  ΔR² vs benchmark: +5.58 p.p.
  RMSE (test): 2.7900
  ΔRMSE vs benchmark: -0.1633
  Active vars: 10
  Active variables: wb_ny_gdp_mktp_kd_zg_ma3, wb_ny_gdp_mktp_kd_zg_ma10, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_kd_zg_t-1, wb_ny_gdp_petr_rt_zs_delta, efw_5a_credit_market_regulation_delta3, ief_monetary_freedom_t-1, ief_monetary_freedom_t-3
  DM test: stat=2.22, p=0.026


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 6 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma3, wb_ny_gdp_mktp_kd_z...",0.212314,38.885274,9
74,ief_monetary_freedom,"[ief_monetary_freedom_t-1, ief_monetary_freedo...",0.137495,25.182106,8
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.095601,17.509371,6
99,wb_ny_gdp_petr_rt_zs,"[wb_ny_gdp_petr_rt_zs_delta, wb_ny_gdp_petr_rt...",0.060631,11.104495,4
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg_t-1, wb_ny_gdp_defl_kd_z...",0.032405,5.934875,8
36,efw_5a_credit_market_regulation,"[efw_5a_credit_market_regulation_delta3, efw_5...",0.007556,1.383879,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 3 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
1,wb_ny_gdp_mktp_kd_zg,[wb_ny_gdp_mktp_kd_zg_ma3],0.86,0.21886,0.098211,1
2,wb_ny_gdp_petr_rt_zs,[wb_ny_gdp_petr_rt_zs_delta],0.86,-0.240686,0.083965,1
0,wb_fp_cpi_totl_zg,[wb_fp_cpi_totl_zg],0.85,-3.408347,1.03841,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 397.972312

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.3979723,0.786725,-36.294111,25.03181,654,-5.2873,1.241347e-07
1,0.003480701,1.385222,0.775427,-44.680388,27.703652,654,-5.355915,8.512431e-08
2,0.01211528,4.821545,0.757111,-57.891018,31.455504,654,-5.251291,1.510368e-07
3,0.04216965,16.78235,0.7246,-63.800653,32.996039,654,-5.01333,5.349603e-07
4,0.1467799,58.41435,0.66205,-51.516171,29.704254,654,-4.99876,5.770011e-07
5,0.510897,203.3229,0.551736,-25.374072,21.050418,654,-5.333167,9.65145e-08
6,1.778279,707.706,0.370466,-5.429083,10.393136,654,-5.742447,9.331782e-09
7,6.189658,2463.313,0.016846,-0.060817,4.221748,654,-3.870672,0.0001085355
8,21.54435,8574.054,-0.782933,0.395841,3.186016,654,-0.821779,0.4112029
9,74.98942,29843.71,-1.925907,0.546171,2.761331,654,1.435622,0.1511099



Best RIDGE:
  Parameter: 103877.030102
  R² (test): 0.5635
  ΔR² vs benchmark: +8.27 p.p.
  RMSE (test): 2.7080
  ΔRMSE vs benchmark: -0.2453
  Active vars: 654
  DM test: stat=3.82, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,0.189048,12.326917,5
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.104351,6.804268,9
90,wb_iq_spi_pil1,"[wb_iq_spi_pil1, wb_iq_spi_pil1_delta3, wb_iq_...",0.046889,3.057436,7
74,ief_monetary_freedom,"[ief_monetary_freedom_t-1, ief_monetary_freedo...",0.036835,2.401841,8
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",0.035549,2.317973,7
100,wb_pa_nus_fcrf,"[wb_pa_nus_fcrf, wb_pa_nus_fcrf_delta, wb_pa_n...",0.029011,1.891639,4
28,efw_4bii_costs_of_importing_and_exporting,[efw_4bii_costs_of_importing_and_exporting_del...,0.028775,1.876313,5
11,efw_2c_property_rights,"[efw_2c_property_rights_t-5, efw_2c_property_r...",0.027379,1.785272,5
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_delta3, efw_2b_impart...",0.027035,1.762811,5
91,wb_iq_spi_pil3,"[wb_iq_spi_pil3_t-5, wb_iq_spi_pil3_t-3, wb_iq...",0.02403,1.566899,5



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.786321,-36.058698,24.95268,654,-5.417062,6.058648e-08
1,0.003481,7e-06,0.786317,-36.120614,24.973516,654,-5.417381,6.047838e-08
2,0.012115,2.5e-05,0.786295,-36.319308,25.040264,654,-5.418288,6.017242e-08
3,0.04217,8.7e-05,0.786122,-36.818601,25.207214,652,-5.419893,5.963479e-08
4,0.14678,0.000304,0.784634,-39.16008,25.975828,650,-5.412123,6.228194e-08
5,0.510897,0.001057,0.77552,-40.989004,26.560724,603,-5.384752,7.254479e-08
6,1.778279,0.00368,0.747809,-52.825014,30.072131,542,-5.171216,2.325762e-07
7,6.189658,0.012807,0.694464,-42.290938,26.969359,453,-5.262484,1.421221e-07
8,21.544347,0.044578,0.5727,-9.282552,13.14385,320,-5.584793,2.339781e-08
9,74.989421,0.155163,0.32474,0.315506,3.391229,195,-2.428656,0.01515491



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.5382
  ΔR² vs benchmark: +5.73 p.p.
  RMSE (test): 2.7855
  ΔRMSE vs benchmark: -0.1678
  Active vars: 5
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=4.29, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma3, wb_...",0.016281,60.226024,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma...",0.010752,39.773976,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 5 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.85,-0.361625,0.067627,3
1,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_de...",0.85,-0.228615,0.039326,2



COMPLETED IN 197.4s


Running forecast horizon: t+9
Split year for 85-15 split: 2010

UNIFIED REGULARIZED REGRESSION: SIGNAL_T9_85
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2010 (1307 obs)
Test:  2011-2015 (275 obs)

β=1 specification active
After removing NaN: 1307 train, 275 test obs

Benchmark metrics:
  R² (test): 0.3683
  RMSE (test): 3.2758

Clustering variants of same variables...
  Original features: 963
  After clustering: 656
  Number of clusters: 656

Features used in models: 656


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal paramete

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.79921,-52.267738,30.081821,656,-3.189684,0.001424
1,0.003481,7e-06,0.799214,-52.432329,30.12826,656,-3.188878,0.001428
2,0.012115,2.5e-05,0.79922,-52.954901,30.27523,654,-3.185887,0.001443
3,0.04217,8.7e-05,0.799145,-54.356543,30.665954,649,-3.167833,0.001536
4,0.14678,0.000304,0.797711,-59.959711,32.180549,631,-3.127194,0.001765
5,0.510897,0.001057,0.786218,-55.074432,30.864159,577,-2.915208,0.003555
6,1.778279,0.00368,0.744353,-55.709831,31.038532,480,-3.129832,0.001749
7,6.189658,0.012807,0.661163,-69.54541,34.618351,344,-3.802496,0.000143
8,21.544347,0.044578,0.486928,-104.373658,42.309513,207,-4.139094,3.5e-05
9,74.989421,0.155163,0.145641,-8.313754,12.57866,88,-4.110001,4e-05



Best LASSO:
  Parameter: 0.540078
  R² (test): 0.4403
  ΔR² vs benchmark: +7.20 p.p.
  RMSE (test): 3.0836
  ΔRMSE vs benchmark: -0.1922
  Active vars: 11
  DM test: stat=4.13, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 7 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma5, wb_ny_gdp_mktp_kd_z...",0.097424,22.221366,9
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.091994,20.982732,6
99,wb_ny_gdp_petr_rt_zs,"[wb_ny_gdp_petr_rt_zs_delta, wb_ny_gdp_petr_rt...",0.081794,18.656163,4
74,ief_monetary_freedom,"[ief_monetary_freedom_t-1, ief_monetary_freedo...",0.074268,16.939782,8
11,efw_2c_property_rights,"[efw_2c_property_rights_t-5, efw_2c_property_r...",0.047637,10.865509,5
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg_t-1, wb_ny_gdp_defl_kd_z...",0.037672,8.592568,8
36,efw_5a_credit_market_regulation,"[efw_5a_credit_market_regulation_delta3, efw_5...",0.007637,1.74188,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...
No variables meet stability criterion (80% selection frequency)


--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 397.972312

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.3979723,0.800897,-49.164084,29.192309,656,-2.917201,0.003531877
1,0.003480701,1.385222,0.787241,-54.301876,30.650808,656,-2.956328,0.003113261
2,0.01211528,4.821545,0.764497,-61.169978,32.498428,656,-3.058069,0.002227683
3,0.04216965,16.78235,0.725414,-69.327942,34.564952,656,-3.294526,0.0009858773
4,0.1467799,58.41435,0.655216,-81.123709,37.351348,656,-3.604238,0.0003130698
5,0.510897,203.3229,0.53496,-68.135864,34.270757,656,-3.677892,0.0002351693
6,1.778279,707.706,0.336441,-29.562655,22.785971,656,-3.441316,0.0005788926
7,6.189658,2463.313,-0.063105,-6.042895,10.938244,656,-3.042366,0.002347265
8,21.54435,8574.054,-0.970859,-0.37626,4.835287,656,-2.53877,0.01112429
9,74.98942,29843.71,-2.24123,0.430474,3.110488,656,1.266374,0.2053794



Best RIDGE:
  Parameter: 103877.030102
  R² (test): 0.4789
  ΔR² vs benchmark: +11.06 p.p.
  RMSE (test): 2.9752
  ΔRMSE vs benchmark: -0.3006
  Active vars: 656
  DM test: stat=4.32, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,0.621947,31.57233,5
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",0.102784,5.217707,9
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",0.03658,1.856945,7
74,ief_monetary_freedom,"[ief_monetary_freedom_t-1, ief_monetary_freedo...",0.032994,1.674905,8
100,wb_pa_nus_fcrf,"[wb_pa_nus_fcrf, wb_pa_nus_fcrf_delta, wb_pa_n...",0.030507,1.54863,4
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_delta3, efw_2b_impart...",0.029389,1.491904,5
11,efw_2c_property_rights,"[efw_2c_property_rights_t-5, efw_2c_property_r...",0.028098,1.426363,5
73,ief_labor_freedom,"[ief_labor_freedom_delta, ief_labor_freedom_de...",0.026602,1.35042,6
90,wb_iq_spi_pil1,"[wb_iq_spi_pil1_ma3, wb_iq_spi_pil1, wb_iq_spi...",0.023148,1.175072,7
91,wb_iq_spi_pil3,"[wb_iq_spi_pil3_t-5, wb_iq_spi_pil3_t-3, wb_iq...",0.02199,1.116304,6



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.799213,-52.267245,30.081682,656,-3.189918,0.001423133
1,0.003481,7e-06,0.799224,-52.432126,30.128203,656,-3.189723,0.001424091
2,0.012115,2.5e-05,0.799251,-52.984476,30.283526,656,-3.188939,0.00142796
3,0.04217,8.7e-05,0.799232,-54.547835,30.718893,653,-3.182393,0.001460634
4,0.14678,0.000304,0.798026,-58.957044,31.914799,646,-3.154078,0.00161006
5,0.510897,0.001057,0.78805,-53.230129,30.35235,600,-2.928804,0.003402694
6,1.778279,0.00368,0.754686,-64.068726,33.247435,542,-3.11244,0.001855477
7,6.189658,0.012807,0.693424,-68.31108,34.314157,444,-3.566902,0.0003612262
8,21.544347,0.044578,0.56356,-84.148784,38.033056,313,-4.010405,6.061476e-05
9,74.989421,0.155163,0.303923,-60.163726,32.234354,189,-4.145863,3.38536e-05



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.4457
  ΔR² vs benchmark: +7.74 p.p.
  RMSE (test): 3.0685
  ΔRMSE vs benchmark: -0.2073
  Active vars: 5
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma3
  DM test: stat=5.12, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma3, wb_...",0.016796,56.689784,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma...",0.012832,43.310216,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 5 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.83,-0.380797,0.073356,3
1,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_de...",0.82,-0.241095,0.043855,2



COMPLETED IN 193.8s


Running forecast horizon: t+10
Split year for 85-15 split: 2009

UNIFIED REGULARIZED REGRESSION: SIGNAL_T10_85
Specification: β=1 (fixed at mean reversion)
Models: LASSO, RIDGE, ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2009 (1251 obs)
Test:  2010-2014 (270 obs)

β=1 specification active
After removing NaN: 1251 train, 270 test obs

Benchmark metrics:
  R² (test): 0.4158
  RMSE (test): 3.1737

Clustering variants of same variables...
  Original features: 960
  After clustering: 659
  Number of clusters: 659

Features used in models: 659


--------------------------------------------------------------------------------
RUNNING LASSO
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parame

Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.811814,-19.555819,18.825405,659,-3.226591,0.001252744
1,0.003481,7e-06,0.811798,-19.618757,18.854203,659,-3.224123,0.001263593
2,0.012115,2.5e-05,0.811733,-19.848161,18.958798,659,-3.215576,0.001301832
3,0.04217,8.7e-05,0.81141,-20.518382,19.261129,655,-3.19249,0.001410518
4,0.14678,0.000304,0.809332,-22.288886,20.037858,641,-3.140982,0.001683821
5,0.510897,0.001057,0.797133,-26.187942,21.650359,572,-3.048735,0.002298072
6,1.778279,0.00368,0.747968,-23.171322,20.413954,480,-3.022396,0.002507822
7,6.189658,0.012807,0.659625,-51.653387,30.129372,338,-2.934913,0.003336413
8,21.544347,0.044578,0.476402,-62.917524,33.196087,218,-2.888475,0.003871146
9,74.989421,0.155163,0.139331,-31.126647,23.534752,89,-2.852024,0.004344178



Best LASSO:
  Parameter: 1.879848
  R² (test): 0.4772
  ΔR² vs benchmark: +6.14 p.p.
  RMSE (test): 3.0023
  ΔRMSE vs benchmark: -0.1713
  Active vars: 3
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg
  DM test: stat=4.93, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(LASSO)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 1 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.082139,100.0,6



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 1 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,[wb_fp_cpi_totl_zg],0.82,-1.751716,1.152711,1



--------------------------------------------------------------------------------
RUNNING RIDGE
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 251.273762

Early stopping: R² converged after 3 iterations.



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,0.2512738,0.817581,-9.930553,13.727707,659,-3.559933,0.0003709493
1,0.003480701,0.8746087,0.803969,-9.556996,13.491092,659,-3.643637,0.0002688123
2,0.01211528,3.044251,0.78207,-11.124104,14.457785,659,-3.400443,0.0006727676
3,0.04216965,10.59613,0.746195,-16.962918,17.598083,659,-3.090886,0.001995604
4,0.1467799,36.88194,0.683351,-31.954209,23.835945,659,-2.970926,0.002969031
5,0.510897,128.375,0.576197,-39.23984,26.33935,659,-2.944921,0.003230369
6,1.778279,446.835,0.404526,-18.596482,18.380867,659,-2.947714,0.003201332
7,6.189658,1555.299,0.077343,-2.956284,8.25887,659,-2.92073,0.003492126
8,21.54435,5413.529,-0.711292,0.266501,3.556123,659,-1.425854,0.1539106
9,74.98942,18842.87,-2.100344,0.551429,2.780945,659,4.23904,2.244776e-05



Best RIDGE:
  Parameter: 18842.873921
  R² (test): 0.5514
  ΔR² vs benchmark: +13.56 p.p.
  RMSE (test): 2.7809
  ΔRMSE vs benchmark: -0.3927
  Active vars: 659
  DM test: stat=4.24, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(RIDGE)

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
33,efw_4diii_freedom_of_foreigners_to_visit,[efw_4diii_freedom_of_foreigners_to_visit_delt...,2.910196,34.550636,4
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_t-3, wb_ny_gdp_mktp_kd_z...",0.399706,4.745414,9
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",0.137592,1.633525,7
10,efw_2b_impartial_courts,"[efw_2b_impartial_courts_delta3, efw_2b_impart...",0.13476,1.599905,6
73,ief_labor_freedom,"[ief_labor_freedom_delta, ief_labor_freedom_de...",0.126541,1.502326,5
55,efw_5dii_business_permits,"[efw_5dii_business_permits_delta3, efw_5dii_bu...",0.124768,1.481282,5
11,efw_2c_property_rights,"[efw_2c_property_rights_t-5, efw_2c_property_r...",0.111202,1.320216,5
74,ief_monetary_freedom,"[ief_monetary_freedom_t-1, ief_monetary_freedo...",0.101554,1.205682,8
16,efw_2h_police_and_crime,"[efw_2h_police_and_crime_t-5, efw_2h_police_an...",0.099312,1.179064,5
3,efw_1c_government_investment,"[efw_1c_government_investment_t-3, efw_1c_gove...",0.094629,1.123456,7



METHOD 2: STABILITY SELECTION

Skipping stability selection for Ridge regression.
Ridge does not perform variable selection - all features have non-zero coefficients.
For feature importance with Ridge, refer to SHAP values above.


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 0.002069

Stopping: model has 0 active variables at factor 11006.94171252208



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.001,2e-06,0.811816,-19.550742,18.82308,659,-3.226699,0.001252271
1,0.003481,7e-06,0.811805,-19.60051,18.845858,659,-3.224515,0.001261864
2,0.012115,2.5e-05,0.811755,-19.768815,18.922686,659,-3.217267,0.001294179
3,0.04217,8.7e-05,0.811476,-20.299959,19.163124,658,-3.195462,0.001396071
4,0.14678,0.000304,0.809563,-21.810097,19.830812,649,-3.145044,0.001660618
5,0.510897,0.001057,0.798665,-17.465962,17.842795,620,-3.184001,0.001452545
6,1.778279,0.00368,0.761329,-17.682455,17.947084,548,-3.089646,0.002003951
7,6.189658,0.012807,0.693979,-31.785346,23.774797,438,-2.97666,0.002914066
8,21.544347,0.044578,0.558392,-48.061863,29.083652,314,-2.917613,0.003527217
9,74.989421,0.155163,0.293764,-52.980229,30.506634,193,-2.863475,0.004190214



Best ELASTIC (L1=0.5):
  Parameter: 6.543189
  R² (test): 0.4958
  ΔR² vs benchmark: +8.00 p.p.
  RMSE (test): 2.9483
  ΔRMSE vs benchmark: -0.2254
  Active vars: 6
  Active variables: wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg_ma10, wb_fp_cpi_totl_zg, wb_ny_gdp_defl_kd_zg, wb_ny_gdp_defl_kd_zg_ma3, wb_ny_gdp_defl_kd_zg_t-1
  DM test: stat=5.33, p=0.000


INTERPRETABILITY ANALYSIS: WHICH VARIABLES DRIVE INCREMENTAL PREDICTIVE POWER?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 2 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
88,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg_ma3, wb_fp_cpi_totl_zg, wb_...",0.019057,55.37736,6
93,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg_ma3, wb_ny_gdp_defl_kd_z...",0.015356,44.62264,8



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 6 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
0,wb_fp_cpi_totl_zg,"[wb_fp_cpi_totl_zg, wb_fp_cpi_totl_zg_ma10, wb...",0.85,-0.436395,0.060777,3
1,wb_ny_gdp_defl_kd_zg,"[wb_ny_gdp_defl_kd_zg_delta3, wb_ny_gdp_defl_k...",0.843333,-0.283276,0.067648,3



COMPLETED IN 187.0s



### Lagged priced-in level regressions

In [None]:
# load data
df = pd.read_pickle("saved/df.dat")

# define columns
id_cols = ["country","year","iso_code_1","iso_code_2","region"]
exclude = id_cols + [c for c in df.columns if c.startswith("tgt_")]
macro_cols = filter_cols([c for c in df.columns if c.startswith("wb_") and not c.startswith("wb_iq_") and c not in exclude], [])
iq_cols = filter_cols([c for c in df.columns if (c.startswith("wb_iq_") or (not c.startswith("wb_") and c not in exclude))], [])

# define models to run
models = ['lasso', 'ridge', 'elastic']

# iterate over split shares
split_shares = [0.75, 0.8, 0.85]
for split_share in split_shares:
    print("\n" + "=" * 100)
    print(f"Runnning {split_share*100:.0f}-{(1-split_share)*100:.0f} split...")

    # run models for each horizon
    for h in range(1,11):

        # get split year for horizon
        y_name = f"tgt_spread_t{h}"
        split_year = check_split_year(df, y_name, split_share)
        if split_share != 0.75:
            name_prefix = f"levels_t{h}_{split_share*100:.0f}"
        else:
            name_prefix = f"levels_t{h}"

        # run spec for models
        print("\n" + "=" * 100)
        print(f"Running forecast horizon: t+{h}")
        print(f"Split year for {split_share*100:.0f}-{(1-split_share)*100:.0f} split: {split_year}")
        run_unified_regularized_regression(
            name_prefix=name_prefix,
            data_path="saved/df.dat",
            macro_cols=macro_cols,
            iq_cols=iq_cols,
            use_clustering=True,
            corr_threshold=0.9,
            y_name=y_name,
            spread_col="tgt_spread",
            year_col="year",
            split_year=split_year,
            model_types=models,
            agnostic=True,
            param_factors=np.logspace(-2,5,15),
            l1_ratios=[0.5],
            use_expanding_cv=True,
            min_train_years=8,
            cv_stride=2,
            r2_benchmark=None,
            rmse_benchmark=None,
            early_stop_threshold=0.10,
            save_results=True,
            output_dir="specs",
            n_jobs=-1,
            multicollinearity_check=False,
            perform_interpretability=True
        )


Runnning 75-25 split...

Running forecast horizon: t+1
Split year for 75-25 split: 2014

UNIFIED REGULARIZED REGRESSION: LEVELS_T1
Specification: Agnostic (β unrestricted)
Models: ELASTIC
Elastic Net L1 ratios: [0.5]
Feature clustering: ON (threshold=0.9)
Statistical inference: SHAP values, Stability Selection (Lasso/Elastic Net only)
R² convergence: Stop after 3 consecutive changes < 0.0001

Train: 1960-2014 (1642 obs)
Test:  2015-2023 (541 obs)

Clustering variants of same variables...
  Original features: 972
  After clustering: 645
  Number of clusters: 645

Features used in models: 645


--------------------------------------------------------------------------------
RUNNING ELASTIC (L1=0.5)
--------------------------------------------------------------------------------

Selecting optimal parameter via expanding window CV...
Optimal parameter: 2.976351

Stopping: model has 0 active variables at factor 3.1622776601683795



Unnamed: 0,factor,param,R²_train,R²_test,RMSE_test,Active_vars,DM_stat,DM_p
0,0.01,0.029764,0.912076,0.453278,3.098579,350,,
1,0.031623,0.09412,0.844922,0.498013,2.969107,237,,
2,0.1,0.297635,0.675938,0.416557,3.200947,112,,
3,0.316228,0.941205,0.483757,0.366866,3.334473,44,,
4,1.0,2.976351,0.269172,0.207952,3.729538,23,,



Best ELASTIC (L1=0.5):
  Parameter: 0.094120
  R² (test): 0.4980
  RMSE (test): 2.9691
  Active vars: 237


INTERPRETABILITY ANALYSIS: WHICH VARIABLES ARE PRICED IN?
(ELASTIC (L1=0.5))

METHOD 1: SHAP VALUES (Feature Attribution)

Computing SHAP values...

Top 20 core variables by SHAP importance:



Unnamed: 0,core_variable,variants,shap_importance,shap_importance_pct,n_variants
95,wb_ny_gdp_mktp_kd_zg,"[wb_ny_gdp_mktp_kd_zg_ma10, wb_ny_gdp_mktp_kd_...",1.850665,8.897891,9
9,efw_2a_judicial_independence,"[efw_2a_judicial_independence_t-5, efw_2a_judi...",1.004954,4.831761,5
74,ief_monetary_freedom,"[ief_monetary_freedom, ief_monetary_freedom_ma...",0.821779,3.951066,7
2,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_ma5, efw_1b_tr...",0.755358,3.631717,6
13,efw_2e_legal_integrity,"[efw_2e_legal_integrity_ma10, efw_2e_legal_int...",0.692371,3.328881,6
97,wb_ny_gdp_pcap_kd,"[wb_ny_gdp_pcap_kd, wb_ny_gdp_pcap_kd_delta3, ...",0.553284,2.660158,3
19,efw_3b_standard_deviation_of_inflation,"[efw_3b_standard_deviation_of_inflation_ma10, ...",0.471195,2.265476,8
16,efw_2h_police_and_crime,"[efw_2h_police_and_crime_t-3, efw_2h_police_an...",0.467821,2.249257,4
85,pts_pts_a,"[pts_pts_a_ma10, pts_pts_a, pts_pts_a_delta, p...",0.421205,2.02513,7
58,efw_ie_state_ownership,"[efw_ie_state_ownership_ma10, efw_ie_state_own...",0.408701,1.96501,7



METHOD 2: STABILITY SELECTION

Running stability selection (100 iterations)...

Core variables with stable variants (aggregated from 130 stable features):



Unnamed: 0,core_variable,variants,selection_frequency,mean_coef,std_coef,n_variants
1,efw_1b_transfers_and_subsidies,"[efw_1b_transfers_and_subsidies_ma5, efw_1b_tr...",1.0,0.239347,0.074021,3
58,wb_gc_dod_totl_gd_zs,"[wb_gc_dod_totl_gd_zs_t-1, wb_gc_dod_totl_gd_z...",1.0,-0.18581,0.062025,2
64,wb_ny_gdp_pcap_kd,[wb_ny_gdp_pcap_kd],1.0,-0.301262,0.108617,1
36,efw_5ci_regulatory_burden,"[efw_5ci_regulatory_burden_t-3, efw_5ci_regula...",1.0,0.14462,0.042523,2
8,efw_2f_contracts,[efw_2f_contracts_delta3],1.0,-0.215635,0.046231,1
7,efw_2e_legal_integrity,"[efw_2e_legal_integrity_t-1, efw_2e_legal_inte...",1.0,-0.331201,0.087635,2
52,p5d_durable,[p5d_durable],1.0,-0.335176,0.055965,1
62,wb_ny_gdp_minr_rt_zs,[wb_ny_gdp_minr_rt_zs_ma10],1.0,0.208723,0.056862,1
40,efw_5dii_business_permits,[efw_5dii_business_permits_t-5],1.0,-0.238661,0.05306,1
56,wb_bn_cab_xoka_gd_zs,"[wb_bn_cab_xoka_gd_zs_ma5, wb_bn_cab_xoka_gd_z...",0.993333,-0.177026,0.069874,3



COMPLETED IN 96.2s

