In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd
from regime_ml.data.common.loaders import load_dataframe
from regime_ml.utils.config import load_configs
from regime_ml.regimes.hmm import HMMRegimeDetector
from regime_ml.features.macro.selection import get_top_features

In [118]:
macro_cfg = load_configs()["macro_data"]["regime_universe"]
feat_path = macro_cfg["raw_features_path"]
raw_path = macro_cfg["raw_path"]

In [119]:
df_feat = load_dataframe(feat_path)
df_feat = df_feat.dropna() # drop burn-in period

In [121]:
from regime_ml.regimes.hmm import initialise_emissions
from regime_ml.regimes.hmm import initialise_transitions
from regime_ml.regimes.hmm import initialise_probabilities

fitted_models = {}
count = 0
for n_features in range(5, 15):
    selected_features = get_top_features(n=n_features)
    df_feat_selected = df_feat[selected_features]
    feature_names = df_feat_selected.columns

    for split_date in ['2019-01-01', '2020-01-01', '2021-01-01']:
        df_train = df_feat_selected.loc[:split_date]
        X_train = df_train.values
    
        for n_regimes in range(3, 5):
            init_startprob = initialise_probabilities(n_regimes=n_regimes)

            for covariance_type in ['full', 'diag']:
                init_means, init_covars, scaler = initialise_emissions(df_train, n_clusters=n_regimes, covariance_type=covariance_type)

                for p_stay in [0.9, 0.95, 0.99]:
                    init_transmat = initialise_transitions(n_regimes=n_regimes, p_stay=p_stay)

                    model = HMMRegimeDetector(
                        n_regimes=n_regimes,
                        covariance_type=covariance_type,
                        startprob=init_startprob,
                        transmat=init_transmat,
                        means=init_means,
                        covars=init_covars,
                        feature_names=feature_names, # type: ignore
                    )

                    X_train_scaled = scaler.fit_transform(X_train)
                    fitted_model = model.fit(X_train_scaled)
                    fitted_models[f"{count}"] = {
                        "model": fitted_model,
                        "split_date": split_date,
                        "n_features": n_features,
                        "n_regimes": n_regimes,
                        "covariance_type": covariance_type,
                        "p_stay": p_stay,
                        "scaler": scaler,
                    }
                    count += 1

In [122]:
from regime_ml.regimes.evaluation import compare_hmm_models
results = compare_hmm_models(df_feat, fitted_models)

In [123]:
records = [
    {"model_id": mid, **pd.json_normalize(v, sep=".").iloc[0].to_dict()}
    for mid, v in results.items()
]
df = pd.DataFrame(records).set_index("model_id")

In [224]:
# NOTE: investigate why nearly have of the tv_distances are not valid (does this make sense or is there a fundamental issue with the models)
from regime_ml.data.macro import build_featuregroup_map
_map = build_featuregroup_map(df_feat.columns)

In [124]:
from regime_ml.regimes.selection import select_best_hmm_model
best_id, leaderboard, rejected = select_best_hmm_model(results)
display(leaderboard[["model_id","final_score","macro_score","transition_score","stability_score","oos_macro_score"]])

Unnamed: 0,model_id,final_score,macro_score,transition_score,stability_score,oos_macro_score
0,68,0.784772,0.935811,0.6,0.736655,0.925676
1,6,0.784724,0.989865,0.560144,0.682939,1.0
2,66,0.78315,0.929054,0.601351,0.738007,0.932432
3,8,0.781621,0.983108,0.558809,0.684291,0.993243
4,67,0.780515,0.922297,0.602703,0.735304,0.939189
5,7,0.778513,0.976351,0.557458,0.685642,0.986486
6,71,0.732787,0.837838,0.586486,0.711149,0.878378
7,69,0.73049,0.831081,0.587838,0.7125,0.871622
8,70,0.728193,0.824324,0.589189,0.713851,0.864865
9,27,0.690764,0.658784,0.603502,0.801014,0.918919


In [225]:
best_model = fitted_models["68"]
model = best_model["model"]
selected_features = model.feature_names
split_date = best_model["split_date"]

In [226]:
from regime_ml.regimes.labeling import label_regimes
df_selected = df_feat[selected_features]
X_full = df_selected.values
X_scaled = best_model["scaler"].fit_transform(X_full)

regimes = model.predict(X_scaled)
filtered_proba = model.filter_proba(X_scaled)
smoothed_proba = model.smooth_proba(X_scaled)

labels_info = label_regimes(
    X=X_scaled,
    proba=smoothed_proba,
    feature_names=selected_features
)

df_groups = pd.DataFrame(labels_info["state_group_scores"])
df_groups

Unnamed: 0,0,1,2,3
growth,0.062406,0.435794,0.153498,-1.043365
inflation,0.23821,-0.341378,0.866284,-0.530029
liquidity,0.210569,-0.879987,0.365653,1.022751
rates,-0.86412,0.506346,-0.187092,0.244096
stress,0.026419,-0.120488,-0.080559,0.277386


In [227]:
from regime_ml.regimes.visualisation import plot_regime_timeseries
from regime_ml.regimes.visualisation import plot_ticker_by_regime

fig1 = plot_regime_timeseries(df_selected, regimes, filtered_proba, regime_names=labels_info["state_labels"])
fig2 = plot_ticker_by_regime("SPY", df_selected.index, regimes, regime_names=labels_info["state_labels"])

fig2.show()
fig1.show()

Downloading SPY data from 2004-09-01 to 2026-01-23...


In [228]:
from regime_ml.regimes.evaluation import equity_metrics_by_regime
import yfinance as yf
regimes = list(map(lambda x: labels_info["state_labels"][x], regimes))

df_sap = yf.download("SAP", start=df_feat.index[0], end=df_feat.index[-2], multi_level_index=False)["Close"].reindex(df_feat.index) # type: ignore

df_equity_metrics = equity_metrics_by_regime(df_sap, regimes)
df_equity_metrics

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,regime,n_days,mean_daily_ret,ann_return,ann_vol,sharpe,max_drawdown,up_day_frac
0,Early Expansion / Liquidity Driven,1238,0.000923,0.261591,0.258571,0.899097,-0.249767,0.516963
1,Stagflation,1087,0.000708,0.195115,0.241575,0.738095,-0.275749,0.522539
2,Policy-Contstrained Expansion,1978,0.000521,0.140376,0.244231,0.537983,-0.373861,0.540445
3,Recession / Risk-Off,1029,1.3e-05,0.003192,0.37735,0.008446,-0.538064,0.511176
