In [None]:
# DRO actual data 

In [None]:
IN_COLAB = False
REMOUNT = False

In [None]:
# RUN IN COLAB ONLY: mount Google drive
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

In [None]:
# RUN IN COLAB ONLY: unmount, remount Google drive - if required
if REMOUNT:
    !fusermount -u /content/drive 2>/dev/null || true
    !rm -rf /content/drive
    from google.colab import auth
    auth.authenticate_user()
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)

In [None]:
# RUN IN COLAB ONLY: install required modules: ssm, openpyxl
if IN_COLAB:
    !pip -q install git+https://github.com/lindermanlab/ssm.git #egg=ssm
    !pip -q install openpyxl
    !pip -q install filelock pyarrow

In [None]:
# RUN IN COLAB ONLY: force-fetch latest files from GitHub (overwrite local copies)
if IN_COLAB:
    !rm -rf /content/SLDS
    !git clone --depth=1 https://github.com/chrismader1/SLDS.git /content/SLDS
    import sys
    sys.path.append("/content/SLDS")

In [None]:
# RUN IN COLAB ONLY: prevent timeouts
if IN_COLAB:
    from IPython.display import Javascript, display
    display(Javascript("""
    (function keepAlive(){
      function clickConnect(){
        // Try shadow-root button (newer Colab)
        const el = document.querySelector('colab-connect-button');
        if (el && el.shadowRoot){
          const btn = el.shadowRoot.querySelector('#connect');
          if (btn){ btn.click(); console.log('keepAlive: clicked shadow connect'); return; }
        }
        // Fallbacks
        const btn2 = document.querySelector('#connect, button#connect');
        if (btn2){ btn2.click(); console.log('keepAlive: clicked #connect'); return; }
        console.log('keepAlive: connect button not found');
      }
      setInterval(clickConnect, 60 * 1000);
      console.log('keepAlive: armed');
    })();
    """))

In [None]:
# RUN IN COLAB ONLY: clear cache
if IN_COLAB:
    !rm -f "/content/drive/MyDrive/Colab Notebooks/SLDS/Data/gridsearch_results.csv"
    !rm -f "/content/drive/MyDrive/Colab Notebooks/SLDS/Data/gridsearch_segments.csv"
    !echo "Reset done."

In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import itertools, warnings, os
from dro import *

import warnings
warnings.filterwarnings("ignore", message=r"Argument (sub|subj) .* Incorrect array format causing data to be copied")
pd.set_option('future.no_silent_downcasting', True)

In [None]:
# paths

COLAB_PATHS = {
    "data_excel": "/content/drive/MyDrive/Colab Notebooks/SLDS/Data/bbg_data.xlsx",  # Google Drive
    "ff_dir": "/content/drive/MyDrive/Colab Notebooks/SLDS/Data/",                   # Google Drive
    "ff_files": {
        "ff5": "F-F_Research_Data_5_Factors_2x3_daily.csv",
        "ff3": "F-F_Research_Data_Factors_daily.csv",
        "mom": "F-F_Momentum_Factor_daily.csv",},
    "results_csv": "/content/drive/MyDrive/Colab Notebooks/SLDS/Out/gridsearch_results.csv",            # Google Drive
    "segments_parquet": "/content/drive/MyDrive/Colab Notebooks/SLDS/Out/gridsearch_segments.parquet",  # Google Drive
    "tmp_dir":          "/content/tmp_slds/",                      # Colab local
    "segments_tmp_csv": "/content/tmp_slds/segments_tmp.csv",      # Colab local
}

LOCAL_PATHS = {
    "data_excel": "/Users/chrismader/Python/SLDS/Data/bbg_data.xlsx",
    "ff_dir": "/Users/chrismader/Python/SLDS/Data/",
    "ff_files": {
        "ff5": "F-F_Research_Data_5_Factors_2x3_daily.csv",
        "ff3": "F-F_Research_Data_Factors_daily.csv",
        "mom": "F-F_Momentum_Factor_daily.csv",},
    "results_csv": "/Users/chrismader/Python/SLDS/Output/gridsearch_results1.csv",
    "segments_parquet": "/Users/chrismader/Python/SLDS/Output/gridsearch_segments1.parquet",
    "tmp_dir":          "/Users/chrismader/Python/SLDS/tmp_slds/",
    "segments_tmp_csv": "/Users/chrismader/Python/SLDS/tmp_slds/segments_tmp.csv",
}

In [None]:
# -------------------------
# CONFIG
# -------------------------

CONFIG = {
    
    # Core defaults
    "dt": 1.0 / 252.0,
    "n_iters": 50,
    "h_z": 3.0,  # CUSUM threshold

    # Batch windows
    "batch_grid": [
        {"train_window": 1260, "overlap_window": 1008},
    ],

    # Number of regimes
    "K_grid": [5],
    # "K_grid": [2, 3],
    
    # Unrestricted models: 
    "unrestricted_models": [
        # {"label": "[y]",         "channels": ["y"],                "dim_latent": [1]},
        {"label": "[y,h]",       "channels": ["y","h"],            "dim_latent": [2]},
        # {"label": "[g,v]",       "channels": ["g","v"],            "dim_latent": [2]},
        # {"label": "[g,v,h]",     "channels": ["g","v","h"],        "dim_latent": [2,3]},
        # {"label": "[y,g,v,h]",   "channels": ["y","g","v","h"],    "dim_latent": [3,4]},
    ],

    # Restricted models: 
    "restricted_models": [
        # {"label": "fund1",        "channels": ["y"],                 "dim_latent": [2],    "C_type": "fund1"},
        # {"label": "fund1_vix",    "channels": ["y","h"],             "dim_latent": [3],    "C_type": "fund1_vix"},
        # {"label": "fund2",        "channels": ["y","g"],             "dim_latent": [2],    "C_type": "fund2"},
        # {"label": "fund2_vix",    "channels": ["y","g","h"],         "dim_latent": [3],    "C_type": "fund2_vix"},
        # {"label": "fund3",        "channels": ["y","v","g"],         "dim_latent": [2],    "C_type": "fund3"},
        # {"label": "fund3_vix",    "channels": ["y","v","g","h"],     "dim_latent": [3],    "C_type": "fund3_vix"},   

        # {"label": "factor1",      "channels": ["y"],                 "dim_latent": [2],    "C_type": "factor1"},
        # {"label": "factor1_vix",  "channels": ["y","h"],             "dim_latent": [3],    "C_type": "factor1_vix"},

        {"label": "factor2_ff3",   "channels": ["y","mkt","smb","hml"],                   "dim_latent": [3], "C_type": "factor2"},
        # {"label": "factor2_ff3mom","channels": ["y","mkt","smb","hml","mom"],             "dim_latent": [4], "C_type": "factor2"},
        # {"label": "factor2_ff5",   "channels": ["y","mkt","smb","hml","rmw","cma"],       "dim_latent": [5], "C_type": "factor2"},
        # {"label": "factor2_ff5mom","channels": ["y","mkt","smb","hml","rmw","cma","mom"], "dim_latent": [6], "C_type": "factor2"},
    ],

    # Model selection
    "run_unrestricted": False,
    "run_restricted": False,

    # Output
    "verbose": False,
    "display": False,
    "predict_oos": True,
}

PATHS = COLAB_PATHS if IN_COLAB else LOCAL_PATHS
for k, v in PATHS.items(): 
    CONFIG[k] = v
# per-security temp file templates used by IOManager
CONFIG["tmp_results_fmt"]  = "{tmp_dir}/tmp_res_{security}.csv"
CONFIG["tmp_segments_fmt"] = "{tmp_dir}/tmp_seg_{security}.csv"


DRO_CONFIG = {

    # Controls
    "run_gridsearch": False,

    # Optimizer
    "start_dt": None,
    "end_dt": None,
    "min_assets": 3,
    "GLOBAL": {"risk_budget": 0.30, "risk_free_rate": 0.0, "epsilon_sigma": 1e-6,},
    "delta_name": "bootstrap_np",
}

DELTA_DEFAULTS = {
    "kappa_l2": {"delta_method": "kappa_l2", "kappa": 1.0},
    "kappa_rate": {"delta_method": "kappa_rate", "kappa": 1.0},
    "bound_ek": {"delta_method": "bound_ek", "alpha": 0.05, "c1": 3.0, "c2": 1.0, "a": 2.0},
    "bootstrap_np": {"delta_method": "bootstrap_np", "alpha": 0.05, "B": 100, "seed": 0},
    "bootstrap_gaussian": {"delta_method": "bootstrap_gaussian", "alpha": 0.05, "B": 100, "seed": 0},
}


In [None]:
try:
    import cupy as xp
    from cupyx.scipy.optimize import linear_sum_assignment  # GPU Hungarian
    from cupyx.scipy import stats as xp_stats               # for stats
    GPU = True
except Exception:
    import numpy as xp
    from scipy.optimize import linear_sum_assignment
    from scipy import stats as xp_stats
    GPU = False

In [None]:
# -------------------------
# Execute
# -------------------------

if __name__ == "__main__":
    securities = ['NVDA', 'AAPL', 'AVGO', 'GOOGL', 'ORCL', 'IBM', 'CSCO', 'CRM']

    import pandas as pd
    filename_results = '/Users/chrismader/Python/SLDS/Output/gridsearch_results1.csv'
    res = pd.read_csv(filename_results)
    securities = list(res.security.unique())

    out = dro_pipeline(securities, CONFIG, DRO_CONFIG, DELTA_DEFAULTS)


    # MVO baseline
    fitA_mvo = fit_mvo(out["PartA"]["data"], {}, DRO_CONFIG["GLOBAL"])
    summA_mvo = evaluate_portfolio(fitA_mvo, out["PartA"]["data"], DRO_CONFIG["GLOBAL"])
    
    results = {
        "DRO (Static)": pd.DataFrame([out["PartA"]["summary"]]),
        "Regime-DRO":   pd.DataFrame([out["PartB"]["summary"]]),
        "MVO":          pd.DataFrame([summA_mvo]),
    }
    
    tbl = oos_summary(results, model_order=["MVO","DRO (Static)","Regime-DRO"])
    display(tbl.loc[["mu_ann","sigma_ann","sharpe_ann","vol_breach","max_drawdown"]])


In [None]:
# Plot Portfolio A vs Portfolio B

import pandas as pd
import matplotlib.pyplot as plt

# pull series safely
sA = pd.Series(out["series"].get("PartA_daily", pd.Series(dtype=float))).sort_index()
sB = pd.Series(out["series"].get("PartB_daily", pd.Series(dtype=float))).sort_index()

if sA.empty or sB.empty:
    raise ValueError("Missing series in `out['series']`. Expected 'PartA_daily' and 'PartB_daily'.")

# cumulative returns
cumA = (1 + sA).cumprod() - 1
cumB = (1 + sB).cumprod() - 1

# --- cumulative on one chart ---
fig, ax = plt.subplots(figsize=(8, 4))
cumA.plot(ax=ax, label="Portfolio A (Static)")
cumB.plot(ax=ax, label="Portfolio B (Regime)")
ax.set_title("Cumulative Return")
ax.set_xlabel("Date")
ax.set_ylabel("Cumulative return")
ax.grid(True, alpha=0.3)
ax.legend()

In [None]:
# Show deltas got tighter

summA = pd.Series(out["PartA"]["summary"])
summB = pd.Series(out["PartB"]["summary"])

# pull delta and per-segment deltas
delta_A = float(summA.get("delta", float("nan")))
delta_ks = [summB[k] for k in summB.index if str(k).startswith("delta_k")]
display(pd.DataFrame({
    "Static δ (Part A)": [delta_A],
    "Regime δ_k (Part B)": [pd.Series(delta_ks, dtype=float).describe()[["count","mean","min","25%","50%","75%","max"]].to_dict()]
}))


In [None]:
# Compare OOS performance

from dro import oos_summary

results = {
    "DRO (Static)": pd.DataFrame([out["PartA"]["summary"]]),
    "Regime-DRO":   pd.DataFrame([out["PartB"]["summary"]]),
}
tbl = oos_summary(results, model_order=["DRO (Static)","Regime-DRO"])
display(tbl.loc[["mu_ann","sigma_ann","sharpe_ann","vol_breach","max_drawdown"]])


In [None]:
# Quick hypothesis checks (paired tests work with m=1; p’s will be NaN—ok for a single trial; for multiple trials, they’ll be informative)

from dro import hypothesis_tests

hypothesis_tests(
    results,
    tests=[
        {"kind":"breach_less",        "A":"Regime-DRO", "B":"DRO (Static)"},
        {"kind":"superiority_sharpe", "A":"Regime-DRO", "B":"DRO (Static)"},
    ],
    alpha=0.05,
)

# Optional: add MVO baseline with fit_mvo(...) exactly like Part-A (set delta=0) and append to results.

In [None]:
# Confirm:
# Expecting to see: the distribution of delta_k in Part B is typically below Part A. When regimes are stationary → tighter radii.
# OOS: higher Sharpe / lower breach and often lower drawdown for Regime-DRO vs static DRO (and vs MVO), on the same risk budget.