In [1]:
# rSLDS actual data 

In [2]:
IN_COLAB = False
REMOUNT = False

In [3]:
# RUN IN COLAB ONLY: mount Google drive
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

In [4]:
# RUN IN COLAB ONLY: unmount, remount Google drive - if required
if REMOUNT:
    !fusermount -u /content/drive 2>/dev/null || true
    !rm -rf /content/drive
    from google.colab import auth
    auth.authenticate_user()
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)

In [5]:
# RUN IN COLAB ONLY: install required modules: ssm, openpyxl
if IN_COLAB:
    !pip -q install git+https://github.com/lindermanlab/ssm.git #egg=ssm
    !pip -q install openpyxl
    !pip -q install filelock pyarrow

In [6]:
# RUN IN COLAB ONLY: force-fetch latest files from GitHub (overwrite local copies)
if IN_COLAB:
    !rm -rf /content/SLDS
    !git clone --depth=1 https://github.com/chrismader1/SLDS.git /content/SLDS
    import sys
    sys.path.append("/content/SLDS")

In [7]:
# RUN IN COLAB ONLY: prevent timeouts
if IN_COLAB:
    from IPython.display import Javascript, display
    display(Javascript("""
    (function keepAlive(){
      function clickConnect(){
        // Try shadow-root button (newer Colab)
        const el = document.querySelector('colab-connect-button');
        if (el && el.shadowRoot){
          const btn = el.shadowRoot.querySelector('#connect');
          if (btn){ btn.click(); console.log('keepAlive: clicked shadow connect'); return; }
        }
        // Fallbacks
        const btn2 = document.querySelector('#connect, button#connect');
        if (btn2){ btn2.click(); console.log('keepAlive: clicked #connect'); return; }
        console.log('keepAlive: connect button not found');
      }
      setInterval(clickConnect, 60 * 1000);
      console.log('keepAlive: armed');
    })();
    """))

In [8]:
# RUN IN COLAB ONLY: clear cache
if IN_COLAB:
    !rm -f "/content/drive/MyDrive/Colab Notebooks/SLDS/Data/gridsearch_results.csv"
    !rm -f "/content/drive/MyDrive/Colab Notebooks/SLDS/Data/gridsearch_segments.csv"
    !echo "Reset done."

In [9]:
import numpy as np
import pandas as pd
from gridsearch import pipeline_actual

In [10]:
# paths

COLAB_PATHS = {
    "data_excel": "/content/drive/MyDrive/Colab Notebooks/SLDS/Data/bbg_data.xlsx",  # Google Drive
    "ff_dir": "/content/drive/MyDrive/Colab Notebooks/SLDS/Data/",                   # Google Drive
    "ff_files": {
        "ff5": "F-F_Research_Data_5_Factors_2x3_daily.csv",
        "ff3": "F-F_Research_Data_Factors_daily.csv",
        "mom": "F-F_Momentum_Factor_daily.csv",},
    "results_csv": "/content/drive/MyDrive/Colab Notebooks/SLDS/Out/gridsearch_results.csv",            # Google Drive
    "segments_parquet": "/content/drive/MyDrive/Colab Notebooks/SLDS/Out/gridsearch_segments.parquet",  # Google Drive
    "tmp_dir":          "/content/tmp_slds/",                      # Colab local
    "segments_tmp_csv": "/content/tmp_slds/segments_tmp.csv",      # Colab local
}

LOCAL_PATHS = {
    "data_excel": "/Users/chrismader/Python/SLDS/Data/bbg_data.xlsx",
    "ff_dir": "/Users/chrismader/Python/SLDS/Data/",
    "ff_files": {
        "ff5": "F-F_Research_Data_5_Factors_2x3_daily.csv",
        "ff3": "F-F_Research_Data_Factors_daily.csv",
        "mom": "F-F_Momentum_Factor_daily.csv",},
    "results_csv": "/Users/chrismader/Python/SLDS/Out/gridsearch_results.csv",
    "segments_parquet": "/Users/chrismader/Python/SLDS/Out/gridsearch_segments.parquet",
    "tmp_dir":          "/Users/chrismader/Python/SLDS/tmp_slds/",
    "segments_tmp_csv": "/Users/chrismader/Python/SLDS/tmp_slds/segments_tmp.csv",
}

In [11]:
# --------------------------------------------------------------------------------------
# CONFIG
# --------------------------------------------------------------------------------------

CONFIG = {
    
    # Core defaults
    "n_jobs": -1,  # multi-threading
    "dt": 1.0 / 252.0,
    "n_iters": 10,
    "h_z": 3.0,  # CUSUM parameter
    
    # Batch windows
    "batch_grid": [
        {"train_window": 756, "overlap_window": 5},
        # {"train_window": 256, "overlap_window": 63},
        # {"train_window": 504, "overlap_window": 63},
        # {"train_window": 756, "overlap_window": 63},
        # {"train_window": 1260, "overlap_window": 63},
    ],

    # Number of regimes
    "K_grid": [2, 3, 4],
    
    # Unrestricted models: 
    "unrestricted_models": [
        {"label": "[y]",         "channels": ["y"],                "dim_latent": [1]},
        # {"label": "[y,h]",       "channels": ["y","h"],            "dim_latent": [2]},
        # {"label": "[g,v]",       "channels": ["g","v"],            "dim_latent": [2]},
        # {"label": "[g,v,h]",     "channels": ["g","v","h"],        "dim_latent": [2,3]},
        # {"label": "[y,g,v,h]",   "channels": ["y","g","v","h"],    "dim_latent": [3,4]},
    ],

    # Restricted models: 
    "restricted_models": [
        # {"label": "fund1",        "channels": ["y"],                 "dim_latent": [2],    "C_type": "fund1"},
        # {"label": "fund1_vix",    "channels": ["y","h"],             "dim_latent": [3],    "C_type": "fund1_vix"},
        # {"label": "fund2",        "channels": ["y","g"],             "dim_latent": [2],    "C_type": "fund2"},
        # {"label": "fund2_vix",    "channels": ["y","g","h"],         "dim_latent": [3],    "C_type": "fund2_vix"},
        # {"label": "fund3",        "channels": ["y","v","g"],         "dim_latent": [2],    "C_type": "fund3"},
        # {"label": "fund3_vix",    "channels": ["y","v","g","h"],     "dim_latent": [3],    "C_type": "fund3_vix"},   

        # {"label": "factor1",      "channels": ["y"],                 "dim_latent": [2],    "C_type": "factor1"},
        # {"label": "factor1_vix",  "channels": ["y","h"],             "dim_latent": [3],    "C_type": "factor1_vix"},

        {"label": "factor2_ff3",   "channels": ["y","mkt","smb","hml"],                   "dim_latent": [3], "C_type": "factor2"},
        # {"label": "factor2_ff3mom","channels": ["y","mkt","smb","hml","mom"],             "dim_latent": [4], "C_type": "factor2"},
        # {"label": "factor2_ff5",   "channels": ["y","mkt","smb","hml","rmw","cma"],       "dim_latent": [5], "C_type": "factor2"},
        # {"label": "factor2_ff5mom","channels": ["y","mkt","smb","hml","rmw","cma","mom"], "dim_latent": [6], "C_type": "factor2"},
    ],

    # Model selection
    "run_unrestricted": True,
    "run_restricted": True,

    # Output
    "verbose": False,
    "display": False,
}

PATHS = COLAB_PATHS if IN_COLAB else LOCAL_PATHS
for k, v in PATHS.items(): 
    CONFIG[k] = v
# per-security temp file templates used by IOManager
CONFIG["tmp_results_fmt"]  = "{tmp_dir}/tmp_res_{security}.csv"
CONFIG["tmp_segments_fmt"] = "{tmp_dir}/tmp_seg_{security}.csv"

print("[CFG] data_excel =", CONFIG["data_excel"])
print("[CFG] results_csv =", CONFIG["results_csv"])
print("[CFG] segments_parquet =", CONFIG["segments_parquet"])
print("[CFG] tmp_dir =", CONFIG["tmp_dir"])
print("[CFG] tmp_results_fmt =", CONFIG["tmp_results_fmt"])
print("[CFG] tmp_segments_fmt =", CONFIG["tmp_segments_fmt"])


[CFG] data_excel = /Users/chrismader/Python/SLDS/Data/bbg_data.xlsx
[CFG] results_csv = /Users/chrismader/Python/SLDS/Out/gridsearch_results.csv
[CFG] segments_parquet = /Users/chrismader/Python/SLDS/Out/gridsearch_segments.parquet
[CFG] tmp_dir = /Users/chrismader/Python/SLDS/tmp_slds/
[CFG] tmp_results_fmt = {tmp_dir}/tmp_res_{security}.csv
[CFG] tmp_segments_fmt = {tmp_dir}/tmp_seg_{security}.csv


In [25]:
# --------------------------------------------------------------------------------------
# Execute
# --------------------------------------------------------------------------------------

def main():
    
    import os
    import pandas as pd

    # IO from CONFIG
    csv_path = CONFIG["results_csv"]
    filename = CONFIG["data_excel"]
    out_path = os.path.dirname(CONFIG["results_csv"]) + "/"
    
    # securities_master = [
    #     "MSFT","NVDA","AAPL","AVGO","GOOGL","ORCL","IBM","CSCO","CRM","AMD","INTU","NOW","TXN","QCOM",
    #     "ADBE","AMAT","PLTR","ACN","META","NFLX","GOOGL","DIS","VZ","T","UBER","AMZN","TSLA","HD",
    #     "MCD","BKNG","COST","WMT","PG","KO","PM","PEP","BRK/B","JPM","V","MA","BAC","WFC","GS","MS","SPGI",
    #     "SCHW","AXP","BLK","C","PGR","LLY","JNJ","ABBV","UNH","ABT","MRK","ISRG","BSX","TMO","AMGN","RTX",
    #     "XOM","CVX","GE","CAT","BA","HON","LIN","NEE","DUK","SO","AEP","D","AMT","PLD","EQIX","O","CBRE",]

    securities_master = ["MSFT","NVDA","AAPL","AVGO","GOOGL","ORCL","IBM","CSCO"]

    # resume if CSV exists; otherwise start from beginning
    if os.path.exists(csv_path) and os.path.getsize(csv_path) > 0:
        df = pd.read_csv(csv_path)
        assert "security" in df.columns, "results_csv missing 'security' column"
        done_set = set(df["security"].dropna().astype(str))
        done = [s for s in securities_master if s in done_set]
        securities = [s for s in securities_master if s not in done_set]
        print("Resuming from CSV:", csv_path)
        print("Already completed:", done)
    else:
        securities = securities_master
        print("Resuming from: START (no/empty CSV)")
    
    print("Remaining tickers:", len(securities), securities)

    if len(securities) == 0:
        print("Nothing to do.")
        return

    _ = pipeline_actual(securities=securities, CONFIG=CONFIG)

if __name__ == "__main__":
    main()


Resuming from: START (no/empty CSV)
Remaining tickers: 8 ['MSFT', 'NVDA', 'AAPL', 'AVGO', 'GOOGL', 'ORCL', 'IBM', 'CSCO']
[GS][init] creating master results CSV: /Users/chrismader/Python/SLDS/Out/gridsearch_results.csv


ELBO: 1715.9: 100%|██████████| 10/10 [00:00<00:00, 31.67it/s]
ELBO: 1727.2: 100%|██████████| 10/10 [00:00<00:00, 29.22it/s]
ELBO: 1684.3: 100%|██████████| 10/10 [00:00<00:00, 27.06it/s]
ELBO: 1701.4: 100%|██████████| 10/10 [00:00<00:00, 34.62it/s]
ELBO: 1721.7: 100%|██████████| 10/10 [00:00<00:00, 31.47it/s]
ELBO: 1694.4: 100%|██████████| 10/10 [00:00<00:00, 30.07it/s]
ELBO: 1612.7: 100%|██████████| 10/10 [00:00<00:00, 34.59it/s]
ELBO: 1655.7: 100%|██████████| 10/10 [00:00<00:00, 30.63it/s]
ELBO: 1636.6: 100%|██████████| 10/10 [00:00<00:00, 29.37it/s]
ELBO: 1639.6: 100%|██████████| 10/10 [00:00<00:00, 33.81it/s]
ELBO: 1621.4: 100%|██████████| 10/10 [00:00<00:00, 30.35it/s]
ELBO: 1620.7: 100%|██████████| 10/10 [00:00<00:00, 29.41it/s]
ELBO: 1072.4: 100%|██████████| 10/10 [00:00<00:00, 46.84it/s]
ELBO: 1102.4: 100%|██████████| 10/10 [00:00<00:00, 41.69it/s]
ELBO: 1069.9: 100%|██████████| 10/10 [00:00<00:00, 38.43it/s]


[GS][append] MSFT | [y] -> rows=3 cols=26
[IO][results] append -> /Users/chrismader/Python/SLDS/tmp_slds//tmp_res_MSFT.csv rows=3 header=True
[IO][MSFT-tmp_res] path=/Users/chrismader/Python/SLDS/tmp_slds//tmp_res_MSFT.csv size=1483 bytes
[IO][MSFT-tmp_res] L1: security,config,rank,score,dt,n_regimes,dim_latent,single_subspace,train_window,overlap_window,avg_inferred_regime_length,elbo_start (min all runs),elbo_end (max all runs),elbo_delta (max all runs),cpll (max all runs),"max cpll (proxy bound
[IO][MSFT-tmp_res] L2: MSFT,[y],1,-0.02739466601216889,1/252,3,1,True,756,5,880.25,-84557.92926001374,1727.1997056114292,86285.12896562518,-14068.100851238918,9530.07411800884,"{0: 2258, 1: 751, 2: 512}",-0.02739466601216889,0.22238212914188948,0.2568120762097152
[GS][segments] MSFT | [y] -> rows=10563
[IO][segments] append -> /Users/chrismader/Python/SLDS/tmp_slds//tmp_seg_MSFT.csv rows=10563 header=True
[IO][MSFT-tmp_seg] path=/Users/chrismader/Python/SLDS/tmp_slds//tmp_seg_MSFT.csv size=28

ELBO: -8273.9: 100%|██████████| 1/1 [00:00<00:00, 17.32it/s]
ELBO: -8039.1: 100%|██████████| 1/1 [00:00<00:00, 12.76it/s]
ELBO: -7905.4: 100%|██████████| 1/1 [00:00<00:00, 22.62it/s]
ELBO: -7886.6: 100%|██████████| 1/1 [00:00<00:00, 20.90it/s]
ELBO: -7870.4: 100%|██████████| 1/1 [00:00<00:00, 23.23it/s]
ELBO: -7891.3: 100%|██████████| 1/1 [00:00<00:00, 20.75it/s]
ELBO: -7855.4: 100%|██████████| 1/1 [00:00<00:00, 22.73it/s]
ELBO: -7850.1: 100%|██████████| 1/1 [00:00<00:00, 22.25it/s]
ELBO: -7837.3: 100%|██████████| 1/1 [00:00<00:00, 23.31it/s]
ELBO: -7814.6: 100%|██████████| 1/1 [00:00<00:00, 22.26it/s]
ELBO: -7848.0: 100%|██████████| 1/1 [00:00<00:00, 22.52it/s]
ELBO: -7878.3: 100%|██████████| 1/1 [00:00<00:00, 21.90it/s]
ELBO: -7813.9: 100%|██████████| 1/1 [00:00<00:00, 22.97it/s]
ELBO: -7748.6: 100%|██████████| 1/1 [00:00<00:00, 22.18it/s]
ELBO: -7823.7: 100%|██████████| 1/1 [00:00<00:00, 22.94it/s]
ELBO: -7806.8: 100%|██████████| 1/1 [00:00<00:00, 21.88it/s]
ELBO: -7794.9: 100%|████

KeyboardInterrupt: 