In [None]:
# rSLDS actual data 

In [None]:
# RUN IN COLAB ONLY: mount Google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# RUN IN COLAB ONLY: unmount, remount Google drive - if required
# !fusermount -u /content/drive 2>/dev/null || true
# !rm -rf /content/drive
# from google.colab import auth
# auth.authenticate_user()
# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)

In [None]:
# RUN IN COLAB ONLY: install required modules: ssm, openpyxl
!pip -q install git+https://github.com/lindermanlab/ssm.git #egg=ssm
!pip -q install openpyxl

In [None]:
# RUN IN COLAB ONLY: force-fetch latest files from GitHub (overwrite local copies)
!curl -L -H "Cache-Control: no-cache" -o gridsearch.py https://raw.githubusercontent.com/chrismader1/SLDS/main/gridsearch.py
!curl -L -H "Cache-Control: no-cache" -o rSLDS.py https://raw.githubusercontent.com/chrismader1/SLDS/main/rSLDS.py

In [None]:
# RUN IN COLAB ONLY: prevent timeouts
%%javascript
// Ping the backend + re-click the connect button every 60s
(function keepAlive(){
  const clickConnect = () => {
    const btn = document.querySelector('colab-connect-button') || document.querySelector('#connect');
    if (btn) btn.click();
    console.log('keepAlive: ping');
  };
  setInterval(clickConnect, 60000);
})();

In [None]:
# RUN IN COLAB ONLY: clear cache
!rm -f "/content/drive/MyDrive/Colab Notebooks/SLDS/Data/gridsearch_results.csv"
!rm -f "/content/drive/MyDrive/Colab Notebooks/SLDS/Data/gridsearch_segments.csv"
!echo "Reset done."

In [None]:
import numpy as np
import pandas as pd
from gridsearch import pipeline_actual

In [None]:
# --------------------------------------------------------------------------------------
# CONFIG
# --------------------------------------------------------------------------------------

CONFIG = {
    
    # Core defaults
    "n_jobs": -1,  # multi-threading
    "dt": 1.0 / 252.0,
    "n_iters": 50,
    "h_z": 3.0,  # CUSUM parameter
    
    # Batch windows
    "batch_grid": [
        {"train_window": 756, "overlap_window": 5},
        # {"train_window": 256, "overlap_window": 63},
        # {"train_window": 504, "overlap_window": 63},
        # {"train_window": 756, "overlap_window": 63},
        # {"train_window": 1260, "overlap_window": 63},
    ],

    # Number of regimes
    "K_grid": [2, 3, 4],
    
    # Unrestricted models: 
    "unrestricted_models": [
        {"label": "[y]",         "channels": ["y"],                "dim_latent": [1]},
        {"label": "[y,h]",       "channels": ["y","h"],            "dim_latent": [2]},
        {"label": "[g,v]",       "channels": ["g","v"],            "dim_latent": [2]},
        {"label": "[g,v,h]",     "channels": ["g","v","h"],        "dim_latent": [2,3]},
        {"label": "[y,g,v,h]",   "channels": ["y","g","v","h"],    "dim_latent": [3,4]},
    ],

    # Restricted models: 
    "restricted_models": [
        {"label": "fund1",        "channels": ["y"],                 "dim_latent": [2],    "C_type": "fund1"},
        {"label": "fund1_vix",    "channels": ["y","h"],             "dim_latent": [3],    "C_type": "fund1_vix"},
        {"label": "fund2",        "channels": ["y","g"],             "dim_latent": [2],    "C_type": "fund2"},
        {"label": "fund2_vix",    "channels": ["y","g","h"],         "dim_latent": [3],    "C_type": "fund2_vix"},
        {"label": "fund3",        "channels": ["y","v","g"],         "dim_latent": [2],    "C_type": "fund3"},
        {"label": "fund3_vix",    "channels": ["y","v","g","h"],     "dim_latent": [3],    "C_type": "fund3_vix"},   

        {"label": "factor1",      "channels": ["y"],                 "dim_latent": [2],    "C_type": "factor1"},
        {"label": "factor1_vix",  "channels": ["y","h"],             "dim_latent": [3],    "C_type": "factor1_vix"},

        {"label": "factor2_ff3",   "channels": ["y","mkt","smb","hml"],                   "dim_latent": [3], "C_type": "factor2"},
        {"label": "factor2_ff3mom","channels": ["y","mkt","smb","hml","mom"],             "dim_latent": [4], "C_type": "factor2"},
        {"label": "factor2_ff5",   "channels": ["y","mkt","smb","hml","rmw","cma"],       "dim_latent": [5], "C_type": "factor2"},
        {"label": "factor2_ff5mom","channels": ["y","mkt","smb","hml","rmw","cma","mom"], "dim_latent": [6], "C_type": "factor2"},
    ],

    # Model selection
    "run_unrestricted": True,
    "run_restricted": True,

    # Output
    "display": False,
    "verbose": False,    
}


In [None]:
# --------------------------------------------------------------------------------------
# Execute
# --------------------------------------------------------------------------------------

def main():
    
    import os
    import pandas as pd

    filename = "/content/drive/MyDrive/Colab Notebooks/SLDS/Data/bbg_data.xlsx"
    out_path = "/content/drive/MyDrive/Colab Notebooks/SLDS/Output/"
    csv_path = f"{out_path}/gridsearch_results.csv"
    
    # df = pd.read_excel(filename, sheet_name="SPX_PX", skiprows=4, index_col=0)
    # df = df.iloc[3:, :]
    # securities_master = df.columns.to_list()
    
    # securities_master = ['MSFT', 'NVDA', 'AAPL', 'AMZN', 'META', 'AVGO', 'GOOGL', 'TSLA']

    securities_master = [
        "MSFT","NVDA","AAPL","AVGO","GOOGL","ORCL","IBM","CSCO","CRM","AMD","INTU","NOW","TXN","QCOM",
        "ADBE","AMAT","PLTR","ACN","META","NFLX","GOOGL","GOOG","DIS","VZ","T","UBER","AMZN","TSLA","HD",
        "MCD","BKNG","COST","WMT","PG","KO","PM","PEP","BRK/B","JPM","V","MA","BAC","WFC","GS","MS","SPGI",
        "SCHW","AXP","BLK","C","PGR","LLY","JNJ","ABBV","UNH","ABT","MRK","ISRG","BSX","TMO","AMGN","RTX",
        "XOM","CVX","GE","CAT","BA","HON","LIN","NEE","DUK","SO","AEP","D","AMT","PLD","EQIX","O","CBRE"
    ]

    # resume if CSV exists; otherwise start from beginning
    if os.path.exists(csv_path) and os.path.getsize(csv_path) > 0:
        df = pd.read_csv(csv_path)
        if "security" in df.columns and len(df) > 0:
            last_done = df["security"].iloc[-1]
            start_idx = securities_master.index(last_done)
            securities = securities_master[start_idx:]       # inclusive; use +1 to start after
            print("Resuming from:", last_done)
        else:
            securities = securities_master
            print("Resuming from: START (empty CSV or missing column)")
    else:
        securities = securities_master
        print("Resuming from: START (no CSV)")

    print("Remaining tickers:", len(securities))
    _ = pipeline_actual(securities=securities, CONFIG=CONFIG, filename=filename, out_path=out_path)

if __name__ == "__main__":
    main()
