In [4]:
# ============================================================
# Imports
# ============================================================

import pandas as pd
import numpy as np
import statsmodels.api as sm

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings("ignore")

# ============================================================
# Utility Functions
# ============================================================

def log_return(series):
    return np.log(series).diff()

def safe_diff(series):
    return series.diff()

# ============================================================
# Data Loading Helper
# ============================================================

def load_excel_series(path, sheet, column=None):
    df = pd.read_excel(path, sheet_name=sheet)
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.set_index("Date").sort_index()
    return df[column] if column else df

# ============================================================
# Load Data
# ============================================================

portfolio = load_excel_series("Workshop Data.xlsx", "Portfolio")

vix = load_excel_series("MOVE Vix prices.xlsx", "VIX", "PX_LAST")

ig_index = load_excel_series(
    "Indexes and Spreads Data 01.09.xlsx", "IG Index"
)
hy_index = load_excel_series(
    "Indexes and Spreads Data 01.09.xlsx", "HY Index"
)

# ============================================================
# SPY Total Return Construction
# ============================================================

if "TotalReturnsSPY" not in portfolio.columns:
    portfolio["SPYCumDiv"] = portfolio["SPY Dividends"][::-1].cumsum()[::-1]
    portfolio["TotalReturnsSPY"] = (
        portfolio["SPY Position"] + portfolio["SPYCumDiv"]
    )

# ============================================================
# Resampling + Model Function
# ============================================================

def run_spy_factor_model(freq_label, freq):

    print(f"\n================ {freq_label.upper()} MODEL =================")

    # ---------------- SPY Return ----------------
    spy_ret = (
        portfolio["TotalReturnsSPY"]
        .resample(freq)
        .last()
        .pct_change()
        .rename("SPY_Return")
    )

    # ---------------- VIX (Volatility Shock) ----------------
    vix_rs = vix.resample(freq).last()
    vix_change = safe_diff(vix_rs).rename("VIX_Change")

    # ---------------- Credit Factors ----------------
    ig_oas = ig_index["OAS_SOVEREIGN_CURVE"].resample(freq).last()
    hy_oas = hy_index["OAS_SOVEREIGN_CURVE"].resample(freq).last()

    ig_credit = safe_diff(ig_oas).rename("IG_Credit")
    credit_stress = (safe_diff(hy_oas) - safe_diff(ig_oas)).rename("Credit_Stress")

    # ---------------- Assemble Factor Matrix ----------------
    factors = pd.concat(
        [vix_change, ig_credit, credit_stress],
        axis=1
    )

    # ---------------- SAFE DATE ALIGNMENT ----------------
    data = pd.concat(
        [spy_ret, factors],
        axis=1,
        join="inner"
    ).dropna()

    y = data["SPY_Return"]
    X = data.drop(columns=["SPY_Return"])

    # ========================================================
    # OLS Regression
    # ========================================================

    X_ols = sm.add_constant(X)
    ols_model = sm.OLS(y, X_ols).fit()

    print("\nOLS Summary:")
    print(ols_model.summary())

    # ========================================================
    # PCA on Factors
    # ========================================================

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    pca = PCA(n_components=min(3, X.shape[1]))
    pcs = pca.fit_transform(X_scaled)

    pca_df = pd.DataFrame(
        pcs,
        index=X.index,
        columns=[f"PC{i+1}" for i in range(pcs.shape[1])]
    )

    explained_var = pd.Series(
        pca.explained_variance_ratio_,
        index=pca_df.columns,
        name="Explained Variance"
    )

    print("\nPCA Explained Variance:")
    print(explained_var)

    # ========================================================
    # PCA Regression
    # ========================================================

    X_pca = sm.add_constant(pca_df)
    pca_model = sm.OLS(y, X_pca).fit()

    print("\nPCA Regression Summary:")
    print(pca_model.summary())

    return {
        "OLS_Model": ols_model,
        "PCA_Model": pca_model,
        "PCA_Loadings": pd.DataFrame(
            pca.components_.T,
            index=X.columns,
            columns=pca_df.columns
        ),
        "Explained_Variance": explained_var
    }

# ============================================================
# Run Models at All Frequencies
# ============================================================

daily_results   = run_spy_factor_model("Daily", "D")
weekly_results  = run_spy_factor_model("Weekly", "W")
monthly_results = run_spy_factor_model("Monthly", "M")




OLS Summary:
                            OLS Regression Results                            
Dep. Variable:             SPY_Return   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                 -0.001
Method:                 Least Squares   F-statistic:                    0.5105
Date:                Fri, 16 Jan 2026   Prob (F-statistic):              0.675
Time:                        16:53:36   Log-Likelihood:                 3114.8
No. Observations:                 994   AIC:                            -6222.
Df Residuals:                     990   BIC:                            -6202.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const             0.0004      0

In [None]:
# ============================================================
# Imports
# ============================================================

import pandas as pd
import numpy as np
import statsmodels.api as sm

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

import warnings
warnings.filterwarnings("ignore")

# ============================================================
# Utility Functions
# ============================================================

def log_return(series):
    return np.log(series).diff()

def safe_diff(series):
    return series.diff()

# ============================================================
# Data Loading Helper
# ============================================================

def load_excel_series(path, sheet, column=None):
    df = pd.read_excel(path, sheet_name=sheet)
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.set_index("Date").sort_index()
    return df[column] if column else df

# ============================================================
# Load Data
# ============================================================

portfolio = load_excel_series("Workshop Data.xlsx", "Portfolio")

spy_vol = load_excel_series(
    "Workshop Data.xlsx", "SPY", "Volume"
)

vix = load_excel_series(
    "MOVE Vix prices.xlsx", "VIX", "PX_LAST"
)

ig_index = load_excel_series(
    "Indexes and Spreads Data 01.09.xlsx", "IG Index"
)

hy_index = load_excel_series(
    "Indexes and Spreads Data 01.09.xlsx", "HY Index"
)

# ============================================================
# SPY Total Return Construction
# ============================================================

if "TotalReturnsSPY" not in portfolio.columns:
    portfolio["SPYCumDiv"] = portfolio["SPY Dividends"][::-1].cumsum()[::-1]
    portfolio["TotalReturnsSPY"] = (
        portfolio["SPY Position"] + portfolio["SPYCumDiv"]
    )

# ============================================================
# Resampling + Model Function
# ============================================================

def run_spy_factor_model(freq_label, freq):

    print(f"\n================ {freq_label.upper()} MODEL =================")

    # ---------------- SPY Return ----------------
    spy_ret = (
        portfolio["TotalReturnsSPY"]
        .resample(freq)
        .last()
        .pct_change()
        .rename("SPY_Return")
    )

    # ---------------- VIX (Volatility) ----------------
    vix_rs = vix.resample(freq).last()
    vix_change = safe_diff(vix_rs).rename("VIX_Change")

    # ---------------- Credit Factors ----------------
    ig_oas = ig_index["OAS_SOVEREIGN_CURVE"].resample(freq).last()
    hy_oas = hy_index["OAS_SOVEREIGN_CURVE"].resample(freq).last()

    ig_credit = safe_diff(ig_oas).rename("IG_Credit")
    credit_stress = (safe_diff(hy_oas) - safe_diff(ig_oas)).rename("Credit_Stress")

    # ---------------- Liquidity Factor (NEW) ----------------
    spy_vol_rs = spy_vol.resample(freq).last()
    liquidity = log_return(spy_vol_rs).rename("Liquidity")

    # ---------------- Assemble Factor Matrix ----------------
    factors = pd.concat(
        [vix_change, ig_credit, credit_stress, liquidity],
        axis=1
    )

    # ---------------- SAFE DATE ALIGNMENT ----------------
    data = pd.concat(
        [spy_ret, factors],
        axis=1,
        join="inner"
    ).dropna()

    y = data["SPY_Return"]
    X = data.drop(columns=["SPY_Return"])

    # ========================================================
    # OLS Regression
    # ========================================================

    X_ols = sm.add_constant(X)
    ols_model = sm.OLS(y, X_ols).fit()

    print("\nOLS Summary:")
    print(ols_model.summary())

    # ========================================================
    # PCA on Factors
    # ========================================================

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    pca = PCA(n_components=min(3, X.shape[1]))
    pcs = pca.fit_transform(X_scaled)

    pca_df = pd.DataFrame(
        pcs,
        index=X.index,
        columns=[f"PC{i+1}" for i in range(pcs.shape[1])]
    )

    explained_var = pd.Series(
        pca.explained_variance_ratio_,
        index=pca_df.columns,
        name="Explained Variance"
    )

    print("\nPCA Explained Variance:")
    print(explained_var)

    # ========================================================
    # PCA Regression
    # ========================================================

    X_pca = sm.add_constant(pca_df)
    pca_model = sm.OLS(y, X_pca).fit()

    print("\nPCA Regression Summary:")
    print(pca_model.summary())

    return {
        "OLS_Model": ols_model,
        "PCA_Model": pca_model,
        "PCA_Loadings": pd.DataFrame(
            pca.components_.T,
            index=X.columns,
            columns=pca_df.columns
        ),
        "Explained_Variance": explained_var
    }

# ============================================================
# Run Models at All Frequencies
# ============================================================

daily_results   = run_spy_factor_model("Daily", "D")
weekly_results  = run_spy_factor_model("Weekly", "W")
monthly_results = run_spy_factor_model("Monthly", "M")




OLS Summary:
                            OLS Regression Results                            
Dep. Variable:             SPY_Return   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                 -0.003
Method:                 Least Squares   F-statistic:                    0.3627
Date:                Fri, 16 Jan 2026   Prob (F-statistic):              0.835
Time:                        17:09:30   Log-Likelihood:                 3032.7
No. Observations:                 971   AIC:                            -6055.
Df Residuals:                     966   BIC:                            -6031.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const             0.0004      0