In [1]:
# ============================================================
# Cross-Asset HYG Factor Model (CORRECTED)
# Notebook-Ready Single File
# ============================================================

import pandas as pd
import numpy as np
import statsmodels.api as sm

# ------------------------------------------------------------
# Utility Functions
# ------------------------------------------------------------

def log_return(series):
    return np.log(series).diff()

def safe_diff(series):
    return series.diff()

# ------------------------------------------------------------
# Data Loading Helper
# ------------------------------------------------------------

def load_excel_series(path, sheet, column=None):
    df = pd.read_excel(path, sheet_name=sheet)
    df = df.set_index("Date").sort_index()
    if column:
        return df[column]
    return df

# ------------------------------------------------------------
# Load Data
# ------------------------------------------------------------

portfolio = load_excel_series("Workshop Data.xlsx", "Portfolio")
hyg_vol   = load_excel_series("Workshop Data.xlsx", "Adj HYG", "Volume")

hy_index  = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "HY Index")
ig_index  = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "IG Index")
ust10     = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "10yUST Yields")
hyg_yas   = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "HYG")

# ------------------------------------------------------------
# Construct HYG Total Return (LEVEL)
# ------------------------------------------------------------

if "TotalReturnsHYG" not in portfolio.columns:
    portfolio["HYGCumDiv"] = portfolio["HYG Dividends"][::-1].cumsum()[::-1]
    portfolio["TotalReturnsHYG"] = (
        portfolio["HYG Position"] + portfolio["HYGCumDiv"]
    )

# ------------------------------------------------------------
# CORRECT Dependent Variable (PERCENT RETURN)
# ------------------------------------------------------------

hyg_return = portfolio["TotalReturnsHYG"].pct_change()

# ------------------------------------------------------------
# Equity Proxy (Risk-On)
# ------------------------------------------------------------

spy_px = portfolio["SPY Position"] / -10
spy_return = log_return(spy_px)

# ------------------------------------------------------------
# Duration
# ------------------------------------------------------------

hyg_duration = hyg_yas["YAS_MOD_DUR"]

# ------------------------------------------------------------
# Factor Construction (STATIONARY & ORTHOGONAL)
# ------------------------------------------------------------

# Credit spread shocks
d_hy_oas = safe_diff(hy_index["OAS_SOVEREIGN_CURVE"])
d_ig_oas = safe_diff(ig_index["OAS_SOVEREIGN_CURVE"])

# Rates shock
d_ust10 = safe_diff(ust10["PX_LAST"])

# Liquidity shock
d_vol = log_return(hyg_vol)

# Orthogonal credit factors
credit_level = 0.5 * d_hy_oas + 0.5 * d_ig_oas
credit_rotation = d_hy_oas - d_ig_oas

# Duration-adjusted rate factor
rate_factor = - hyg_duration * d_ust10

# Assemble factor matrix
factors = pd.DataFrame({
    "Rate": rate_factor,
    "Credit_Level": credit_level,
    "Credit_Rotation": credit_rotation,
    "Equity": spy_return,
    "Liquidity": d_vol
})

# Drop missing values and align Y
factors = factors.dropna()
hyg_return = hyg_return.loc[factors.index]

# ------------------------------------------------------------
# Regression
# ------------------------------------------------------------

X = sm.add_constant(factors)
Y = hyg_return

model = sm.OLS(Y, X).fit()

print(model.summary())

# ------------------------------------------------------------
# Diagnostics
# ------------------------------------------------------------

condition_number = np.linalg.cond(X)

print("\n================ Diagnostics ================")
print(f"Condition Number : {condition_number:.2f}")
print(f"R²              : {model.rsquared:.3f}")
print(f"Adj R²          : {model.rsquared_adj:.3f}")
print("Durbin-Watson   :", sm.stats.stattools.durbin_watson(model.resid))

# ------------------------------------------------------------
# Factor Attribution
# ------------------------------------------------------------

betas = model.params.drop("const")
attribution = factors.mul(betas, axis=1)
attribution["Total"] = attribution.sum(axis=1)

print("\nLatest Factor Attribution:")
display(attribution.tail())

# ------------------------------------------------------------
# Factor Correlation Check
# ------------------------------------------------------------

print("\nFactor Correlations:")
display(factors.corr())


                            OLS Regression Results                            
Dep. Variable:        TotalReturnsHYG   R-squared:                       0.567
Model:                            OLS   Adj. R-squared:                  0.565
Method:                 Least Squares   F-statistic:                     323.7
Date:                Mon, 12 Jan 2026   Prob (F-statistic):          8.24e-222
Time:                        15:27:54   Log-Likelihood:                 5585.7
No. Observations:                1244   AIC:                        -1.116e+04
Df Residuals:                    1238   BIC:                        -1.113e+04
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const              -0.0001   7.73e-05     

Unnamed: 0_level_0,Rate,Credit_Level,Credit_Rotation,Equity,Liquidity,Total
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-12-24,0.000847,0.000517,-7e-06,-9.611853e-07,0.000527,0.001883
2025-12-26,0.000167,-0.001237,0.000613,-3.483918e-06,-2.3e-05,-0.000483
2025-12-29,0.000502,-0.000352,-9.2e-05,-3.11208e-06,-0.000441,-0.000386
2025-12-30,-0.000336,0.001466,-0.00051,1.694518e-06,5.6e-05,0.000678
2025-12-31,-0.00128,0.000164,-0.000235,2.283719e-07,9e-05,-0.001261



Factor Correlations:


Unnamed: 0,Rate,Credit_Level,Credit_Rotation,Equity,Liquidity
Rate,1.0,0.259188,0.264701,0.02121,0.03151
Credit_Level,0.259188,1.0,0.957047,-0.008872,0.133045
Credit_Rotation,0.264701,0.957047,1.0,-0.004137,0.123394
Equity,0.02121,-0.008872,-0.004137,1.0,-0.046599
Liquidity,0.03151,0.133045,0.123394,-0.046599,1.0
