In [4]:
# ============================================================
# Cross-Asset HYG Factor Model (CORRECTED)
# Notebook-Ready Single File
# ============================================================

import pandas as pd
import numpy as np
import statsmodels.api as sm

# ------------------------------------------------------------
# Utility Functions
# ------------------------------------------------------------

def log_return(series):
    return np.log(series).diff()

def safe_diff(series):
    return series.diff()

# ------------------------------------------------------------
# Data Loading Helper
# ------------------------------------------------------------

def load_excel_series(path, sheet, column=None):
    df = pd.read_excel(path, sheet_name=sheet)
    df = df.set_index("Date").sort_index()
    if column:
        return df[column]
    return df

# ------------------------------------------------------------
# Load Data
# ------------------------------------------------------------

portfolio = load_excel_series("Workshop Data.xlsx", "Portfolio")
hyg_vol   = load_excel_series("Workshop Data.xlsx", "Adj HYG", "Volume")

hy_index  = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "HY Index")
ig_index  = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "IG Index")
ust10     = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "10yUST Yields")
hyg_yas   = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "HYG")

# ------------------------------------------------------------
# Construct HYG Total Return (LEVEL)
# ------------------------------------------------------------

if "TotalReturnsHYG" not in portfolio.columns:
    portfolio["HYGCumDiv"] = portfolio["HYG Dividends"][::-1].cumsum()[::-1]
    portfolio["TotalReturnsHYG"] = (
        portfolio["HYG Position"] + portfolio["HYGCumDiv"]
    )

# ------------------------------------------------------------
# CORRECT Dependent Variable (RETURN, NOT LEVEL)
# ------------------------------------------------------------

hyg_return = portfolio["TotalReturnsHYG"].diff() / 100

# ------------------------------------------------------------
# Equity Proxy (Risk-On)
# ------------------------------------------------------------

spy_px = portfolio["SPY Position"] / -10
spy_return = log_return(spy_px)

# ------------------------------------------------------------
# Duration
# ------------------------------------------------------------

hyg_duration = hyg_yas["YAS_MOD_DUR"]

# ------------------------------------------------------------
# Factor Construction (STATIONARY & ORTHOGONAL)
# ------------------------------------------------------------

# Credit spread shocks
d_hy_oas = safe_diff(hy_index["OAS_SOVEREIGN_CURVE"])
d_ig_oas = safe_diff(ig_index["OAS_SOVEREIGN_CURVE"])

# Rates shock
d_ust10 = safe_diff(ust10["PX_LAST"])

# Liquidity shock
d_vol = log_return(hyg_vol)

# Orthogonal credit factors
credit_level = 0.5 * d_hy_oas + 0.5 * d_ig_oas
credit_rotation = d_hy_oas - d_ig_oas

# Duration-adjusted rate factor
rate_factor = - hyg_duration * d_ust10

# Assemble factor matrix
factors = pd.DataFrame({
    "Rate": rate_factor,
    "Credit_Level": credit_level,
    "Credit_Rotation": credit_rotation,
    "Equity": spy_return,
    "Liquidity": d_vol
})

# Drop missing values and align Y
factors = factors.dropna()
hyg_return = hyg_return.loc[factors.index]

# ------------------------------------------------------------
# Regression
# ------------------------------------------------------------

X = sm.add_constant(factors)
Y = hyg_return

model = sm.OLS(Y, X).fit()

print(model.summary())

# ------------------------------------------------------------
# Diagnostics
# ------------------------------------------------------------

condition_number = np.linalg.cond(X)

print("\n================ Diagnostics ================")
print(f"Condition Number : {condition_number:.2f}")
print(f"R²              : {model.rsquared:.3f}")
print(f"Adj R²          : {model.rsquared_adj:.3f}")
print("Durbin-Watson   :", sm.stats.stattools.durbin_watson(model.resid))

# ------------------------------------------------------------
# Factor Attribution
# ------------------------------------------------------------

betas = model.params.drop("const")
attribution = factors.mul(betas, axis=1)
attribution["Total"] = attribution.sum(axis=1)

print("\nLatest Factor Attribution:")
display(attribution.tail())

# ------------------------------------------------------------
# Factor Correlation Check
# ------------------------------------------------------------

print("\nFactor Correlations:")
display(factors.corr())


                            OLS Regression Results                            
Dep. Variable:        TotalReturnsHYG   R-squared:                       0.557
Model:                            OLS   Adj. R-squared:                  0.555
Method:                 Least Squares   F-statistic:                     311.2
Date:                Mon, 12 Jan 2026   Prob (F-statistic):          7.24e-216
Time:                        14:14:03   Log-Likelihood:                -18.191
No. Observations:                1244   AIC:                             48.38
Df Residuals:                    1238   BIC:                             79.14
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
const              -0.0149      0.007     

Unnamed: 0_level_0,Rate,Credit_Level,Credit_Rotation,Equity,Liquidity,Total
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2025-12-24,0.07497,0.045746,-0.000593,-1.4e-05,0.052934,0.173043
2025-12-26,0.01478,-0.109524,0.054293,-5e-05,-0.002294,-0.042795
2025-12-29,0.044391,-0.031151,-0.008156,-4.5e-05,-0.044284,-0.039245
2025-12-30,-0.029696,0.129839,-0.045153,2.4e-05,0.005598,0.060613
2025-12-31,-0.113324,0.014502,-0.020791,3e-06,0.009056,-0.110555



Factor Correlations:


Unnamed: 0,Rate,Credit_Level,Credit_Rotation,Equity,Liquidity
Rate,1.0,0.259188,0.264701,0.02121,0.03151
Credit_Level,0.259188,1.0,0.957047,-0.008872,0.133045
Credit_Rotation,0.264701,0.957047,1.0,-0.004137,0.123394
Equity,0.02121,-0.008872,-0.004137,1.0,-0.046599
Liquidity,0.03151,0.133045,0.123394,-0.046599,1.0
