In [None]:
# Imports

import pandas as pd
import numpy as np
import statsmodels.api as sm

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.model_selection import TimeSeriesSplit

import warnings
warnings.filterwarnings("ignore")

# Utility Functions

def log_return(series):
    return np.log(series).diff()

def safe_diff(series):
    return series.diff()

# Data Loading Helper

def load_excel_series(path, sheet, column=None):
    df = pd.read_excel(path, sheet_name=sheet)
    df = df.set_index("Date").sort_index()
    return df[column] if column else df

# Load Data

portfolio = load_excel_series("Workshop Data.xlsx", "Portfolio")
hyg_vol   = load_excel_series("Workshop Data.xlsx", "Adj HYG", "Volume")

hy_index  = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "HY Index")
ig_index  = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "IG Index")
ust10     = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "10yUST Yields")
hyg_yas   = load_excel_series("Indexes and Spreads Data 01.09.xlsx", "HYG")

# HYG Total Return Construction

if "TotalReturnsHYG" not in portfolio.columns:
    portfolio["HYGCumDiv"] = portfolio["HYG Dividends"][::-1].cumsum()[::-1]
    portfolio["TotalReturnsHYG"] = (
        portfolio["HYG Position"] + portfolio["HYGCumDiv"]
    )


hyg_return = portfolio["TotalReturnsHYG"].pct_change()

# Equity Risk

equity_px = portfolio["SPY Position"] / -10
equity_return = log_return(equity_px)

# Duration

hyg_duration = hyg_yas["YAS_MOD_DUR"]

# Factor Construction 

# Credit spread change 
d_hy_oas = safe_diff(hy_index["OAS_SOVEREIGN_CURVE"])
d_ig_oas = safe_diff(ig_index["OAS_SOVEREIGN_CURVE"])
credit = 0.5 * d_hy_oas + 0.5 * d_ig_oas

# Rates (duration-adjusted)
d_ust10 = safe_diff(ust10["PX_LAST"])
rate = -hyg_duration * d_ust10

# Liquidity
liquidity = log_return(hyg_vol)

# Assemble factor matrix
factors = pd.DataFrame({
    "Rate": rate,
    "Credit": credit,
    "Equity": equity_return,
    "Liquidity": liquidity
})

# Align and clean
factors = factors.dropna()
hyg_return = hyg_return.loc[factors.index]

# ============================================================
# --------------------- OLS REGRESSION -----------------------
# ============================================================

X_ols = sm.add_constant(factors)
y = hyg_return

ols_model = sm.OLS(y, X_ols).fit()

print("\n================ OLS Regression =================")
print(ols_model.summary())

# Diagnostics
print("\nOLS Diagnostics")
print(f"Condition Number : {np.linalg.cond(X_ols):.2f}")
print(f"R²              : {ols_model.rsquared:.3f}")
print(f"Adj R²          : {ols_model.rsquared_adj:.3f}")
print("Durbin-Watson   :", sm.stats.stattools.durbin_watson(ols_model.resid))

# RIDGE REGRESSION 

ridge_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("ridge", Ridge(alpha=10.0))
])

ridge_pipeline.fit(factors, y)
ridge_pred = ridge_pipeline.predict(factors)

print("\n================ Ridge Regression ================")
print(f"R²  : {r2_score(y, ridge_pred):.3f}")
print(f"MSE : {mean_squared_error(y, ridge_pred):.6f}")

ridge_betas = pd.Series(
    ridge_pipeline.named_steps["ridge"].coef_,
    index=factors.columns
)

print("\nRidge Betas:")
display(ridge_betas)

# RANDOM FOREST MODEL 

rf_model = RandomForestRegressor(
    n_estimators=500,
    max_depth=5,
    min_samples_leaf=20,
    random_state=42
)

rf_model.fit(factors, y)
rf_pred = rf_model.predict(factors)

print("\n============== Random Forest ====================")
print(f"R²  : {r2_score(y, rf_pred):.3f}")
print(f"MSE : {mean_squared_error(y, rf_pred):.6f}")

rf_importance = pd.Series(
    rf_model.feature_importances_,
    index=factors.columns
).sort_values(ascending=False)

print("\nRandom Forest Feature Importance:")
display(rf_importance)

# FACTOR ATTRIBUTION (OLS) 

betas = ols_model.params.drop("const")
attribution = factors.mul(betas, axis=1)
attribution["Total"] = attribution.sum(axis=1)

print("\nOLS Factor Attribution:")
display(attribution.tail())

#  FACTOR CORRELATION MATRIX
print("\nFactor Correlation Matrix:")
display(factors.corr())



                            OLS Regression Results                            
Dep. Variable:        TotalReturnsHYG   R-squared:                       0.561
Model:                            OLS   Adj. R-squared:                  0.559
Method:                 Least Squares   F-statistic:                     395.7
Date:                Wed, 14 Jan 2026   Prob (F-statistic):          1.28e-219
Time:                        17:37:56   Log-Likelihood:                 5577.6
No. Observations:                1244   AIC:                        -1.115e+04
Df Residuals:                    1239   BIC:                        -1.112e+04
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.0001   7.78e-05     -1.861      0.

Rate         2.400244e-03
Credit      -2.569286e-03
Equity      -8.347976e-07
Liquidity   -2.269581e-04
dtype: float64


R²  : 0.538
MSE : 0.000008

Random Forest Feature Importance:


Credit       0.532126
Rate         0.423226
Liquidity    0.039839
Equity       0.004809
dtype: float64


OLS Factor Attribution:


Unnamed: 0_level_0,Rate,Credit,Equity,Liquidity,Total
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-12-24,0.000854,0.00037,-2.719716e-07,0.000537,0.00176
2025-12-26,0.000168,-0.000886,-9.857899e-07,-2.3e-05,-0.000742
2025-12-29,0.000506,-0.000252,-8.805767e-07,-0.000449,-0.000196
2025-12-30,-0.000338,0.001051,4.794713e-07,5.7e-05,0.00077
2025-12-31,-0.001291,0.000117,6.461882e-08,9.2e-05,-0.001081



Factor Correlation Matrix:


Unnamed: 0,Rate,Credit,Equity,Liquidity
Rate,1.0,0.259188,0.02121,0.03151
Credit,0.259188,1.0,-0.008872,0.133045
Equity,0.02121,-0.008872,1.0,-0.046599
Liquidity,0.03151,0.133045,-0.046599,1.0
