# Baseline FF5 Factor Regression

This notebook performs OLS regressions of each stock's monthly excess returns on the Fama-French 5 factors Mkt-RF, SMB, HML, RMW, CMA.

In [None]:
import pandas as pd, statsmodels.api as sm

panel = pd.read_csv(
    "../data/processed/panel_monthly_with_ff5.csv",
    index_col=[0,1], parse_dates=[0]
)
panel.index.set_names(["date", "ticker"], inplace=True)

tickers = panel.index.get_level_values("ticker").unique()
rows_coef, rows_r2 = [], []

for t in tickers:
    df = panel.xs(t, level="ticker").dropna()
    y = df["excess_ret"]
    X = sm.add_constant(df[["Mkt-RF","SMB","HML","RMW","CMA"]])
    res = sm.OLS(y, X).fit()
    rows_coef.append(pd.Series(res.params, name=t))
    rows_r2.append(pd.Series({"r2": res.rsquared}, name=t))

coefs = pd.DataFrame(rows_coef).sort_index()
r2    = pd.DataFrame(rows_r2).sort_index()

coefs.to_csv("../data/processed/ff5_betas.csv")
r2.to_csv("../data/processed/ff5_r2.csv")

print(coefs.head())
print("R² stats: mean =", r2["r2"].mean(), ", min =", r2["r2"].min(), ", max =", r2["r2"].max())


Number of tickers estimated: 10
Sample coefficients (first 5):
         const    Mkt-RF       SMB       HML       RMW       CMA
AAPL  0.007506  1.181501 -0.042462 -0.557948  0.657246 -0.075277
AMZN  0.009574  1.266596 -0.679399 -0.400701 -0.846374 -0.891271
GOOG  0.004401  0.938483 -0.540276  0.128854 -0.019171 -0.872771
HD    0.003296  0.970879  0.289136 -0.178542  0.613778  0.226112
JPM   0.007372  1.072965 -0.168611  1.145773 -0.619134 -0.498653
R² stats: mean = 0.488414384892054 , min = 0.24917704094158222 , max = 0.7719922998285793
