A Five-Factor Asset Pricing Model
Eugene F. Fama & Kenneth R. French (2015)

In [36]:
import pandas as pd
import statsmodels.api as sm
from utils.ff_functions import create_coef_table
from utils.ff_functions import summarise_table
from utils.ff_functions import run_GRS

# First Sample
## 1963-1991

In [37]:
PATH = '../data/processed'

# Loading processed df
ff5 = pd.read_parquet(f'{PATH}/ff5_factors_monthly.parquet')
port = pd.read_parquet(f'{PATH}/ff_portfolios_25_monthly.parquet')

# Filtering date to year range
ff5 = ff5[ff5['Date'].between('1963-07-01', '1991-12-31')].copy()
port = port[port['Date'].between('1963-07-01', '1991-12-31')].copy()

# Robustness check
assert len(ff5) == len(port), 'Observation sizes do not match'

# Creating list from columns except Date
ff5_cols = [c for c in ff5.columns if c != 'Date']
port_cols = [c for c in port.columns if c != 'Date']

# Applying numeric transformations
ff5[ff5_cols] = ff5[ff5_cols].apply(pd.to_numeric, errors='coerce')
port[port_cols] = port[port_cols].apply(pd.to_numeric, errors='coerce')

In [38]:
ff5

Unnamed: 0,Date,Mkt-RF,SMB,HML,RMW,CMA,RF
0,1963-07-01,-0.39,-0.46,-0.82,0.72,-1.15,0.27
1,1963-08-01,5.07,-0.85,1.63,0.42,-0.33,0.25
2,1963-09-01,-1.57,-0.50,0.19,-0.80,0.27,0.27
3,1963-10-01,2.53,-1.30,-0.11,2.75,-2.24,0.29
4,1963-11-01,-0.85,-0.83,1.66,-0.34,2.22,0.27
...,...,...,...,...,...,...,...
337,1991-08-01,2.32,1.48,-0.80,0.94,-0.55,0.46
338,1991-09-01,-1.59,1.55,-0.98,-1.80,0.15,0.46
339,1991-10-01,1.28,0.90,-0.44,-1.89,-0.30,0.42
340,1991-11-01,-4.19,-0.88,-1.91,0.90,0.13,0.39


# Time-Series FF5 regressions
For each of the 25 portfolios the regression equation is:
$$
\begin{equation}
R_{i,t} - R_{f,t}
=
\alpha_i
+
\beta_{i,M}\left(R_{M,t} - R_{f,t}\right)
+
\beta_{i,S}\,\mathrm{SMB}_t
+
\beta_{i,H}\,\mathrm{HML}_t
+
\beta_{i,R}\,\mathrm{RMW}_t
+
\beta_{i,C}\,\mathrm{CMA}_t
+
\varepsilon_{i,t}
\end{equation}
$$

Where:

$$
\begin{aligned}
R_{i,t} &:\ \text{return on portfolio } i \text{ at time } t \\
R_{f,t} &:\ \text{risk-free rate} \\
R_{M,t} - R_{f,t} &:\ \text{market excess return (MKT--RF)} \\
\mathrm{SMB}_t &:\ \text{size factor (Small Minus Big)} \\
\mathrm{HML}_t &:\ \text{value factor (High Minus Low)} \\
\mathrm{RMW}_t &:\ \text{profitability factor (Robust Minus Weak)} \\
\mathrm{CMA}_t &:\ \text{investment factor (Conservative Minus Aggressive)} \\
\alpha_i &:\ \text{pricing error (abnormal return)} \\
\varepsilon_{i,t} &:\ \text{regression residual}
\end{aligned}
$$

In [39]:
# Merging port and ff5 to regression df
df = port.merge(ff5[["Date", "Mkt-RF", "SMB", "HML", 'RMW', 'CMA', "RF"]], on="Date", how="inner")

# Dropping date from columns
portfolio_cols = port.columns.drop('Date')

# Converting to excess returns
df[portfolio_cols] = df[portfolio_cols].sub(df['RF'], axis=0)

# Creating results dictionary
ff5_results = {}

# Factors that will be used in regression
facts = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']

for portfolio in portfolio_cols:
    Y = df[portfolio]
    X = sm.add_constant(df[facts])

    # Using heteroskedasticity robust SEs
    model = sm.OLS(Y,X).fit(cov_type='HAC', cov_kwds={'maxlags':12})
    ff5_results[portfolio] = model

In [40]:
# Creating table of coefficients for each portfolio
ff5_table = create_coef_table(ff5_results, factors=facts)
display(ff5_table.head())

# Summarising table
ff5_summary = summarise_table(ff5_table)
display(ff5_summary)

Unnamed: 0,Portfolio,alpha,t_alpha,p_alpha,R2,betaMkt-RF,betaSMB,betaHML,betaRMW,betaCMA
0,SMALL LoBM,-0.202282,-1.897414,0.0578,0.946151,1.018331,1.328947,-0.498507,-0.427151,-0.093202
1,ME1 BM2,-0.007485,-0.11167,0.9111,0.962582,0.958155,1.218888,-0.087785,-0.239408,-0.026345
2,ME1 BM3,-0.022448,-0.277162,0.7817,0.965596,0.942525,1.110486,0.141791,-0.10293,0.01089
3,ME1 BM4,0.080235,1.222158,0.2216,0.967994,0.899478,1.062905,0.265316,-0.077334,0.029098
4,SMALL HiBM,-0.007039,-0.10915,0.9131,0.962748,0.965655,1.16457,0.462619,0.091959,0.17017


mean[|alpha|)          0.077557
% sig alpha (<0.05)    8.000000
mean(R2)               0.936307
dtype: float64

In [41]:
ff5_table.to_csv('../results/ff5_results.csv', index=False)

# Comparing Tables

In [42]:
R_PATH = '../results/'

# Loading tables
capm_table = pd.read_csv(f'{R_PATH}/capm_results.csv')
ff3_table = pd.read_csv(f'{R_PATH}/ff3_results.csv')
ff5_table = pd.read_csv(f'{R_PATH}/ff5_results.csv')

# Summarising each table
capm_summary = summarise_table(capm_table).rename("CAPM")
ff3_summary  = summarise_table(ff3_table).rename("FF3")
ff5_summary  = summarise_table(ff5_table).rename("FF5")

# Concatenating to comparative table
summary_compare = pd.concat([capm_summary, ff3_summary, ff5_summary], axis=1)
display(summary_compare)

Unnamed: 0,CAPM,FF3,FF5
mean[|alpha|),0.25785,0.092776,0.077557
% sig alpha (<0.05),36.0,16.0,8.0
mean(R2),0.794427,0.934559,0.936307


# GRS Test

In [43]:
# Creating grs dataframe
df_grs = port.merge(ff5[["Date","Mkt-RF","SMB","HML","RMW","CMA","RF"]],
                    on="Date",
                    how="inner")

# Taking portfolio column names as list
portfolio_cols = port.columns.drop("Date")

# Subtracting rf
df_grs[portfolio_cols] = df_grs[portfolio_cols].sub(df_grs["RF"], axis=0)

In [44]:
# GRS for FF5
F_ff5, p_ff5, _, _ = run_GRS(df_grs, portfolio_cols,
                             ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA'])
# GRS for FF3
F_ff3, p_ff3, _, _ = run_GRS(df_grs, portfolio_cols,
                             ['Mkt-RF', 'SMB', 'HML'])
# GRS for CAPM
F_capm, p_capm, _, _ = run_GRS(df_grs, portfolio_cols,
                               ['Mkt-RF'])

rows = [
    {"Model": "CAPM", "GRS_F": F_capm.item(), "p_value": p_capm.item()},
    {"Model": "FF3",  "GRS_F": F_ff3.item(),  "p_value": p_ff3.item()},
    {"Model": "FF5",  "GRS_F": F_ff5.item(),  "p_value": p_ff5.item()},
]

# Creating dataframe from F-stat and p-value
grs_df = pd.DataFrame(rows).set_index("Model")
display(grs_df.round(4))

Unnamed: 0_level_0,GRS_F,p_value
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
CAPM,2.0289,0.0031
FF3,1.4351,0.0847
FF5,1.1493,0.286


# Intepretation
We see that we comfortably fail to reject at even the 10% level of significance for the FF5 model. The added factors in FF5 eliminate the joint pricing errors.

In [48]:
port

Unnamed: 0,Date,SMALL LoBM,ME1 BM2,ME1 BM3,ME1 BM4,SMALL HiBM,ME2 BM1,ME2 BM2,ME2 BM3,ME2 BM4,...,ME4 BM1,ME4 BM2,ME4 BM3,ME4 BM4,ME4 BM5,BIG LoBM,ME5 BM2,ME5 BM3,ME5 BM4,BIG HiBM
0,1926-07-01,5.8276,-1.7006,0.5118,-2.1477,1.9583,1.2118,2.4107,0.6056,-2.6082,...,1.5376,1.5460,1.3389,0.2765,2.4678,3.3248,6.0909,2.0285,3.1263,0.5623
1,1926-08-01,-2.0206,-8.0282,1.3968,2.1483,8.5104,2.3620,-0.7525,3.8984,0.2299,...,1.3858,3.8587,1.9738,2.1336,5.3422,1.0169,4.1975,1.9769,5.4924,7.7576
2,1926-09-01,-4.8291,-2.6806,-4.3417,-3.2683,0.8586,-2.6849,-0.5252,1.0789,-3.2877,...,1.6897,-0.5246,-1.7724,1.4806,0.8730,-1.2951,3.6610,0.1384,-0.7497,-2.4284
3,1926-10-01,-9.3633,-3.5519,-3.5024,3.4413,-2.5452,-2.8014,-4.4191,-5.0767,-8.0271,...,-3.9136,-2.6528,-2.1058,-3.2532,-5.3525,-2.7382,-3.0061,-2.2467,-4.6725,-5.8129
4,1926-11-01,5.5888,4.1877,2.4384,-4.4495,0.5110,3.1023,-1.7317,3.0425,4.9538,...,3.4492,2.3823,3.7315,5.1102,1.8213,4.4331,2.5355,1.5280,3.6596,2.5636
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1189,2025-08-01,8.8770,8.9386,9.4083,9.1551,10.0193,9.2309,5.3913,8.5592,8.3905,...,3.6528,1.6166,2.3492,5.8848,7.1342,1.1611,1.1935,3.0568,5.4262,9.0799
1190,2025-09-01,5.0252,3.6843,6.2120,1.8185,1.7839,1.5671,2.5534,2.7634,-1.7018,...,-1.6173,1.9338,2.8504,-1.0616,-1.1749,5.5609,0.7986,2.7747,0.2281,9.7883
1191,2025-10-01,1.6342,4.1585,3.5958,-0.6982,-0.6074,3.3873,5.5502,2.3757,-2.8945,...,1.5596,1.1414,0.7399,-1.9959,-3.8438,3.8071,1.3814,0.7712,-2.2130,5.6126
1192,2025-11-01,-4.5541,6.5321,-0.4316,3.5830,3.6556,-1.3963,4.1876,3.0292,2.5370,...,-1.8818,1.8503,4.8510,5.2554,4.5621,-1.0600,0.3464,2.2197,3.9767,2.6288
