In [27]:
import pandas as pd
import statsmodels.api as sm
from utils.ff_functions import create_coef_table
from utils.ff_functions import summarise_table
from utils.ff_functions import run_GRS

# Filtering Dates
In the paper they used the date range = [July 1963 - December 1991]

In [28]:
PATH = '../data/processed'

# Loading processed dfs
ff5 = pd.read_parquet(f'{PATH}/ff5_factors_monthly.parquet')
port = pd.read_parquet(f'{PATH}/ff_portfolios_25_monthly.parquet')

# Filtering FF5 columns to convert to FF3
ff3 = ff5[['Date', 'Mkt-RF', 'SMB', 'HML', 'RF']].copy()

# Filtering date to year range
ff3 = ff3[ff3['Date'].between('1963-07-01', '1991-12-31')]
port = port[port['Date'].between('1963-07-01', '1991-12-31')]

# Robustness check
assert len(ff3) == len(port), 'Observation sizes do not match'

# Creating list from columns except Date
ff3_cols = [c for c in ff3.columns if c != 'Date']
port_cols = [c for c in port.columns if c != 'Date']

# Applying numeric transformation
ff3[ff3_cols] = ff3[ff3_cols].apply(pd.to_numeric, errors='coerce')
port[port_cols] = port[port_cols].apply(pd.to_numeric, errors='coerce')

# Data Exploration

In [29]:
# Creating copies for descriptive stats
ff3_stat = ff3.copy()
port_stat = port.copy()

# Setting Date as index
ff3_stat.set_index('Date', inplace=True)
port_stat.set_index('Date', inplace=True)

In [30]:
ff3_stat.describe()

Unnamed: 0,Mkt-RF,SMB,HML,RF
count,342.0,342.0,342.0,342.0
mean,0.408246,0.302661,0.392105,0.552982
std,4.59266,2.962978,2.560401,0.218826
min,-23.24,-10.08,-9.72,0.25
25%,-2.185,-1.37,-1.07,0.4
50%,0.555,0.115,0.485,0.51
75%,3.535,2.015,1.68,0.6675
max,16.1,12.8,8.6,1.35


In [31]:
port_stat.describe()

Unnamed: 0,SMALL LoBM,ME1 BM2,ME1 BM3,ME1 BM4,SMALL HiBM,ME2 BM1,ME2 BM2,ME2 BM3,ME2 BM4,ME2 BM5,...,ME4 BM1,ME4 BM2,ME4 BM3,ME4 BM4,ME4 BM5,BIG LoBM,ME5 BM2,ME5 BM3,ME5 BM4,BIG HiBM
count,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,...,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0
mean,0.86601,1.218971,1.29135,1.420761,1.542843,0.946676,1.197346,1.394027,1.451832,1.535109,...,1.012612,0.918682,1.169203,1.345892,1.439576,0.922617,0.890408,0.9088,1.060339,1.081593
std,7.749803,6.779863,6.272839,5.912414,6.266145,7.272491,6.284909,5.723932,5.334407,6.01514,...,5.881375,5.401291,5.074356,4.91583,5.63636,4.894273,4.711281,4.374565,4.302269,4.779814
min,-34.1653,-30.6779,-28.593,-28.3583,-28.4843,-32.4849,-31.9413,-28.6601,-24.6616,-29.2541,...,-25.5471,-28.6858,-24.0245,-18.4999,-23.468,-21.6513,-22.4499,-22.0284,-15.4467,-18.7014
25%,-3.44945,-2.5539,-2.159625,-1.552625,-1.4533,-3.52605,-2.335675,-1.710825,-1.48355,-1.76505,...,-2.63755,-2.1334,-1.978525,-1.655525,-1.77515,-1.78605,-1.685675,-1.682925,-1.39535,-1.83285
50%,1.04755,1.1768,1.2939,1.42555,1.5093,1.07175,1.52805,1.55275,1.5993,1.68675,...,0.93685,1.06095,1.33365,1.497,1.55265,0.9898,0.79455,0.9965,0.8823,1.052
75%,5.431025,5.57,5.077175,4.447575,4.803775,5.48135,5.089625,4.828525,4.84335,5.0208,...,4.843225,4.096025,4.420875,4.227225,4.9958,3.713625,3.798775,3.3026,3.533275,3.9603
max,25.1985,30.069,28.2814,28.2656,33.2035,21.4626,26.6206,26.6153,27.1808,29.6542,...,21.2099,20.246,23.8873,24.0697,27.8541,22.2031,16.4911,18.5374,19.8662,15.6797


# Time-Series FF3 regressions
For each of the 25 portfolios the regression equation is:
$$
\begin{equation}
R_{i,t} - R_{f,t}
=
\alpha_i
+
\beta_{i,M}\left(R_{M,t} - R_{f,t}\right)
+
\beta_{i,S}\,\mathrm{SMB}_t
+
\beta_{i,H}\,\mathrm{HML}_t
+
\varepsilon_{i,t}
\end{equation}
$$

Where:

$$
\begin{aligned}
R_{i,t} &:\ \text{return on portfolio } i \text{ at time } t \\
R_{f,t} &:\ \text{risk-free rate} \\
R_{M,t} - R_{f,t} &:\ \text{market excess return (MKT--RF)} \\
\mathrm{SMB}_t &:\ \text{size factor (Small Minus Big)} \\
\mathrm{HML}_t &:\ \text{value factor (High Minus Low)} \\
\alpha_i &:\ \text{pricing error (abnormal return)} \\
\varepsilon_{i,t} &:\ \text{regression residual}
\end{aligned}
$$

In [32]:
# Merging port and ff3 to regression df
df = port.merge(ff3[["Date", "Mkt-RF", "SMB", "HML", "RF"]], on="Date", how="inner")

# Dropping date from columns
portfolio_cols = port.columns.drop('Date')

# Converting to excess returns
df[portfolio_cols] = df[portfolio_cols].sub(df['RF'], axis=0)

# Creating results dictionary
ff3_results = {}

# Factors that will be used in regression
facts = ['Mkt-RF', 'SMB', 'HML']

for portfolio in portfolio_cols:
    Y = df[portfolio]
    X = sm.add_constant(df[facts])

    # Using heteroskedasticity robust SEs
    model = sm.OLS(Y,X).fit(cov_type='HAC', cov_kwds={'maxlags':12})
    ff3_results[portfolio] = model

In [33]:
# Creating table of coefficients for each portfolio
ff3_table = create_coef_table(ff3_results, factors=facts)
display(ff3_table.head())

# Summarising table
ff3_summary = summarise_table(ff3_table)
display(ff3_summary)

Unnamed: 0,Portfolio,alpha,t_alpha,p_alpha,R2,betaMkt-RF,betaSMB,betaHML
0,SMALL LoBM,-0.369237,-3.052528,0.0023,0.941882,1.03385,1.371117,-0.394751
1,ME1 BM2,-0.097297,-1.477217,0.1396,0.960748,0.965439,1.240546,-0.016106
2,ME1 BM3,-0.057833,-0.778675,0.4362,0.965137,0.944443,1.118101,0.184213
3,ME1 BM4,0.056688,0.924228,0.3554,0.967618,0.899777,1.067029,0.308115
4,SMALL HiBM,0.050713,0.771852,0.4402,0.961708,0.954114,1.144029,0.518693


mean[|alpha|)           0.092776
% sig alpha (<0.05)    16.000000
mean(R2)                0.934559
n_portfolios           25.000000
dtype: float64

In [34]:
ff3_table.to_csv('../results/ff3_results.csv', index=False)

# CAPM
We estimate CAPM to compare

In [35]:
# Creating results dictionary
capm_results = {}

for portfolio in portfolio_cols:
    Y = df[portfolio]
    X = sm.add_constant(df['Mkt-RF']) # only CAPM factor
    # Using heteroskedasticity robust SEs
    CAPM_model = sm.OLS(Y,X).fit(cov_type='HAC', cov_kwds={'maxlags':12})

    capm_results[portfolio] = CAPM_model

In [36]:
# Creating coefficient table
capm_table = create_coef_table(capm_results, factors=['Mkt-RF'])
display(capm_table.head())

# Summarising table
capm_summary = summarise_table(capm_table)
display(capm_summary)

Unnamed: 0,Portfolio,alpha,t_alpha,p_alpha,R2,betaMkt-RF
0,SMALL LoBM,-0.265484,-0.800503,0.4234,0.699712,1.417067
1,ME1 BM2,0.158852,0.624859,0.5321,0.703454,1.242234
2,ME1 BM3,0.267591,1.139517,0.2545,0.707923,1.153172
3,ME1 BM4,0.430205,2.002894,0.0452,0.687897,1.071841
4,SMALL HiBM,0.540799,2.194483,0.0282,0.644641,1.099978


mean[|alpha|)           0.257850
% sig alpha (<0.05)    36.000000
mean(R2)                0.794427
n_portfolios           25.000000
dtype: float64

In [37]:
capm_table.to_csv('../results/capm_results.csv', index=False)

# CAPM vs FF3 Comparison

In [38]:
# Setting index for alignment
capm_alpha = capm_table.set_index("Portfolio")[["alpha"]]
ff3_alpha  = ff3_table.set_index("Portfolio")[["alpha"]]

alpha_compare = capm_alpha.compare(ff3_alpha)
alpha_compare.columns = ['alpha_CAPM', 'alpha_FF3']

# Finding change in alpha
alpha_compare['alpha_diff'] = (
    alpha_compare['alpha_CAPM'] - alpha_compare['alpha_FF3']
)
display(alpha_compare)

Unnamed: 0_level_0,alpha_CAPM,alpha_FF3,alpha_diff
Portfolio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SMALL LoBM,-0.265484,-0.369237,0.103753
ME1 BM2,0.158852,-0.097297,0.256149
ME1 BM3,0.267591,-0.057833,0.325424
ME1 BM4,0.430205,0.056688,0.373517
SMALL HiBM,0.540799,0.050713,0.490086
ME2 BM1,-0.188727,-0.136321,-0.052406
ME2 BM2,0.142,-0.032283,0.174283
ME2 BM3,0.387247,0.131131,0.256116
ME2 BM4,0.477902,0.13762,0.340283
ME2 BM5,0.52202,0.054721,0.467299


# Gibbons–Ross–Shanken (1989) Test

### GRS Test on FF3

In [39]:
# Creating df for grs
df_grs = port.merge(ff3[["Date","Mkt-RF","SMB","HML","RF"]], on="Date", how="inner")
portfolio_cols = port.columns.drop("Date")

# Subtracting rf
df_grs[portfolio_cols] = df_grs[portfolio_cols].sub(df_grs["RF"], axis=0)

In [40]:
facts = ['Mkt-RF', 'SMB', 'HML']

F_stat, pVal, _, _ = run_GRS(df_grs, portfolio_cols, facts)

print("GRS F:", F_stat)
print("p-value:", pVal)

GRS F: [[1.43505949]]
p-value: [[0.08473135]]


FF3 fails to reject at 5%, rejects at 10%. The alphas are jointly insignificant, meaning it prices all test portfolion on average.

### GRS Test on CAPM

In [41]:
capm_factors = ["Mkt-RF"]

F_capm, p_capm, _, _ = run_GRS(df_grs, portfolio_cols, capm_factors)

print("CAPM GRS F:", F_capm)
print("CAPM p-value:", p_capm)

CAPM GRS F: [[2.02888155]]
CAPM p-value: [[0.0030554]]


CAPM rejects strongly. Since we reject the null at least some alphas are systematically non-zero, meaning it can leave unpriced average returns.