In [1]:
import pandas as pd
import statsmodels.api as sm
from utils.ff_functions import create_coef_table
from utils.ff_functions import run_GRS

# Filtering Dates
In the paper they used the date range = [July 1963 - December 1991]

In [2]:
PATH = '../data/processed'

# Loading processed dfs
fact = pd.read_parquet(f'{PATH}/ff_factors_monthly.parquet')
port = pd.read_parquet(f'{PATH}/ff_portfolios_25_monthly.parquet')

# Filtering date to year range
fact = fact[fact['Date'].between('1963-07-01', '1991-12-31')]
port = port[port['Date'].between('1963-07-01', '1991-12-31')]

# Robustness check
assert len(fact) == len(port), 'Observation sizes do not match'
assert fact['Date'].equals(port['Date']), "Dates don't match"

# Creating list from columns except Date
fact_cols = [c for c in fact.columns if c != 'Date']
port_cols = [c for c in port.columns if c != 'Date']

# Applying numeric transformation
fact[fact_cols] = fact[fact_cols].apply(pd.to_numeric, errors='coerce')
port[port_cols] = port[port_cols].apply(pd.to_numeric, errors='coerce')

# Data Exploration

In [3]:
# Creating copies for descriptive stats
fact_stat = fact.copy()
port_stat = port.copy()

# Setting Date as index
fact_stat.set_index('Date', inplace=True)
port_stat.set_index('Date', inplace=True)

In [4]:
fact_stat.describe()

Unnamed: 0,Mkt-RF,SMB,HML,RF
count,342.0,342.0,342.0,342.0
mean,0.408538,0.265234,0.380819,0.552982
std,4.59299,2.876615,2.552106,0.218826
min,-23.24,-9.86,-9.87,0.25
25%,-2.185,-1.3025,-1.105,0.4
50%,0.555,0.13,0.445,0.51
75%,3.535,2.04,1.69,0.6675
max,16.1,11.14,8.63,1.35


In [5]:
port_stat.describe()

Unnamed: 0,SMALL LoBM,ME1 BM2,ME1 BM3,ME1 BM4,SMALL HiBM,ME2 BM1,ME2 BM2,ME2 BM3,ME2 BM4,ME2 BM5,...,ME4 BM1,ME4 BM2,ME4 BM3,ME4 BM4,ME4 BM5,BIG LoBM,ME5 BM2,ME5 BM3,ME5 BM4,BIG HiBM
count,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,...,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0,342.0
mean,0.86601,1.218971,1.29135,1.420761,1.542843,0.946676,1.197346,1.394027,1.451832,1.535109,...,1.012612,0.918682,1.169203,1.345892,1.439576,0.922617,0.890408,0.9088,1.060339,1.081593
std,7.749803,6.779863,6.272839,5.912414,6.266145,7.272491,6.284909,5.723932,5.334407,6.01514,...,5.881375,5.401291,5.074356,4.91583,5.63636,4.894273,4.711281,4.374565,4.302269,4.779814
min,-34.1653,-30.6779,-28.593,-28.3583,-28.4843,-32.4849,-31.9413,-28.6601,-24.6616,-29.2541,...,-25.5471,-28.6858,-24.0245,-18.4999,-23.468,-21.6513,-22.4499,-22.0284,-15.4467,-18.7014
25%,-3.44945,-2.5539,-2.159625,-1.552625,-1.4533,-3.52605,-2.335675,-1.710825,-1.48355,-1.76505,...,-2.63755,-2.1334,-1.978525,-1.655525,-1.77515,-1.78605,-1.685675,-1.682925,-1.39535,-1.83285
50%,1.04755,1.1768,1.2939,1.42555,1.5093,1.07175,1.52805,1.55275,1.5993,1.68675,...,0.93685,1.06095,1.33365,1.497,1.55265,0.9898,0.79455,0.9965,0.8823,1.052
75%,5.431025,5.57,5.077175,4.447575,4.803775,5.48135,5.089625,4.828525,4.84335,5.0208,...,4.843225,4.096025,4.420875,4.227225,4.9958,3.713625,3.798775,3.3026,3.533275,3.9603
max,25.1985,30.069,28.2814,28.2656,33.2035,21.4626,26.6206,26.6153,27.1808,29.6542,...,21.2099,20.246,23.8873,24.0697,27.8541,22.2031,16.4911,18.5374,19.8662,15.6797


# Time-Series FF3 regressions
For each of the 25 portfolios the regression equation is:
$$
\begin{equation}
R_{i,t} - R_{f,t}
=
\alpha_i
+
\beta_{i,M}\left(R_{M,t} - R_{f,t}\right)
+
\beta_{i,S}\,\mathrm{SMB}_t
+
\beta_{i,H}\,\mathrm{HML}_t
+
\varepsilon_{i,t}
\end{equation}
$$

Where:

$$
\begin{aligned}
R_{i,t} &:\ \text{return on portfolio } i \text{ at time } t \\
R_{f,t} &:\ \text{risk-free rate} \\
R_{M,t} - R_{f,t} &:\ \text{market excess return (MKT--RF)} \\
\mathrm{SMB}_t &:\ \text{size factor (Small Minus Big)} \\
\mathrm{HML}_t &:\ \text{value factor (High Minus Low)} \\
\alpha_i &:\ \text{pricing error (abnormal return)} \\
\varepsilon_{i,t} &:\ \text{regression residual}
\end{aligned}
$$

In [6]:
port

Unnamed: 0,Date,SMALL LoBM,ME1 BM2,ME1 BM3,ME1 BM4,SMALL HiBM,ME2 BM1,ME2 BM2,ME2 BM3,ME2 BM4,...,ME4 BM1,ME4 BM2,ME4 BM3,ME4 BM4,ME4 BM5,BIG LoBM,ME5 BM2,ME5 BM3,ME5 BM4,BIG HiBM
444,1963-07-01,1.1287,-0.3632,0.7223,-0.0413,-1.2447,-1.8076,0.1929,-1.0149,-1.9749,...,-0.9115,-1.7733,-1.9168,-1.5745,-1.8574,0.1391,0.4839,1.1360,-0.4285,-1.1045
445,1963-08-01,4.2396,1.3730,1.4917,2.5068,4.6644,5.5703,4.5220,4.4450,4.4662,...,5.5754,4.7469,6.2516,7.6941,5.3456,5.7823,4.2633,4.6341,8.1704,6.3984
446,1963-09-01,-1.7343,0.6204,-1.0007,-1.5215,-0.3584,-4.0525,-1.5072,-0.8638,-1.4935,...,-2.6644,-2.1463,-1.7882,-3.9641,-2.0002,-1.3752,-0.8081,-0.8497,-0.1912,-3.5033
447,1963-10-01,0.3778,-0.7329,1.3066,0.1904,2.3711,1.1926,4.2411,2.3526,2.3058,...,-0.2415,0.6990,2.5214,4.8524,0.6138,5.3261,1.7420,-0.3354,2.4176,0.4702
448,1963-11-01,-3.3319,-3.8436,-1.7893,-1.0535,-1.1077,-4.2596,-1.7484,-0.7845,-0.0554,...,-0.9083,-0.6311,-0.7516,1.3596,3.5407,-1.2669,1.0080,-1.6914,-2.1316,1.3496
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
781,1991-08-01,3.7308,4.8966,3.3730,0.8023,2.7947,3.9935,3.4400,3.6917,3.2984,...,1.5777,1.9912,3.2530,2.6905,3.1127,4.3134,0.6985,2.6552,1.4422,1.7597
782,1991-09-01,4.4290,2.5734,0.2002,0.2863,-1.5098,0.5632,0.8170,1.9587,0.9642,...,-0.3266,-1.7414,-0.8519,-0.0298,-1.2317,-2.2206,-1.0818,-0.9837,0.5243,-3.8306
783,1991-10-01,6.5752,4.1495,1.8624,3.8383,1.2713,2.3274,1.4029,3.4099,0.5585,...,2.5246,0.6612,1.2885,2.3167,2.8539,1.5326,0.9423,1.9451,3.2662,0.5042
784,1991-11-01,-1.8620,-3.2117,-4.1558,-3.2872,-2.5995,-4.7841,-6.5330,-2.8905,-5.1594,...,-2.3044,-4.8734,-3.9613,-3.1107,-6.1249,-1.0874,-5.5812,-5.5566,-3.7402,-8.3335


In [7]:
# Merging port and fact to regression df
df = port.merge(fact[["Date", "Mkt-RF", "SMB", "HML", "RF"]], on="Date", how="inner")

# Dropping date from columns
portfolio_cols = port.columns.drop('Date')

# Converting to excess returns
df[portfolio_cols] = df[portfolio_cols].sub(df['RF'], axis=0)

# Creating results dictionary
ff3_results = {}

# Factors that will be used in regression
facts = ['Mkt-RF', 'SMB', 'HML']

for portfolio in portfolio_cols:
    Y = df[portfolio]
    X = sm.add_constant(df[facts])

    # Using heteroskedasticity robust SEs
    model = sm.OLS(Y,X).fit(cov_type='HAC', cov_kwds={'maxlags':12})
    ff3_results[portfolio] = model

In [8]:
# Creating table of coefficients for each portfolio
ff3_table = create_coef_table(ff3_results, factors=['Mkt-RF', 'SMB', 'HML'])
ff3_table.head()

Unnamed: 0,Portfolio,alpha,t_alpha,p_alpha,R2,betaMkt-RF,betaSMB,betaHML
0,SMALL LoBM,-0.374281,-2.963325,0.003,0.938311,1.031629,1.411691,-0.28512
1,ME1 BM2,-0.09812,-1.481014,0.1386,0.956171,0.964351,1.273687,0.084841
2,ME1 BM3,-0.058351,-0.828404,0.4074,0.963407,0.942777,1.152698,0.277885
3,ME1 BM4,0.0584,0.944324,0.345,0.966534,0.897335,1.101506,0.395534
4,SMALL HiBM,0.052623,0.828681,0.4073,0.963833,0.950548,1.186659,0.614888


In [9]:
ff3_table.to_csv('../results/ff3_results.csv', index=False)

# CAPM
We estimate CAPM to compare

In [10]:
# Creating results dictionary
capm_results = {}

for portfolio in portfolio_cols:
    Y = df[portfolio]
    X = sm.add_constant(df['Mkt-RF']) # only CAPM factor
    # Using heteroskedasticity robust SEs
    CAPM_model = sm.OLS(Y,X).fit(cov_type='HAC', cov_kwds={'maxlags':12})

    capm_results[portfolio] = CAPM_model

In [11]:
# Creating coefficient table
capm_table = create_coef_table(capm_results, factors=['Mkt-RF'])
capm_table.head()

Unnamed: 0,Portfolio,alpha,t_alpha,p_alpha,R2,betaMkt-RF
0,SMALL LoBM,-0.265853,-0.801667,0.4227,0.699704,1.416957
1,ME1 BM2,0.158533,0.623613,0.5329,0.703432,1.242125
2,ME1 BM3,0.26729,1.138217,0.255,0.707916,1.153083
3,ME1 BM4,0.42993,2.001645,0.0453,0.687873,1.071745
4,SMALL HiBM,0.54052,2.193523,0.0283,0.644614,1.099876


In [12]:
capm_table.to_csv('../results/capm_results.csv', index=False)

# CAPM vs FF3 Comparison

In [13]:
# Setting index for alignment
capm_alpha = capm_table.set_index("Portfolio")[["alpha"]]
ff3_alpha  = ff3_table.set_index("Portfolio")[["alpha"]]

alpha_compare = capm_alpha.compare(ff3_alpha)
alpha_compare.columns = ['alpha_CAPM', 'alpha_FF3']

# Finding change in alpha
alpha_compare['alpha_diff'] = (
    alpha_compare['alpha_CAPM'] - alpha_compare['alpha_FF3']
)
alpha_compare

Unnamed: 0_level_0,alpha_CAPM,alpha_FF3,alpha_diff
Portfolio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
SMALL LoBM,-0.265853,-0.374281,0.108428
ME1 BM2,0.158533,-0.09812,0.256653
ME1 BM3,0.26729,-0.058351,0.325641
ME1 BM4,0.42993,0.0584,0.37153
SMALL HiBM,0.54052,0.052623,0.487897
ME2 BM1,-0.189115,-0.142114,-0.047001
ME2 BM2,0.141687,-0.033673,0.17536
ME2 BM3,0.386951,0.1321,0.254851
ME2 BM4,0.477632,0.139625,0.338007
ME2 BM5,0.521734,0.059187,0.462547


# Gibbons–Ross–Shanken (1989) Test

### GRS Test on FF3

In [14]:
facts = ['Mkt-RF', 'SMB', 'HML']
portfolio_cols = port.columns.drop('Date')

# df is already merged and you already converted portfolios to excess returns
F_stat, pVal, alpha, resids = run_GRS(df, portfolio_cols, facts)

print("GRS F:", F_stat)
print("p-value:", pVal)

342 25 3
GRS F: [[1.45457859]]
p-value: [[0.07710705]]


FF3 fails to reject at 5%, rejects at 10%. The alphas are jointly insignificant, meaning it prices all test portfolion on average.

### GRS Test on CAPM

In [15]:
capm_factors = ["Mkt-RF"]
portfolio_cols = port.columns.drop("Date")

F_capm, p_capm, alpha_capm, resids_capm = run_GRS(df, portfolio_cols, capm_factors)

print("CAPM GRS F:", F_capm)
print("CAPM p-value:", p_capm)

342 25 1
CAPM GRS F: [[2.02571381]]
CAPM p-value: [[0.00311613]]


CAPM rejects strongly. Since we reject the null at least some alphas are systematically non-zero, meaning it can leave unpriced average returns.