In [2]:
import pandas as pd
import statsmodels.api as sm
from utils.ff_functions import ff_run_regression
from utils.ff_functions import create_coef_table
from utils.ff_functions import summarise_table
from utils.ff_functions import run_GRS

# Second Sample
# 1963-2013
# NEED TO ADD SIZE OP SIZE INV PORTFOLIOS

Reference :
https://www.sciencedirect.com/science/article/pii/S0304405X14002323

In [3]:
PATH = '../data/processed'

# Loading processed factor df
ff5 = pd.read_parquet(f'{PATH}/ff5_factors_monthly.parquet')

# Loading processed 25 portfolio dfs
p_SIZE = pd.read_parquet(f'{PATH}/ff_portfolios_25_ME_SIZE.parquet')
p_INV = pd.read_parquet(f'{PATH}/ff_portfolios_25_ME_INV.parquet')
p_OP = pd.read_parquet(f'{PATH}/ff_portfolios_25_ME_OP.parquet')

# Filtering date to year range
ff5 = ff5[ff5['Date'].between('1963-07-01', '2013-12-31')].copy()
p_SIZE = p_SIZE[p_SIZE['Date'].between('1963-07-01', '2013-12-31')].copy()
p_INV = p_INV[p_INV['Date'].between('1963-07-01', '2013-12-31')].copy()
p_OP = p_OP[p_OP['Date'].between('1963-07-01', '2013-12-31')].copy()

# Robustness check
assert len(ff5) == len(p_SIZE), 'Observation sizes do not match'
assert len(ff5) == len(p_INV), 'Observation sizes do not match'
assert len(ff5) == len(p_OP), 'Observation sizes do not match'

# Creating list from columns except Date
ff5_cols = [c for c in ff5.columns if c != 'Date']
p_SIZE_cols = [c for c in p_SIZE.columns if c != 'Date']
p_INV_cols = [c for c in p_INV.columns if c != 'Date']
p_OP_cols = [c for c in p_OP.columns if c != 'Date']

# Applying numeric transformations
ff5[ff5_cols] = ff5[ff5_cols].apply(pd.to_numeric, errors='coerce')
p_SIZE[p_SIZE_cols] = p_SIZE[p_SIZE_cols].apply(pd.to_numeric, errors='coerce')
p_INV[p_INV_cols] = p_INV[p_INV_cols].apply(pd.to_numeric, errors='coerce')
p_OP[p_OP_cols] = p_OP[p_OP_cols].apply(pd.to_numeric, errors='coerce')

# Checking for pseudo-missing values
for port in [p_SIZE, p_INV, p_OP]:
    print(port[port.isin([-999, -99.99]).any(axis=1)])

Empty DataFrame
Columns: [Date, SMALL LoBM, ME1 BM2, ME1 BM3, ME1 BM4, SMALL HiBM, ME2 BM1, ME2 BM2, ME2 BM3, ME2 BM4, ME2 BM5, ME3 BM1, ME3 BM2, ME3 BM3, ME3 BM4, ME3 BM5, ME4 BM1, ME4 BM2, ME4 BM3, ME4 BM4, ME4 BM5, BIG LoBM, ME5 BM2, ME5 BM3, ME5 BM4, BIG HiBM]
Index: []

[0 rows x 26 columns]
Empty DataFrame
Columns: [Date, SMALL LoINV, ME1 INV2, ME1 INV3, ME1 INV4, SMALL HiINV, ME2 INV1, ME2 INV2, ME2 INV3, ME2 INV4, ME2 INV5, ME3 INV1, ME3 INV2, ME3 INV3, ME3 INV4, ME3 INV5, ME4 INV1, ME4 INV2, ME4 INV3, ME4 INV4, ME4 INV5, BIG LoINV, ME5 INV2, ME5 INV3, ME5 INV4, BIG HiINV]
Index: []

[0 rows x 26 columns]
Empty DataFrame
Columns: [Date, SMALL LoOP, ME1 OP2, ME1 OP3, ME1 OP4, SMALL HiOP, ME2 OP1, ME2 OP2, ME2 OP3, ME2 OP4, ME2 OP5, ME3 OP1, ME3 OP2, ME3 OP3, ME3 OP4, ME3 OP5, ME4 OP1, ME4 OP2, ME4 OP3, ME4 OP4, ME4 OP5, BIG LoOP, ME5 OP2, ME5 OP3, ME5 OP4, BIG HiOP]
Index: []

[0 rows x 26 columns]


# Merging and Transforming

In [4]:
# Merging all portfolio dfs with factors
df_SIZE = p_SIZE.merge(
    ff5[["Date", "Mkt-RF", "SMB", "HML", "RMW", "CMA", "RF"]],
    on="Date",
    how="inner"
)

df_INV = p_INV.merge(
    ff5[["Date", "Mkt-RF", "SMB", "HML", "RMW", "CMA", "RF"]],
    on="Date",
    how="inner"
)

df_OP = p_OP.merge(
    ff5[["Date", "Mkt-RF", "SMB", "HML", "RMW", "CMA", "RF"]],
    on="Date",
    how="inner"
)

# Converting to excess returns
size_cols = [c for c in p_SIZE.columns if c != "Date"]
inv_cols = [c for c in p_INV.columns if c != "Date"]
op_cols = [c for c in p_OP.columns if c !='Date']

df_SIZE[size_cols] = df_SIZE[size_cols].sub(df_SIZE["RF"], axis=0)
df_INV[inv_cols] = df_INV[inv_cols].sub(df_INV["RF"], axis=0)
df_OP[op_cols] = df_OP[op_cols].sub(df_OP["RF"], axis=0)

# Regressions
We run FF5 regression on every portfolio set.

In [7]:
# Factors that will be used in al regressions
factors_ff5 = ["Mkt-RF", "SMB", "HML", "RMW", "CMA"]

port_frames = {
    "SIZE" : df_SIZE,
    "INV" : df_INV,
    "OP" : df_OP,
}

results = {}
tables = {}
summaries = {}

for id, frame in port_frames.items():
    results[id] = ff_run_regression(frame, factors=factors_ff5)
    tables[id] = create_coef_table(results[id], factors=factors_ff5)
    summaries[id] = summarise_table(tables[id])

# Unpacking
size_table, inv_table, op_table = tables["SIZE"], tables["INV"], tables["OP"]
size_summary, inv_summary, op_summary = summaries["SIZE"], summaries["INV"], summaries["OP"]

In [9]:
# Creating table of all summaries
summary_df = pd.DataFrame({
    "SIZE": size_summary,
    "INV": inv_summary,
    "OP": op_summary
}).T

summary_df

Unnamed: 0,mean(|alpha|),% sig alpha (<0.05),mean(R2),n_portfolios
SIZE,0.0886,32.0,0.918923,25.0
INV,0.082392,12.0,0.931465,25.0
OP,0.071284,8.0,0.929778,25.0
