### Size and B/M
**Getting Data Together**

In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats

factors = pd.read_csv('data/five_factor_model/factors/5f_data.csv', skiprows=4)
factors = factors[pd.to_numeric(factors['Unnamed: 0'], errors='coerce').notna()]
factors = factors.rename(columns={'Unnamed: 0': 'date', 'Mkt-RF': 'MKTRF'})
factors = factors[['date', 'MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']]
factors.iloc[:, 1:] = factors.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')

portSBM = pd.read_csv('data/five_factor_model/portfolios/25_Portfolios_5x5_on_size_BM.csv', skiprows=15)
portSBM = portSBM.rename(columns={'Unnamed: 0': 'date'})
portSBM = portSBM[pd.to_numeric(portSBM['date'], errors='coerce').notna()]
portSBM = portSBM[:-1]

portSBM.iloc[:, 1:] = portSBM.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')

factors = factors.drop_duplicates(subset='date')
portSBM = portSBM.drop_duplicates(subset='date')

dataSBM = pd.merge(portSBM, factors, on='date', how='inner')
dataSBM['date']
cutoff_row = dataSBM.index[dataSBM['date'] > '201312'][0]
dataSBM = dataSBM.iloc[:cutoff_row]
dataSBM

Unnamed: 0,date,SMALL LoBM,ME1 BM2,ME1 BM3,ME1 BM4,SMALL HiBM,ME2 BM1,ME2 BM2,ME2 BM3,ME2 BM4,...,ME5 BM2,ME5 BM3,ME5 BM4,BIG HiBM,MKTRF,SMB,HML,RMW,CMA,RF
0,196307,1.1287,-0.3632,0.7223,-0.0413,-1.2447,-1.8076,0.1929,-1.0149,-1.9749,...,0.4839,1.136,-0.4285,-1.1045,-0.39,-0.48,-0.81,0.64,-1.15,0.27
1,196308,4.2396,1.373,1.4917,2.5068,4.6644,5.5703,4.522,4.445,4.4662,...,4.2633,4.6341,8.1704,6.3984,5.08,-0.8,1.7,0.4,-0.38,0.25
2,196309,-1.7343,0.6204,-1.0007,-1.5215,-0.3584,-4.0525,-1.5072,-0.8638,-1.4935,...,-0.8081,-0.8497,-0.1912,-3.5033,-1.57,-0.43,0.0,-0.78,0.15,0.27
3,196310,0.3778,-0.7329,1.3066,0.1904,2.3711,1.1926,4.2411,2.3526,2.3058,...,1.742,-0.3354,2.4176,0.4702,2.54,-1.34,-0.04,2.79,-2.25,0.29
4,196311,-3.3319,-3.8436,-1.7893,-1.0535,-1.1077,-4.2596,-1.7484,-0.7845,-0.0554,...,1.008,-1.6914,-2.1316,1.3496,-0.86,-0.85,1.73,-0.43,2.27,0.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,201308,-2.3217,-2.536,-2.3048,-3.7353,-3.1512,-0.5681,-0.6886,-3.1632,-3.944,...,-2.5292,-3.5744,-3.6714,-4.5134,-2.71,-0.03,-2.73,0.71,-2.17,0.0
602,201309,7.2942,7.5884,7.1963,6.9327,5.8445,6.8252,6.1363,7.5391,6.0436,...,2.6355,2.5945,2.5619,2.2688,3.77,2.65,-1.18,-0.54,-1.32,0.0
603,201310,-0.739,0.7764,2.1975,4.0429,2.87,0.6633,1.5231,1.2683,3.8146,...,4.3822,5.4352,3.2953,3.7175,4.17,-1.5,1.22,2.75,0.89,0.0
604,201311,7.7434,5.3439,5.7074,7.1859,4.9367,6.4781,3.6126,3.311,2.8899,...,3.5527,1.6052,1.9314,6.2891,3.12,1.44,0.19,0.26,0.06,0.0


**Combining Sets and Forming Regression**

In [18]:
cols = ['SMALL LoBM', 'RF', 'MKTRF', 'SMB', 'HML', 'RMW', 'CMA']
dataSBM[cols] = dataSBM[cols].apply(pd.to_numeric, errors='coerce')

dataSBM['excess'] = dataSBM['SMALL LoBM'] - dataSBM['RF']
dataSBM['excess'] = pd.to_numeric(dataSBM['excess'], errors='coerce')

reg = smf.ols('excess ~ MKTRF + SMB + HML + RMW + CMA',
              data=dataSBM).fit(cov_type='HAC', cov_kwds={'maxlags':12})

reg.summary()

0,1,2,3
Dep. Variable:,excess,R-squared:,0.932
Model:,OLS,Adj. R-squared:,0.931
Method:,Least Squares,F-statistic:,1314.0
Date:,"Wed, 19 Nov 2025",Prob (F-statistic):,1.99e-320
Time:,15:55:09,Log-Likelihood:,-1304.6
No. Observations:,606,AIC:,2621.0
Df Residuals:,600,BIC:,2648.0
Df Model:,5,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.2761,0.085,-3.253,0.001,-0.442,-0.110
MKTRF,1.0529,0.023,45.861,0.000,1.008,1.098
SMB,1.2443,0.048,25.704,0.000,1.149,1.339
HML,-0.3881,0.054,-7.225,0.000,-0.493,-0.283
RMW,-0.4770,0.056,-8.479,0.000,-0.587,-0.367
CMA,-0.1720,0.083,-2.074,0.038,-0.335,-0.009

0,1,2,3
Omnibus:,72.497,Durbin-Watson:,2.135
Prob(Omnibus):,0.0,Jarque-Bera (JB):,291.869
Skew:,0.468,Prob(JB):,4.18e-64
Kurtosis:,6.268,Cond. No.,5.27


**Super Regression**

In [None]:
dataSBM.columns = dataSBM.columns.str.strip()

factor_cols = ['MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']

dataSBM[factor_cols] = dataSBM[factor_cols].apply(pd.to_numeric, errors='coerce')
X = dataSBM[['MKTRF', 'SMB', 'HML', 'RMW', 'CMA']]
X = sm.add_constant(X)

portfolio_cols = [c for c in dataSBM.columns if c not in factor_cols + ['date']]

results = []

rf = dataSBM['RF']

for port_name in portfolio_cols:
    y = pd.to_numeric(dataSBM[port_name], errors='coerce') - rf

    reg = sm.OLS(y, X, missing='drop').fit(
        cov_type='HAC', cov_kwds={'maxlags': 12}
    )

    results.append({
        'Portfolio': port_name,
        'Alpha': reg.params['const'],
        't(Alpha)': reg.tvalues['const'],
        'b_MKT': reg.params['MKTRF'],
        't(MKT)': reg.tvalues['MKTRF'],
        'b_SMB': reg.params['SMB'],
        't(SMB)': reg.tvalues['SMB'],
        'b_HML': reg.params['HML'],
        't(HML)': reg.tvalues['HML'],
        'b_RMW': reg.params['RMW'],
        't(RMW)': reg.tvalues['RMW'],
        'b_CMA': reg.params['CMA'],
        't(CMA)': reg.tvalues['CMA'],
        'R2': reg.rsquared
    })

resultsSBM = pd.DataFrame(results).set_index('Portfolio')
resultsSBM

Unnamed: 0_level_0,Alpha,t(Alpha),b_MKT,t(MKT),b_SMB,t(SMB),b_HML,t(HML),b_RMW,t(RMW),b_CMA,t(CMA),R2
Portfolio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
SMALL LoBM,-0.276061,-3.253392,1.052896,45.860957,1.244305,25.704363,-0.388149,-7.225223,-0.476987,-8.479191,-0.172032,-2.073814,0.932003
ME1 BM2,0.153988,1.559768,0.929513,40.937171,1.231316,25.722394,-0.118303,-2.4178,-0.402119,-4.702905,-0.063028,-1.346807,0.946274
ME1 BM3,-0.020663,-0.375115,0.937119,68.617076,1.073297,38.888013,0.132015,5.009744,-0.037735,-1.113281,0.04574,1.016887,0.953594
ME1 BM4,0.17904,2.623272,0.887755,47.196876,1.062326,39.793398,0.262747,7.113936,-0.050057,-0.847537,0.068568,1.446207,0.950727
SMALL HiBM,0.103783,1.596958,0.970478,44.652659,1.070861,32.499493,0.493106,14.300735,0.015049,0.312857,0.099278,1.690126,0.946244
ME2 BM1,-0.069492,-1.165435,1.096112,60.205065,0.972321,29.939089,-0.471085,-10.193674,-0.168129,-2.533309,-0.126624,-1.789288,0.959359
ME2 BM2,-0.049703,-0.979706,1.021679,71.017489,0.949824,28.95979,-0.030937,-0.874987,0.086641,1.313798,0.048963,0.94309,0.957036
ME2 BM3,-0.01435,-0.190036,0.979892,42.213016,0.813365,28.046249,0.285132,5.651058,0.24444,3.174413,-0.002097,-0.044117,0.942977
ME2 BM4,0.008849,0.17309,0.968575,63.542817,0.734015,33.341206,0.395266,16.644186,0.127309,3.478361,0.112445,2.32846,0.949974
ME2 BM5,-0.00935,-0.158682,1.076203,81.190348,0.880749,34.104028,0.635264,23.873566,0.023673,0.97486,0.033563,0.832743,0.954008


### Size and Investment

In [28]:
factors = pd.read_csv('data/five_factor_model/factors/5f_data.csv', skiprows=4)
factors = factors[pd.to_numeric(factors['Unnamed: 0'], errors='coerce').notna()]
factors = factors.rename(columns={'Unnamed: 0': 'date', 'Mkt-RF': 'MKTRF'})
factors = factors[['date', 'MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']]
factors.iloc[:, 1:] = factors.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')


portSIN = pd.read_csv(
    'data/five_factor_model/portfolios/25_Portfolios_ME_INV_5x5.csv',
    skiprows=16         
)
portSIN = portSIN.rename(columns={'Unnamed: 0': 'date'})
portSIN.head()
portSIN = portSIN[:-1]
portSIN
portSIN = portSIN[pd.to_numeric(portSIN['date'], errors='coerce').notna()]
portSIN.iloc[:, 1:] = portSIN.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')
factors = factors.drop_duplicates(subset='date')
portSIN = portSIN.drop_duplicates(subset='date')

dataSIN = pd.merge(portSIN, factors, on='date', how='inner')
cutoff_row = dataSIN.index[dataSIN['date'] > '201312'][0]
dataSIN = dataSIN.iloc[:cutoff_row]
dataSIN


Unnamed: 0,date,SMALL LoINV,ME1 INV2,ME1 INV3,ME1 INV4,SMALL HiINV,ME2 INV1,ME2 INV2,ME2 INV3,ME2 INV4,...,ME5 INV2,ME5 INV3,ME5 INV4,BIG HiINV,MKTRF,SMB,HML,RMW,CMA,RF
0,196307,0.2217,-1.9977,-0.0859,0.0899,0.4734,-1.5411,-1.2326,0.0698,0.2809,...,-0.0311,0.9349,0.3087,1.388,-0.39,-0.48,-0.81,0.64,-1.15,0.27
1,196308,4.5368,1.9844,2.3097,2.2577,2.9373,5.8204,5.0459,3.3426,5.282,...,5.2045,5.383,4.5708,6.753,5.08,-0.8,1.7,0.4,-0.38,0.25
2,196309,-0.8673,-0.4042,-1.7032,0.4938,-1.7319,0.0875,-2.6599,-4.157,-2.4081,...,-0.9662,-2.9453,0.0842,-0.6869,-1.57,-0.43,0.0,-0.78,0.15,0.27
3,196310,0.951,-0.837,0.2494,2.3211,-1.6296,4.2487,4.7005,1.0751,3.3033,...,0.9488,0.4054,4.2569,10.5631,2.54,-1.34,-0.04,2.79,-2.25,0.29
4,196311,-2.8287,2.1428,-3.6466,-2.8587,-2.9758,0.0434,-2.0686,0.4388,-0.7462,...,-1.1543,-1.0494,0.9908,-4.9794,-0.86,-0.85,1.73,-0.43,2.27,0.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,201308,-3.1578,-3.5358,-3.414,-4.3804,-0.8777,-3.2831,-4.6267,-3.3278,-2.7034,...,-3.5394,-4.4821,-2.09,-0.4063,-2.71,-0.03,-2.73,0.71,-2.17,0.0
602,201309,7.1378,5.7873,6.5806,6.1325,8.1105,6.2765,4.6323,7.4657,5.9761,...,2.8588,2.7137,3.3934,4.2729,3.77,2.65,-1.18,-0.54,-1.32,0.0
603,201310,1.7927,2.632,3.0992,3.1234,-0.3774,1.4572,4.2844,3.6686,2.7481,...,3.6833,3.8322,4.3024,5.5098,4.17,-1.5,1.22,2.75,0.89,0.0
604,201311,7.1739,5.1384,7.4838,4.2343,5.3932,2.8896,5.1696,4.3429,4.8519,...,3.3776,3.6866,3.0759,3.2043,3.12,1.44,0.19,0.26,0.06,0.0


In [29]:
dataSIN.columns = dataSIN.columns.str.strip()

factor_cols = ['MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']

dataSIN[factor_cols] = dataSIN[factor_cols].apply(pd.to_numeric, errors='coerce')

X = dataSIN[['MKTRF', 'SMB', 'HML', 'RMW', 'CMA']]
X = sm.add_constant(X)   

portfolio_cols = [c for c in dataSIN.columns if c not in factor_cols + ['date']]

results = []

rf = dataSIN['RF']

for port_name in portfolio_cols:
    y = pd.to_numeric(dataSIN[port_name], errors='coerce') - rf

    reg = sm.OLS(y, X, missing='drop').fit(
        cov_type='HAC', cov_kwds={'maxlags': 12}
    )

    results.append({
        'Portfolio': port_name,
        'Alpha': reg.params['const'],
        't(Alpha)': reg.tvalues['const'],
        'b_MKT': reg.params['MKTRF'],
        't(MKT)': reg.tvalues['MKTRF'],
        'b_SMB': reg.params['SMB'],
        't(SMB)': reg.tvalues['SMB'],
        'b_HML': reg.params['HML'],
        't(HML)': reg.tvalues['HML'],
        'b_RMW': reg.params['RMW'],
        't(RMW)': reg.tvalues['RMW'],
        'b_CMA': reg.params['CMA'],
        't(CMA)': reg.tvalues['CMA'],
        'R2': reg.rsquared
    })

resultsSIN = pd.DataFrame(results).set_index('Portfolio')
resultsSIN

Unnamed: 0_level_0,Alpha,t(Alpha),b_MKT,t(MKT),b_SMB,t(SMB),b_HML,t(HML),b_RMW,t(RMW),b_CMA,t(CMA),R2
Portfolio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
SMALL LoINV,0.216827,1.827064,1.000147,40.817426,1.284126,37.321556,-0.053632,-1.073262,-0.528385,-6.075271,0.286593,4.222605,0.937613
ME1 INV2,0.10168,1.693965,0.906342,59.129786,1.01855,43.495902,0.169485,5.282633,-0.003199,-0.088031,0.195997,4.497588,0.950843
ME1 INV3,0.125712,2.037205,0.908482,46.961675,1.016175,48.520139,0.202925,5.225845,0.075864,1.479185,0.115709,2.084937,0.940129
ME1 INV4,0.029408,0.568211,0.937689,54.923667,1.096775,38.591026,0.132472,3.898574,0.063901,1.818011,0.024412,0.377969,0.947809
SMALL HiINV,-0.355959,-5.61294,1.027585,50.974328,1.209719,30.940625,0.02642,0.538052,-0.187893,-4.382627,-0.320652,-4.970623,0.953006
ME2 INV1,-0.02278,-0.430271,1.108057,65.350472,0.920252,35.858796,0.050381,1.467476,-0.182724,-6.844829,0.423812,10.631603,0.960123
ME2 INV2,0.00076,0.012718,0.977775,58.26983,0.752254,21.576017,0.2443,5.102581,0.197235,3.404103,0.215497,4.359037,0.933062
ME2 INV3,0.070944,1.514223,0.92518,65.75777,0.829979,29.002833,0.167732,5.559186,0.097157,2.674813,0.16656,4.149476,0.942311
ME2 INV4,0.020371,0.40628,1.01716,67.579775,0.857233,36.352829,0.235469,7.689841,0.243248,5.543614,-0.061626,-1.324035,0.957689
ME2 INV5,-0.120621,-2.102966,1.103496,64.254756,0.961629,36.505639,-0.122828,-2.87338,-0.145768,-2.736022,-0.386641,-6.792779,0.969808


### Size and Operating Profit

In [30]:
factors = pd.read_csv('data/five_factor_model/factors/5f_data.csv', skiprows=4)
factors = factors[pd.to_numeric(factors['Unnamed: 0'], errors='coerce').notna()]
factors = factors.rename(columns={'Unnamed: 0': 'date', 'Mkt-RF': 'MKTRF'})
factors = factors[['date', 'MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']]
factors.iloc[:, 1:] = factors.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')

port25SOP = pd.read_csv('data/five_factor_model/portfolios/25_Portfolios_ME_OP_5x5.csv', skiprows=22)
port25SOP = port25SOP.rename(columns={'Unnamed: 0': 'date'})
port25SOP = port25SOP[pd.to_numeric(port25SOP['date'], errors='coerce').notna()]
port25SOP = port25SOP[:-1]
port25SOP.iloc[:, 1:] = port25SOP.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')
factors = factors.drop_duplicates(subset='date')
port25SOP = port25SOP.drop_duplicates(subset='date')

dataSOP = pd.merge(port25SOP, factors, on='date', how='inner')
cutoff_row = dataSOP.index[dataSOP['date'] > '201312'][0]
dataSOP = dataSOP.iloc[:cutoff_row]
dataSOP

Unnamed: 0,date,SMALL LoOP,ME1 OP2,ME1 OP3,ME1 OP4,SMALL HiOP,ME2 OP1,ME2 OP2,ME2 OP3,ME2 OP4,...,ME5 OP2,ME5 OP3,ME5 OP4,BIG HiOP,MKTRF,SMB,HML,RMW,CMA,RF
0,196307,-0.6108,1.273,2.529,-0.5216,-1.0972,-1.175,-1.9451,-1.1903,1.2298,...,1.5663,0.0336,-0.9588,0.6651,-0.39,-0.48,-0.81,0.64,-1.15,0.27
1,196308,2.2692,4.0724,2.8528,2.0117,4.2513,6.6139,6.0332,4.198,4.3652,...,4.2175,5.2946,5.6994,5.9952,5.08,-0.8,1.7,0.4,-0.38,0.25
2,196309,-1.2115,-0.876,-2.3407,7.4614,-1.4111,-1.9101,-1.8399,0.3819,-2.129,...,-2.1607,0.9056,-1.8269,-1.2079,-1.57,-0.43,0.0,-0.78,0.15,0.27
3,196310,0.3526,-0.865,0.8505,1.3877,0.3652,3.5288,3.2906,1.6911,-1.3657,...,0.0457,2.393,3.1507,7.7959,2.54,-1.34,-0.04,2.79,-2.25,0.29
4,196311,-2.1603,-1.4852,-2.855,-3.4152,-2.9596,-1.2879,-0.1063,-0.6418,-0.4184,...,-1.0265,1.2647,0.7541,-2.7811,-0.86,-0.85,1.73,-0.43,2.27,0.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,201308,-2.7659,-2.8383,-2.9678,-1.537,-4.8959,-2.6271,-3.2617,-4.1475,-0.4728,...,-2.8445,-3.7624,-3.5847,-1.2531,-2.71,-0.03,-2.73,0.71,-2.17,0.0
602,201309,7.68,6.5988,4.1738,5.5973,6.9464,6.1359,5.9733,5.9613,7.8263,...,3.4635,3.2089,2.9352,2.7189,3.77,2.65,-1.18,-0.54,-1.32,0.0
603,201310,0.2223,3.1202,3.8849,4.7615,3.2563,-0.5123,2.0712,3.5594,3.3414,...,3.3616,5.2549,4.2521,5.3259,4.17,-1.5,1.22,2.75,0.89,0.0
604,201311,6.4302,5.9471,5.2543,7.805,5.1449,3.1331,5.4113,4.3481,4.596,...,2.88,1.9089,3.8583,3.5808,3.12,1.44,0.19,0.26,0.06,0.0


In [31]:
dataSIN.columns = dataSIN.columns.str.strip()

factor_cols = ['MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']

dataSOP[factor_cols] = dataSOP[factor_cols].apply(pd.to_numeric, errors='coerce')

X = dataSOP[['MKTRF', 'SMB', 'HML', 'RMW', 'CMA']]
X = sm.add_constant(X)  

portfolio_cols = [c for c in dataSOP.columns if c not in factor_cols + ['date']]

results = []

rf = dataSOP['RF']

for port_name in portfolio_cols:
    y = pd.to_numeric(dataSOP[port_name], errors='coerce') - rf

    reg = sm.OLS(y, X, missing='drop').fit(
        cov_type='HAC', cov_kwds={'maxlags': 12}
    )

    results.append({
        'Portfolio': port_name,
        'Alpha': reg.params['const'],
        't(Alpha)': reg.tvalues['const'],
        'b_MKT': reg.params['MKTRF'],
        't(MKT)': reg.tvalues['MKTRF'],
        'b_SMB': reg.params['SMB'],
        't(SMB)': reg.tvalues['SMB'],
        'b_HML': reg.params['HML'],
        't(HML)': reg.tvalues['HML'],
        'b_RMW': reg.params['RMW'],
        't(RMW)': reg.tvalues['RMW'],
        'b_CMA': reg.params['CMA'],
        't(CMA)': reg.tvalues['CMA'],
        'R2': reg.rsquared
    })

resultsSOP = pd.DataFrame(results).set_index('Portfolio')
resultsSOP

Unnamed: 0_level_0,Alpha,t(Alpha),b_MKT,t(MKT),b_SMB,t(SMB),b_HML,t(HML),b_RMW,t(RMW),b_CMA,t(CMA),R2
Portfolio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
SMALL LoOP,-0.098926,-1.060176,0.975273,47.048226,1.253892,33.815591,-0.101226,-2.362422,-0.595449,-10.810498,0.078339,1.294984,0.942277
ME1 OP2,0.068813,1.071959,0.922041,50.271091,1.082414,30.422516,0.261915,5.773815,0.136008,2.733044,-0.035045,-0.490294,0.937557
ME1 OP3,-0.094491,-1.39285,0.951584,52.722571,0.984185,26.024534,0.295109,6.197873,0.300492,4.500587,0.05583,0.861692,0.939982
ME1 OP4,-0.034599,-0.447524,0.970086,45.149171,1.033043,25.995907,0.305436,5.925225,0.380121,5.602443,-0.022586,-0.386865,0.934566
SMALL HiOP,-0.133532,-1.833093,1.092702,49.407275,1.15603,24.613349,0.150862,2.611323,0.333329,5.804796,-0.070374,-0.844189,0.937078
ME2 OP1,-0.029796,-0.430623,1.10223,57.440533,0.923687,34.934422,-0.123624,-2.726414,-0.562726,-9.3981,0.039489,0.691481,0.958242
ME2 OP2,-0.094922,-1.50304,1.029425,48.909583,0.833585,22.964416,0.1471,2.790694,0.126131,2.116021,0.12317,2.227804,0.946024
ME2 OP3,-0.04518,-0.843776,0.954892,74.791726,0.798411,31.778442,0.207458,7.30261,0.236973,4.042469,0.077526,1.393866,0.949319
ME2 OP4,-0.098936,-1.436976,0.997447,55.345145,0.864569,31.837624,0.217813,6.7637,0.388069,9.722067,-0.041235,-0.735509,0.943269
ME2 OP5,0.001869,0.025662,1.107903,44.940708,0.958553,22.025463,0.114474,1.679823,0.542758,6.748735,-0.059793,-0.935505,0.943105


### B/M and Investment

In [32]:
factors = pd.read_csv('data/five_factor_model/factors/5f_data.csv', skiprows=4)
factors = factors[pd.to_numeric(factors['Unnamed: 0'], errors='coerce').notna()]
factors = factors.rename(columns={'Unnamed: 0': 'date', 'Mkt-RF': 'MKTRF'})
factors = factors[['date', 'MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']]
factors.iloc[:, 1:] = factors.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')

portBMIN = pd.read_csv('data/five_factor_model/portfolios/32_Portfolios_ME_BEME_INV_2x4x4.csv', skiprows=16)
portBMIN = portBMIN.rename(columns={'Unnamed: 0': 'date'})
portBMIN = portBMIN[pd.to_numeric(portBMIN['date'], errors='coerce').notna()]
portBMIN = portBMIN[:-1]
portBMIN.head()
portBMIN.iloc[:, 1:] = portBMIN.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')
factors = factors.drop_duplicates(subset='date')
portBMIN = portBMIN.drop_duplicates(subset='date')

dataBMIN = pd.merge(portBMIN, factors, on='date', how='inner')
cutoff_row = dataBMIN.index[dataBMIN['date'] > '201312'][0]
dataBMIN = dataBMIN.iloc[:cutoff_row]
dataBMIN

Unnamed: 0,date,SMALL LoBM LoINV,ME1 BM1 INV2,ME1 BM1 INV3,SMALL LoBM HiINV,ME1 BM2 INV1,ME1 BM2 INV2,ME1 BM2 INV3,ME1 BM2 INV4,ME1 BM3 INV1,...,BIG HiBM LoINV,ME2 BM4 INV2,ME2 BM4 INV3,BIG HiBM HiINV,MKTRF,SMB,HML,RMW,CMA,RF
0,196307,2.287,-2.2391,-0.1713,-1.0548,-0.8057,0.2079,-0.4748,0.7722,-1.2985,...,-1.1858,-0.8393,-1.8137,0.8693,-0.39,-0.48,-0.81,0.64,-1.15,0.27
1,196308,5.0568,1.9263,4.0515,5.8317,5.6387,4.4847,3.4174,5.6878,6.4512,...,8.6639,6.3232,9.2646,6.366,5.08,-0.8,1.7,0.4,-0.38,0.25
2,196309,-3.7009,0.5667,-2.078,-2.5026,3.5845,-1.2973,-0.7249,-1.3295,-2.4224,...,-0.5213,-1.3148,1.1074,-2.829,-1.57,-0.43,0.0,-0.78,0.15,0.27
3,196310,0.0727,0.9517,3.1849,0.1872,1.6412,-0.3535,2.359,1.1428,4.143,...,2.7025,2.6194,4.3769,-0.9756,2.54,-1.34,-0.04,2.79,-2.25,0.29
4,196311,-2.0991,-2.469,-2.7259,-2.9307,1.1095,0.3394,-1.9185,-2.8155,-0.1817,...,-0.3948,-2.3059,-0.4382,-1.3641,-0.86,-0.85,1.73,-0.43,2.27,0.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,201308,-3.5783,-2.2329,-2.4635,0.8804,-4.1968,-4.206,-3.9074,-2.2125,-5.7413,...,-3.5329,-5.2707,-4.3396,-1.6097,-2.71,-0.03,-2.73,0.71,-2.17,0.0
602,201309,4.7731,6.0205,4.9644,7.6435,7.7901,6.3346,5.4459,7.6683,7.1511,...,3.3589,2.2574,1.9284,3.3571,3.77,2.65,-1.18,-0.54,-1.32,0.0
603,201310,1.4271,1.7605,3.4091,0.415,3.8805,4.3021,1.355,1.69,4.5291,...,5.1603,3.8692,3.6823,3.9169,4.17,-1.5,1.22,2.75,0.89,0.0
604,201311,5.6378,5.6085,3.6017,5.1732,6.3658,2.9422,3.9519,3.8935,4.9825,...,3.1943,6.3227,2.4986,1.9836,3.12,1.44,0.19,0.26,0.06,0.0


In [33]:
dataBMIN.columns = dataBMIN.columns.str.strip()

factor_cols = ['MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']

dataBMIN[factor_cols] = dataBMIN[factor_cols].apply(pd.to_numeric, errors='coerce')
X = dataBMIN[['MKTRF', 'SMB', 'HML', 'RMW', 'CMA']]
X = sm.add_constant(X)

portfolio_cols = [c for c in dataBMIN.columns if c not in factor_cols + ['date']]

results = []

rf = dataBMIN['RF']

for port_name in portfolio_cols:
    y = pd.to_numeric(dataBMIN[port_name], errors='coerce') - rf

    reg = sm.OLS(y, X, missing='drop').fit(
        cov_type='HAC', cov_kwds={'maxlags': 12}
    )

    results.append({
        'Portfolio': port_name,
        'Alpha': reg.params['const'],
        't(Alpha)': reg.tvalues['const'],
        'b_MKT': reg.params['MKTRF'],
        't(MKT)': reg.tvalues['MKTRF'],
        'b_SMB': reg.params['SMB'],
        't(SMB)': reg.tvalues['SMB'],
        'b_HML': reg.params['HML'],
        't(HML)': reg.tvalues['HML'],
        'b_RMW': reg.params['RMW'],
        't(RMW)': reg.tvalues['RMW'],
        'b_CMA': reg.params['CMA'],
        't(CMA)': reg.tvalues['CMA'],
        'R2': reg.rsquared
    })

resultsBMIN = pd.DataFrame(results).set_index('Portfolio')
resultsBMIN

Unnamed: 0_level_0,Alpha,t(Alpha),b_MKT,t(MKT),b_SMB,t(SMB),b_HML,t(HML),b_RMW,t(RMW),b_CMA,t(CMA),R2
Portfolio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
SMALL LoBM LoINV,-0.01372,-0.145985,1.07087,57.142979,1.14241,26.058768,-0.431691,-10.667346,-0.394154,-7.277017,0.454305,6.883691,0.929879
ME1 BM1 INV2,0.074432,1.03051,0.992086,43.282581,0.917188,22.82848,-0.208192,-3.568227,-0.023258,-0.305938,0.280636,3.163994,0.928646
ME1 BM1 INV3,0.120806,2.272382,0.974762,70.970444,0.913424,35.860035,-0.173159,-5.617614,0.092869,2.108202,0.084993,1.900101,0.957584
SMALL LoBM HiINV,-0.18047,-3.542489,1.092551,65.747578,1.014468,34.812795,-0.239358,-5.578769,-0.126402,-2.288136,-0.424624,-8.816341,0.976697
ME1 BM2 INV1,0.046799,0.675385,1.053913,33.775275,0.961273,24.429772,-0.034845,-0.717146,-0.079208,-1.402735,0.50051,7.873372,0.922408
ME1 BM2 INV2,0.022265,0.39813,0.938309,53.390121,0.730715,27.797786,0.192855,4.543553,0.236701,4.438554,0.298529,7.529119,0.929367
ME1 BM2 INV3,-0.032886,-0.540514,0.946201,49.583112,0.817473,37.713546,0.250829,5.150692,0.349387,5.794074,0.098684,2.369188,0.94055
ME1 BM2 INV4,-0.053919,-0.846386,1.02666,54.367891,0.932957,32.685098,0.283315,6.772324,0.093932,1.602193,-0.23463,-3.70676,0.946805
ME1 BM3 INV1,0.123246,1.417575,1.031961,43.083592,0.984273,39.444668,0.250638,6.648716,-0.062369,-1.296125,0.388915,5.647961,0.926778
ME1 BM3 INV2,-0.051245,-1.045256,0.934203,61.106598,0.748499,35.640856,0.343362,12.412016,0.211607,7.507443,0.294873,6.605251,0.940901


### B/M Operating Profit

In [34]:
factors = pd.read_csv('data/five_factor_model/factors/5f_data.csv', skiprows=4)
factors = factors[pd.to_numeric(factors['Unnamed: 0'], errors='coerce').notna()]
factors = factors.rename(columns={'Unnamed: 0': 'date', 'Mkt-RF': 'MKTRF'})
factors = factors[['date', 'MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']]
factors.iloc[:, 1:] = factors.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')

portBMOP = pd.read_csv('data/five_factor_model/portfolios/32_Portfolios_ME_BEME_OP_2x4x4.csv', skiprows=23)
portBMOP = portBMOP.rename(columns={'Unnamed: 0': 'date'})
portBMOP = portBMOP[pd.to_numeric(portBMOP['date'], errors='coerce').notna()]
portBMOP = portBMOP[:-1]
portBMOP.head()
portBMOP.iloc[:, 1:] = portBMOP.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')
factors = factors.drop_duplicates(subset='date')
portBMOP = portBMOP.drop_duplicates(subset='date')

dataBMOP = pd.merge(portBMOP, factors, on='date', how='inner')
cutoff_row = dataBMOP.index[dataBMOP['date'] > '201312'][0]
dataBMOP = dataBMOP.iloc[:cutoff_row]
dataBMOP

Unnamed: 0,date,SMALL LoBM LoOP,ME1 BM1 OP2,ME1 BM1 OP3,SMALL LoBM HiOP,ME1 BM2 OP1,ME1 BM2 OP2,ME1 BM2 OP3,ME1 BM2 OP4,ME1 BM3 OP1,...,BIG HiBM LoOP,ME2 BM4 OP2,ME2 BM4 OP3,BIG HiBM HiOP,MKTRF,SMB,HML,RMW,CMA,RF
0,196307,-0.758,-1.3712,-0.8586,-0.4613,3.9062,-0.2508,-0.971,2.3822,-1.3823,...,-1.2258,0.1243,-1.9071,-0.6775,-0.39,-0.48,-0.81,0.64,-1.15,0.27
1,196308,2.1338,9.7448,2.6713,5.1957,4.2026,5.3924,4.2391,6.0359,2.0607,...,6.8226,10.3616,5.7373,8.1046,5.08,-0.8,1.7,0.4,-0.38,0.25
2,196309,-1.5143,-2.1044,0.0405,-2.7377,-2.0017,2.0945,-0.5559,-2.3808,-1.53,...,-1.2733,0.2842,-1.5299,-2.4192,-1.57,-0.43,0.0,-0.78,0.15,0.27
3,196310,-2.1998,1.4798,2.148,1.2446,-0.4678,1.864,0.8982,2.9256,1.7367,...,1.0704,6.0695,6.8414,2.8136,2.54,-1.34,-0.04,2.79,-2.25,0.29
4,196311,-5.658,1.0325,-2.4006,-2.6364,-4.3374,-0.8103,-0.6711,0.3397,-0.0757,...,-1.022,-2.6897,4.0117,1.1559,-0.86,-0.85,1.73,-0.43,2.27,0.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,201308,-1.3204,-0.7074,-2.0222,-1.0024,-3.8927,-2.1345,-4.7881,-2.8397,-4.5363,...,-3.8894,-5.2529,-2.5608,-6.3112,-2.71,-0.03,-2.73,0.71,-2.17,0.0
602,201309,5.8897,7.5773,5.1148,6.7629,8.2016,6.5037,7.1934,5.2372,7.4236,...,3.1341,2.1107,1.888,-0.2766,3.77,2.65,-1.18,-0.54,-1.32,0.0
603,201310,-3.6549,1.0061,4.9072,3.6872,-0.923,0.8681,3.7618,5.1329,3.5618,...,3.8272,3.5418,5.3153,10.8526,4.17,-1.5,1.22,2.75,0.89,0.0
604,201311,5.2049,2.8923,4.761,5.4742,3.4307,4.6013,3.096,5.0021,4.8183,...,4.04,4.0766,2.0166,6.1541,3.12,1.44,0.19,0.26,0.06,0.0


In [35]:
dataBMOP.columns = dataBMOP.columns.str.strip()

factor_cols = ['MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']

dataBMOP[factor_cols] = dataBMOP[factor_cols].apply(pd.to_numeric, errors='coerce')
X = dataBMOP[['MKTRF', 'SMB', 'HML', 'RMW', 'CMA']]
X = sm.add_constant(X)   

portfolio_cols = [c for c in dataBMOP.columns if c not in factor_cols + ['date']]

results = []

rf = dataBMOP['RF']

for port_name in portfolio_cols:
    y = pd.to_numeric(dataBMOP[port_name], errors='coerce') - rf

    reg = sm.OLS(y, X, missing='drop').fit(
        cov_type='HAC', cov_kwds={'maxlags': 12}
    )

    results.append({
        'Portfolio': port_name,
        'Alpha': reg.params['const'],
        't(Alpha)': reg.tvalues['const'],
        'b_MKT': reg.params['MKTRF'],
        't(MKT)': reg.tvalues['MKTRF'],
        'b_SMB': reg.params['SMB'],
        't(SMB)': reg.tvalues['SMB'],
        'b_HML': reg.params['HML'],
        't(HML)': reg.tvalues['HML'],
        'b_RMW': reg.params['RMW'],
        't(RMW)': reg.tvalues['RMW'],
        'b_CMA': reg.params['CMA'],
        't(CMA)': reg.tvalues['CMA'],
        'R2': reg.rsquared
    })

resultsBMOP = pd.DataFrame(results).set_index('Portfolio')
resultsBMOP

Unnamed: 0_level_0,Alpha,t(Alpha),b_MKT,t(MKT),b_SMB,t(SMB),b_HML,t(HML),b_RMW,t(RMW),b_CMA,t(CMA),R2
Portfolio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
SMALL LoBM LoOP,-0.339381,-3.481502,1.063122,39.694115,1.142983,21.613087,-0.528006,-8.962646,-0.929634,-10.845346,-0.158693,-1.717584,0.931811
ME1 BM1 OP2,-0.021124,-0.195335,1.090831,30.654177,0.97078,16.173953,-0.306217,-3.449835,-0.011213,-0.084305,-0.135044,-1.337049,0.904234
ME1 BM1 OP3,-0.116409,-1.703754,1.034581,50.863216,0.92287,38.707648,-0.156944,-4.04516,0.193072,2.741107,-0.001246,-0.018347,0.94992
SMALL LoBM HiOP,-0.108348,-2.066124,1.089756,52.562716,0.957469,27.965496,0.006207,0.115498,0.544114,6.668153,-0.070269,-1.602176,0.968892
ME1 BM2 OP1,0.035909,0.323,1.064485,40.3612,1.055083,17.275647,-0.129902,-2.058059,-0.487268,-3.833004,0.069776,0.571916,0.911309
ME1 BM2 OP2,-0.089288,-1.302731,0.991517,38.936964,0.863347,22.319397,0.156628,2.694597,0.244021,2.893423,0.137168,2.485873,0.923796
ME1 BM2 OP3,-0.061178,-1.218335,0.937578,43.883082,0.730999,24.536662,0.254183,5.515976,0.346763,5.655242,0.180438,4.078072,0.938858
ME1 BM2 OP4,0.010479,0.139835,1.010567,50.434548,0.867658,25.676803,0.483091,8.844805,0.533912,6.219587,-0.028635,-0.548875,0.933593
ME1 BM3 OP1,-0.048044,-0.624557,1.048415,62.971498,1.043319,32.090104,0.148378,4.173976,-0.186207,-2.399046,0.242987,2.930129,0.94114
ME1 BM3 OP2,-0.081836,-1.25907,0.948567,36.485846,0.717019,24.679254,0.403821,9.711899,0.223315,3.91738,0.143249,3.09386,0.934369


### Operating Profit and Investment

In [36]:
factors = pd.read_csv('data/five_factor_model/factors/5f_data.csv', skiprows=4)
factors = factors[pd.to_numeric(factors['Unnamed: 0'], errors='coerce').notna()]
factors = factors.rename(columns={'Unnamed: 0': 'date', 'Mkt-RF': 'MKTRF'})
factors = factors[['date', 'MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']]
factors.iloc[:, 1:] = factors.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')

portOPIN = pd.read_csv('data/five_factor_model/portfolios/32_Portfolios_ME_BEME_OP_2x4x4.csv', skiprows=23)
portOPIN = portOPIN.rename(columns={'Unnamed: 0': 'date'})
portOPIN = portOPIN[pd.to_numeric(portOPIN['date'], errors='coerce').notna()]
portOPIN = portOPIN[:-1]
portOPIN.head()
portOPIN.iloc[:, 1:] = portOPIN.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')
factors = factors.drop_duplicates(subset='date')
portOPIN = portOPIN.drop_duplicates(subset='date')

dataOPIN = pd.merge(portOPIN, factors, on='date', how='inner')
cutoff_row = dataOPIN.index[dataOPIN['date'] > '201312'][0]
dataOPIN = dataOPIN.iloc[:cutoff_row]
dataOPIN

Unnamed: 0,date,SMALL LoBM LoOP,ME1 BM1 OP2,ME1 BM1 OP3,SMALL LoBM HiOP,ME1 BM2 OP1,ME1 BM2 OP2,ME1 BM2 OP3,ME1 BM2 OP4,ME1 BM3 OP1,...,BIG HiBM LoOP,ME2 BM4 OP2,ME2 BM4 OP3,BIG HiBM HiOP,MKTRF,SMB,HML,RMW,CMA,RF
0,196307,-0.758,-1.3712,-0.8586,-0.4613,3.9062,-0.2508,-0.971,2.3822,-1.3823,...,-1.2258,0.1243,-1.9071,-0.6775,-0.39,-0.48,-0.81,0.64,-1.15,0.27
1,196308,2.1338,9.7448,2.6713,5.1957,4.2026,5.3924,4.2391,6.0359,2.0607,...,6.8226,10.3616,5.7373,8.1046,5.08,-0.8,1.7,0.4,-0.38,0.25
2,196309,-1.5143,-2.1044,0.0405,-2.7377,-2.0017,2.0945,-0.5559,-2.3808,-1.53,...,-1.2733,0.2842,-1.5299,-2.4192,-1.57,-0.43,0.0,-0.78,0.15,0.27
3,196310,-2.1998,1.4798,2.148,1.2446,-0.4678,1.864,0.8982,2.9256,1.7367,...,1.0704,6.0695,6.8414,2.8136,2.54,-1.34,-0.04,2.79,-2.25,0.29
4,196311,-5.658,1.0325,-2.4006,-2.6364,-4.3374,-0.8103,-0.6711,0.3397,-0.0757,...,-1.022,-2.6897,4.0117,1.1559,-0.86,-0.85,1.73,-0.43,2.27,0.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,201308,-1.3204,-0.7074,-2.0222,-1.0024,-3.8927,-2.1345,-4.7881,-2.8397,-4.5363,...,-3.8894,-5.2529,-2.5608,-6.3112,-2.71,-0.03,-2.73,0.71,-2.17,0.0
602,201309,5.8897,7.5773,5.1148,6.7629,8.2016,6.5037,7.1934,5.2372,7.4236,...,3.1341,2.1107,1.888,-0.2766,3.77,2.65,-1.18,-0.54,-1.32,0.0
603,201310,-3.6549,1.0061,4.9072,3.6872,-0.923,0.8681,3.7618,5.1329,3.5618,...,3.8272,3.5418,5.3153,10.8526,4.17,-1.5,1.22,2.75,0.89,0.0
604,201311,5.2049,2.8923,4.761,5.4742,3.4307,4.6013,3.096,5.0021,4.8183,...,4.04,4.0766,2.0166,6.1541,3.12,1.44,0.19,0.26,0.06,0.0


In [37]:
dataOPIN.columns = dataOPIN.columns.str.strip()

factor_cols = ['MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']

dataOPIN[factor_cols] = dataOPIN[factor_cols].apply(pd.to_numeric, errors='coerce')
X = dataOPIN[['MKTRF', 'SMB', 'HML', 'RMW', 'CMA']]
X = sm.add_constant(X)   

portfolio_cols = [c for c in dataOPIN.columns if c not in factor_cols + ['date']]

results = []

rf = dataOPIN['RF']

for port_name in portfolio_cols:
    y = pd.to_numeric(dataOPIN[port_name], errors='coerce') - rf

    reg = sm.OLS(y, X, missing='drop').fit(
        cov_type='HAC', cov_kwds={'maxlags': 12}
    )

    results.append({
        'Portfolio': port_name,
        'Alpha': reg.params['const'],
        't(Alpha)': reg.tvalues['const'],
        'b_MKT': reg.params['MKTRF'],
        't(MKT)': reg.tvalues['MKTRF'],
        'b_SMB': reg.params['SMB'],
        't(SMB)': reg.tvalues['SMB'],
        'b_HML': reg.params['HML'],
        't(HML)': reg.tvalues['HML'],
        'b_RMW': reg.params['RMW'],
        't(RMW)': reg.tvalues['RMW'],
        'b_CMA': reg.params['CMA'],
        't(CMA)': reg.tvalues['CMA'],
        'R2': reg.rsquared
    })

resultsOPIN = pd.DataFrame(results).set_index('Portfolio')
resultsOPIN

Unnamed: 0_level_0,Alpha,t(Alpha),b_MKT,t(MKT),b_SMB,t(SMB),b_HML,t(HML),b_RMW,t(RMW),b_CMA,t(CMA),R2
Portfolio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
SMALL LoBM LoOP,-0.339381,-3.481502,1.063122,39.694115,1.142983,21.613087,-0.528006,-8.962646,-0.929634,-10.845346,-0.158693,-1.717584,0.931811
ME1 BM1 OP2,-0.021124,-0.195335,1.090831,30.654177,0.97078,16.173953,-0.306217,-3.449835,-0.011213,-0.084305,-0.135044,-1.337049,0.904234
ME1 BM1 OP3,-0.116409,-1.703754,1.034581,50.863216,0.92287,38.707648,-0.156944,-4.04516,0.193072,2.741107,-0.001246,-0.018347,0.94992
SMALL LoBM HiOP,-0.108348,-2.066124,1.089756,52.562716,0.957469,27.965496,0.006207,0.115498,0.544114,6.668153,-0.070269,-1.602176,0.968892
ME1 BM2 OP1,0.035909,0.323,1.064485,40.3612,1.055083,17.275647,-0.129902,-2.058059,-0.487268,-3.833004,0.069776,0.571916,0.911309
ME1 BM2 OP2,-0.089288,-1.302731,0.991517,38.936964,0.863347,22.319397,0.156628,2.694597,0.244021,2.893423,0.137168,2.485873,0.923796
ME1 BM2 OP3,-0.061178,-1.218335,0.937578,43.883082,0.730999,24.536662,0.254183,5.515976,0.346763,5.655242,0.180438,4.078072,0.938858
ME1 BM2 OP4,0.010479,0.139835,1.010567,50.434548,0.867658,25.676803,0.483091,8.844805,0.533912,6.219587,-0.028635,-0.548875,0.933593
ME1 BM3 OP1,-0.048044,-0.624557,1.048415,62.971498,1.043319,32.090104,0.148378,4.173976,-0.186207,-2.399046,0.242987,2.930129,0.94114
ME1 BM3 OP2,-0.081836,-1.25907,0.948567,36.485846,0.717019,24.679254,0.403821,9.711899,0.223315,3.91738,0.143249,3.09386,0.934369


### Three Factor Model with Size and B/M

In [38]:
tfactors = pd.read_csv('data/three_factor_model/factors/3f_data.csv', skiprows=4)
tfactors = tfactors[pd.to_numeric(tfactors['Unnamed: 0'], errors='coerce').notna()]
tfactors = tfactors.rename(columns={'Unnamed: 0': 'date', 'Mkt-RF': 'MKTRF'})
tfactors = tfactors[['date', 'MKTRF', 'SMB', 'HML', 'RF']]
tfactors.iloc[:, 1:] = tfactors.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')
portSBM = pd.read_csv('data/five_factor_model/portfolios/25_Portfolios_5x5_on_size_BM.csv', skiprows=15)
portSBM = portSBM.rename(columns={'Unnamed: 0': 'date'})
portSBM = portSBM[pd.to_numeric(portSBM['date'], errors='coerce').notna()]
portSBM = portSBM[:-1]
portSBM.iloc[:, 1:] = portSBM.iloc[:, 1:].apply(pd.to_numeric, errors='coerce')

tfactors = tfactors.drop_duplicates(subset='date')
portSBM = portSBM.drop_duplicates(subset='date')

data3 = pd.merge(portSBM, tfactors, on='date', how='inner')
cutoff_row = data3.index[data3['date'] > '201312'][0]
data3 = data3.iloc[:cutoff_row]
data3

Unnamed: 0,date,SMALL LoBM,ME1 BM2,ME1 BM3,ME1 BM4,SMALL HiBM,ME2 BM1,ME2 BM2,ME2 BM3,ME2 BM4,...,ME4 BM5,BIG LoBM,ME5 BM2,ME5 BM3,ME5 BM4,BIG HiBM,MKTRF,SMB,HML,RF
0,192607,5.8276,-1.7006,0.5118,-2.1477,1.9583,1.2118,2.4107,0.6056,-2.6082,...,2.4678,3.3248,6.0909,2.0285,3.1263,0.5623,2.89,-2.55,-2.39,0.22
1,192608,-2.0206,-8.0282,1.3968,2.1483,8.5104,2.362,-0.7525,3.8984,0.2299,...,5.3422,1.0169,4.1975,1.9769,5.4924,7.7576,2.64,-1.14,3.81,0.25
2,192609,-4.8291,-2.6806,-4.3417,-3.2683,0.8586,-2.6849,-0.5252,1.0789,-3.2877,...,0.873,-1.2951,3.661,0.1384,-0.7497,-2.4284,0.38,-1.36,0.05,0.23
3,192610,-9.3633,-3.5519,-3.5024,3.4413,-2.5452,-2.8014,-4.4191,-5.0767,-8.0271,...,-5.3525,-2.7382,-3.0061,-2.2467,-4.6725,-5.8129,-3.27,-0.14,0.82,0.32
4,192611,5.5888,4.1877,2.4384,-4.4495,0.511,3.1023,-1.7317,3.0425,4.9538,...,1.8213,4.4331,2.5355,1.528,3.6596,2.5636,2.54,-0.11,-0.61,0.31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1045,201308,-2.3217,-2.536,-2.3048,-3.7353,-3.1512,-0.5681,-0.6886,-3.1632,-3.944,...,-4.3789,-1.4845,-2.5292,-3.5744,-3.6714,-4.5134,-2.71,0.32,-2.73,0.0
1046,201309,7.2942,7.5884,7.1963,6.9327,5.8445,6.8252,6.1363,7.5391,6.0436,...,3.3946,4.3089,2.6355,2.5945,2.5619,2.2688,3.77,2.89,-1.18,0.0
1047,201310,-0.739,0.7764,2.1975,4.0429,2.87,0.6633,1.5231,1.2683,3.8146,...,6.1959,4.8861,4.3822,5.4352,3.2953,3.7175,4.17,-1.56,1.22,0.0
1048,201311,7.7434,5.3439,5.7074,7.1859,4.9367,6.4781,3.6126,3.311,2.8899,...,3.7551,3.1878,3.5527,1.6052,1.9314,6.2891,3.11,1.31,0.19,0.0


In [39]:
data3.columns = data3.columns.str.strip()

factor_cols = ['MKTRF', 'SMB', 'HML', 'RF']

data3[factor_cols] = data3[factor_cols].apply(pd.to_numeric, errors='coerce')
X = data3[['MKTRF', 'SMB', 'HML']]
X = sm.add_constant(X)  

portfolio_cols = [c for c in data3.columns if c not in factor_cols + ['date']]

results = []

rf = data3['RF']

for port_name in portfolio_cols:
    y = pd.to_numeric(data3[port_name], errors='coerce') - rf

    reg = sm.OLS(y, X, missing='drop').fit(
        cov_type='HAC', cov_kwds={'maxlags': 12}
    )

    results.append({
        'Portfolio': port_name,
        'Alpha': reg.params['const'],
        't(Alpha)': reg.tvalues['const'],
        'b_MKT': reg.params['MKTRF'],
        't(MKT)': reg.tvalues['MKTRF'],
        'b_SMB': reg.params['SMB'],
        't(SMB)': reg.tvalues['SMB'],
        'b_HML': reg.params['HML'],
        't(HML)': reg.tvalues['HML'],
        'R2': reg.rsquared
    })

results3 = pd.DataFrame(results).set_index('Portfolio')
results3

Unnamed: 0_level_0,Alpha,t(Alpha),b_MKT,t(MKT),b_SMB,t(SMB),b_HML,t(HML),R2
Portfolio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
SMALL LoBM,-0.741905,-4.779566,1.275119,14.132233,1.462458,7.251228,0.433439,1.934259,0.658069
ME1 BM2,-0.470119,-3.789396,1.075688,43.86375,1.552757,8.568266,0.24047,3.595415,0.822301
ME1 BM3,-0.144915,-2.287044,1.054558,26.428617,1.243709,24.098374,0.527425,13.612385,0.889069
ME1 BM4,0.076479,1.23138,0.947758,43.958603,1.248086,11.631564,0.576957,17.027373,0.927534
SMALL HiBM,0.08465,1.424591,0.987474,40.394389,1.293517,17.803493,0.912637,16.744072,0.940842
ME2 BM1,-0.230767,-3.505146,1.08747,38.115335,1.140145,15.946026,-0.223728,-6.953125,0.909994
ME2 BM2,-0.002461,-0.038344,1.02233,60.358989,1.005546,13.919419,0.127839,2.455571,0.933388
ME2 BM3,0.021917,0.403924,0.985278,39.749415,0.823746,10.176094,0.353005,5.953373,0.932598
ME2 BM4,0.024851,0.51892,0.971764,53.954338,0.820903,11.998791,0.583315,12.457491,0.951548
ME2 BM5,0.034042,0.635685,1.063203,49.759931,0.924641,20.543271,0.891657,21.608364,0.952143


### Mean Absolute Alpha's and GRS Test

In [50]:
factor_cols = ['MKTRF', 'SMB', 'HML', 'RMW', 'CMA', 'RF']

def avg_excess_return(df):
    # risk-free rate
    rf = pd.to_numeric(df['RF'], errors='coerce')
    
    # portfolio columns = everything that's not a factor or date
    port_cols = [c for c in df.columns if c not in factor_cols + ['date']]
    
    # convert to numeric and compute excess returns
    ports = df[port_cols].apply(pd.to_numeric, errors='coerce')
    excess = ports.sub(rf, axis=0)        # R_it - R_ft for each portfolio
    
    # mean over time, then mean across portfolios
    return excess.mean().mean()


In [51]:
def safe_mean(df, col):
    return df[col].mean() if col in df.columns else np.nan


ff_stats = pd.DataFrame({
    'Avg |Alpha|': [
        resultsSBM['Alpha'].abs().mean(),
        resultsSIN['Alpha'].abs().mean(),
        resultsSOP['Alpha'].abs().mean(),
        resultsBMIN['Alpha'].abs().mean(),
        resultsBMOP['Alpha'].abs().mean(),
        resultsOPIN['Alpha'].abs().mean()
    ],
    'Avg |t(Alpha)|': [
        resultsSBM['t(Alpha)'].abs().mean(),
        resultsSIN['t(Alpha)'].abs().mean(),
        resultsSOP['t(Alpha)'].abs().mean(),
        resultsBMIN['t(Alpha)'].abs().mean(),
        resultsBMOP['t(Alpha)'].abs().mean(),
        resultsOPIN['t(Alpha)'].abs().mean()
    ],
    'Avg b_MKT': [
        safe_mean(resultsSBM, 'b_MKT'),
        safe_mean(resultsSIN, 'b_MKT'),
        safe_mean(resultsSOP, 'b_MKT'),
        safe_mean(resultsBMIN, 'b_MKT'),
        safe_mean(resultsBMOP, 'b_MKT'),
        safe_mean(resultsOPIN, 'b_MKT')
    ],
    'Avg b_SMB': [
        safe_mean(resultsSBM, 'b_SMB'),
        safe_mean(resultsSIN, 'b_SMB'),
        safe_mean(resultsSOP, 'b_SMB'),
        safe_mean(resultsBMIN, 'b_SMB'),
        safe_mean(resultsBMOP, 'b_SMB'),
        safe_mean(resultsOPIN, 'b_SMB')
    ],
    'Avg b_HML': [
        safe_mean(resultsSBM, 'b_HML'),
        safe_mean(resultsSIN, 'b_HML'),
        safe_mean(resultsSOP, 'b_HML'),
        safe_mean(resultsBMIN, 'b_HML'),
        safe_mean(resultsBMOP, 'b_HML'),
        safe_mean(resultsOPIN, 'b_HML')
    ],
    'Avg b_RMW': [
        safe_mean(resultsSBM, 'b_RMW'),
        safe_mean(resultsSIN, 'b_RMW'),
        safe_mean(resultsSOP, 'b_RMW'),
        safe_mean(resultsBMIN, 'b_RMW'),
        safe_mean(resultsBMOP, 'b_RMW'),
        safe_mean(resultsOPIN, 'b_RMW')
    ],
    'Avg b_CMA': [
        safe_mean(resultsSBM, 'b_CMA'),
        safe_mean(resultsSIN, 'b_CMA'),
        safe_mean(resultsSOP, 'b_CMA'),
        safe_mean(resultsBMIN, 'b_CMA'),
        safe_mean(resultsBMOP, 'b_CMA'),
        safe_mean(resultsOPIN, 'b_CMA')
    ],
    'Avg R2': [
        safe_mean(resultsSBM, 'R2'),
        safe_mean(resultsSIN, 'R2'),
        safe_mean(resultsSOP, 'R2'),
        safe_mean(resultsBMIN, 'R2'),
        safe_mean(resultsBMOP, 'R2'),
        safe_mean(resultsOPIN, 'R2')
    ]
},
    index=[
        'Size-BM (25 SMB)',
        'Size-INV (25 SIN)',
        'Size-OP (25 SOP)',
        'ME-BM INV (32 BMIN)',
        'ME-BM OP (32 BMOP)',
        'ME-OP INV (32 OPIN)'
    ]
)

ff_stats.loc['Overall Average'] = ff_stats.mean()

ff_stats


Unnamed: 0,Avg |Alpha|,Avg |t(Alpha)|,Avg b_MKT,Avg b_SMB,Avg b_HML,Avg b_RMW,Avg b_CMA,Avg R2
Size-BM (25 SMB),0.116301,1.498703,1.02509,0.564895,0.183134,0.012657,0.013515,0.919952
Size-INV (25 SIN),0.086367,1.328872,1.01821,0.536705,0.083648,0.026125,0.0862,0.931551
Size-OP (25 SOP),0.065514,0.948407,1.025749,0.525228,0.113027,0.041496,0.016381,0.929944
ME-BM INV (32 BMIN),0.099492,1.283886,1.024315,0.429663,0.204201,0.092312,0.100697,0.886374
ME-BM OP (32 BMOP),0.116674,1.232668,1.041568,0.435498,0.213145,0.100783,0.023032,0.858084
ME-OP INV (32 OPIN),0.116674,1.232668,1.041568,0.435498,0.213145,0.100783,0.023032,0.858084
Overall Average,0.10017,1.2542,1.029417,0.487914,0.168383,0.062359,0.04381,0.897331


In [53]:
def safe_mean(df, col):
    return df[col].mean() if col in df.columns else np.nan

# --- NEW helper for avg excess return ---
def avg_excess(df):
    rf = pd.to_numeric(df['RF'], errors='coerce')
    port_cols = [c for c in df.columns if c not in ['MKTRF','SMB','HML','RMW','CMA','RF','date']]
    ports = df[port_cols].apply(pd.to_numeric, errors='coerce')
    excess = ports.sub(rf, axis=0)
    return excess.mean().mean()

ff_stats = pd.DataFrame({
    # will reorder later
    'Avg |Alpha|': [
        resultsSBM['Alpha'].abs().mean(),
        resultsSIN['Alpha'].abs().mean(),
        resultsSOP['Alpha'].abs().mean(),
        resultsBMIN['Alpha'].abs().mean(),
        resultsBMOP['Alpha'].abs().mean(),
        resultsOPIN['Alpha'].abs().mean()
    ],
    'Avg |t(Alpha)|': [
        resultsSBM['t(Alpha)'].abs().mean(),
        resultsSIN['t(Alpha)'].abs().mean(),
        resultsSOP['t(Alpha)'].abs().mean(),
        resultsBMIN['t(Alpha)'].abs().mean(),
        resultsBMOP['t(Alpha)'].abs().mean(),
        resultsOPIN['t(Alpha)'].abs().mean()
    ],
    'Avg b_MKT': [
        safe_mean(resultsSBM, 'b_MKT'),
        safe_mean(resultsSIN, 'b_MKT'),
        safe_mean(resultsSOP, 'b_MKT'),
        safe_mean(resultsBMIN, 'b_MKT'),
        safe_mean(resultsBMOP, 'b_MKT'),
        safe_mean(resultsOPIN, 'b_MKT')
    ],
    'Avg b_SMB': [
        safe_mean(resultsSBM, 'b_SMB'),
        safe_mean(resultsSIN, 'b_SMB'),
        safe_mean(resultsSOP, 'b_SMB'),
        safe_mean(resultsBMIN, 'b_SMB'),
        safe_mean(resultsBMOP, 'b_SMB'),
        safe_mean(resultsOPIN, 'b_SMB')
    ],
    'Avg b_HML': [
        safe_mean(resultsSBM, 'b_HML'),
        safe_mean(resultsSIN, 'b_HML'),
        safe_mean(resultsSOP, 'b_HML'),
        safe_mean(resultsBMIN, 'b_HML'),
        safe_mean(resultsBMOP, 'b_HML'),
        safe_mean(resultsOPIN, 'b_HML')
    ],
    'Avg b_RMW': [
        safe_mean(resultsSBM, 'b_RMW'),
        safe_mean(resultsSIN, 'b_RMW'),
        safe_mean(resultsSOP, 'b_RMW'),
        safe_mean(resultsBMIN, 'b_RMW'),
        safe_mean(resultsBMOP, 'b_RMW'),
        safe_mean(resultsOPIN, 'b_RMW')
    ],
    'Avg b_CMA': [
        safe_mean(resultsSBM, 'b_CMA'),
        safe_mean(resultsSIN, 'b_CMA'),
        safe_mean(resultsSOP, 'b_CMA'),
        safe_mean(resultsBMIN, 'b_CMA'),
        safe_mean(resultsBMOP, 'b_CMA'),
        safe_mean(resultsOPIN, 'b_CMA')
    ],
    'Avg R2': [
        safe_mean(resultsSBM, 'R2'),
        safe_mean(resultsSIN, 'R2'),
        safe_mean(resultsSOP, 'R2'),
        safe_mean(resultsBMIN, 'R2'),
        safe_mean(resultsBMOP, 'R2'),
        safe_mean(resultsOPIN, 'R2')
    ],

    # NEW COLUMN (will be moved to the front)
    'Avg Excess Return': [
        avg_excess(dataSBM),
        avg_excess(dataSIN),
        avg_excess(dataSOP),
        avg_excess(dataBMIN),
        avg_excess(dataBMOP),
        avg_excess(dataOPIN)
    ]
},
    index=[
        'Size-BM (25 SMB)',
        'Size-INV (25 SIN)',
        'Size-OP (25 SOP)',
        'ME-BM INV (32 BMIN)',
        'ME-BM OP (32 BMOP)',
        'ME-OP INV (32 OPIN)'
    ]
)

# --- NEW COLUMN: Alpha / Avg Excess Return ---
ff_stats['Alpha / Avg Excess'] = ff_stats['Avg |Alpha|'] / ff_stats['Avg Excess Return']

# --- MOVE Avg Excess Return to front ---
new_order = (['Avg Excess Return', 'Avg |Alpha|', 'Avg |t(Alpha)|', 'Alpha / Avg Excess'] +
             [col for col in ff_stats.columns if col not in 
              ['Avg Excess Return', 'Avg |Alpha|', 'Avg |t(Alpha)|', 'Alpha / Avg Excess']])

ff_stats = ff_stats[new_order]

# Recompute the overall average row
ff_stats.loc['Overall Average'] = ff_stats.mean()

ff_stats


Unnamed: 0,Avg Excess Return,Avg |Alpha|,Avg |t(Alpha)|,Alpha / Avg Excess,Avg b_MKT,Avg b_SMB,Avg b_HML,Avg b_RMW,Avg b_CMA,Avg R2
Size-BM (25 SMB),0.697423,0.116301,1.498703,0.166759,1.02509,0.564895,0.183134,0.012657,0.013515,0.919952
Size-INV (25 SIN),0.736065,0.086367,1.328872,0.117336,1.01821,0.536705,0.083648,0.026125,0.0862,0.931551
Size-OP (25 SOP),0.701515,0.065514,0.948407,0.093389,1.025749,0.525228,0.113027,0.041496,0.016381,0.929944
ME-BM INV (32 BMIN),0.736596,0.099492,1.283886,0.13507,1.024315,0.429663,0.204201,0.092312,0.100697,0.886374
ME-BM OP (32 BMOP),0.720147,0.116674,1.232668,0.162014,1.041568,0.435498,0.213145,0.100783,0.023032,0.858084
ME-OP INV (32 OPIN),0.720147,0.116674,1.232668,0.162014,1.041568,0.435498,0.213145,0.100783,0.023032,0.858084
Overall Average,0.718649,0.10017,1.2542,0.13943,1.029417,0.487914,0.168383,0.062359,0.04381,0.897331


In [41]:
summary_results3 = pd.DataFrame({
    'Avg |Alpha|': [results3['Alpha'].abs().mean()],
    'Avg |t(Alpha)|': [results3['t(Alpha)'].abs().mean()],
    'Avg b_MKT': [results3['b_MKT'].mean()],
    'Avg b_SMB': [results3['b_SMB'].mean()],
    'Avg b_HML': [results3['b_HML'].mean()],
    'Avg R2': [results3['R2'].mean()]
}, index=['Three Factor'])

summary_results3


Unnamed: 0,Avg |Alpha|,Avg |t(Alpha)|,Avg b_MKT,Avg b_SMB,Avg b_HML,Avg R2
Three Factor,0.122391,1.495751,1.050253,0.587007,0.373359,0.908303


In [42]:
import numpy as np
from scipy.stats import f
import statsmodels.api as sm

def grs_test(data, portfolio_cols, factor_cols=['MKTRF','SMB','HML','RMW','CMA'], rf_col='RF'):
   
    data = data.copy()
    data[factor_cols + [rf_col]] = data[factor_cols + [rf_col]].apply(pd.to_numeric, errors='coerce')
    data[portfolio_cols] = data[portfolio_cols].apply(pd.to_numeric, errors='coerce')

    
    data = data.dropna(subset=factor_cols + [rf_col])

    F = data[factor_cols].values
    T = F.shape[0]
    K = F.shape[1]

   
    X = sm.add_constant(F)  

    
    alphas = []
    resid_list = []

    for p in portfolio_cols:
        y = (data[p] - data[rf_col]).values    
        mask = ~np.isnan(y)
        y_ = y[mask]
        X_ = X[mask, :]

        res = sm.OLS(y_, X_).fit()
        alphas.append(res.params[0])      
        
        r_full = np.full(T, np.nan)
        r_full[mask] = res.resid
        resid_list.append(r_full)

    alphas = np.array(alphas)                 
    resid_mat = np.column_stack(resid_list)   

    valid_rows = ~np.isnan(resid_mat).any(axis=1)
    resid_mat = resid_mat[valid_rows, :]
    F_valid = F[valid_rows, :]

    T_eff = resid_mat.shape[0]
    N = len(portfolio_cols)

    Sigma = np.cov(resid_mat, rowvar=False)           
    Sf = np.cov(F_valid, rowvar=False)                
    f_bar = F_valid.mean(axis=0)                      

   
    eps = 1e-8
    Sigma_inv = np.linalg.inv(Sigma + eps * np.eye(N))
    Sf_inv = np.linalg.inv(Sf)

   
    num_prefactor = (T_eff - N - K) / N
    alpha_sig = alphas.T @ Sigma_inv @ alphas
    denom = 1.0 + f_bar.T @ Sf_inv @ f_bar

    GRS = num_prefactor * (alpha_sig / denom)

    df1 = N
    df2 = T_eff - N - K
    pval = 1 - f.cdf(GRS, df1, df2)

    return GRS, pval, df1, df2

In [43]:
portfolio_cols_SBM = [c for c in dataSBM.columns if c not in factor_cols + ['date']]

GRS_SBM, p_SBM, df1_SBM, df2_SBM = grs_test(
    dataSBM,
    portfolio_cols_SBM,
    factor_cols=['MKTRF','SMB','HML','RMW','CMA'],
    rf_col='RF'
)

print("SBM GRS:", GRS_SBM, "p-value:", p_SBM)

SBM GRS: 58.47832127204501 p-value: 1.1102230246251565e-16


In [44]:
factor_cols = ['MKTRF','SMB','HML','RMW','CMA','RF']
portfolio_cols_SIN = [c for c in dataSIN.columns if c not in factor_cols + ['date']]

GRS_SIN, p_SIN, df1_SIN, df2_SIN = grs_test(
    dataSIN,
    portfolio_cols_SIN,
    factor_cols=['MKTRF','SMB','HML','RMW','CMA'],
    rf_col='RF'
)

print("SIN GRS:", GRS_SIN, "p-value:", p_SIN, "df:", df1_SIN, df2_SIN)


SIN GRS: 3.5200095739309396 p-value: 3.1925063925797303e-08 df: 25 576


In [45]:
portfolio_cols_SOP = [c for c in dataSOP.columns if c not in factor_cols + ['date']]

GRS_SOP, p_SOP, df1_SOP, df2_SOP = grs_test(
    dataSOP,
    portfolio_cols_SOP,
    factor_cols=['MKTRF','SMB','HML','RMW','CMA'],
    rf_col='RF'
)

print("SOP GRS:", GRS_SOP, "p-value:", p_SOP)

SOP GRS: 1.8933340924061297 p-value: 0.005784913730277008


In [46]:
portfolio_cols_BMIN = [c for c in dataBMIN.columns if c not in factor_cols + ['date']]

GRS_BMIN, p_BMIN, df1_BMIN, df2_BMIN = grs_test(
    dataBMIN,
    portfolio_cols_BMIN,
    factor_cols=['MKTRF','SMB','HML','RMW','CMA'],
    rf_col='RF'
)

print("BMIN GRS:", GRS_BMIN, "p-value:", p_BMIN)

BMIN GRS: 2.1770820518860403 p-value: 0.0002505183057550653


  alpha_sig = alphas.T @ Sigma_inv @ alphas
  alpha_sig = alphas.T @ Sigma_inv @ alphas
  alpha_sig = alphas.T @ Sigma_inv @ alphas


In [47]:
portfolio_cols_BMOP = [c for c in dataBMOP.columns if c not in factor_cols + ['date']]

GRS_BMOP, p_BMOP, df1_BMOP, df2_BMOP = grs_test(
    dataBMOP,
    portfolio_cols_BMOP,
    factor_cols=['MKTRF','SMB','HML','RMW','CMA'],
    rf_col='RF'
)

print("BMOP GRS:", GRS_BMOP, "p-value:", p_BMOP)


BMOP GRS: 1.9947697751843159 p-value: 0.0011192777643614926


  alpha_sig = alphas.T @ Sigma_inv @ alphas
  alpha_sig = alphas.T @ Sigma_inv @ alphas
  alpha_sig = alphas.T @ Sigma_inv @ alphas


In [48]:
portfolio_cols_OPIN = [c for c in dataOPIN.columns if c not in factor_cols + ['date']]

GRS_OPIN, p_OPIN, df1_OPIN, df2_OPIN = grs_test(
    dataOPIN,
    portfolio_cols_OPIN,
    factor_cols=['MKTRF','SMB','HML','RMW','CMA'],
    rf_col='RF'
)



  alpha_sig = alphas.T @ Sigma_inv @ alphas
  alpha_sig = alphas.T @ Sigma_inv @ alphas
  alpha_sig = alphas.T @ Sigma_inv @ alphas


In [49]:
grs_df = pd.DataFrame({
    'GRS Statistic': [
        GRS_SBM, 
        GRS_SIN, 
        GRS_SOP, 
        GRS_BMIN, 
        GRS_BMOP, 
        GRS_OPIN
    ],
    'p-value': [
        p_SBM, 
        p_SIN, 
        p_SOP, 
        p_BMIN, 
        p_BMOP, 
        p_OPIN
    ],
    'df1': [
        df1_SBM, 
        df1_SIN, 
        df1_SOP, 
        df1_BMIN, 
        df1_BMOP, 
        df1_OPIN
    ],
    'df2': [
        df2_SBM, 
        df2_SIN, 
        df2_SOP, 
        df2_BMIN, 
        df2_BMOP, 
        df2_OPIN
    ]
},
    index=[
        'Size-BM (25 SMB)',
        'Size-INV (25 SIN)',
        'Size-OP (25 SOP)',
        'ME-BM INV (32 BMIN)',
        'ME-BM OP (32 BMOP)',
        'ME-OP INV (32 OPIN)'
    ]
)

grs_df


Unnamed: 0,GRS Statistic,p-value,df1,df2
Size-BM (25 SMB),58.478321,1.110223e-16,28,573
Size-INV (25 SIN),3.52001,3.192506e-08,25,576
Size-OP (25 SOP),1.893334,0.005784914,25,576
ME-BM INV (32 BMIN),2.177082,0.0002505183,32,569
ME-BM OP (32 BMOP),1.99477,0.001119278,32,569
ME-OP INV (32 OPIN),1.99477,0.001119278,32,569
