In [72]:
import pandas as pd
import pandas_datareader as web
import statsmodels.api as sm
import urllib.request
import zipfile

In [193]:
def get_fama_french():
    ff_url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_5_Factors_2x3_CSV.zip"
    urllib.request.urlretrieve(ff_url,'fama_french.zip')
    zip_file = zipfile.ZipFile('fama_french.zip', 'r')
    zip_file.extractall()
    zip_file.close()
    ff_factors = pd.read_csv('F-F_Research_Data_5_Factors_2x3.csv', skiprows = 3, index_col = 0)
    ff_row = ff_factors.isnull().any(1).nonzero()[0][0]
    ff_factors = pd.read_csv('F-F_Research_Data_5_Factors_2x3.csv', skiprows = 3, nrows = ff_row, index_col = 0)
    ff_factors.index = pd.to_datetime(ff_factors.index, format= '%Y%m')
    ff_factors.index = ff_factors.index + pd.offsets.MonthEnd()
    ff_factors = ff_factors.apply(lambda x: x/ 100)
    return ff_factors

In [194]:
ff_data = get_fama_french()
print(ff_data.tail())

            Mkt-RF     SMB     HML     RMW     CMA      RF
2019-11-30  0.0387  0.0050 -0.0186 -0.0150 -0.0129  0.0012
2019-12-31  0.0277  0.0096  0.0183  0.0021  0.0131  0.0014
2020-01-31 -0.0011 -0.0440 -0.0627 -0.0123 -0.0234  0.0013
2020-02-29 -0.0813 -0.0007 -0.0401 -0.0164 -0.0253  0.0012
2020-03-31 -0.1339 -0.0843 -0.1411 -0.0132  0.0126  0.0012


  


In [75]:
ff_last = ff_data.index[ff_data.shape[0] - 1].date()
def get_price_data(ticker, start, end):
    price = web.get_data_yahoo(ticker, start, end)
    price = price['Adj Close'] # keep only the Adj Price col
    return price

In [93]:
price_data = get_price_data("FCNTX", "2000-01-01", "2020-03-31")
price_data = price_data.loc[:ff_last]
print(price_data.tail())

Date
2020-03-25    11.37
2020-03-26    11.95
2020-03-27    11.53
2020-03-30    11.91
2020-03-31    11.76
Name: Adj Close, dtype: float64


In [94]:
def get_return_data(price_data, period = "M"):
    price = price_data.resample(period).last()
    ret_data = price.pct_change()[1:]
    ret_data = pd.DataFrame(ret_data)
    ret_data.columns = ['portfolio']
    return ret_data
ret_data = get_return_data(price_data, "M")
print(ret_data.tail())

            portfolio
Date                 
2019-11-30   0.043117
2019-12-31   0.028764
2020-01-31   0.021152
2020-02-29  -0.059435
2020-03-31  -0.100917


In [95]:
all_data = pd.merge(pd.DataFrame(ret_data),ff_data, how = 'inner', left_index= True, right_index= True)
all_data.rename(columns={"Mkt-RF":"mkt_excess"}, inplace=True)
all_data['port_excess'] = all_data['portfolio'] - all_data['RF']
print(all_data.tail())

            portfolio  mkt_excess     SMB     HML     RMW     CMA      RF  \
2019-11-30   0.043117      0.0387  0.0050 -0.0186 -0.0150 -0.0129  0.0012   
2019-12-31   0.028764      0.0277  0.0096  0.0183  0.0021  0.0131  0.0014   
2020-01-31   0.021152     -0.0011 -0.0440 -0.0627 -0.0123 -0.0234  0.0013   
2020-02-29  -0.059435     -0.0813 -0.0007 -0.0401 -0.0164 -0.0253  0.0012   
2020-03-31  -0.100917     -0.1339 -0.0843 -0.1411 -0.0132  0.0126  0.0012   

            port_excess  
2019-11-30     0.041917  
2019-12-31     0.027364  
2020-01-31     0.019852  
2020-02-29    -0.060635  
2020-03-31    -0.102117  


In [96]:
model = sm.formula.ols(formula="port_excess~mkt_excess+ SMB+HML+RMW+CMA",data=all_data).fit()

print(model.params)

Intercept     0.001405
mkt_excess    0.848544
SMB           0.133896
HML          -0.146161
RMW           0.270631
CMA          -0.070373
dtype: float64


In [97]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:            port_excess   R-squared:                       0.850
Model:                            OLS   Adj. R-squared:                  0.847
Method:                 Least Squares   F-statistic:                     268.3
Date:                Wed, 13 May 2020   Prob (F-statistic):           3.37e-95
Time:                        04:11:35   Log-Likelihood:                 671.60
No. Observations:                 242   AIC:                            -1331.
Df Residuals:                     236   BIC:                            -1310.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0014      0.001      1.350      0.1

In [115]:
def run_reg_model(ticker,start,end):
    ff_data = get_fama_french()
    ff_last = ff_data.index[ff_data.shape[0] - 1].date()
    price_data = get_price_data(ticker,start,end)
    price_data = price_data.loc[:ff_last]
    ret_data = get_return_data(price_data, "M")
    all_data = pd.merge(pd.DataFrame(ret_data),ff_data, how = 'inner', left_index= True, right_index= True)
    all_data.rename(columns={"Mkt-RF":"mkt_excess"}, inplace=True)
    all_data['port_excess'] = all_data['portfolio'] - all_data['RF']
    model = sm.formula.ols(formula = "port_excess ~ mkt_excess + SMB+HML+RMW+CMA", data = all_data).fit()
    return model.summary()

In [116]:
agthx_model = run_reg_model("AGTHX", start = "2000-01-01", end = "2020-03-31")
print(agthx_model)

  


                            OLS Regression Results                            
Dep. Variable:            port_excess   R-squared:                       0.942
Model:                            OLS   Adj. R-squared:                  0.941
Method:                 Least Squares   F-statistic:                     771.0
Date:                Wed, 13 May 2020   Prob (F-statistic):          5.75e-144
Time:                        04:25:05   Log-Likelihood:                 749.01
No. Observations:                 242   AIC:                            -1486.
Df Residuals:                     236   BIC:                            -1465.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0022      0.001      2.868      0.0

In [119]:
bfocx_model = run_reg_model("BFOCX", start = "2000-01-01", end = "2020-03-31")
print(bfocx_model)

  


                            OLS Regression Results                            
Dep. Variable:            port_excess   R-squared:                       0.760
Model:                            OLS   Adj. R-squared:                  0.755
Method:                 Least Squares   F-statistic:                     149.8
Date:                Wed, 13 May 2020   Prob (F-statistic):           4.03e-71
Time:                        05:19:38   Log-Likelihood:                 362.81
No. Observations:                 242   AIC:                            -713.6
Df Residuals:                     236   BIC:                            -692.7
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0066      0.004      1.760      0.0

In [120]:
fcntx_model = run_reg_model("FCNTX", start = "2000-01-01", end = "2020-03-31")
print(fcntx_model)

  


                            OLS Regression Results                            
Dep. Variable:            port_excess   R-squared:                       0.850
Model:                            OLS   Adj. R-squared:                  0.847
Method:                 Least Squares   F-statistic:                     268.3
Date:                Wed, 13 May 2020   Prob (F-statistic):           3.37e-95
Time:                        05:20:55   Log-Likelihood:                 671.60
No. Observations:                 242   AIC:                            -1331.
Df Residuals:                     236   BIC:                            -1310.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0014      0.001      1.350      0.1

In [121]:
focpx_model = run_reg_model("FOCPX", start = "2000-01-01", end = "2020-03-31")
print(focpx_model)

  


                            OLS Regression Results                            
Dep. Variable:            port_excess   R-squared:                       0.896
Model:                            OLS   Adj. R-squared:                  0.893
Method:                 Least Squares   F-statistic:                     405.1
Date:                Wed, 13 May 2020   Prob (F-statistic):          1.28e-113
Time:                        05:22:02   Log-Likelihood:                 590.57
No. Observations:                 242   AIC:                            -1169.
Df Residuals:                     236   BIC:                            -1148.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0031      0.001      2.161      0.0

In [122]:
prnhx_model = run_reg_model("PRNHX", start = "2000-01-01", end = "2020-03-31")
print(prnhx_model)

  


                            OLS Regression Results                            
Dep. Variable:            port_excess   R-squared:                       0.937
Model:                            OLS   Adj. R-squared:                  0.936
Method:                 Least Squares   F-statistic:                     700.4
Date:                Wed, 13 May 2020   Prob (F-statistic):          2.41e-139
Time:                        05:22:40   Log-Likelihood:                 675.66
No. Observations:                 242   AIC:                            -1339.
Df Residuals:                     236   BIC:                            -1318.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0048      0.001      4.711      0.0

In [123]:
trbcx_model = run_reg_model("TRBCX", start = "2000-01-01", end = "2020-03-31")
print(trbcx_model)

  


                            OLS Regression Results                            
Dep. Variable:            port_excess   R-squared:                       0.934
Model:                            OLS   Adj. R-squared:                  0.933
Method:                 Least Squares   F-statistic:                     671.7
Date:                Wed, 13 May 2020   Prob (F-statistic):          2.43e-137
Time:                        05:23:53   Log-Likelihood:                 723.75
No. Observations:                 242   AIC:                            -1435.
Df Residuals:                     236   BIC:                            -1415.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0012      0.001      1.486      0.1

In [124]:
pxsgx_model = run_reg_model("PXSGX", start = "2006-07-01", end = "2020-03-31")
print(pxsgx_model)

  


                            OLS Regression Results                            
Dep. Variable:            port_excess   R-squared:                       0.842
Model:                            OLS   Adj. R-squared:                  0.836
Method:                 Least Squares   F-statistic:                     167.8
Date:                Wed, 13 May 2020   Prob (F-statistic):           2.66e-61
Time:                        05:25:18   Log-Likelihood:                 406.43
No. Observations:                 164   AIC:                            -800.9
Df Residuals:                     158   BIC:                            -782.3
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0041      0.002      2.418      0.0

In [125]:
psptx_model = run_reg_model("PSPTX", start = "2002-08-01", end = "2020-03-31")
print(psptx_model)

  


                            OLS Regression Results                            
Dep. Variable:            port_excess   R-squared:                       0.934
Model:                            OLS   Adj. R-squared:                  0.932
Method:                 Least Squares   F-statistic:                     579.0
Date:                Wed, 13 May 2020   Prob (F-statistic):          8.84e-119
Time:                        05:26:44   Log-Likelihood:                 632.82
No. Observations:                 211   AIC:                            -1254.
Df Residuals:                     205   BIC:                            -1234.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0006      0.001      0.660      0.5

In [126]:
fxaix_model = run_reg_model("FXAIX", start = "2000-01-01", end = "2020-03-31")
print(fxaix_model)

  


                            OLS Regression Results                            
Dep. Variable:            port_excess   R-squared:                       0.997
Model:                            OLS   Adj. R-squared:                  0.997
Method:                 Least Squares   F-statistic:                     6386.
Date:                Wed, 13 May 2020   Prob (F-statistic):          1.45e-123
Time:                        05:27:18   Log-Likelihood:                 504.82
No. Observations:                 106   AIC:                            -997.6
Df Residuals:                     100   BIC:                            -981.7
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept  -5.431e-05      0.000     -0.247      0.8

In [190]:
def port_fama_french():
    ff_port_url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/Developed_6_Portfolios_ME_INV_CSV.zip"
    urllib.request.urlretrieve(ff_port_url,'fama_french_port.zip')
    zip_file_port = zipfile.ZipFile('fama_french_port.zip', 'r')
    zip_file_port.extractall()
    zip_file_port.close()
    ff_port = pd.read_csv('Developed_6_Portfolios_ME_INV.csv', skiprows = 18, index_col = 0)
    ff_port_row = ff_port.isnull().any(1).nonzero()[0][0]
    ff_port = pd.read_csv('Developed_6_Portfolios_ME_INV.csv', skiprows = 18, nrows = ff_port_row, index_col = 0)
    ff_port.index = pd.to_datetime(ff_port.index, format= '%Y%m')
    ff_port.index = ff_port.index + pd.offsets.MonthEnd()
    ff_port = ff_port.apply(lambda x: x/ 100)
    return ff_port

In [191]:
ff_port_data = port_fama_french()
print(ff_port_data.head())

            SMALL LoINV  ME1 INV2  SMALL HiINV  BIG LoINV  ME2 INV2  BIG HiINV
1990-07-31       0.0190    0.0179       0.0093     0.0223    0.0097     0.0050
1990-08-31      -0.1061   -0.1069      -0.1222    -0.0918   -0.1010    -0.0998
1990-09-30      -0.0973   -0.0958      -0.1059    -0.0966   -0.1076    -0.1311
1990-10-31       0.0280    0.0370       0.0098     0.1116    0.0982     0.1096
1990-11-30      -0.0297   -0.0064       0.0011    -0.0387   -0.0212    -0.0271


  


In [204]:
# Merging 6 PORTFOLIO DATA WITH FAMA FRENCH
all_data_port = pd.merge(pd.DataFrame(ff_port_data),ff_data, how = 'inner', left_index= True, right_index= True)
# Rename the columns
all_data_port.rename(columns={"Mkt-RF":"mkt_excess"}, inplace=True)
# Calculate the excess returns

all_data_port['port_excess_ff'] = all_data_port['ME1 INV2'] - all_data_port['RF']

print(all_data_port.tail())

            SMALL LoINV  ME1 INV2  SMALL HiINV  BIG LoINV  ME2 INV2  \
2019-11-30       0.0225    0.0248       0.0368     0.0257    0.0270   
2019-12-31       0.0478    0.0374       0.0486     0.0366    0.0281   
2020-01-31      -0.0405   -0.0401      -0.0315    -0.0196   -0.0090   
2020-02-29      -0.1028   -0.0972      -0.0925    -0.0932   -0.0827   
2020-03-31      -0.1933   -0.1652      -0.1888    -0.1343   -0.1358   

            BIG HiINV  mkt_excess     SMB     HML     RMW     CMA      RF  \
2019-11-30     0.0351      0.0387  0.0050 -0.0186 -0.0150 -0.0129  0.0012   
2019-12-31     0.0262      0.0277  0.0096  0.0183  0.0021  0.0131  0.0014   
2020-01-31     0.0102     -0.0011 -0.0440 -0.0627 -0.0123 -0.0234  0.0013   
2020-02-29    -0.0695     -0.0813 -0.0007 -0.0401 -0.0164 -0.0253  0.0012   
2020-03-31    -0.1217     -0.1339 -0.0843 -0.1411 -0.0132  0.0126  0.0012   

            port_excess_ff  
2019-11-30          0.0236  
2019-12-31          0.0360  
2020-01-31         -0.0

In [206]:
me1_ff = sm.formula.ols(formula = 'port_excess_ff~mkt_excess +SMB+HML+RMW+CMA',data=all_data_port).fit()
me1_ff.summary()


0,1,2,3
Dep. Variable:,port_excess_ff,R-squared:,0.761
Model:,OLS,Adj. R-squared:,0.757
Method:,Least Squares,F-statistic:,223.2
Date:,"Wed, 13 May 2020",Prob (F-statistic):,1.14e-106
Time:,06:52:24,Log-Likelihood:,884.76
No. Observations:,357,AIC:,-1758.0
Df Residuals:,351,BIC:,-1734.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0005,0.001,0.418,0.676,-0.002,0.003
mkt_excess,0.7518,0.031,24.601,0.000,0.692,0.812
SMB,0.3761,0.040,9.399,0.000,0.297,0.455
HML,0.1686,0.050,3.359,0.001,0.070,0.267
RMW,0.0017,0.055,0.030,0.976,-0.106,0.109
CMA,0.0812,0.075,1.084,0.279,-0.066,0.228

0,1,2,3
Omnibus:,7.426,Durbin-Watson:,1.705
Prob(Omnibus):,0.024,Jarque-Bera (JB):,11.452
Skew:,0.076,Prob(JB):,0.00326
Kurtosis:,3.864,Cond. No.,79.8


In [211]:
# Merging 6 PORTFOLIO DATA WITH FAMA FRENCH
all_data_port = pd.merge(pd.DataFrame(ff_port_data),ff_data, how = 'inner', left_index= True, right_index= True)
# Rename the columns
all_data_port.rename(columns={"Mkt-RF":"mkt_excess"}, inplace=True)
# Calculate the excess returns

all_data_port['port_excess_ff'] = all_data_port['SMALL LoINV'] - all_data_port['RF']

print(all_data_port.tail())



            SMALL LoINV  ME1 INV2  SMALL HiINV  BIG LoINV  ME2 INV2  \
2019-11-30       0.0225    0.0248       0.0368     0.0257    0.0270   
2019-12-31       0.0478    0.0374       0.0486     0.0366    0.0281   
2020-01-31      -0.0405   -0.0401      -0.0315    -0.0196   -0.0090   
2020-02-29      -0.1028   -0.0972      -0.0925    -0.0932   -0.0827   
2020-03-31      -0.1933   -0.1652      -0.1888    -0.1343   -0.1358   

            BIG HiINV  mkt_excess     SMB     HML     RMW     CMA      RF  \
2019-11-30     0.0351      0.0387  0.0050 -0.0186 -0.0150 -0.0129  0.0012   
2019-12-31     0.0262      0.0277  0.0096  0.0183  0.0021  0.0131  0.0014   
2020-01-31     0.0102     -0.0011 -0.0440 -0.0627 -0.0123 -0.0234  0.0013   
2020-02-29    -0.0695     -0.0813 -0.0007 -0.0401 -0.0164 -0.0253  0.0012   
2020-03-31    -0.1217     -0.1339 -0.0843 -0.1411 -0.0132  0.0126  0.0012   

            port_excess_ff  
2019-11-30          0.0213  
2019-12-31          0.0464  
2020-01-31         -0.0

In [213]:
low_small_ff = sm.formula.ols(formula = 'port_excess_ff~mkt_excess +SMB+HML+RMW+CMA',data=all_data_port).fit()
low_small_ff.summary()

0,1,2,3
Dep. Variable:,port_excess_ff,R-squared:,0.757
Model:,OLS,Adj. R-squared:,0.754
Method:,Least Squares,F-statistic:,219.3
Date:,"Wed, 13 May 2020",Prob (F-statistic):,1.2400000000000001e-105
Time:,06:58:21,Log-Likelihood:,842.73
No. Observations:,357,AIC:,-1673.0
Df Residuals:,351,BIC:,-1650.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-7.687e-05,0.001,-0.060,0.952,-0.003,0.002
mkt_excess,0.8002,0.034,23.275,0.000,0.733,0.868
SMB,0.4432,0.045,9.845,0.000,0.355,0.532
HML,0.1156,0.056,2.047,0.041,0.005,0.227
RMW,-0.1560,0.061,-2.543,0.011,-0.277,-0.035
CMA,0.2939,0.084,3.488,0.001,0.128,0.460

0,1,2,3
Omnibus:,3.125,Durbin-Watson:,1.725
Prob(Omnibus):,0.21,Jarque-Bera (JB):,3.255
Skew:,0.095,Prob(JB):,0.196
Kurtosis:,3.427,Cond. No.,79.8


In [214]:
all_data_port['port_excess_ff'] = all_data_port['SMALL HiINV'] - all_data_port['RF']

print(all_data_port.tail())
hi_small_ff = sm.formula.ols(formula = 'port_excess_ff~mkt_excess +SMB+HML+RMW+CMA',data=all_data_port).fit()
hi_small_ff.summary()

            SMALL LoINV  ME1 INV2  SMALL HiINV  BIG LoINV  ME2 INV2  \
2019-11-30       0.0225    0.0248       0.0368     0.0257    0.0270   
2019-12-31       0.0478    0.0374       0.0486     0.0366    0.0281   
2020-01-31      -0.0405   -0.0401      -0.0315    -0.0196   -0.0090   
2020-02-29      -0.1028   -0.0972      -0.0925    -0.0932   -0.0827   
2020-03-31      -0.1933   -0.1652      -0.1888    -0.1343   -0.1358   

            BIG HiINV  mkt_excess     SMB     HML     RMW     CMA      RF  \
2019-11-30     0.0351      0.0387  0.0050 -0.0186 -0.0150 -0.0129  0.0012   
2019-12-31     0.0262      0.0277  0.0096  0.0183  0.0021  0.0131  0.0014   
2020-01-31     0.0102     -0.0011 -0.0440 -0.0627 -0.0123 -0.0234  0.0013   
2020-02-29    -0.0695     -0.0813 -0.0007 -0.0401 -0.0164 -0.0253  0.0012   
2020-03-31    -0.1217     -0.1339 -0.0843 -0.1411 -0.0132  0.0126  0.0012   

            port_excess_ff  
2019-11-30          0.0356  
2019-12-31          0.0472  
2020-01-31         -0.0

0,1,2,3
Dep. Variable:,port_excess_ff,R-squared:,0.814
Model:,OLS,Adj. R-squared:,0.811
Method:,Least Squares,F-statistic:,307.2
Date:,"Wed, 13 May 2020",Prob (F-statistic):,8.33e-126
Time:,06:59:21,Log-Likelihood:,847.02
No. Observations:,357,AIC:,-1682.0
Df Residuals:,351,BIC:,-1659.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0021,0.001,-1.640,0.102,-0.005,0.000
mkt_excess,0.8638,0.034,25.428,0.000,0.797,0.931
SMB,0.4395,0.044,9.881,0.000,0.352,0.527
HML,0.0604,0.056,1.082,0.280,-0.049,0.170
RMW,-0.2747,0.061,-4.531,0.000,-0.394,-0.155
CMA,-0.1654,0.083,-1.986,0.048,-0.329,-0.002

0,1,2,3
Omnibus:,13.569,Durbin-Watson:,1.711
Prob(Omnibus):,0.001,Jarque-Bera (JB):,28.076
Skew:,-0.121,Prob(JB):,8.01e-07
Kurtosis:,4.352,Cond. No.,79.8


In [215]:
all_data_port['port_excess_ff'] = all_data_port['BIG LoINV'] - all_data_port['RF']

print(all_data_port.tail())
big_lo_ff = sm.formula.ols(formula = 'port_excess_ff~mkt_excess +SMB+HML+RMW+CMA',data=all_data_port).fit()
big_lo_ff.summary()

            SMALL LoINV  ME1 INV2  SMALL HiINV  BIG LoINV  ME2 INV2  \
2019-11-30       0.0225    0.0248       0.0368     0.0257    0.0270   
2019-12-31       0.0478    0.0374       0.0486     0.0366    0.0281   
2020-01-31      -0.0405   -0.0401      -0.0315    -0.0196   -0.0090   
2020-02-29      -0.1028   -0.0972      -0.0925    -0.0932   -0.0827   
2020-03-31      -0.1933   -0.1652      -0.1888    -0.1343   -0.1358   

            BIG HiINV  mkt_excess     SMB     HML     RMW     CMA      RF  \
2019-11-30     0.0351      0.0387  0.0050 -0.0186 -0.0150 -0.0129  0.0012   
2019-12-31     0.0262      0.0277  0.0096  0.0183  0.0021  0.0131  0.0014   
2020-01-31     0.0102     -0.0011 -0.0440 -0.0627 -0.0123 -0.0234  0.0013   
2020-02-29    -0.0695     -0.0813 -0.0007 -0.0401 -0.0164 -0.0253  0.0012   
2020-03-31    -0.1217     -0.1339 -0.0843 -0.1411 -0.0132  0.0126  0.0012   

            port_excess_ff  
2019-11-30          0.0245  
2019-12-31          0.0352  
2020-01-31         -0.0

0,1,2,3
Dep. Variable:,port_excess_ff,R-squared:,0.784
Model:,OLS,Adj. R-squared:,0.781
Method:,Least Squares,F-statistic:,255.1
Date:,"Wed, 13 May 2020",Prob (F-statistic):,1.67e-114
Time:,07:00:41,Log-Likelihood:,903.24
No. Observations:,357,AIC:,-1794.0
Df Residuals:,351,BIC:,-1771.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0017,0.001,-1.573,0.117,-0.004,0.000
mkt_excess,0.9156,0.029,31.553,0.000,0.859,0.973
SMB,-0.0557,0.038,-1.466,0.143,-0.130,0.019
HML,0.0412,0.048,0.865,0.388,-0.052,0.135
RMW,0.0075,0.052,0.145,0.885,-0.094,0.109
CMA,0.3806,0.071,5.352,0.000,0.241,0.520

0,1,2,3
Omnibus:,88.005,Durbin-Watson:,2.118
Prob(Omnibus):,0.0,Jarque-Bera (JB):,689.881
Skew:,0.782,Prob(JB):,1.56e-150
Kurtosis:,9.628,Cond. No.,79.8


In [216]:
all_data_port['port_excess_ff'] = all_data_port['ME2 INV2'] - all_data_port['RF']

print(all_data_port.tail())
me2_ff = sm.formula.ols(formula = 'port_excess_ff~mkt_excess +SMB+HML+RMW+CMA',data=all_data_port).fit()
me2_ff.summary()

            SMALL LoINV  ME1 INV2  SMALL HiINV  BIG LoINV  ME2 INV2  \
2019-11-30       0.0225    0.0248       0.0368     0.0257    0.0270   
2019-12-31       0.0478    0.0374       0.0486     0.0366    0.0281   
2020-01-31      -0.0405   -0.0401      -0.0315    -0.0196   -0.0090   
2020-02-29      -0.1028   -0.0972      -0.0925    -0.0932   -0.0827   
2020-03-31      -0.1933   -0.1652      -0.1888    -0.1343   -0.1358   

            BIG HiINV  mkt_excess     SMB     HML     RMW     CMA      RF  \
2019-11-30     0.0351      0.0387  0.0050 -0.0186 -0.0150 -0.0129  0.0012   
2019-12-31     0.0262      0.0277  0.0096  0.0183  0.0021  0.0131  0.0014   
2020-01-31     0.0102     -0.0011 -0.0440 -0.0627 -0.0123 -0.0234  0.0013   
2020-02-29    -0.0695     -0.0813 -0.0007 -0.0401 -0.0164 -0.0253  0.0012   
2020-03-31    -0.1217     -0.1339 -0.0843 -0.1411 -0.0132  0.0126  0.0012   

            port_excess_ff  
2019-11-30          0.0258  
2019-12-31          0.0267  
2020-01-31         -0.0

0,1,2,3
Dep. Variable:,port_excess_ff,R-squared:,0.836
Model:,OLS,Adj. R-squared:,0.834
Method:,Least Squares,F-statistic:,358.8
Date:,"Wed, 13 May 2020",Prob (F-statistic):,1.49e-135
Time:,07:01:44,Log-Likelihood:,947.9
No. Observations:,357,AIC:,-1884.0
Df Residuals:,351,BIC:,-1861.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0017,0.001,-1.796,0.073,-0.004,0.000
mkt_excess,0.9305,0.026,36.338,0.000,0.880,0.981
SMB,-0.0971,0.034,-2.896,0.004,-0.163,-0.031
HML,0.0941,0.042,2.238,0.026,0.011,0.177
RMW,0.0019,0.046,0.042,0.966,-0.088,0.092
CMA,0.1029,0.063,1.640,0.102,-0.021,0.226

0,1,2,3
Omnibus:,57.704,Durbin-Watson:,2.13
Prob(Omnibus):,0.0,Jarque-Bera (JB):,454.355
Skew:,0.352,Prob(JB):,2.18e-99
Kurtosis:,8.482,Cond. No.,79.8


In [217]:
all_data_port['port_excess_ff'] = all_data_port['BIG HiINV'] - all_data_port['RF']

print(all_data_port.tail())
big_high_ff = sm.formula.ols(formula = 'port_excess_ff~mkt_excess +SMB+HML+RMW+CMA',data=all_data_port).fit()
big_high_ff.summary()

            SMALL LoINV  ME1 INV2  SMALL HiINV  BIG LoINV  ME2 INV2  \
2019-11-30       0.0225    0.0248       0.0368     0.0257    0.0270   
2019-12-31       0.0478    0.0374       0.0486     0.0366    0.0281   
2020-01-31      -0.0405   -0.0401      -0.0315    -0.0196   -0.0090   
2020-02-29      -0.1028   -0.0972      -0.0925    -0.0932   -0.0827   
2020-03-31      -0.1933   -0.1652      -0.1888    -0.1343   -0.1358   

            BIG HiINV  mkt_excess     SMB     HML     RMW     CMA      RF  \
2019-11-30     0.0351      0.0387  0.0050 -0.0186 -0.0150 -0.0129  0.0012   
2019-12-31     0.0262      0.0277  0.0096  0.0183  0.0021  0.0131  0.0014   
2020-01-31     0.0102     -0.0011 -0.0440 -0.0627 -0.0123 -0.0234  0.0013   
2020-02-29    -0.0695     -0.0813 -0.0007 -0.0401 -0.0164 -0.0253  0.0012   
2020-03-31    -0.1217     -0.1339 -0.0843 -0.1411 -0.0132  0.0126  0.0012   

            port_excess_ff  
2019-11-30          0.0339  
2019-12-31          0.0248  
2020-01-31          0.0

0,1,2,3
Dep. Variable:,port_excess_ff,R-squared:,0.837
Model:,OLS,Adj. R-squared:,0.835
Method:,Least Squares,F-statistic:,361.4
Date:,"Wed, 13 May 2020",Prob (F-statistic):,5.09e-136
Time:,07:02:45,Log-Likelihood:,889.92
No. Observations:,357,AIC:,-1768.0
Df Residuals:,351,BIC:,-1745.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0008,0.001,-0.672,0.502,-0.003,0.001
mkt_excess,0.9496,0.030,31.524,0.000,0.890,1.009
SMB,-0.0619,0.039,-1.569,0.118,-0.139,0.016
HML,-0.0540,0.049,-1.093,0.275,-0.151,0.043
RMW,-0.1898,0.054,-3.532,0.000,-0.296,-0.084
CMA,-0.2933,0.074,-3.973,0.000,-0.438,-0.148

0,1,2,3
Omnibus:,58.134,Durbin-Watson:,2.221
Prob(Omnibus):,0.0,Jarque-Bera (JB):,665.967
Skew:,-0.005,Prob(JB):,2.4400000000000002e-145
Kurtosis:,9.691,Cond. No.,79.8
