### Problem 2: Momentum factor
Code and notes for subproblem 2: Farma-French Approach

In [23]:
# packages and load data.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import Utils
from scipy.optimize import minimize 
from sklearn.linear_model import LinearRegression as lm
import statsmodels.api as sm
import Backtest as bt   
import matplotlib.dates as mdates

In [24]:
FF = pd.read_csv("Data_clean/FF_cleaned.csv")

MOMexp = pd.read_csv("Data_clean/6_Portfolios_ME_Prior_12_2_returns.csv")
MOMdep = pd.read_csv("Data_clean/25_Portfolios_ME_Prior_12_2_returns.csv")

MOMexp = pd.merge(MOMexp, FF[["Date","RF"]], 'left', on = "Date")
MOMdep = pd.merge(MOMdep, FF[["Date","RF"]], 'left', on = "Date")

### Format Data
That is, we construct SMB and MOM in MOMexp

In [25]:
MOMexp["SMB"] = (MOMexp["SMALL LoPRIOR"]  + MOMexp["ME1 PRIOR2"] + 
                 MOMexp["SMALL HiPRIOR"] ) / 3 - (MOMexp["BIG LoPRIOR"]  + 
                            MOMexp["ME2 PRIOR2"] + MOMexp["BIG HiPRIOR"] ) / 3 

MOMexp["MOM"] = (MOMexp["SMALL HiPRIOR"] + 
                 MOMexp["BIG HiPRIOR"]) / 2 - (MOMexp["SMALL LoPRIOR"] + MOMexp["BIG LoPRIOR"]) / 2 

In [26]:
MOMexp["Mkt-RF"] = MOMexp["Market Return"] - MOMexp["RF"]
MOMexp = MOMexp[["Date","Mkt-RF", "SMB", "MOM", 'Market Return']]
MOMexp = MOMexp.rename(columns = {"Market Return":"Mkt"})

## Constructing equivalent table 1 and 2

### Table 1

In [95]:
pf_25_afs = pd.read_csv("Data_clean/25_Portfolios_ME_Prior_12_2_afs.csv")
pf_25_nf = pd.read_csv("Data_clean/25_Portfolios_ME_Prior_12_2_nf.csv")
pf_25_afs.set_index("Date", inplace = True)
pf_25_nf.set_index("Date", inplace = True)

In [96]:
market_cap_df = pf_25_afs.multiply(pf_25_nf)
market_cap_df['Market Value'] = market_cap_df.sum(axis=1)
market_cap_df = market_cap_df.div(market_cap_df['Market Value'], axis=0)
market_cap_df.drop('Market Value', axis=1, inplace=True)

In [110]:
mean_firm_size = pf_25_afs.mean()
mean_number_of_firms = pf_25_nf.mean()
mean_pct_of_market = market_cap_df.mean()
average_of_annual_averages_matrix = mean_firm_size.values.reshape(5, 5).round(2)
average_number_of_firms_matrix = mean_number_of_firms.values.reshape(5, 5).round(2)
average_pct_of_market_matrix = mean_pct_of_market.values.reshape(5, 5).round(4)

In [111]:
average_pct_of_market_matrix *100

array([[ 0.59,  0.35,  0.3 ,  0.3 ,  0.48],
       [ 0.63,  0.59,  0.56,  0.57,  0.76],
       [ 0.87,  1.07,  1.11,  1.14,  1.33],
       [ 1.63,  2.33,  2.57,  2.68,  2.78],
       [ 7.58, 14.96, 18.09, 20.24, 16.5 ]])

In [106]:
mean_firm_size

SMALL LoPRIOR       42.288625
ME1 PRIOR2          56.999510
ME1 PRIOR3          59.814605
ME1 PRIOR4          61.518522
SMALL HiPRIOR       64.093677
ME2 PRIOR1         288.633668
ME2 PRIOR2         298.394098
ME2 PRIOR3         301.827251
ME2 PRIOR4         301.930292
ME2 PRIOR5         298.836753
ME3 PRIOR1         690.118789
ME3 PRIOR2         705.894888
ME3 PRIOR3         715.259854
ME3 PRIOR4         716.993119
ME3 PRIOR5         708.888754
ME4 PRIOR1        1745.226813
ME4 PRIOR2        1786.864072
ME4 PRIOR3        1782.977294
ME4 PRIOR4        1774.818359
ME4 PRIOR5        1769.097680
BIG LoPRIOR      10772.176060
ME5 PRIOR2       13306.455421
ME5 PRIOR3       13951.992277
ME5 PRIOR4       14236.296314
BIG HiPRIOR      13565.647912
dtype: float64

### Table 2

In [30]:
MOMexp

Unnamed: 0,Date,Mkt-RF,SMB,MOM,Mkt
0,1927-01-31,-0.08,1.396667,0.360,0.17
1,1927-02-28,4.21,1.153333,-2.150,4.47
2,1927-03-31,0.15,-2.303333,3.615,0.45
3,1927-04-30,0.59,0.750000,4.300,0.84
4,1927-05-31,5.61,0.933333,3.005,5.91
...,...,...,...,...,...
1159,2023-08-31,-2.35,-3.273333,3.770,-1.90
1160,2023-09-30,-5.23,-1.773333,0.240,-4.80
1161,2023-10-31,-3.08,-4.016667,1.685,-2.61
1162,2023-11-30,8.86,0.516667,2.760,9.30


In [43]:
# Calculate summary statistics
summary_stats = MOMexp[['SMB', 'MOM', 'Mkt-RF', 'Mkt']].agg(['mean', 'std']).transpose()
summary_stats['t-stat'] = summary_stats['mean'] / (summary_stats['std'] / np.sqrt(len(MOMexp)))
summary_stats = summary_stats.round(2)

# Calculate autocorrelations for lags 1, 2, and 12
lags = [1, 2, 12]
autocorrelations = {f'lag_{lag}': MOMexp[['SMB', 'MOM', 'Mkt-RF', 'Mkt']].apply(lambda x: x.autocorr(lag)) for lag in lags}
autocorr_df = pd.DataFrame(autocorrelations).transpose().round(2)

# Calculate correlations
correlations = MOMexp[['SMB', 'MOM', 'Mkt-RF']].corr().round(2)
print(summary_stats)
print(autocorr_df)
print(correlations)

        mean   std  t-stat
SMB     0.28  3.46    2.77
MOM     0.62  4.70    4.51
Mkt-RF  0.69  5.34    4.40
Mkt     0.96  5.33    6.12
         SMB   MOM  Mkt-RF   Mkt
lag_1   0.12  0.07    0.09  0.09
lag_2   0.07 -0.07   -0.02 -0.03
lag_12  0.12  0.06    0.00  0.00
         SMB   MOM  Mkt-RF
SMB     1.00 -0.20    0.33
MOM    -0.20  1.00   -0.35
Mkt-RF  0.33 -0.35    1.00


In [5]:
MOMdep_25 = MOMdep.drop(columns = ["Date", "Market Return", "RF"]) 
# Find excess return
MOMdep_25_excess = MOMdep_25.copy()
for i in MOMdep_25.columns.values:
    MOMdep_25_excess[i] = MOMdep_25[i]  -  MOMdep["RF"]

Utils.table_2_lower(MOMdep_25_excess).round(2)


Unnamed: 0_level_0,mean,mean,mean,mean,mean,std,std,std,std,std,t-test of mean,t-test of mean,t-test of mean,t-test of mean,t-test of mean
PRIOR,PRIOR1,PRIOR2,PRIOR3,PRIOR4,PRIOR5,PRIOR1,PRIOR2,PRIOR3,PRIOR4,PRIOR5,PRIOR1,PRIOR2,PRIOR3,PRIOR4,PRIOR5
ME,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
ME1,0.62,1.13,1.28,1.34,1.53,10.78,9.51,8.66,8.74,8.72,1.97,4.05,5.04,5.23,5.98
ME2,0.43,0.89,0.98,1.16,1.4,9.84,8.11,7.2,7.21,7.79,1.49,3.75,4.65,5.51,6.14
ME3,0.39,0.79,0.86,0.93,1.28,9.43,7.51,6.79,6.2,6.85,1.4,3.59,4.33,5.11,6.36
ME4,0.4,0.67,0.81,0.92,1.23,9.49,7.21,6.27,6.04,6.41,1.44,3.18,4.4,5.17,6.56
ME5,0.37,0.53,0.62,0.76,0.95,8.53,6.41,5.71,5.23,5.75,1.48,2.83,3.72,4.97,5.66


In [8]:
0.62/(10.78/np.sqrt(1164))

np.float64(1.9622277750879087)

## Regressions
We are now ready to do the regressions.

In [11]:
# Regression: 
def regression(dep,exp,rf):
    var = dep.columns.drop(["Date", "Market Return", "RF"])
    params = pd.DataFrame(columns =["a", "b","s","m","R2/s(e)"])
    tvals =  pd.DataFrame(columns =["a", "b","s","m", "R2/s(e)"])
    X = np.array(exp).reshape(len(exp),len(exp.columns))
    X = sm.add_constant(X)
    for v in var:
        y = np.array(dep[v])  - np.array(rf["RF"]) 
        linmod = sm.OLS(y,X)
        results = linmod.fit()
        coef = pd.DataFrame([np.append(results.params.round(2),results.rsquared.round(2))],
                            columns=["a", "b","s","m","R2/s(e)"])
        ttest = pd.DataFrame([np.append(results.tvalues.round(2),np.std(results.resid).round(4))],
                            columns=["a", "b","s","m", "R2/s(e)"])
        params = pd.concat([params,coef], ignore_index=True)
        tvals =  pd.concat([tvals,ttest], ignore_index=True)
    return params, tvals

# params # Relatively close - > So assume it is correct.

In [12]:
# Create output table: 
# Generate table:
def out_array(param,dep,exp,rf):
    p = param
    params, tvals = regression(dep,exp,rf)
    N = np.sqrt(len(params[p]))
    parameter = np.array(params[p]).reshape((int(N),int(N)))
    ttest = np.array(tvals[p]).reshape((int(N),int(N)))
    tab = np.append(parameter,ttest,axis=1)
    tabdf = pd.DataFrame(tab, columns=pd.MultiIndex.from_tuples(
                          [(r"\beta/R^2","LPrior"), ("","2"), ("","3"), ("","4"), ("","HPrior"),
                           (r"t(\beta)/s(e)","LPrior"), ("","2"), ("","3"), ("","4"), ("","HPrior")]))
    tabdf = tabdf.set_index([pd.Index([param,"", "", "", ""]),pd.Index(["S", "2", "3", "4", "B"])])

    return tabdf

dep = MOMdep.copy()
exp = MOMexp.copy()
rf = FF.copy()
exp = exp.drop(columns = "Date")
rf = FF.drop(columns = "Date")

a = out_array("a", dep, exp, rf)
b = out_array("b", dep, exp, rf)
s = out_array("s", dep, exp, rf)
m = out_array("m", dep, exp, rf)
R2_se = out_array("R2/s(e)", dep, exp, rf)
tab = pd.concat([a,b,s,m, R2_se])


  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


In [14]:
tab

Unnamed: 0_level_0,Unnamed: 1_level_0,\beta/R^2,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,t(\beta)/s(e),Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0
Unnamed: 0_level_1,Unnamed: 1_level_1,LPrior,2,3,4,HPrior,LPrior,2,3,4,HPrior
a,S,-0.09,0.31,0.43,0.27,0.25,-1.01,3.47,4.86,2.74,2.52
,2,-0.23,0.16,0.18,0.18,0.11,-4.02,2.85,2.86,2.96,1.78
,3,-0.13,0.14,0.13,0.04,0.05,-2.01,2.68,2.42,0.83,0.82
,4,-0.02,0.12,0.14,0.07,0.07,-0.2,2.03,2.63,1.37,1.11
,B,0.1,0.19,0.09,0.04,-0.08,1.09,3.59,1.99,0.86,-1.52
b,S,1.04,0.97,0.92,1.0,1.07,57.71,52.9,51.05,49.65,53.89
,2,1.15,1.0,0.97,0.99,1.14,99.48,90.82,76.91,82.12,93.12
,3,1.17,1.05,1.0,0.98,1.12,88.91,97.8,90.8,92.46,96.45
,4,1.23,1.09,1.01,1.05,1.12,72.03,92.15,90.05,94.37,92.86
,B,1.14,0.99,0.99,1.0,1.1,62.82,93.35,105.03,110.51,103.8


In [313]:
# To latex regression tab:
latex_table = tab.to_latex(index=True, float_format="%.2f", 
                           caption=r"Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)", 
                           label="tab:regression")
print(latex_table)


\begin{table}
\caption{Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)}
\label{tab:regression}
\begin{tabular}{llrrrrrrrrrr}
\toprule
 &  & \beta/R^2 & \multicolumn{4}{r}{} & t(\beta)/s(e) & \multicolumn{4}{r}{} \\
 &  & LPrior & 2 & 3 & 4 & HPrior & LPrior & 2 & 3 & 4 & HPrior \\
\midrule
a & S & -0.09 & 0.31 & 0.43 & 0.27 & 0.25 & -1.01 & 3.47 & 4.86 & 2.74 & 2.52 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & -0.23 & 0.16 & 0.18 & 0.18 & 0.11 & -4.02 & 2.85 & 2.86 & 2.96 & 1.78 \\
 & 3 & -0.13 & 0.14 & 0.13 & 0.04 & 0.05 & -2.01 & 2.68 & 2.42 & 0.83 & 0.82 \\
 & 4 & -0.02 & 0.12 & 0.14 & 0.07 & 0.07 & -0.20 & 2.03 & 2.63 & 1.37 & 1.11 \\
 & B & 0.10 & 0.19 & 0.09 & 0.04 & -0.08 & 1.09 & 3.59 & 1.99 & 0.86 & -1.52 \\
\cline{1-12}
b & S & 1.04 & 0.97 & 0.92 & 1.00 & 1.07 & 57.71 & 52.90 & 51.05 & 49.65 & 53.89 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & 1.15 & 1.00 & 0.97 & 0.99 & 1.14 & 99.48 & 90.82 & 76.91 & 82.12 & 93.12 \\
 & 3 & 1.17 & 1.05 & 1.00 & 0.98 & 1.1

Now for the Subperiods. 
192701-196306:

In [314]:
dep = MOMdep.copy()
exp = MOMexp.copy()
rf = FF.copy()
dep = dep[(dep["Date"]<'1963-07-31')]
exp = exp[(exp["Date"]<'1963-07-31')].drop(columns = "Date")
rf = rf[( rf["Date"]<'1963-07-31')].drop(columns = "Date")


a = out_array("a", dep, exp, rf)
b = out_array("b", dep, exp, rf)
s = out_array("s", dep, exp, rf)
m = out_array("m", dep, exp, rf)
R2_se = out_array("R2/s(e)", dep, exp, rf)
tab = pd.concat([a,b,s,m, R2_se])

# To latex regression tab:
latex_table = tab.to_latex(index=True, float_format="%.2f", 
                           caption=r"Subperiod: 192701-196306, Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)", 
                           label="tab:regression")
#print(latex_table)

  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


196307-199112


In [315]:
dep = MOMdep.copy()
exp = MOMexp.copy()
rf = FF.copy()
dep = dep[(dep["Date"]>='1963-07-31') & (dep["Date"]<='1991-12-31')]
exp = exp[(exp["Date"]>='1963-07-31')& (exp["Date"]<='1991-12-31')].drop(columns = "Date")
rf = rf[( rf["Date"]>='1963-07-31')& (rf["Date"]<='1991-12-31')].drop(columns = "Date")


a = out_array("a", dep, exp, rf)
b = out_array("b", dep, exp, rf)
s = out_array("s", dep, exp, rf)
m = out_array("m", dep, exp, rf)
R2_se = out_array("R2/s(e)", dep, exp, rf)
tab = pd.concat([a,b,s,m, R2_se])

# To latex regression tab:
latex_table = tab.to_latex(index=True, float_format="%.2f", 
                           caption=r"Subperiod: 196307-199112, Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)", 
                           label="tab:regression")
#print(latex_table)

  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


199201-202312

In [316]:
dep = MOMdep.copy()
exp = MOMexp.copy()
rf = FF.copy()
dep = dep[(dep["Date"]>='1992-01-31') & (dep["Date"]<='2023-12-31')]
exp = exp[(exp["Date"]>='1992-01-31')& (exp["Date"]<='2023-12-31')].drop(columns = "Date")
rf = rf[( rf["Date"]>='1992-01-31')& (rf["Date"]<='2023-12-31')].drop(columns = "Date")


a = out_array("a", dep, exp, rf)
b = out_array("b", dep, exp, rf)
s = out_array("s", dep, exp, rf)
m = out_array("m", dep, exp, rf)
R2_se = out_array("R2/s(e)", dep, exp, rf)
tab = pd.concat([a,b,s,m, R2_se])

# To latex regression tab:
latex_table = tab.to_latex(index=True, float_format="%.2f", 
                           caption=r"Subperiod: 199201-202312, Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)", 
                           label="tab:regression")
#print(latex_table)

  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


### Appendix: Replicate tables and similarly from FF.
Here, we produce code to do things similarly to the original paper.

In [317]:
# Take july 1963 to december 1991
OFF = FF[(FF["Date"]>='1963-07-31') & ( FF["Date"]<'1992-01-31')] 
OFF["Mkt-RF"], OFF["SMB"], OFF["HML"] = OFF["Mkt-RF"], OFF["SMB"], OFF["HML"] 
RF = OFF["RF"]
OFF = OFF[["Mkt-RF", "SMB", "HML"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  OFF["Mkt-RF"], OFF["SMB"], OFF["HML"] = OFF["Mkt-RF"], OFF["SMB"], OFF["HML"]


In [318]:
# Read subparts of FF data. 
pf_25_vwr = pd.read_csv("Data/25_Portfolios_5x5.csv", skiprows=15, nrows = 1171)
pf_25_vwr = pf_25_vwr.rename(columns = {"Unnamed: 0" : "Date"})
pf_25_vwr["Date"] = pd.to_datetime(pf_25_vwr["Date"], format="%Y%m") + pd.offsets.MonthEnd()
pf_25_vwr = pf_25_vwr[(pf_25_vwr["Date"]<pd.Timestamp('1992-01-31')) & (pf_25_vwr["Date"]>=pd.Timestamp('1963-07-31'))] 
pf_25_vwr = pf_25_vwr.set_index("Date").sort_index(ascending=True).reset_index()

# Note for this i have renamed the variables to easier replicate table.
pf_25_nf = pd.read_csv("Data/25_Portfolios_5x5.csv", skiprows=2581, nrows = 1171)
pf_25_nf = pf_25_nf.rename(columns = {"Unnamed: 0" : "Date"})
pf_25_nf["Date"] = pd.to_datetime(pf_25_nf["Date"], format="%Y%m") + pd.offsets.MonthEnd()
pf_25_nf = pf_25_nf[(pf_25_nf["Date"]<pd.Timestamp('1992-01-31')) & (pf_25_nf["Date"]>=pd.Timestamp('1963-07-31'))] 
pf_25_nf = pf_25_nf.set_index("Date").sort_index(ascending=True).reset_index()

pf_25_nf["Y"] =  pf_25_nf['Date'].dt.year

pf_25_vwr

Unnamed: 0,Date,ME1 PRIOR1,ME1 PRIOR2,ME1 PRIOR3,ME1 PRIOR4,ME1 PRIOR5,ME2 PRIOR1,ME2 PRIOR2,ME2 PRIOR3,ME2 PRIOR4,...,ME4 PRIOR2,ME4 PRIOR3,ME4 PRIOR4,ME4 PRIOR5,ME5 PRIOR1,ME5 PRIOR2,ME5 PRIOR3,ME5 PRIOR4,ME5 PRIOR5,Market Return
0,1963-07-31,1.1307,-0.3091,0.7079,0.1062,-1.3211,-1.8071,0.1899,-1.0105,-1.9644,...,-1.6781,-1.9184,-1.5741,-1.8567,0.1547,0.4833,1.2286,-0.5862,-1.1026,
1,1963-08-31,4.2370,1.3834,1.4977,2.3755,4.7567,5.5665,4.5191,4.4424,4.4188,...,4.7306,6.2332,7.6782,5.3469,5.7691,4.2550,4.5936,8.2831,6.3824,
2,1963-09-30,-2.8878,0.6263,-1.0204,-1.6000,-0.4320,-4.0502,-1.5034,-0.8798,-1.1812,...,-2.0801,-1.7800,-3.9639,-1.9943,-1.3595,-0.8054,-0.8135,-0.2145,-3.4963,
3,1963-10-31,1.2885,-0.7071,1.3132,0.0855,2.3988,1.1916,4.2342,2.3524,2.2012,...,0.6829,2.6229,4.8492,0.6113,5.3339,1.7427,-0.2469,2.3915,0.4857,
4,1963-11-30,-3.3751,-3.7534,-1.8055,-1.0487,-1.0538,-4.2561,-1.7534,-0.7809,-0.1002,...,-0.6434,-0.7933,1.3610,3.5388,-1.2556,1.0072,-1.7425,-2.0838,1.3455,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337,1991-08-31,3.7660,4.9642,3.3036,0.8912,2.5648,3.9931,3.4807,3.7487,3.2501,...,2.4808,2.8089,2.8636,3.1384,4.3292,0.5920,2.6484,1.4335,1.7558,
338,1991-09-30,4.4078,2.4480,0.2229,0.3425,-1.4219,0.5631,0.6227,1.9861,1.1513,...,-1.7675,-0.8980,0.2090,-1.0699,-2.1176,-1.1911,-0.9900,0.5191,-3.8291,
339,1991-10-31,6.7438,4.0105,1.8304,4.0238,1.2581,2.3269,1.6157,3.2404,0.3751,...,0.5155,1.4563,2.6295,3.4508,1.4452,0.9375,1.9438,3.2627,0.4944,
340,1991-11-30,-2.1445,-2.9867,-4.1670,-3.3929,-2.7336,-4.7847,-5.8368,-2.8752,-5.2109,...,-4.8911,-4.0517,-2.8979,-6.2634,-1.1949,-5.4796,-5.5360,-3.7224,-8.3265,


In [319]:
# Subtable 2 latter part.
pf_25_vwr_e = pf_25_vwr.drop(columns = ["Date", "Market Return"]).copy() 
# Find excess return
for i in pf_25_vwr_e.columns.values:
    pf_25_vwr_e[i] = pf_25_vwr[i]  -  FF["RF"] 
Utils.table_2_lower(pf_25_vwr_e)

Unnamed: 0_level_0,mean,mean,mean,mean,mean,std,std,std,std,std,t-test of mean,t-test of mean,t-test of mean,t-test of mean,t-test of mean
PRIOR,PRIOR1,PRIOR2,PRIOR3,PRIOR4,PRIOR5,PRIOR1,PRIOR2,PRIOR3,PRIOR4,PRIOR5,PRIOR1,PRIOR2,PRIOR3,PRIOR4,PRIOR5
ME,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
ME1,0.781475,1.143816,1.210045,1.344637,1.466232,7.756928,6.770919,6.283119,5.926139,6.271099,1.863108,3.124076,3.561553,4.196103,4.323866
ME2,0.865895,1.114516,1.314024,1.377328,1.470441,7.270707,6.283741,5.74012,5.339751,6.015569,2.202428,3.280054,4.233459,4.770122,4.520474
ME3,0.908361,1.126064,1.141475,1.305888,1.407142,6.657011,5.72642,5.166071,4.931202,5.693645,2.523437,3.636577,4.086194,4.897407,4.570468
ME4,0.926296,0.847008,1.069941,1.268404,1.367421,5.894195,5.424643,5.059844,4.938601,5.646482,2.906287,2.887548,3.91053,4.749706,4.478548
ME5,0.842677,0.807513,0.828277,0.97606,1.005841,4.888186,4.709419,4.36533,4.289873,4.779291,3.18806,3.170993,3.508903,4.207704,3.892055


In [320]:
# Regression: 

var = pf_25_vwr.columns.drop(["Date", "Market Return"])
params = pd.DataFrame(columns =["a", "b","s","h","R2/s(e)"])
tvals =  pd.DataFrame(columns =["a", "b","s","h", "R2/s(e)"])
X = np.array(OFF).reshape(len(OFF),len(OFF.columns))
X = sm.add_constant(X)
for v in var:
    y = np.array(pf_25_vwr[v])  - np.array(RF) * 100
    linmod = sm.OLS(y,X)
    results = linmod.fit()
    coef = pd.DataFrame([np.append(results.params.round(2),results.rsquared.round(2))],
                        columns=["a", "b","s","h","R2/s(e)"])
    ttest = pd.DataFrame([np.append(results.tvalues.round(2),np.std(results.resid).round(4))],
                         columns=["a", "b","s","h", "R2/s(e)"])
    params = pd.concat([params,coef], ignore_index=True)
    tvals =  pd.concat([tvals,ttest], ignore_index=True)

#params # Relatively close - > So assume it is correct. 



  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


In [321]:
# Generate table:
def out_array(param):
    p = param
    N = np.sqrt(len(params[p]))
    parameter = np.array(params[p]).reshape((int(N),int(N)))
    ttest = np.array(tvals[p]).reshape((int(N),int(N)))
    tab = np.append(parameter,ttest,axis=1)
    tabdf = pd.DataFrame(tab, columns=pd.MultiIndex.from_tuples(
                          [(r"\beta/R^2","L"), ("","2"), ("","3"), ("","4"), ("","H"),
                           (r"t(\beta)/s(e)","L"), ("","2"), ("","3"), ("","4"), ("","H")]))
    tabdf = tabdf.set_index([pd.Index([param,"", "", "", ""]),pd.Index(["S", "2", "3", "4", "B"])])

    return tabdf

a = out_array("a")
b = out_array("b")
s = out_array("s")
h = out_array("h")
R2_se = out_array("R2/s(e)")
tab = pd.concat([a,b,s,h,R2_se])


In [322]:
latex_table = tab.to_latex(index=True, float_format="%.2f", caption="Original FF", label="tab:sample_table")
print(latex_table)

\begin{table}
\caption{Original FF}
\label{tab:sample_table}
\begin{tabular}{llrrrrrrrrrr}
\toprule
 &  & \beta/R^2 & \multicolumn{4}{r}{} & t(\beta)/s(e) & \multicolumn{4}{r}{} \\
 &  & L & 2 & 3 & 4 & H & L & 2 & 3 & 4 & H \\
\midrule
a & S & -55.23 & -54.94 & -54.92 & -54.79 & -54.80 & -45.58 & -45.94 & -45.93 & -45.71 & -45.46 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & -54.99 & -54.89 & -54.72 & -54.71 & -54.78 & -46.06 & -46.15 & -45.64 & -45.93 & -45.76 \\
 & 3 & -54.89 & -54.80 & -54.84 & -54.73 & -54.80 & -46.09 & -45.99 & -45.81 & -45.55 & -46.01 \\
 & 4 & -54.74 & -55.00 & -54.86 & -54.76 & -54.81 & -46.05 & -46.12 & -45.93 & -45.39 & -45.75 \\
 & B & -54.66 & -54.88 & -54.90 & -54.92 & -55.02 & -45.54 & -46.18 & -45.75 & -45.78 & -45.99 \\
\cline{1-12}
b & S & 1.45 & 1.38 & 1.36 & 1.31 & 1.36 & 4.87 & 4.69 & 4.62 & 4.46 & 4.61 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & 1.51 & 1.44 & 1.38 & 1.38 & 1.48 & 5.15 & 4.92 & 4.68 & 4.72 & 5.03 \\
 & 3 & 1.51 & 1.43 & 1.39 & 1.39 & 1