### Problem 2: Momentum factor
Code and notes for subproblem 2: Farma-French Approach

In [77]:
# packages and load data.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import Utils
from scipy.optimize import minimize 
from sklearn.linear_model import LinearRegression as lm
import statsmodels.api as sm
import Backtest as bt   
import matplotlib.dates as mdates

FF = pd.read_csv("Data_clean/FF_cleaned.csv")
bond = pd.read_csv("Data_clean/bond_returns.csv")
#FF["RF"] = FF["RF"] 
MOMexp = pd.read_csv("Data_clean/6_Portfolios_ME_Prior_12_2_returns.csv")
MOMdep = pd.read_csv("Data_clean/25_Portfolios_ME_Prior_12_2_returns.csv")
#MOMexp["Market Return"] = MOMexp["Market Return"] 
#MOMdep["Market Return"] = MOMdep["Market Return"] 
# Keep only RF to create new SMB and MOM
MOMexp = pd.merge(MOMexp, FF[["Date","RF"]], 'left', on = "Date")
MOMdep = pd.merge(MOMdep, FF[["Date","RF"]], 'left', on = "Date")
# MOMdep = pd.merge(MOMdep, bond[["Date","10YrReturns"]], 'left', on = 'Date' )

### Format Data
That is, we construct SMB and MOM in MOMexp

In [78]:
MOMexp["SMB"] = (MOMexp["SMALL LoPRIOR"]  + MOMexp["ME1 PRIOR2"] + 
                 MOMexp["SMALL HiPRIOR"] ) / 3 - (MOMexp["BIG LoPRIOR"]  + 
                            MOMexp["ME2 PRIOR2"] + MOMexp["BIG HiPRIOR"] ) / 3 

MOMexp["MOM"] = (MOMexp["SMALL HiPRIOR"] + 
                 MOMexp["BIG HiPRIOR"]) / 2 - (MOMexp["SMALL LoPRIOR"] + MOMexp["BIG LoPRIOR"]) / 2 
# SMB and MOM is returns - normalize to fit returns on market
MOMexp["MOM"],MOMexp["SMB"] =MOMexp["MOM"] ,MOMexp["SMB"] 

MOMexp["Mkt-RF"] = MOMexp["Market Return"] - MOMexp["RF"]
# Clean data:
MOMexp = MOMexp.rename(columns = {"Market Return":"Mkt"})

## Constructing equivalent table 1 and 2

### Table 7

In [79]:
pf_25_afs = pd.read_csv("Data_clean/25_Portfolios_ME_Prior_12_2_afs.csv")
pf_25_nf = pd.read_csv("Data_clean/25_Portfolios_ME_Prior_12_2_nf.csv")
pf_25_afs.set_index("Date", inplace = True)
pf_25_nf.set_index("Date", inplace = True)

In [80]:
market_cap_df = pf_25_afs.multiply(pf_25_nf)
market_cap_df['Market Value'] = market_cap_df.sum(axis=1)
market_cap_df = market_cap_df.div(market_cap_df['Market Value'], axis=0)
market_cap_df.drop('Market Value', axis=1, inplace=True)

In [81]:
mean_firm_size = pf_25_afs.mean()
mean_number_of_firms = pf_25_nf.mean()
mean_pct_of_market = market_cap_df.mean()
average_of_annual_averages_matrix = mean_firm_size.values.reshape(5, 5).round(2)
average_number_of_firms_matrix = mean_number_of_firms.values.reshape(5, 5).round(2)
average_pct_of_market_matrix = mean_pct_of_market.values.reshape(5, 5).round(4)

In [82]:
average_pct_of_market_matrix *100

array([[ 0.59,  0.35,  0.3 ,  0.3 ,  0.48],
       [ 0.63,  0.59,  0.56,  0.57,  0.76],
       [ 0.87,  1.07,  1.11,  1.14,  1.33],
       [ 1.63,  2.33,  2.57,  2.68,  2.78],
       [ 7.58, 14.96, 18.09, 20.24, 16.5 ]])

In [83]:
mean_firm_size

SMALL LoPRIOR       42.288625
ME1 PRIOR2          56.999510
ME1 PRIOR3          59.814605
ME1 PRIOR4          61.518522
SMALL HiPRIOR       64.093677
ME2 PRIOR1         288.633668
ME2 PRIOR2         298.394098
ME2 PRIOR3         301.827251
ME2 PRIOR4         301.930292
ME2 PRIOR5         298.836753
ME3 PRIOR1         690.118789
ME3 PRIOR2         705.894888
ME3 PRIOR3         715.259854
ME3 PRIOR4         716.993119
ME3 PRIOR5         708.888754
ME4 PRIOR1        1745.226813
ME4 PRIOR2        1786.864072
ME4 PRIOR3        1782.977294
ME4 PRIOR4        1774.818359
ME4 PRIOR5        1769.097680
BIG LoPRIOR      10772.176060
ME5 PRIOR2       13306.455421
ME5 PRIOR3       13951.992277
ME5 PRIOR4       14236.296314
BIG HiPRIOR      13565.647912
dtype: float64

Upper part of table 8

In [84]:
# Calculate summary statistics
summary_stats = MOMexp[['SMB', 'MOM', 'Mkt-RF', 'Mkt']].agg(['mean', 'std']).transpose()
summary_stats['t-stat'] = summary_stats['mean'] / (summary_stats['std'] / np.sqrt(len(MOMexp)))
summary_stats = summary_stats.round(2)

# Calculate autocorrelations for lags 1, 2, and 12
lags = [1, 2, 12]
autocorrelations = {f'lag_{lag}': MOMexp[['SMB', 'MOM', 'Mkt-RF', 'Mkt']].apply(lambda x: x.autocorr(lag)) for lag in lags}
autocorr_df = pd.DataFrame(autocorrelations).transpose().round(2)

# Calculate correlations
correlations = MOMexp[['SMB', 'MOM', 'Mkt-RF']].corr().round(2)
print(summary_stats)
print(autocorr_df)
print(correlations)

        mean   std  t-stat
SMB     0.28  3.46    2.77
MOM     0.62  4.70    4.51
Mkt-RF  0.69  5.34    4.40
Mkt     0.96  5.33    6.12
         SMB   MOM  Mkt-RF   Mkt
lag_1   0.12  0.07    0.09  0.09
lag_2   0.07 -0.07   -0.02 -0.03
lag_12  0.12  0.06    0.00  0.00
         SMB   MOM  Mkt-RF
SMB     1.00 -0.20    0.33
MOM    -0.20  1.00   -0.35
Mkt-RF  0.33 -0.35    1.00


Construction of lower part of table 8

In [85]:
MOMdep_25 = MOMdep.drop(columns = ["Date", "Market Return", "RF"]) 
# Find excess return
MOMdep_25_excess = MOMdep_25.copy()
for i in MOMdep_25.columns.values:
    MOMdep_25_excess[i] = MOMdep_25[i]  -  MOMdep["RF"]
MOMdep_25_excess = MOMdep_25_excess.rename(columns = {"SMALL LoPRIOR": "ME1 PRIOR1",
                                            "SMALL HiPRIOR": "ME1 PRIOR5",
                                            "BIG LoPRIOR": "ME5 PRIOR1",
                                            "BIG HiPRIOR": "ME5 PRIOR5"})
Utils.table_2_lower(MOMdep_25_excess).round(2)
MOMexp = MOMexp[["Date","Mkt-RF", "SMB", "MOM"]]


## Regressions
We are now ready to do the regressions.

This is table 9

In [86]:
# Regression: 
def regression(dep,exp,rf):
    var = dep.columns.drop(["Date", "Market Return", "RF"])
    params = pd.DataFrame(columns =["a", "b","s","m","R2/s(e)"])
    tvals =  pd.DataFrame(columns =["a", "b","s","m", "R2/s(e)"])
    X = np.array(exp).reshape(len(exp),len(exp.columns))
    X = sm.add_constant(X)
    for v in var:
        y = np.array(dep[v])  - np.array(rf["RF"]) 
        linmod = sm.OLS(y,X)
        results = linmod.fit()
        coef = pd.DataFrame([np.append(results.params.round(2),results.rsquared.round(2))],
                            columns=["a", "b","s","m","R2/s(e)"])
        ttest = pd.DataFrame([np.append(results.tvalues.round(2),np.sqrt(results.mse_resid).round(4))],
                            columns=["a", "b","s","m", "R2/s(e)"])
        params = pd.concat([params,coef], ignore_index=True)
        tvals =  pd.concat([tvals,ttest], ignore_index=True)
    return params, tvals

# params # Relatively close - > So assume it is correct.

In [87]:
# Create output table: 
# Generate table:
def out_array(param,dep,exp,rf):
    p = param
    params, tvals = regression(dep,exp,rf)
    N = np.sqrt(len(params[p]))
    parameter = np.array(params[p]).reshape((int(N),int(N)))
    ttest = np.array(tvals[p]).reshape((int(N),int(N)))
    tab = np.append(parameter,ttest,axis=1)
    tabdf = pd.DataFrame(tab, columns=pd.MultiIndex.from_tuples(
                          [(r"\beta/R^2","LPrior"), ("","2"), ("","3"), ("","4"), ("","HPrior"),
                           (r"t(\beta)/s(e)","LPrior"), ("","2"), ("","3"), ("","4"), ("","HPrior")]))
    tabdf = tabdf.set_index([pd.Index([param,"", "", "", ""]),pd.Index(["S", "2", "3", "4", "B"])])

    return  tabdf

dep = MOMdep.copy()
exp = MOMexp.copy()
rf = FF.copy()
exp = exp.drop(columns = "Date")
rf = FF.drop(columns = "Date")

a = out_array("a", dep, exp, rf)
b = out_array("b", dep, exp, rf)
s = out_array("s", dep, exp, rf)
m = out_array("m", dep, exp, rf)
R2_se = out_array("R2/s(e)", dep, exp, rf)
tab = pd.concat([a,b,s,m, R2_se])

# This checks thas sqrt MSE of residuals is essentially s(e)
# print(results.mse_resid, np.sum(results.resid**2) / results.df_resid)
# So to get standard error one needs a squareroot.


  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


In [88]:
# To latex regression tab:
latex_table = tab.to_latex(index=True, float_format="%.2f", 
                           caption=r"Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)", 
                           label="tab:regression")
print(latex_table)


\begin{table}
\caption{Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)}
\label{tab:regression}
\begin{tabular}{llrrrrrrrrrr}
\toprule
 &  & \beta/R^2 & \multicolumn{4}{r}{} & t(\beta)/s(e) & \multicolumn{4}{r}{} \\
 &  & LPrior & 2 & 3 & 4 & HPrior & LPrior & 2 & 3 & 4 & HPrior \\
\midrule
a & S & -0.09 & 0.31 & 0.43 & 0.27 & 0.25 & -1.01 & 3.47 & 4.86 & 2.74 & 2.52 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & -0.23 & 0.16 & 0.18 & 0.18 & 0.11 & -4.02 & 2.85 & 2.86 & 2.96 & 1.78 \\
 & 3 & -0.13 & 0.14 & 0.13 & 0.04 & 0.05 & -2.01 & 2.68 & 2.42 & 0.83 & 0.82 \\
 & 4 & -0.02 & 0.12 & 0.14 & 0.07 & 0.07 & -0.20 & 2.03 & 2.63 & 1.37 & 1.11 \\
 & B & 0.10 & 0.19 & 0.09 & 0.04 & -0.08 & 1.09 & 3.59 & 1.99 & 0.86 & -1.52 \\
\cline{1-12}
b & S & 1.04 & 0.97 & 0.92 & 1.00 & 1.07 & 57.71 & 52.90 & 51.05 & 49.65 & 53.89 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & 1.15 & 1.00 & 0.97 & 0.99 & 1.14 & 99.48 & 90.82 & 76.91 & 82.12 & 93.12 \\
 & 3 & 1.17 & 1.05 & 1.00 & 0.98 & 1.1

Now for the Subperiods. 
192701-196306:

Table 10

In [89]:
dep = MOMdep.copy()
exp = MOMexp.copy()
rf = FF.copy()
dep = dep[(dep["Date"]<'1963-07-31')]
exp = exp[(exp["Date"]<'1963-07-31')].drop(columns = "Date")
rf = rf[( rf["Date"]<'1963-07-31')].drop(columns = "Date")


a = out_array("a", dep, exp, rf)
b = out_array("b", dep, exp, rf)
s = out_array("s", dep, exp, rf)
m = out_array("m", dep, exp, rf)
R2_se = out_array("R2/s(e)", dep, exp, rf)
tab = pd.concat([m])

# To latex regression tab:
latex_table = tab.to_latex(index=True, float_format="%.2f", 
                           caption=r"Subperiod: 192701-196306, Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)", 
                           label="tab:regression")
print(latex_table)

  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)


\begin{table}
\caption{Subperiod: 192701-196306, Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)}
\label{tab:regression}
\begin{tabular}{llrrrrrrrrrr}
\toprule
 &  & \beta/R^2 & \multicolumn{4}{r}{} & t(\beta)/s(e) & \multicolumn{4}{r}{} \\
 &  & LPrior & 2 & 3 & 4 & HPrior & LPrior & 2 & 3 & 4 & HPrior \\
\midrule
m & S & -0.74 & -0.48 & -0.35 & 0.04 & 0.24 & -19.42 & -13.01 & -9.79 & 0.80 & 5.17 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & -0.69 & -0.38 & -0.19 & 0.12 & 0.32 & -31.73 & -17.99 & -7.03 & 4.52 & 14.38 \\
 & 3 & -0.78 & -0.38 & -0.17 & 0.15 & 0.44 & -34.54 & -19.19 & -8.29 & 8.07 & 20.61 \\
 & 4 & -0.83 & -0.39 & -0.12 & 0.14 & 0.45 & -25.52 & -17.56 & -5.94 & 6.80 & 20.74 \\
 & B & -0.78 & -0.44 & -0.24 & 0.12 & 0.42 & -21.40 & -22.80 & -15.42 & 7.60 & 21.70 \\
\cline{1-12}
\bottomrule
\end{tabular}
\end{table}



  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


196307-199112


Table 11

In [90]:
dep = MOMdep.copy()
exp = MOMexp.copy()
rf = FF.copy()
dep = dep[(dep["Date"]>='1963-07-31') & (dep["Date"]<='1991-12-31')]
exp = exp[(exp["Date"]>='1963-07-31')& (exp["Date"]<='1991-12-31')].drop(columns = "Date")
rf = rf[( rf["Date"]>='1963-07-31')& (rf["Date"]<='1991-12-31')].drop(columns = "Date")


a = out_array("a", dep, exp, rf)
b = out_array("b", dep, exp, rf)
s = out_array("s", dep, exp, rf)
m = out_array("m", dep, exp, rf)
R2_se = out_array("R2/s(e)", dep, exp, rf)
tab = pd.concat([s])

# To latex regression tab:
latex_table = tab.to_latex(index=True, float_format="%.2f", 
                           caption=r"Subperiod: 196307-199112, Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)", 
                           label="tab:regression")
print(latex_table)

  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


\begin{table}
\caption{Subperiod: 196307-199112, Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)}
\label{tab:regression}
\begin{tabular}{llrrrrrrrrrr}
\toprule
 &  & \beta/R^2 & \multicolumn{4}{r}{} & t(\beta)/s(e) & \multicolumn{4}{r}{} \\
 &  & LPrior & 2 & 3 & 4 & HPrior & LPrior & 2 & 3 & 4 & HPrior \\
\midrule
s & S & 1.39 & 1.16 & 1.13 & 1.11 & 1.26 & 39.54 & 44.99 & 45.19 & 44.81 & 37.96 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & 0.96 & 0.83 & 0.78 & 0.84 & 0.89 & 30.09 & 33.46 & 33.62 & 35.36 & 26.31 \\
 & 3 & 0.65 & 0.55 & 0.55 & 0.55 & 0.68 & 17.60 & 20.44 & 21.36 & 21.45 & 20.60 \\
 & 4 & 0.31 & 0.21 & 0.28 & 0.26 & 0.39 & 7.16 & 6.99 & 10.09 & 9.44 & 11.24 \\
 & B & -0.17 & -0.27 & -0.20 & -0.22 & -0.02 & -3.94 & -9.19 & -7.28 & -9.04 & -0.49 \\
\cline{1-12}
\bottomrule
\end{tabular}
\end{table}



  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


In [91]:
# Comparison to Farma-French table:
tst = np.abs(np.array(tab).reshape((5,10))[:,:5] - np.array([
                                                [1.46,1.26,1.19,1.17,1.23],
                                                [1.00,0.98,0.88,0.73,0.89],
                                                [0.76,0.65,0.60,0.48,0.66],
                                                [0.37,0.33,0.29,0.24,0.41],
                                                [-0.17,-0.12,-0.23,-0.17,-0.05]
                                                ]))
tst

array([[0.07, 0.1 , 0.06, 0.06, 0.03],
       [0.04, 0.15, 0.1 , 0.11, 0.  ],
       [0.11, 0.1 , 0.05, 0.07, 0.02],
       [0.06, 0.12, 0.01, 0.02, 0.02],
       [0.  , 0.15, 0.03, 0.05, 0.03]])

199201-202312

Table 12

In [92]:
dep = MOMdep.copy()
exp = MOMexp.copy()
rf = FF.copy()
dep = dep[(dep["Date"]>='1992-01-31') & (dep["Date"]<='2023-12-31')]
exp = exp[(exp["Date"]>='1992-01-31')& (exp["Date"]<='2023-12-31')].drop(columns = "Date")
rf = rf[( rf["Date"]>='1992-01-31')& (rf["Date"]<='2023-12-31')].drop(columns = "Date")


a = out_array("a", dep, exp, rf)
b = out_array("b", dep, exp, rf)
s = out_array("s", dep, exp, rf)
m = out_array("m", dep, exp, rf)
R2_se = out_array("R2/s(e)", dep, exp, rf)
tab = pd.concat([m])

# To latex regression tab:
latex_table = tab.to_latex(index=True, float_format="%.2f", 
                           caption=r"Subperiod: 199201-202312, Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)", 
                           label="tab:regression")
print(latex_table)

  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


\begin{table}
\caption{Subperiod: 199201-202312, Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)}
\label{tab:regression}
\begin{tabular}{llrrrrrrrrrr}
\toprule
 &  & \beta/R^2 & \multicolumn{4}{r}{} & t(\beta)/s(e) & \multicolumn{4}{r}{} \\
 &  & LPrior & 2 & 3 & 4 & HPrior & LPrior & 2 & 3 & 4 & HPrior \\
\midrule
m & S & -0.71 & -0.26 & -0.10 & 0.02 & 0.29 & -23.62 & -13.33 & -4.97 & 1.06 & 11.18 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & -0.73 & -0.32 & -0.09 & 0.03 & 0.40 & -36.53 & -16.77 & -4.62 & 1.37 & 17.84 \\
 & 3 & -0.77 & -0.33 & -0.17 & 0.03 & 0.42 & -29.51 & -16.10 & -8.09 & 1.21 & 21.42 \\
 & 4 & -0.85 & -0.39 & -0.16 & 0.04 & 0.46 & -27.99 & -18.04 & -7.95 & 1.98 & 20.56 \\
 & B & -0.80 & -0.45 & -0.13 & 0.10 & 0.44 & -24.90 & -23.57 & -7.67 & 5.96 & 24.11 \\
\cline{1-12}
\bottomrule
\end{tabular}
\end{table}



  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


Selected period 1980-2023

Table 13

In [93]:
dep = MOMdep.copy()
exp = MOMexp.copy()
rf = FF.copy()
dep = dep[(dep["Date"]>='1980-01-31') & (dep["Date"]<='2023-12-31')]
exp = exp[(exp["Date"]>='1980-01-31') & (exp["Date"]<='2023-12-31')].drop(columns = "Date")
rf = rf[ (rf["Date"]>='1980-01-31')& (rf["Date"]<='2023-12-31')].drop(columns = "Date")


a = out_array("a", dep, exp, rf)
b = out_array("b", dep, exp, rf)
s = out_array("s", dep, exp, rf)
m = out_array("m", dep, exp, rf)
R2_se = out_array("R2/s(e)", dep, exp, rf)
tab = pd.concat([m])

# To latex regression tab:
latex_table = tab.to_latex(index=True, float_format="%.2f", 
                           caption=r"Subperiod: 198001-202312, Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)", 
                           label="tab:regression")
print(latex_table)

  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)
  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


\begin{table}
\caption{Subperiod: 198001-202312, Regression: R(t)-RF(t)=a + b [RM(t) - RF(t)] + sSMB(t) + m MOM(t) +e(t)}
\label{tab:regression}
\begin{tabular}{llrrrrrrrrrr}
\toprule
 &  & \beta/R^2 & \multicolumn{4}{r}{} & t(\beta)/s(e) & \multicolumn{4}{r}{} \\
 &  & LPrior & 2 & 3 & 4 & HPrior & LPrior & 2 & 3 & 4 & HPrior \\
\midrule
m & S & -0.67 & -0.25 & -0.09 & 0.05 & 0.29 & -27.00 & -15.93 & -5.08 & 2.81 & 13.94 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & -0.71 & -0.31 & -0.08 & 0.05 & 0.37 & -41.41 & -20.09 & -5.21 & 3.00 & 20.16 \\
 & 3 & -0.76 & -0.34 & -0.16 & 0.05 & 0.41 & -35.13 & -20.07 & -9.28 & 2.64 & 23.94 \\
 & 4 & -0.86 & -0.40 & -0.16 & 0.07 & 0.45 & -32.82 & -21.72 & -9.44 & 4.00 & 24.06 \\
 & B & -0.82 & -0.45 & -0.11 & 0.14 & 0.47 & -29.86 & -28.31 & -7.97 & 9.93 & 29.57 \\
\cline{1-12}
\bottomrule
\end{tabular}
\end{table}



  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


### Appendix: Replicate tables and similarly from FF.
Here, we produce code to do things similarly to the original paper.
Not included in project!!

In [94]:
# Take july 1963 to december 1991
OFF = FF[(FF["Date"]>='1963-07-31') & ( FF["Date"]<'1992-01-31')] 
OFF["Mkt-RF"], OFF["SMB"], OFF["HML"] = OFF["Mkt-RF"], OFF["SMB"], OFF["HML"] 
RF = OFF["RF"]
OFF = OFF[["Mkt-RF", "SMB", "HML"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  OFF["Mkt-RF"], OFF["SMB"], OFF["HML"] = OFF["Mkt-RF"], OFF["SMB"], OFF["HML"]


In [95]:
# Read subparts of FF data. 
pf_25_vwr = pd.read_csv("Data/25_Portfolios_5x5.csv", skiprows=15, nrows = 1171)
pf_25_vwr = pf_25_vwr.rename(columns = {"Unnamed: 0" : "Date"})
pf_25_vwr["Date"] = pd.to_datetime(pf_25_vwr["Date"], format="%Y%m") + pd.offsets.MonthEnd()
pf_25_vwr = pf_25_vwr[(pf_25_vwr["Date"]<pd.Timestamp('1992-01-31')) & (pf_25_vwr["Date"]>=pd.Timestamp('1963-07-31'))] 
pf_25_vwr = pf_25_vwr.set_index("Date").sort_index(ascending=True).reset_index()

# Note for this i have renamed the variables to easier replicate table.
pf_25_nf = pd.read_csv("Data/25_Portfolios_5x5.csv", skiprows=2581, nrows = 1171)
pf_25_nf = pf_25_nf.rename(columns = {"Unnamed: 0" : "Date"})
pf_25_nf["Date"] = pd.to_datetime(pf_25_nf["Date"], format="%Y%m") + pd.offsets.MonthEnd()
pf_25_nf = pf_25_nf[(pf_25_nf["Date"]<pd.Timestamp('1992-01-31')) & (pf_25_nf["Date"]>=pd.Timestamp('1963-07-31'))] 
pf_25_nf = pf_25_nf.set_index("Date").sort_index(ascending=True).reset_index()

pf_25_nf["Y"] =  pf_25_nf['Date'].dt.year

OFF

Unnamed: 0,Mkt-RF,SMB,HML
438,-0.39,-0.45,-0.97
439,5.07,-0.98,1.80
440,-1.57,-0.33,0.13
441,2.53,-0.58,-0.10
442,-0.85,-1.17,1.75
...,...,...,...
775,2.32,1.58,-0.78
776,-1.59,1.64,-1.08
777,1.29,0.81,-0.47
778,-4.19,-0.50,-1.89


In [96]:
# Subtable 2 latter part.
pf_25_vwr_e = pf_25_vwr.drop(columns = ["Date", "Market Return"]).copy() 
# Find excess return
for i in pf_25_vwr_e.columns.values:
    pf_25_vwr_e[i] = pf_25_vwr[i]  -  FF["RF"] 
Utils.table_2_lower(pf_25_vwr_e)

Unnamed: 0_level_0,mean,mean,mean,mean,mean,std,std,std,std,std,t-test of mean,t-test of mean,t-test of mean,t-test of mean,t-test of mean
PRIOR,PRIOR1,PRIOR2,PRIOR3,PRIOR4,PRIOR5,PRIOR1,PRIOR2,PRIOR3,PRIOR4,PRIOR5,PRIOR1,PRIOR2,PRIOR3,PRIOR4,PRIOR5
ME,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
ME1,0.781475,1.143816,1.210045,1.344637,1.466232,7.756928,6.770919,6.283119,5.926139,6.271099,1.863108,3.124076,3.561553,4.196103,4.323866
ME2,0.865895,1.114516,1.314024,1.377328,1.470441,7.270707,6.283741,5.74012,5.339751,6.015569,2.202428,3.280054,4.233459,4.770122,4.520474
ME3,0.908361,1.126064,1.141475,1.305888,1.407142,6.657011,5.72642,5.166071,4.931202,5.693645,2.523437,3.636577,4.086194,4.897407,4.570468
ME4,0.926296,0.847008,1.069941,1.268404,1.367421,5.894195,5.424643,5.059844,4.938601,5.646482,2.906287,2.887548,3.91053,4.749706,4.478548
ME5,0.842677,0.807513,0.828277,0.97606,1.005841,4.888186,4.709419,4.36533,4.289873,4.779291,3.18806,3.170993,3.508903,4.207704,3.892055


In [97]:
# Regression: 

var = pf_25_vwr.columns.drop(["Date", "Market Return"])
params = pd.DataFrame(columns =["a", "b","s","h","R2/s(e)"])
tvals =  pd.DataFrame(columns =["a", "b","s","h", "R2/s(e)"])
X = np.array(OFF).reshape(len(OFF),len(OFF.columns))
X = sm.add_constant(X)
for v in var:
    y = np.array(pf_25_vwr[v])  - np.array(RF) 
    linmod = sm.OLS(y,X)
    results = linmod.fit()
    coef = pd.DataFrame([np.append(results.params.round(2),results.rsquared.round(2))],
                        columns=["a", "b","s","h","R2/s(e)"])
    ttest = pd.DataFrame([np.append(results.tvalues.round(2),np.sqrt(results.mse_resid).round(4))],
                         columns=["a", "b","s","h", "R2/s(e)"])
    params = pd.concat([params,coef], ignore_index=True)
    tvals =  pd.concat([tvals,ttest], ignore_index=True)

#params # Relatively close - > So assume it is correct. 



  params = pd.concat([params,coef], ignore_index=True)
  tvals =  pd.concat([tvals,ttest], ignore_index=True)


In [98]:
# Generate table:
def out_array(param):
    p = param
    N = np.sqrt(len(params[p]))
    parameter = np.array(params[p]).reshape((int(N),int(N)))
    ttest = np.array(tvals[p]).reshape((int(N),int(N)))
    tab = np.append(parameter,ttest,axis=1)
    tabdf = pd.DataFrame(tab, columns=pd.MultiIndex.from_tuples(
                          [(r"\beta/R^2","L"), ("","2"), ("","3"), ("","4"), ("","H"),
                           (r"t(\beta)/s(e)","L"), ("","2"), ("","3"), ("","4"), ("","H")]))
    tabdf = tabdf.set_index([pd.Index([param,"", "", "", ""]),pd.Index(["S", "2", "3", "4", "B"])])

    return tabdf

a = out_array("a")
b = out_array("b")
s = out_array("s")
h = out_array("h")
R2_se = out_array("R2/s(e)")
tab = pd.concat([a,b,s,h,R2_se])


In [99]:
latex_table = tab.to_latex(index=True, float_format="%.2f", caption="Original FF", label="tab:sample_table")
print(latex_table)

\begin{table}
\caption{Original FF}
\label{tab:sample_table}
\begin{tabular}{llrrrrrrrrrr}
\toprule
 &  & \beta/R^2 & \multicolumn{4}{r}{} & t(\beta)/s(e) & \multicolumn{4}{r}{} \\
 &  & L & 2 & 3 & 4 & H & L & 2 & 3 & 4 & H \\
\midrule
a & S & -0.37 & -0.09 & -0.06 & 0.06 & 0.06 & -3.46 & -1.10 & -0.88 & 1.02 & 0.87 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & -0.14 & -0.03 & 0.14 & 0.15 & 0.08 & -1.66 & -0.50 & 2.05 & 2.37 & 1.12 \\
 & 3 & -0.03 & 0.06 & 0.01 & 0.13 & 0.06 & -0.44 & 0.78 & 0.18 & 1.86 & 0.71 \\
 & 4 & 0.12 & -0.14 & -0.01 & 0.10 & 0.05 & 1.52 & -1.69 & -0.07 & 1.19 & 0.50 \\
 & B & 0.20 & -0.02 & -0.04 & -0.06 & -0.16 & 2.98 & -0.34 & -0.49 & -0.80 & -1.52 \\
\cline{1-12}
b & S & 1.04 & 0.97 & 0.94 & 0.90 & 0.95 & 39.29 & 49.91 & 58.25 & 59.85 & 58.02 \\
\cline{1-12}
\multirow[t]{4}{*}{} & 2 & 1.10 & 1.02 & 0.97 & 0.97 & 1.07 & 53.85 & 60.04 & 59.48 & 63.28 & 63.37 \\
 & 3 & 1.10 & 1.02 & 0.97 & 0.98 & 1.06 & 59.46 & 57.17 & 54.26 & 58.77 & 50.69 \\
 & 4 & 1.06 & 1.07 