# Cross-Section FMB

## Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import scipy as sp
import pandas as pd

import warnings
import multiprocessing as mp
import dill
import tqdm

import pandas_datareader.data as web
import datetime as dt

import statsmodels.api as sm
import linearmodels as lm

import matplotlib.pyplot as plt
import seaborn as sns

import mypack.data as data
import mypack.calc as calc
import mypack.plot as plot
import mypack.clas as clas

In [3]:
pd.set_option('display.float_format', lambda x: '%.4f' % x) #sets decimals to be displayed

In [4]:
sns.set()
sns.set(rc={'figure.figsize': (17, 6)})

In [5]:
%matplotlib inline

## Data

#### Market level

In [6]:
df_ts_predictions = pd.read_pickle('df_ts_predictions.pkl')
df_ts_estimates = pd.read_pickle('df_ts_estimates.pkl')

df_ts_factors = df_ts_predictions[[(1,'mean'),(1,'var'),(1,'skew'),(1,'kurt'),(1,'entropy')]].join(df_ts_estimates['raw_data','return'])
df_ts_factors.columns = df_ts_factors.columns.droplevel(0)

#### Industry level

In [7]:
df_xs_predictions = pd.read_pickle('df_xs_predictions.pkl')
df_xs_estimates = pd.read_pickle('df_xs_estimates.pkl')

df_xs_data = df_xs_predictions[[(1,'mean'),(1,'var'),(1,'skew'),(1,'kurt'),(1,'entropy'),('data','return')]]
df_xs_data.columns = df_xs_data.columns.droplevel(0)

## Estimate Loadings

In [8]:
win_yrs = 1
fact_dates = df_ts_factors.index
est_dates = fact_dates[fact_dates>fact_dates[0].replace(year=fact_dates[0].year+win_yrs)]

df_loadings = pd.DataFrame(index=pd.MultiIndex.from_product([est_dates,df_xs_data.index.get_level_values(1).unique()], names=['Date','Industry']), columns=pd.MultiIndex.from_product([[],[]]))

##### Multivariate rolling-window regressions

In [9]:
%%time
factors = ['mean','var','skew','kurt','entropy']
df_betas = pd.DataFrame(index=df_xs_data.index, columns=factors)

dep = df_xs_data['return'].unstack()
beta_list = []

for iDate in tqdm.tqdm(est_dates):
    window = fact_dates[(fact_dates>[iDate+dt.timedelta(1) if (iDate.day==29)&(iDate.month==2) else iDate][0].replace(year=iDate.year-win_yrs)) & (fact_dates<=iDate)]
    x = df_ts_factors.loc[window,factors]
    x['constant'] = 1
    
    for iInd in df_xs_data.index.get_level_values(1).unique():
        y = dep.loc[window,iInd]
        
        try:
            betas = np.dot(np.linalg.inv(np.dot(x.T,x)),np.dot(x.T,y))[:-1]
        except:
            betas = np.full(len(factors), np.nan)
        
        betas.shape = (len(factors),)
        beta_list += [betas]

100%|████████████████████████████████████████████████████████████████████████████| 24195/24195 [59:58<00:00,  7.26it/s]


Wall time: 59min 59s


In [12]:
df_betas = pd.DataFrame(index=pd.MultiIndex.from_product([est_dates,df_xs_data.index.get_level_values(1).unique()], names=['Date','Industry']), columns=factors)
df_betas[factors] = np.matrix(beta_list)
df_betas.columns = pd.MultiIndex.from_product([['multi'],df_betas.columns])

df_loadings = df_loadings.merge(df_betas, left_index=True, right_on=['Date','Industry'])

df_loadings.to_pickle('df_loadings.pkl')

In [14]:
%%time
factors = ['mean','var','skew','kurt','entropy','return']
df_betas = pd.DataFrame(index=df_xs_data.index, columns=factors)

dep = df_xs_data['return'].unstack()
beta_list = []

for iDate in tqdm.tqdm(est_dates):
    window = fact_dates[(fact_dates>[iDate+dt.timedelta(1) if (iDate.day==29)&(iDate.month==2) else iDate][0].replace(year=iDate.year-win_yrs)) & (fact_dates<=iDate)]
    x = df_ts_factors.loc[window,factors]
    x['constant'] = 1
    
    for iInd in df_xs_data.index.get_level_values(1).unique():
        y = dep.loc[window,iInd]
        
        try:
            betas = np.dot(np.linalg.inv(np.dot(x.T,x)),np.dot(x.T,y))[:-1]
        except:
            betas = np.full(len(factors), np.nan)
        
        betas.shape = (len(factors),)
        beta_list += [betas]

100%|████████████████████████████████████████████████████████████████████████████| 24195/24195 [56:01<00:00,  7.09it/s]


Wall time: 56min 3s


In [15]:
df_betas = pd.DataFrame(index=pd.MultiIndex.from_product([est_dates,df_xs_data.index.get_level_values(1).unique()], names=['Date','Industry']), columns=factors)
df_betas[factors] = np.matrix(beta_list)
df_betas.columns = pd.MultiIndex.from_product([['multi_mkt'],df_betas.columns])

df_loadings = df_loadings.merge(df_betas, left_index=True, right_on=['Date','Industry'])

df_loadings.to_pickle('df_loadings.pkl')

##### Univariate rolling-window regressions

In [38]:
%%time
factors = ['mean','var','skew','kurt','entropy','return']
df_betas = pd.DataFrame(index=df_xs_data.index, columns=factors)

beta_list = []

for iFact in factors:
    dep = df_xs_data[iFact].unstack()
    indep = df_ts_factors[iFact]
    
    beta_list = []
    
    for iDate in tqdm.tqdm(est_dates):
        window = fact_dates[(fact_dates>[iDate+dt.timedelta(1) if (iDate.day==29)&(iDate.month==2) else iDate][0].replace(year=iDate.year-win_yrs)) & (fact_dates<=iDate)]
        x = indep.loc[window].to_frame()
        x['constant'] = 1
    
        for iInd in df_xs_data.index.get_level_values(1).unique():
            y = dep.loc[window,iInd]
        
            try:
                betas = np.dot(np.linalg.inv(np.dot(x.T,x)),np.dot(x.T,y))[0]
            except:
                betas = np.full(1, np.nan)
        
            beta_list += [betas]
    
    s_betas = pd.DataFrame(beta_list, index=pd.MultiIndex.from_product([est_dates,df_xs_data.index.get_level_values(1).unique()], names=['Date','Industry']), columns=pd.MultiIndex.from_tuples([('uni',iFact)]))  
    df_loadings = df_loadings.merge(s_betas, left_index=True, right_on=['Date','Industry'])

100%|██████████████████████████████████████████████████████████████████████████| 24195/24195 [2:10:23<00:00,  7.41it/s]
100%|██████████████████████████████████████████████████████████████████████████| 24195/24195 [1:04:21<00:00,  6.16it/s]
100%|████████████████████████████████████████████████████████████████████████████| 24195/24195 [59:13<00:00,  7.19it/s]
100%|██████████████████████████████████████████████████████████████████████████| 24195/24195 [1:04:12<00:00,  6.38it/s]
100%|██████████████████████████████████████████████████████████████████████████| 24195/24195 [1:06:18<00:00,  4.79it/s]
100%|██████████████████████████████████████████████████████████████████████████| 24195/24195 [1:06:23<00:00,  6.42it/s]


Wall time: 7h 31min 10s


In [39]:
df_loadings.to_pickle('df_loadings.pkl')

### FMB regressions

#### Multivariate

In [127]:
# set up data
data_fmb = df_xs_data['return'].to_frame()
data_fmb.columns = pd.MultiIndex.from_tuples([('data','return')])
data_fmb = data_fmb.merge(df_loadings, left_index=True, right_on=['Date','Industry'])
data_fmb = data_fmb[data_fmb[('data','return')].notna()]

# select
y = data_fmb[('data','return')]

In [128]:
# set up output table
column_labels = ['$\\beta_{r_i,\mathbb{E}_M}$','$\\beta_{r_i,\mathbb{V}_M}$','$\\beta_{r_i,\mathbb{S}_M}$','$\\beta_{r_i,\mathbb{K}_M}$','$\\beta_{r_i,\mathbb{H}_M}$','$\\beta_{r_i,r_M}$','$\\alpha$','R$^2$']
fmb_table = pd.DataFrame(index=pd.MultiIndex.from_product([[1,2],['est.','t-stat','t-stat (HAC)']]), columns=column_labels)

In [129]:
# set up independents
x = data_fmb['multi']
x['constant'] = 1

# run regressions
reg = lm.FamaMacBeth(y.swaplevel(0,1),x.swaplevel(0,1)).fit()
fmb_table.loc[(1,'est.'),[column_labels[l] for l in [0,1,2,3,4,6]]] = reg.params.values
fmb_table.loc[(1,'est.'),'R$^2$'] = reg.rsquared
fmb_table.loc[(1,'t-stat'),[column_labels[l] for l in [0,1,2,3,4,6]]] = reg.tstats.values
reg = lm.FamaMacBeth(y.swaplevel(0,1),x.swaplevel(0,1)).fit(cov_type='kernel')
fmb_table.loc[(1,'t-stat (HAC)'),[column_labels[l] for l in [0,1,2,3,4,6]]] = reg.tstats.values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
Inputs contain missing values. Dropping rows with missing observations.
  return list(index.levels[0][index.labels[0]].unique())
  return np.asarray(self._frame.index.labels[0])[:, None]
  return np.asarray(self._frame.index.labels[1])[:, None]


In [130]:
# set up independents
x = data_fmb['multi_mkt']
x['constant'] = 1

# run regressions
reg = lm.FamaMacBeth(y.swaplevel(0,1),x.swaplevel(0,1)).fit()
fmb_table.loc[(2,'est.'),column_labels[0:7]] = reg.params.values
fmb_table.loc[(2,'est.'),'R$^2$'] = reg.rsquared
fmb_table.loc[(2,'t-stat'),column_labels[0:7]] = reg.tstats.values
reg = lm.FamaMacBeth(y.swaplevel(0,1),x.swaplevel(0,1)).fit(cov_type='kernel')
fmb_table.loc[(2,'t-stat (HAC)'),column_labels[0:7]] = reg.tstats.values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
Inputs contain missing values. Dropping rows with missing observations.
  return list(index.levels[0][index.labels[0]].unique())
  return np.asarray(self._frame.index.labels[0])[:, None]
  return np.asarray(self._frame.index.labels[1])[:, None]


In [131]:
fmb_table = fmb_table.replace(np.nan,'')

calc.export_df_to_latex(fmb_table, 'fmb_table')

fmb_table

Unnamed: 0,Unnamed: 1,"$\beta_{r_i,\mathbb{E}_M}$","$\beta_{r_i,\mathbb{V}_M}$","$\beta_{r_i,\mathbb{S}_M}$","$\beta_{r_i,\mathbb{K}_M}$","$\beta_{r_i,\mathbb{H}_M}$","$\beta_{r_i,r_M}$",$\alpha$,R$^2$
1,est.,-0.0,0.0,0.0053,-0.0163,-0.0084,,0.0003,-0.0121
1,t-stat,-1.1719,0.2558,2.1362,-1.2667,-1.3274,,6.3146,
1,t-stat (robust),-0.695,0.1545,1.3636,-0.8602,-0.951,,5.5018,
2,est.,-0.0,0.0,0.0048,-0.0159,-0.0042,0.0001,0.0003,-0.0024
2,t-stat,-1.3455,0.6831,1.9735,-1.269,-0.6885,0.8746,4.4335,
2,t-stat (robust),-0.7633,0.3993,1.2289,-0.8326,-0.4723,0.7742,3.6161,


#### Univariate

In [132]:
y = data_fmb[('data','return')]

In [133]:
# set up output table
column_labels = ['$\\beta_{\mathbb{E}_i,\mathbb{E}_M}$','$\\beta_{\mathbb{V}_i,\mathbb{V}_M}$','$\\beta_{\mathbb{S}_i,\mathbb{S}_M}$','$\\beta_{\mathbb{K}_i,\mathbb{K}_M}$','$\\beta_{\mathbb{H}_i,\mathbb{H}_M}$','$\\beta_{r_i,r_M}$','$\\alpha$','R$^2$']
comom_table = pd.DataFrame(index=['est.','t-stat','t-stat (HAC)'], columns = column_labels)

In [134]:
# set up independents
x = data_fmb['uni']
x['constant'] = 1

# run regressions
reg = lm.FamaMacBeth(y.swaplevel(0,1),x.swaplevel(0,1)).fit()
comom_table.loc['est.',column_labels[0:7]] = reg.params.values
comom_table.loc['est.','R$^2$'] = reg.rsquared
comom_table.loc['t-stat',column_labels[0:7]] = reg.tstats.values
reg = lm.FamaMacBeth(y.swaplevel(0,1),x.swaplevel(0,1)).fit(cov_type='kernel')
comom_table.loc['t-stat (HAC)',column_labels[0:7]] = reg.tstats.values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
Inputs contain missing values. Dropping rows with missing observations.
  return list(index.levels[0][index.labels[0]].unique())
  return np.asarray(self._frame.index.labels[0])[:, None]
  return np.asarray(self._frame.index.labels[1])[:, None]


In [135]:
comom_table = comom_table.replace(np.nan,'')

calc.export_df_to_latex(comom_table, 'comom_table')

comom_table

Unnamed: 0,"$\beta_{\mathbb{E}_i,\mathbb{E}_M}$","$\beta_{\mathbb{V}_i,\mathbb{V}_M}$","$\beta_{\mathbb{S}_i,\mathbb{S}_M}$","$\beta_{\mathbb{K}_i,\mathbb{K}_M}$","$\beta_{\mathbb{H}_i,\mathbb{H}_M}$","$\beta_{r_i,r_M}$",$\alpha$,R$^2$
est.,0.0,-0.0,0.0,0.0,-0.0002,0.0001,0.0003,-0.0
t-stat,0.3121,-0.6419,0.0848,0.0077,-1.9837,0.8761,4.6669,
t-stat (robust),0.2937,-0.6254,0.0811,0.0071,-1.9233,0.9077,3.9954,
