In [None]:
# imports
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import re
from datetime import datetime
from tqdm import tqdm

%config InlineBackend.figure_format = 'retina'
plt.style.use('ggplot')

In [None]:
# NAV returns
MF_link = ''

mf_meta = pd.read_excel(MF_link, sheet_name='meta').dropna(how='all')

mf = pd.read_excel(MF_link, sheet_name='rolling_returns').dropna(how='all')
mf = mf.set_index('date')
mf.index.name = None

factors = pd.read_excel('f_capW-quintiles.xlsx', index_col=0)

In [None]:
# column names
m_cols = mf.columns
f_cols = factors.columns[:-1]  # last col is RF
car_cols = ['Mkt-RF', 'SMB', 'HML', 'MOM']

# Global settings
#analysis_start = datetime(year=2008, month=6, day=30)
#analysis_end = datetime(year=2021, month=1, day=31)

analysis_start = datetime(year=2016, month=1, day=1)
analysis_end = datetime(year=2021, month=1, day=1)

# seed random numbers
np.random.seed(5)

# number of bootstraps
m = 1000

In [None]:
df = factors.join(mf)

# create mutual fund excess return
for col in df.columns:
    if col in m_cols:
        df[col] = df[col] - df['RF']

# annualized
df = df * 100 * 12

# drop observations outside scope
df = df.loc[(df.index >= analysis_start) & (df.index <= analysis_end)]

df

In [None]:
# for split periods
df = df.dropna(how='all', axis=1)
m_cols = df.columns[8:]

In [None]:
def kowsowski_bootstrap(mf_factor, factor_cols, mf_cols):
    def fit_model(X, Y):
        # for missing returns
        Y = Y.dropna()
        
        # adjust X to fit time-series regression
        X = X.reindex(Y.index)
        X = sm.add_constant(X)
        X = X.rename(columns={'const': 'alpha'})
        
        # fit model
        mdl_fit = sm.OLS(endog=Y, exog=X).fit(cov_type='HAC', cov_kwds={'maxlags': 12})

        def residual_bootstrap():
            bootstrap_alpha = list()  # placeholder for bootstraps
            T = Y.shape[0]  # number of TS observations
            
            # drop alpha coef (alpha=0)
            coefs = {k: v for k, v in mdl_fit.params.items() if k != 'alpha'}.values()
            coefs = np.array(list(coefs))
            
            # actual time-series
            ts = X.drop(columns='alpha').values

            h0_returns = np.dot(coefs, ts.T)
            
            # run bootstrap
            for bootstrap in range(m):
                resid_sample = np.random.choice(mdl_fit.resid, T, replace=True)
                
                data_gen = (h0_returns + resid_sample).transpose()
                
                bootstrap_mdl_fit = sm.OLS(endog=data_gen, exog=X).fit()
                bootstrap_alpha.append(bootstrap_mdl_fit.params['alpha'])

            return bootstrap_alpha
                
        return(mdl_fit, residual_bootstrap())
    
    # main
    bootstrap_results = pd.DataFrame(index=range(0, m))
    mdl_results = pd.DataFrame()
    
    for fund in tqdm(mf_cols):
        mdl, bootstrap = fit_model(Y=mf_factor[fund], X=mf_factor[factor_cols])
        
        # save all results
        estimates = mdl.params
        estimates.name = fund
        
        bse = mdl.bse
        bse.name = fund
        
        t_vals = estimates.copy()
        t_vals['Mkt-RF'] = t_vals['Mkt-RF'] - 1
        t_vals = t_vals / bse
        t_vals.index = t_vals.index.map(lambda x: f't: {x}')
        
        r2 = pd.Series(mdl.rsquared_adj)
        r2.name = fund
        r2.index = r2.index.map(lambda x: f'R2')
        
        n = pd.Series(mdl.nobs)
        n.name = fund
        n.index = n.index.map(lambda x: f'n')
        
        estimates = estimates.append(t_vals)
        estimates = estimates.append(r2)
        estimates = estimates.append(n)
        mdl_results[fund] = estimates
        
        # save bootstraps
        bootstrap = pd.Series(bootstrap)
        bootstrap.name = fund
        bootstrap_results = bootstrap_results.join(bootstrap)

        
    bootstrap_results = bootstrap_results.transpose()
    mdl_results = mdl_results.transpose()
    
    # save ordering of index
    idx = mdl_results.sort_values(by='alpha').index
    
    # sort bootstrapped alphas
    bootstrap_results = pd.DataFrame(np.sort(bootstrap_results.values, axis=0), 
                                          index=bootstrap_results.index, 
                                          columns=bootstrap_results.columns)
    
    # keep track of the ordering of the alphas
    bootstrap_results.index = idx
    mdl_results = mdl_results.join(bootstrap_results.quantile(0.05, axis=1))
    mdl_results = mdl_results.join(bootstrap_results.quantile(0.95, axis=1))
    
    # bootstrap_results is not returned...
    return (mdl_results, bootstrap_results)

# estimates for the carhart 4-factor model
#fit_car, fit_car_boot = kowsowski_bootstrap(df, car_cols, m_cols)
#fit_car['sig_up'] = fit_car['alpha'] > fit_car[0.95]
#fit_car['sig_down'] = fit_car['alpha'] < fit_car[0.05]

# estimates for the S-CAPM
fit_sus, fit_sus_boot = kowsowski_bootstrap(df, f_cols, m_cols)
fit_sus['sig_up'] = fit_sus['alpha'] > fit_sus[0.95]
fit_sus['sig_down'] = fit_sus['alpha'] < fit_sus[0.05]

In [None]:
fit_sus

In [None]:
t = fit_sus.loc[fit_sus['alpha'] == fit_sus['alpha'].min()]

sns.kdeplot(fit_sus_boot.loc[t.index[0]])
plt.axvline(t['alpha'][0])

In [None]:
t = fit_sus.loc[fit_sus['alpha'] == fit_sus['alpha'].max()]

sns.kdeplot(fit_sus_boot.loc[t.index[0]])
plt.axvline(t['alpha'][0])

In [None]:
splits = [{
    'text': '1%',
    'tile': 0.01
  },
  {
    'text': '3%',
    'tile': 0.03
  },
  {
    'text': '5%',
    'tile': 0.05
  },
  {
    'text': '10%',
    'tile': 0.1
  },
  {
    'text': '20%',
    'tile': 0.2
  },
  {
    'text': '30%',
    'tile': 0.3
  },
  {
    'text': '40%',
    'tile': 0.4
  },
  {
    'text': 'Median',
    'tile': 0.5
  },
  {
    'text': '60%',
    'tile': 0.6
  },
  {
    'text': '70%',
    'tile': 0.7
  },
  {
    'text': '80%',
    'tile': 0.8
  },
  {
    'text': '90%',
    'tile': 0.9
  },
  {
    'text': '95%',
    'tile': 0.95
  },
  {
    'text': '97%',
    'tile': 0.97
  },
  {
    'text': '99%',
    'tile': 0.99
  },
]

In [None]:
# start panel - for table (fit_sus)
panel = fit_sus['alpha'].sort_values().iloc[:5]
panel = pd.DataFrame(panel)
panel['split'] = [f'Bottom {i}' for i in range(1, 6)]


for s in splits:
    p = pd.DataFrame(fit_sus['alpha'].loc[fit_sus['alpha'] == np.quantile(fit_sus['alpha'], q=s['tile'], interpolation='nearest')])
    p['split'] = s['text']
    panel = panel.append(p)


p = fit_sus['alpha'].sort_values(ascending=False).iloc[:5]
p = pd.DataFrame(p)
p['split'] = [f'Top {i}' for i in range(1, 6)]
p = p.sort_values(by='split', ascending=False)
panel = panel.append(p)

panel = panel.join(fit_sus[0.05])
panel = panel.join(fit_sus[0.95])

panel['Lipper'] = panel.index

panel = panel.set_index('split', drop=True)

panel = panel.transpose()

panel = panel.reindex(['Lipper', 'alpha', 0.05, 0.95])

# display results
display(panel)
print(panel.transpose().to_latex(float_format=lambda x: '%.2f' % x))

In [None]:
# start panel - for table (fit_car)
panel = fit_car['alpha'].sort_values().iloc[:5]
panel = pd.DataFrame(panel)
panel['split'] = [f'Bottom {i}' for i in range(1, 6)]


for s in splits:
    p = pd.DataFrame(fit_car['alpha'].loc[fit_car['alpha'] == np.quantile(fit_car['alpha'], q=s['tile'], interpolation='nearest')])
    p['split'] = s['text']
    panel = panel.append(p)


p = fit_car['alpha'].sort_values(ascending=False).iloc[:5]
p = pd.DataFrame(p)
p['split'] = [f'Top {i}' for i in range(1, 6)]
p = p.sort_values(by='split', ascending=False)
panel = panel.append(p)

panel = panel.join(fit_car[0.05])
panel = panel.join(fit_car[0.95])

panel['Lipper'] = panel.index

panel = panel.set_index('split', drop=True)

panel = panel.transpose()

panel = panel.reindex(['Lipper', 'alpha', 0.05, 0.95])


# display results
display(panel)
print(panel.transpose().to_latex(float_format=lambda x: '%.2f' % x))

In [None]:
sns.kdeplot(fit_sus_boot.mean(axis=1), label='Bootstrapped alphas', fill=True)
sns.kdeplot(fit_sus['alpha'], label='Estimated alphas', fill=True)

plt.legend(loc='upper left')

plt.show()

In [None]:
def top_5_percent(meta, metric):
    meta = meta.set_index('Symbol')
    meta = meta[metric].dropna()

    quantiles = pd.qcut(meta, q=20)
    idx = quantiles.loc[quantiles == quantiles.max()]

    return idx

top_env = top_5_percent(mf_meta, 'Environment Pillar Score')
top_soc = top_5_percent(mf_meta, 'Social Pillar Score')
top_gov = top_5_percent(mf_meta, 'Governance Pillar Score')

In [None]:
# Note: super_top er fællesmængden af de mutual funds, som figurerer i alle top 5% inden for hver. 
# Kan bruges som en form for kontrolgruppe. 
super_top = set(top_env.index) & set(top_soc.index) & set(top_gov.index)
mf_meta.loc[mf_meta['Symbol'].isin(super_top)]

In [None]:
panel[['1%', '99%']]

In [None]:
fig, axes = plt.subplots(3, 1, figsize = (8, 8), sharey=False, sharex=True)

sns.kdeplot(fit_sus_boot.loc[panel['1%']['Lipper']], ax=axes[0], fill=True, label='Bootstrapped alphas')
axes[0].axvline(panel['1%']['alpha'], color='black', linestyle='--', label='Estimated alpha')
axes[0].set_ylabel(None)
axes[0].set_title('Bottom 1% fund')

sns.kdeplot(fit_sus_boot.loc[panel['99%']['Lipper']], ax=axes[1], fill=True, label='Bootstrapped alphas')
axes[1].axvline(panel['99%']['alpha'], color='black', linestyle='--', label='Estimated alpha')
axes[1].set_title('Top 99% fund')

sns.kdeplot(fit_sus_boot.mean(axis=1), label='Bootstrapped alphas', fill=True, ax=axes[2])
sns.kdeplot(fit_sus['alpha'], label='Estimated alphas', fill=True, ax=axes[2])
axes[2].set_ylabel(None)
axes[2].set_title('Cross-sectional alphas')

axes[0].legend(loc='upper right')
axes[1].legend(loc='upper left')
axes[2].legend(loc='upper left')
plt.xlim(left=-20, right=20)

plt.xlabel('Annualized alphas')

plt.tight_layout()

plt.savefig('bootstrap_dist.png', format='png', dpi=300)
plt.show()