In [None]:
# imports
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import re
from datetime import datetime
from tqdm import tqdm

%config InlineBackend.figure_format = 'retina'
plt.style.use('ggplot')

In [None]:
# NAV returns
MF_link = ''

mf_meta = pd.read_excel(MF_link, sheet_name='meta').dropna(how='all')

mf = pd.read_excel(MF_link, sheet_name='rolling_returns').dropna(how='all')
mf = mf.set_index('date')
mf.index.name = None

factors = pd.read_excel('f_capW-quintiles.xlsx', index_col=0)

In [None]:
# column names
m_cols = mf.columns
f_cols = factors.columns[:-1]  # last col is RF
car_cols = ['Mkt-RF', 'SMB', 'HML', 'MOM']

# Global settings
analysis_start = datetime(year=2008, month=6, day=30)
analysis_end = datetime(year=2021, month=1, day=31)

In [None]:
df = factors.join(mf)

# create mutual fund excess return
for col in df.columns:
    if col in m_cols:
        df[col] = df[col] - df['RF']

# annualized
df = df * 100 * 12

# drop observations outside scope
df = df.loc[(df.index >= analysis_start) & (df.index <= analysis_end)]

# slice
fc = df[f_cols]
mf = df[m_cols]

display(mf.head())
display(fc.head())

In [None]:
def quintile_split(meta, metric, top):
    meta = meta.set_index('Symbol')
    meta = meta[metric].dropna()

    quantiles = pd.qcut(meta, q=20)
    
    if top:
        idx = quantiles.loc[quantiles == quantiles.max()]
    else:
        idx = quantiles.loc[quantiles != quantiles.max()]

    return idx

top_env = quintile_split(mf_meta, 'Environment Pillar Score', True)
bot_env = quintile_split(mf_meta, 'Environment Pillar Score', False)

top_soc = quintile_split(mf_meta, 'Social Pillar Score', True)
bot_soc = quintile_split(mf_meta, 'Social Pillar Score', False)

top_gov = quintile_split(mf_meta, 'Governance Pillar Score', True)
bot_gov = quintile_split(mf_meta, 'Governance Pillar Score', False)

In [None]:
print(top_gov.count(), bot_gov.count())

In [None]:
pf = pd.DataFrame()

pf['Top 5%, Environment'] = mf[top_env.index].mean(axis=1)
pf['Bottom 95%, Environment'] = mf[bot_env.index].mean(axis=1)

pf['Top 5%, Social'] = mf[top_soc.index].mean(axis=1)
pf['Bottom 95%, Social'] = mf[bot_soc.index].mean(axis=1)

pf['Top 5%, Governance'] = mf[top_gov.index].mean(axis=1)
pf['Bottom 95%, Governance'] = mf[bot_gov.index].mean(axis=1)

pf.head()

In [None]:
nw_lags = 12

sus_res = pd.DataFrame()
car_res = pd.DataFrame()
CI = pd.DataFrame(index=[0, 1])

def fit(x_mat, y_mat):
    mdl_fit = sm.OLS(endog=y_mat, exog=x_mat).fit(cov_type='HAC', cov_kwds={'maxlags': nw_lags})  # NW errors

    if x_mat.shape[1] == (7 + 1):
        ci = mdl_fit.conf_int().loc['alpha']
        ci.name = y_mat.name
        print(ci)
    

    param = mdl_fit.params
    param.index = param.index.map(lambda x: f'{x}:p')

    tvals = mdl_fit.tvalues
    tvals['Mkt-RF'] = (mdl_fit.params['Mkt-RF'] - 1) / mdl_fit.bse['Mkt-RF']
    tvals.index = tvals.index.map(lambda x: f'{x}:t')

    param = param.append(tvals)
    param = param.append(pd.Series({'R2_adj': mdl_fit.rsquared_adj}))

    return param


for sample in pf:
    ### SCAPM
    Y = pf[sample]
    
    # adjust X to fit finance regression
    X = fc[f_cols]
    X = sm.add_constant(X)
    X = X.rename(columns={'const': 'alpha'})

    sus_res[sample] = fit(X, Y)

    ### Carhart
    Y = pf[sample]
    
    # adjust X to fit finance regression
    X = fc[car_cols]
    X = sm.add_constant(X)
    X = X.rename(columns={'const': 'alpha'})

    car_res[sample] = fit(X, Y)


display(sus_res)
display(car_res)

In [None]:
print(sus_res.to_latex(float_format='%.4f'))

In [None]:
print(car_res.to_latex(float_format='%.4f'))

In [None]:
top_mf = pd.DataFrame()

top_mf['Environment'] = top_env.index
top_mf = top_mf.join(mf_meta.set_index('Symbol')['Asset Name'], on='Environment')
top_mf = top_mf.rename(columns={'Asset Name': 'Environment, Name', 'Environment': 'Environment, Lipper'})

top_mf['Social'] = top_soc.index
top_mf = top_mf.join(mf_meta.set_index('Symbol')['Asset Name'], on='Social')
top_mf = top_mf.rename(columns={'Asset Name': 'Social, Name', 'Social': 'Social, Lipper'})

top_mf['Governance'] = top_gov.index
top_mf = top_mf.join(mf_meta.set_index('Symbol')['Asset Name'], on='Governance')
top_mf = top_mf.rename(columns={'Asset Name': 'Governance, Name', 'Governance': 'Governance, Lipper'})

print(top_mf.to_latex(index=False))

In [None]:
data = []

for year in mf.resample('1Y'):
    data.append(
        {
            'date': year[0], 
            'Active funds': int(year[1].count(axis=1).mean()),
            'Mean observations': year[1].count().mean()
        }
    )

data = pd.DataFrame(data).set_index('date', drop=True)
data.index.name = None

m = mf.resample('1Y').mean().mean(axis=1)
m.name = 'Mean monthly return'

s = mf.resample('1Y').std() / np.sqrt(mf.resample('1Y').count())
s = s.mean(axis=1)
s.name = 'Mean monthly std. error'

data = data.join(m)
data = data.join(s)

data['t statistic'] = data['Mean monthly return'] / data['Mean monthly std. error']

data.index = data.index.map(lambda x: x.year)

print(data.to_latex(float_format='%.4f'))

In [None]:
t = mf.loc[mf.index > '2015-01-01']

t.columns[t.isnull().any()]


In [None]:
nw_lags = 12

sus_fit = pd.DataFrame()


def fit(x_mat, y_mat):
    mdl_fit = sm.OLS(endog=y_mat, exog=x_mat).fit(cov_type='HAC', cov_kwds={'maxlags': nw_lags})  # NW errors

    param = mdl_fit.params
    param.index = param.index.map(lambda x: f'{x}:p')

    tvals = mdl_fit.tvalues
    tvals['Mkt-RF'] = (mdl_fit.params['Mkt-RF'] - 1) / mdl_fit.bse['Mkt-RF']
    tvals.index = tvals.index.map(lambda x: f'{x}:t')

    param = param.append(tvals)
    param = param.append(pd.Series({'R2_adj': mdl_fit.rsquared_adj}))

    t_df = pd.Series({'DF': mdl_fit.df_resid})
    t_crit = pd.Series({'t_crit': stats.t.ppf(0.95, mdl_fit.df_resid)})
    param = param.append(t_df)
    param = param.append(t_crit)

    return param


for fund in mf.columns:
    ### SCAPM
    Y = mf[fund].dropna()
    
    # adjust X to fit finance regression
    X = fc[f_cols]
    X = sm.add_constant(X)
    X = X.rename(columns={'const': 'alpha'})

    X = X.reindex(Y.index)

    sus_fit[fund] = fit(X, Y)


display(sus_fit)

In [None]:
sus_fit = sus_fit.transpose()

In [None]:
s = sus_fit[['alpha:t', 'SUS_soc:t', 'SUS_env:t', 'SUS_gov:t', 'Mkt-RF:t', 'SMB:t', 'HML:t', 'MOM:t', 'DF', 't_crit']]

In [None]:
r = pd.DataFrame()

for col in s.columns:
    if col in ['DF', 't_crit']:
        break
    
    r[f'up_{col}'] = (s[col] > s['t_crit'])
    r[f'down_{col}'] = (s[col] < -s['t_crit'])



#>>> print('kid' if age < 13 else 'teenager' if age < 18 else 'adult')
t = pd.DataFrame(r.astype('int').sum(axis=0)).rename(columns={0: 'Count'})
t['Percentage'] = (t['Count'] / r.shape[0]) * 100
print(t.to_latex(float_format='%.1f'))

In [None]:
r[r['up_SUS_gov:t']]['up_SUS_gov:t'].index

In [None]:

len([i for i in r[r['up_SUS_gov:t']]['up_SUS_gov:t'].index if i in top_gov.index])

In [None]:
sns.kdeplot(sus_fit.loc[top_env.index]['alpha:p'], label='Environment, Top 5%')
sns.kdeplot(sus_fit.loc[bot_env.index]['alpha:p'], label='Environment, Bottom 95%')

plt.xlabel(r'$\widehat{\alpha}$')
plt.title('Distribution of fund alphas')

plt.legend()
plt.tight_layout()

plt.savefig('alphadistribution_mututal.png', format='png', dpi=300)

In [None]:
fig, axes = plt.subplots(1, 2, figsize = (10, 6), sharey=True)

sns.kdeplot(sus_fit.loc[top_soc.index]['alpha:p'], label='Social, Top 5%', ax=axes[0])
sns.kdeplot(sus_fit.loc[bot_soc.index]['alpha:p'], label='Social, Bottom 95%', ax=axes[0])

sns.kdeplot(sus_fit.loc[top_gov.index]['alpha:p'], label='Governance, Top 5%', ax=axes[1])
sns.kdeplot(sus_fit.loc[bot_gov.index]['alpha:p'], label='Governance, Bottom 95%', ax=axes[1])

axes[0].set_xlabel(r'$\widehat{\alpha}$')
axes[1].set_xlabel(r'$\widehat{\alpha}$')

plt.ylabel('Density')

axes[0].legend(loc='upper left')
axes[1].legend(loc='upper left')
plt.tight_layout()

plt.savefig('alphadistribution_mututal_rest.png', format='png', dpi=300)

In [None]:
def bot_top(idx, name):
    group_mean = sus_fit.loc[idx].mean()
    group_std = sus_fit.loc[idx].std()

    group_mean.name = name + ' - mean'
    group_std.name = name + ' - std'

    return pd.DataFrame(group_mean).join(group_std)


env = bot_top(top_env.index, 'Environment, Top 5%')
env = env.join(bot_top(bot_env.index, 'Environment, Bottom 95%'))

rest = bot_top(top_soc.index, 'Social, Top 5%')
rest = rest.join(bot_top(bot_soc.index, 'Social, Bottom 95%'))

rest = rest.join(bot_top(top_gov.index, 'Governance, Top 5%'))
rest = rest.join(bot_top(bot_gov.index, 'Governance, Bottom 95%'))

print(env.to_latex(float_format='%.4f', column_format='lcccc'))
print(rest.to_latex(float_format='%.4f', column_format='lcccccccc'))