In [None]:
import pandas as pd
import numpy as np
import re
from datetime import datetime
from dateutil.relativedelta import relativedelta
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt

%config InlineBackend.figure_format = 'retina'
plt.style.use('ggplot')

In [None]:
ip_link = ''

ip = pd.read_excel(ip_link, sheet_name='ValueWeigtedReturns').dropna(how='all')

ip = ip.set_index('Unnamed: 0')
ip.index.name = None

# map from first to last day of month
ip.index = ip.index.map(lambda x: x + pd.tseries.offsets.MonthEnd(0))

ip = ip / 100

ip.head()

In [None]:
# Cap weight
f_capW_quintiles = pd.read_excel('f_capW-quintiles.xlsx', index_col=0)
f_capW_quintiles = f_capW_quintiles.dropna(how='any')

f_capW_median = pd.read_excel('f_capW-median.xlsx', index_col=0)
f_capW_median = f_capW_median.dropna(how='any')

f_capW_mean = pd.read_excel('f_capW-mean.xlsx', index_col=0)
f_capW_mean = f_capW_mean.dropna(how='any')

# Eq. weight
f_eqW_quintiles = pd.read_excel('f_eqW-quintiles.xlsx', index_col=0)
f_eqW_quintiles = f_eqW_quintiles.dropna(how='any')

f_eqW_median = pd.read_excel('f_eqW-median.xlsx', index_col=0)
f_eqW_median = f_eqW_median.dropna(how='any')

f_eqW_mean = pd.read_excel('f_eqW-mean.xlsx', index_col=0)
f_eqW_mean = f_eqW_mean.dropna(how='any')

In [None]:
# date of portfolios
ip = ip.reindex(f_capW_quintiles.index)

# columns
f_cols = f_capW_quintiles.columns[:7]
ip_cols = ip.columns

# Newey West
nw_lags = 12


ip = ip.join(f_capW_quintiles['RF'])

for col in ip_cols:
    ip[col] = ip[col] - ip['RF']

# drop risk-free
ip = ip.drop(columns=['RF'])


f_capW_quintiles = f_capW_quintiles.join(ip)

f_capW_median = f_capW_median.join(ip)

f_capW_mean = f_capW_mean.join(ip)

f_eqW_quintiles = f_eqW_quintiles.join(ip)

f_eqW_median = f_eqW_median.join(ip)

f_eqW_mean = f_eqW_mean.join(ip)

In [None]:
def estimate_model(df, capm_fac, indu_pf, name):
    estimates = pd.DataFrame()

    for industry in indu_pf:
        X = sm.add_constant(df[capm_fac])
        X = X.rename(columns={'const': 'alpha'})
        Y = df[industry]

        model = sm.OLS(endog=Y, exog=X)
        model = model.fit(cov_type='HAC', cov_kwds={'maxlags': nw_lags})

        # save parameters
        parameters = model.params
        parameters.name = 'Estimates'

        parameters = pd.DataFrame(parameters)

        # save t statistics
        tvalues = model.tvalues
        tvalues['Mkt-RF'] = (model.params['Mkt-RF'] - 1) / model.bse['Mkt-RF']
        tvalues.name = 'T'
        parameters = parameters.join(tvalues)

        # save std. errors
        std = model.bse
        std.name = 'std. error'
        parameters = parameters.join(std)

        # save R^2
        parameters['R2'] = model.rsquared_adj

        # save name
        parameters['Portfolio'] = industry

        estimates = estimates.append(parameters)

    estimates['Type'] = name

    return estimates

In [None]:
estimates = estimate_model(f_capW_quintiles, f_cols, ip_cols, 'Mkt. Cap. - Quintiles')
estimates = estimates.append(estimate_model(f_capW_median, f_cols, ip_cols, 'Mkt. Cap. - Median'))
estimates = estimates.append(estimate_model(f_capW_mean, f_cols, ip_cols, 'Mkt. Cap. - Mean'))

estimates = estimates.append(estimate_model(f_eqW_quintiles, f_cols, ip_cols, 'Equal - Quintiles'))
estimates = estimates.append(estimate_model(f_eqW_median, f_cols, ip_cols, 'Equal - Median'))
estimates = estimates.append(estimate_model(f_eqW_mean, f_cols, ip_cols, 'Equal - Mean'))

alpha = estimates.loc['alpha']
sus_env = estimates.loc['SUS_env']

In [None]:
fig = plt.figure(figsize=(8, 6))

for model in alpha.groupby('Type'):
    sns.kdeplot(model[1]['T'], label=model[0])

plt.legend(loc='upper left', title='Split & weighting')
plt.xlabel(r'$t(\widehat{\alpha})$')

plt.title('Variations of sustainability factors')

plt.tight_layout()
plt.savefig('variations.png', format='png', dpi=300)

plt.show()