In [None]:
# imports
import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import re
from tqdm import tqdm
from datetime import datetime
from dateutil.relativedelta import relativedelta

%config InlineBackend.figure_format = 'retina'
plt.style.use('ggplot')

In [None]:
factors = pd.read_excel('f_capW-quintiles.xlsx', index_col=0)
factors = factors.drop(columns=['RF'])

# load all data from master Google Sheets file
ESG_link = ''

esg_meta = pd.read_excel(ESG_link, sheet_name='MetaData').dropna(how='all')
esg_cap = pd.read_excel(ESG_link, sheet_name='FreeFloat Mkt. Cap. CM').dropna(how='all')
esg_returns = pd.read_excel(ESG_link, sheet_name='1-CM Total Return (incl. div)').dropna(how='all')
esg_snp = pd.read_excel(ESG_link, sheet_name='SPX Historical Constituents CY').dropna(how='all')
esg_social = pd.read_excel(ESG_link, sheet_name='Re_Soc FY Absolute').dropna(how='all')
esg_governance = pd.read_excel(ESG_link, sheet_name='Re_Gov FY Absolute').dropna(how='all')
esg_environment = pd.read_excel(ESG_link, sheet_name='Re_Env FY Absolute').dropna(how='all')

In [None]:
factors

In [None]:
### Table 1 ###
factors = factors.reindex(['Mkt-RF', 'SMB', 'HML', 'MOM', 'SUS_env', 'SUS_soc', 'SUS_gov'], axis=1)

mean = factors.mean()
std = factors.std() / np.sqrt(factors.shape[0])
t_stat = mean / std

table1 = pd.DataFrame(mean)
table1 = table1.rename(columns={0: 'Mean'})
table1['Mean std. error'] = std
table1['t statistic'] = t_stat

table1 = table1.join(factors.corr())

print(table1.to_latex(float_format='%.4f'))

In [None]:
ip_link = ''

ip = pd.read_excel(ip_link, sheet_name='ValueWeigtedReturns').dropna(how='all')
factors = pd.read_excel('factors-NonFixedCap_NonFixedESG.xlsx', index_col=0)

ip = ip.set_index('Unnamed: 0')
ip.index.name = None

# map from first to last day of month
ip.index = ip.index.map(lambda x: x + pd.tseries.offsets.MonthEnd(0))

ip = ip / 100

ip_cols = ip.columns

ip = factors.join(ip)

for col in ip_cols:
    ip[col] = ip[col] - ip['RF']

ip = ip[ip_cols]
factors = factors.drop(columns=['RF'])

ip = ip[ip_cols]

ip

### Table industry ###
mean = ip.mean()
std = ip.std() / np.sqrt(ip.shape[0])
t_stat = mean / std

indu = pd.DataFrame(mean)
indu = indu.rename(columns={0: 'Mean'})
indu['Mean std. error'] = std
indu['t statistic'] = t_stat

print(indu.to_latex(float_format='%.4f'))

In [None]:
# this cell parses RIC (ticker.exchange) to (ticker) - fx. "GOOG.OQ" -> "GOOG"
def parse_ric(ric):
    if ric == 'Date':
        return ric
    
    ric = re.findall(string=ric, pattern='([A-Za-z]+)\..+')[0]

    return ric


# returns and cap
esg_cap.columns = list(map(parse_ric, esg_cap.columns))
esg_returns.columns = list(map(parse_ric, esg_returns.columns))

# ESG metrics
esg_social.columns = list(map(parse_ric, esg_social.columns))
esg_governance.columns = list(map(parse_ric, esg_governance.columns))
esg_environment.columns = list(map(parse_ric, esg_environment.columns))


def get_snp_tickers(snp, period):
    """Gets vectors of tickers at `period` year-end

    Parameters:
    snp (DataFrame): DataFrame of SNP constituents
    period ('YYYY-MM-DD' formatted string): year-end 

    Returns:
    list: List of SNP constituents at specified year-end

    """
    # some tickers have halted trading
    halted = ['TIF', 'CXO']

    # get tickers
    snp = snp.loc[snp['Date'] == period]
    snp = snp.dropna(how='any', axis=1)
    snp = snp.drop(columns='Date')
    snp = snp.values[0]

    # clean tickers
    snp = list(set(snp) - set(halted))  # tickers not equal to TIF or CXO
    snp.append('Date')

    return snp

snp_tickers = get_snp_tickers(esg_snp, '2020-12-31')

def clean_esg(df):
    # memory safety:
    df = df.copy()

    df = df[snp_tickers]
    df = df.set_index('Date')
    df.index.name = None
    df = df.loc[df.index >= '31-12-2007']
    df.index = df.index.map(lambda x: x + pd.tseries.offsets.MonthEnd(0))

    return df

# social
soc = clean_esg(esg_social)

# environment
env = clean_esg(esg_environment)

# governance
gov = clean_esg(esg_governance)

# returns
returns = clean_esg(esg_returns)

# market cap
cap = clean_esg(esg_cap)

# set ESG-rebalance at june following year (FY2019 -> 30-06-2020)
gov.index = gov.index.map(lambda x: x + relativedelta(months=+6))
soc.index = soc.index.map(lambda x: x + relativedelta(months=+6))
env.index = env.index.map(lambda x: x + relativedelta(months=+6))

In [None]:
scores_env = env.iloc[:-1]
scores_gov = gov.iloc[:-1]
scores_soc = soc.iloc[:-1]

scores_env = scores_env.ffill()
scores_gov = scores_gov.ffill()
scores_soc = scores_soc.ffill()

table2 = pd.DataFrame(scores_env.count(axis=1))
table2.index.name = 'Year'
table2 = table2.rename(columns={0: 'Scored companies'})
table2['Total companies'] = 503
table2['% covered of sample'] = (table2['Scored companies'] / table2['Total companies']) * 100

table2['1. Quintile (Social)'] = scores_soc.quantile(0.2, axis=1)
table2['5. Quintile (Social)'] = scores_soc.quantile(0.8, axis=1)

table2['1. Quintile (Governance)'] = scores_gov.quantile(0.2, axis=1)
table2['5. Quintile (Governance)'] = scores_gov.quantile(0.8, axis=1)

table2['1. Quintile (Environment)'] = scores_env.quantile(0.2, axis=1)
table2['5. Quintile (Environment)'] = scores_env.quantile(0.8, axis=1)

table2.index = table2.index.map(lambda x: x.year)
table2.index.name = None

table2

In [None]:
print(table2.to_latex(float_format='%.1f'))