In [None]:
### JUPYTERLAB ###

# matplotlib inline plotting
%matplotlib inline
# make inline plotting higher resolution
%config InlineBackend.figure_format = 'svg'
plt.style.use('ggplot')

In [None]:
### DEEP-NOTE ###

%config InlineBackend.figure_format = 'retina'
plt.style.use('ggplot')

In [None]:
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
from datetime import datetime
from dateutil.relativedelta import relativedelta

In [None]:
# Global settings
analysis_start = '2007-12-31'
analysis_end = '2021-01-31'

In [None]:
FF_link = ''
ff = pd.read_excel(FF_link).dropna(how='all')
ff = ff.set_index('date', drop=True)
ff = ff / 100

ff = ff[['Mkt-RF', 'SMB', 'HML', 'MOM', 'RF']]

ff.tail()

In [None]:
# load all data from master Google Sheets file
ESG_link = ''

esg_meta = pd.read_excel(ESG_link, sheet_name='MetaData').dropna(how='all')
esg_cap = pd.read_excel(ESG_link, sheet_name='FreeFloat Mkt. Cap. CM').dropna(how='all')
esg_returns = pd.read_excel(ESG_link, sheet_name='1-CM Total Return (incl. div)').dropna(how='all')
esg_snp = pd.read_excel(ESG_link, sheet_name='SPX Historical Constituents CY').dropna(how='all')
esg_social = pd.read_excel(ESG_link, sheet_name='Re_Soc FY Absolute').dropna(how='all')
esg_governance = pd.read_excel(ESG_link, sheet_name='Re_Gov FY Absolute').dropna(how='all')
esg_environment = pd.read_excel(ESG_link, sheet_name='Re_Env FY Absolute').dropna(how='all')

In [None]:
# this cell parses RIC (ticker.exchange) to (ticker) - fx. "GOOG.OQ" -> "GOOG"
def parse_ric(ric):
    if ric == 'Date':
        return ric
    
    ric = re.findall(string=ric, pattern='([A-Za-z]+)\..+')[0]

    return ric


# returns and cap
esg_cap.columns = list(map(parse_ric, esg_cap.columns))
esg_returns.columns = list(map(parse_ric, esg_returns.columns))

# ESG metrics
esg_social.columns = list(map(parse_ric, esg_social.columns))
esg_governance.columns = list(map(parse_ric, esg_governance.columns))
esg_environment.columns = list(map(parse_ric, esg_environment.columns))

In [None]:
def get_snp_tickers(snp, period):
    """Gets vectors of tickers at `period` year-end

    Parameters:
    snp (DataFrame): DataFrame of SNP constituents
    period ('YYYY-MM-DD' formatted string): year-end 

    Returns:
    list: List of SNP constituents at specified year-end

    """
    # some tickers have halted trading
    halted = ['TIF', 'CXO']

    # get tickers
    snp = snp.loc[snp['Date'] == period]
    snp = snp.dropna(how='any', axis=1)
    snp = snp.drop(columns='Date')
    snp = snp.values[0]

    # clean tickers
    snp = list(set(snp) - set(halted))  # tickers not equal to TIF or CXO
    snp.append('Date')

    return snp

snp_tickers = get_snp_tickers(esg_snp, '2020-12-31')

In [None]:
def clean_esg(df):
    # memory safety:
    df = df.copy()

    df = df[snp_tickers]
    df = df.set_index('Date')
    df.index.name = None
    df = df.loc[df.index >= analysis_start]
    df.index = df.index.map(lambda x: x + pd.tseries.offsets.MonthEnd(0))

    return df

# social
soc = clean_esg(esg_social)

# environment
env = clean_esg(esg_environment)

# governance
gov = clean_esg(esg_governance)

# returns
returns = clean_esg(esg_returns)

# market cap
cap = clean_esg(esg_cap)

In [None]:
# set ESG-rebalance at june following year (FY2019 -> 30-06-2020)
gov.index = gov.index.map(lambda x: x + relativedelta(months=+6))
soc.index = soc.index.map(lambda x: x + relativedelta(months=+6))
env.index = env.index.map(lambda x: x + relativedelta(months=+6))

In [None]:
print('Missing for Social:')
display(soc.columns[soc.isna().all()])
print('Missing for Environment:')
display(env.columns[env.isna().all()])
print('Missing for Governance:')
display(gov.columns[gov.isna().all()])

# drop all NaN values in columns (where all are NaN)
soc = soc.dropna(how='all', axis=1)
env = env.dropna(how='all', axis=1)
gov = gov.dropna(how='all', axis=1)

In [None]:
soc = soc.ffill()
env = env.ffill()
gov = gov.ffill()

In [None]:
mkt_caps = esg_cap.iloc[0].transpose().drop('Date')
mkt_caps.name = 'mkt_cap'
mkt_caps = pd.to_numeric(mkt_caps)

mkt_caps

In [None]:
gov_caps = gov.loc[gov.index.max()]
gov_caps.name = 'Governance'

env_caps = env.loc[env.index.max()]
env_caps.name = 'Environment'

soc_caps = soc.loc[soc.index.max()]
soc_caps.name = 'Social'

In [None]:
caps = pd.DataFrame(soc_caps).join(env_caps).join(gov_caps).join(mkt_caps)

In [None]:
fig, axes = plt.subplots(3, 1, figsize = (7, 8), sharex=True)

axes[0].scatter(caps['mkt_cap'], caps['Environment'], alpha=0.8, edgecolors='none')
axes[1].scatter(caps['mkt_cap'], caps['Governance'], alpha=0.8, edgecolors='none')
axes[2].scatter(caps['mkt_cap'], caps['Social'], alpha=0.8, edgecolors='none')

axes[0].set_xscale('log')

axes[0].set_ylabel('Environment')
axes[1].set_ylabel('Governance')
axes[2].set_ylabel('Social')

axes[2].set_xlabel('Log Market Cap')

plt.tight_layout()
#plt.savefig('test.png', format='png', dpi=300)

plt.show()

In [None]:
caps['mkt_cap'] = np.log(caps['mkt_cap'])
print(caps.corr().to_latex(float_format='%.4f'))

In [None]:
for col in caps.drop(columns=['mkt_cap']).columns:
    Y = caps[col]
    X = caps['mkt_cap']
    X = sm.add_constant(X)

    display(sm.OLS(endog=Y, exog=X).fit().summary())