In [None]:
import pandas as pd
import numpy as np
import re
from datetime import datetime
from dateutil.relativedelta import relativedelta
import seaborn as sns

In [None]:
# Global settings
analysis_start = '2007-12-31'
analysis_end = '2021-01-31'

In [None]:
# load all data from master Google Sheets file
ESG_link = ''

esg_meta = pd.read_excel(ESG_link, sheet_name='MetaData').dropna(how='all')
esg_cap = pd.read_excel(ESG_link, sheet_name='FreeFloat Mkt. Cap. CM').dropna(how='all')
esg_returns = pd.read_excel(ESG_link, sheet_name='1-CM Total Return (incl. div)').dropna(how='all')
esg_snp = pd.read_excel(ESG_link, sheet_name='SPX Historical Constituents CY').dropna(how='all')
esg_social = pd.read_excel(ESG_link, sheet_name='Re_Soc FY Absolute').dropna(how='all')
esg_governance = pd.read_excel(ESG_link, sheet_name='Re_Gov FY Absolute').dropna(how='all')
esg_environment = pd.read_excel(ESG_link, sheet_name='Re_Env FY Absolute').dropna(how='all')

In [None]:
# this cell parses RIC (ticker.exchange) to (ticker) - fx. "GOOG.OQ" -> "GOOG"
def parse_ric(ric):
    if ric == 'Date':
        return ric
    
    ric = re.findall(string=ric, pattern='([A-Za-z]+)\..+')[0]

    return ric


# returns and cap
esg_cap.columns = list(map(parse_ric, esg_cap.columns))
esg_returns.columns = list(map(parse_ric, esg_returns.columns))

# ESG metrics
esg_social.columns = list(map(parse_ric, esg_social.columns))
esg_governance.columns = list(map(parse_ric, esg_governance.columns))
esg_environment.columns = list(map(parse_ric, esg_environment.columns))

In [None]:
def clean_esg(df):
    # memory safety:
    df = df.copy()

    df = df.set_index('Date')
    df.index.name = None
    df = df.loc[df.index >= analysis_start]
    df.index = df.index.map(lambda x: x + pd.tseries.offsets.MonthEnd(0))

    return df

# social
soc = clean_esg(esg_social)

# environment
env = clean_esg(esg_environment)

# governance
gov = clean_esg(esg_governance)

# returns
returns = clean_esg(esg_returns)

# market cap
cap = clean_esg(esg_cap)

In [None]:
def tiles(esg):
    # memory safety
    esg = esg.copy()

    # use latest ESG screening
    esg = esg.ffill()
    esg = esg.iloc[-1]

    # drop where no screening is available
    esg = esg.dropna()

    # drop where zero - some data-issues
    esg = esg.replace(0, np.nan)
    esg = esg.dropna()

    # group into deciles
    q_groupings = esg.quantile(np.linspace(0, 1, 11))
    q_groupings.iloc[0] = 0
    q_groupings.iloc[-1] = 100

    esg = pd.cut(esg, q_groupings, labels=range(1,11))

    esg.name = 'deciles'

    return pd.DataFrame(esg)

soc = tiles(soc)
env = tiles(env)
gov = tiles(gov)

In [None]:
def calc_sorted_pf(esg, esg_category):
    # memory safety
    est = esg.copy()

    sorted_portfolios = {i: pd.DataFrame() for i in range(1, 11)}

    # Overvej om disse portfolios skal rebalanceres løbende, altså hver juni måned. 
    for i in esg['deciles'].unique():
        tickers_in_group = esg.loc[esg['deciles'] == i].index

        # get returns
        sort_return = pd.DataFrame(index=tickers_in_group).join(returns.transpose())

        # calc weight
        sort_cap = pd.DataFrame(index=tickers_in_group).join(cap.transpose())

        def weights(cap_vector):
            w = cap_vector.apply(lambda x: x / np.sum(cap_vector))

            return w

        for col in sort_cap.columns:
            sort_cap[col] = weights(sort_cap[col])
        
        # safety for dimensions of matrices
        if sort_cap.shape == sort_return.shape:
            sorted_portfolios[i] = sort_cap * sort_return
            

    for key in sorted_portfolios.keys():
        sorted_portfolios[key] = sorted_portfolios[key].sum(axis=0)

    sorted_portfolios = pd.DataFrame(sorted_portfolios)
    sorted_portfolios.columns = [f'{esg_category}_Q:{i}' for i in sorted_portfolios.columns]

    return sorted_portfolios

soc = calc_sorted_pf(soc, 'soc')
gov = calc_sorted_pf(gov, 'gov')
env = calc_sorted_pf(env, 'env')

In [None]:
gov.to_excel('gov_sorted.xlsx')
soc.to_excel('soc_sorted.xlsx')
env.to_excel('env_sorted.xlsx')