In [None]:
import pandas as pd 
import numpy as np 
import wrds 
import logging
import statsmodels.api as sm
import importlib
import utils.backtest_performance
import utils.backtest_signal
import json
from pathlib import Path

In [None]:
db = wrds.Connection(wrds_username='codywan')
# db.create_pgpass_file() # first-time use

In [None]:
# get linking table from CIK to permno (for retreiving price history from wrds)
LINKING_FILE_PATH = '/Users/codywan/Data/WRDS Data/crspa_ccmlinktable.csv'
link = pd.read_csv(LINKING_FILE_PATH, header=0, keep_default_na=False).replace("", np.NaN).dropna(subset=['LPERMNO', 'cik'])
link[['LPERMNO', 'cik']] = link[['LPERMNO', 'cik']].astype('int')
CIK_PERMNO_mapping = {CIK: set(link[(link['cik']==CIK)]['LPERMNO']) for CIK in link['cik'].unique()}
# get linking table from CIK to ticker (for retreiving price history after 12/31/2019 from yahoo finance)
link['tic'] = link['tic'].astype('str')
CIK_TICKER_mapping = {CIK: list(set(link[(link['cik']==CIK)]['tic'])) for CIK in link['cik'].unique()}
# get linking table from CIK to SIC (for benchmarking using industry level returns from  ken french website)
CIK_SIC_mapping = {CIK: int(link[(link['cik']==CIK)]['sic'].unique().tolist()[0]) for CIK in link['cik'].unique()}

In [None]:
# check if the mapping between CIK and TICKER is one-to-one
if len(CIK_TICKER_mapping) == sum([len(CIK_TICKER_mapping[CIK]) for CIK in CIK_TICKER_mapping]):
    CIK_TICKER_mapping = {CIK: CIK_TICKER_mapping[CIK][0] for CIK in CIK_TICKER_mapping}
else:
    print("multiple tickers for CIK")

In [None]:
# generate pair of NAICS sector code and name
with open("data/industry_classification_and_portfolio/NACIS_sectors.json", "r") as f:
    NAICS_sectors = json.load(f)
NAICS_sector_code = list({key: key.split(",") for key in NAICS_sectors}.values())
NAICS_sector_name = list(NAICS_sectors.values())

In [None]:
# read SIC to industry portfolio mapping
with open("data/industry_classification_and_portfolio/SIC_portfl_mapping_Siccodes49.json", "r") as f:
    SIC_portfolio_mapping = json.load(f)

In [None]:
from utils.backtest_performance import add_to_return_stats, get_price_history_yahoo_finance, get_price_history, get_return_stats



# get price history and performance metrics for each signal event

# MODEL_PARAM = {
#     "A": ['weather'],
#     "B": ['artificial intelligence', 'cloud based', 'machine learning', 'big data'],
#     "C": ['eyeball', 'view'],
#     "D": ['total addressable market', 'go-to market'],
#     "E": ['ecosystems'],
#     "F": ['disruptor']
# }

backtests_name = 'LDA'
# backtests_name = "key_word_search"

MODEL_PARAM = {
    "ESG_baseline": [],
}

HOLDING_PERIOD = 30*5 # in days

for basket_name in MODEL_PARAM:

    # # debugging
    # if basket_name != 'D' and basket_name != 'E' and basket_name != 'F':
    #     continue

    print(basket_name, MODEL_PARAM[basket_name])
    master_df = list()
    # holding period in days

    for sector_name in NAICS_sector_name:

        # # debugging
        # if sector_name != "Information":
        #     continue

        repository_path = f"backtests/{backtests_name}/{basket_name}"
        file_path = f"{repository_path}/{sector_name}/signal.csv"
        if not Path(file_path).is_file():
            continue
        print(sector_name)

        # get signal events
        df = pd.read_csv(file_path)
        BUY_SIGNAL = { int(col): df[col].dropna().to_list() for col in df}

        # get price history from wrds
        price_history, volume_on_event_day, no_price_history_signal = get_price_history(db, CIK_PERMNO_mapping, BUY_SIGNAL, HOLDING_PERIOD)
        # get price history of those not available from wrds, from yahoo finance
        ex_price_history, ex_volume_on_event_day, _ = get_price_history_yahoo_finance(CIK_TICKER_mapping, no_price_history_signal, HOLDING_PERIOD)
        # put price history from wrds and yahoo finance together
        price_history[ex_price_history.columns] = ex_price_history
        volume_on_event_day = pd.concat([volume_on_event_day, ex_volume_on_event_day])

        # compute return statistics
        return_stats = get_return_stats(db, price_history, CIK_SIC_mapping, SIC_portfolio_mapping)

        # add volume data to return stats
        add_to_return_stats(index=3, df=volume_on_event_day, return_stats=return_stats)

        # get earnings call flag and add to return stats
        earnings_call_date, _ = utils.backtest_performance.get_earnings_call_date(db, CIK_TICKER_mapping, BUY_SIGNAL)
        add_to_return_stats(index=4, df=earnings_call_date, return_stats=return_stats)

        # append to master variables
        master_df.append(return_stats)
        display(return_stats.sort_values("R", ascending=False).iloc[:5])
        # save to local
        return_stats.to_csv(f"{repository_path}/{sector_name}/performance.csv")

        # break

    # save basket_name level performance table to local
    pd.concat(master_df).to_csv(f"{repository_path}/performance.csv")
    # break

# R is return from day -1 to day 1 relative to event day which is day 0
# EA date is a categorical variable, where if there's earnings annoucement within three days of the signal event
# the value is the day relative to the signal event (-1 being the day before event day, 0 on event day, 1 the day after)
# if no earnings annoucement within three days, then np.nan

In [None]:
# TODO: IBES doesn't have all earnings call date
# TODO: add M&A date, Thomson  Reuters has SDC - Mergers and Acquisitions Events database but I don't have access to it
# TODO: analysis on performance

In [None]:
for i, sector_name in enumerate(NAICS_sector_name):
    print(i, sector_name)

In [9]:
import importlib
import pandas as pd 
import numpy as np
from pathlib import Path
from scipy.stats import ttest_1samp
from utils.backtest_performance import get_NAICS_code_sector_name

# generate pair of NAICS sector code and name

NAICS_sector_code, NAICS_sector_name = get_NAICS_code_sector_name()

# test the statistical significance of performance of signal events

# backtests_name = "LDA"
backtests_name = "key_word_search"

MODEL_PARAM = {
    # "A": ['weather'],
    # "B": ['artificial intelligence', 'cloud based', 'machine learning', 'big data'],
    # "C": ['eyeball', 'view'],
    # "D": ['total addressable market', 'go-to market'],
    # "E": ['ecosystems'],
    # "F": ['disruptor'],
    "ESG_baseline": []
}

vol_threshold = 1.25
COLUMNS = ['1d - MKT', '10d - MKT', '30d - MKT', '60d - MKT', '90d - MKT', '120d - MKT', '1d - IDT', '10d - IDT', '30d - IDT', '60d - IDT', '90d - IDT', '120d - IDT']

# TIME_PARTITION = [['1995-01-01', '1999-12-31'], ['2000-01-01', '2004-12-31'], 
#                   ['2005-01-01', '2009-12-31'], ['2010-01-01', '2014-12-31'], 
#                   ['2015-01-01', '2019-12-31']]

TIME_PARTITION = [['1995-01-01', '1999-12-31'], ['2000-01-01', '2005-12-31'], 
                ['2006-01-01', '2007-12-31'], ['2008-01-01', '2009-12-31'],
                ['2010-01-01', '2011-12-31'], ['2012-01-01', '2013-12-31'], 
                ['2014-01-01', '2015-12-31'], ['2016-01-01', '2017-12-31'],
                ['2018-01-01', '2019-12-31']]

sub_folder = f"{len(TIME_PARTITION)}_time_partitions"

for basket_name in MODEL_PARAM:
    repository_path = f"backtests/{backtests_name}/{basket_name}"

    # # debugging
    # if basket_name != 'F':
    #     continue

    print(basket_name, MODEL_PARAM[basket_name])
    master_df = list()

    if not os.path.exists(f"{repository_path}/{sub_folder}"):
        os.mkdir(f"{repository_path}/{sub_folder}")

    for (t0_str, t1_str) in TIME_PARTITION:
        t0, t1 = pd.to_datetime(t0_str), pd.to_datetime(t1_str)
        print(f"{t0_str} - {t1_str}")
        master_t_stats = list()

        for i, sector_name in enumerate(NAICS_sector_name):

            # # debugging
            # if sector_name != "Information":
            #     continue

            file_path = f"{repository_path}/{sector_name}/performance.csv"
            if not Path(file_path).is_file():
                continue

            # get performance df for this sector
            df = pd.read_csv(file_path, parse_dates=[2])
            # select those in t0 to t1 periods
            df = df[(t0 < df['day 0']) & (df['day 0'] <= t1)]
            # select a subset, that tries to remove earnings call, merger etc. events
            df = df[(df['EA'].isnull()) & (df['vol'] <= vol_threshold)]

            if len(df) == 0:
                continue

            # compute t-stats
            t_stats = df[COLUMNS].apply(lambda col: f"{col.mean():.4f} ({ttest_1samp(list(col), popmean=0)[1]*100:.2f}%)", axis=0).to_frame().T
            t_stats.insert(0, 'sample size', len(df))
            t_stats.index = [f"{i}"]
            master_t_stats.append(t_stats)

        # if there's signal events in this time period
        if len(master_t_stats) != 0:
            # compute performance for all signal events in this basket
            df = pd.read_csv(f"{repository_path}/performance.csv", parse_dates=[2])
            df = df[(t0 < df['day 0']) & (df['day 0'] <= t1)]
            df = df[(df['EA'].isnull()) & (df['vol'] <= vol_threshold)][COLUMNS]
            # string format for returns and p-value
            df = df.apply(lambda col: f"{np.round(col.mean()*100, 2)}% ({np.round(ttest_1samp(list(col), popmean=0)[1]*100, 2)}%)", axis=0).to_frame().T
            df.index = [f"Total"]
            # add to master variable
            master_t_stats.append(df)
            all_sector_returns = pd.concat(master_t_stats)
            all_sector_returns.at['Total', 'sample size'] = all_sector_returns.iloc[:-1]['sample size'].sum()
            all_sector_returns.to_csv(f"{repository_path}/{sub_folder}/performance_evaluation_{t0_str}_{t1_str}.csv")
            # display to console
            display(all_sector_returns)
        else:
            print(f"{len(master_t_stats)} events from {t0_str} - {t1_str}")


ESG_baseline []
1995-01-01 - 1999-12-31
0 events from 1995-01-01 - 1999-12-31
2000-01-01 - 2005-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
0,2.0,-0.0039 (87.23%),0.0712 (7.03%),0.0748 (26.69%),0.0543 (20.69%),0.0714 (23.01%),0.1353 (12.75%),0.0071 (86.60%),0.0741 (13.49%),0.0593 (32.46%),0.0481 (46.39%),0.0621 (7.58%),0.1750 (24.19%)
1,4.0,-0.0047 (27.73%),0.0250 (42.42%),0.0559 (13.26%),0.2114 (4.20%),0.3486 (0.94%),0.3991 (0.36%),-0.0028 (69.86%),0.0096 (60.14%),-0.0054 (81.88%),0.0870 (35.29%),0.1699 (17.64%),0.2280 (2.30%)
2,5.0,-0.0126 (38.53%),-0.0467 (28.36%),-0.0727 (5.48%),-0.1228 (24.86%),-0.0972 (54.03%),-0.0414 (82.04%),-0.0098 (41.41%),-0.0551 (15.02%),-0.0733 (5.16%),-0.1041 (18.28%),-0.0952 (45.59%),-0.0787 (61.53%)
3,1.0,-0.0432 (nan%),0.0040 (nan%),0.6002 (nan%),1.2056 (nan%),1.5032 (nan%),1.4551 (nan%),-0.0683 (nan%),-0.0029 (nan%),0.5264 (nan%),0.9766 (nan%),1.2230 (nan%),1.1344 (nan%)
4,75.0,0.0066 (1.91%),0.0043 (59.89%),0.0313 (4.86%),0.0356 (6.27%),0.0655 (1.56%),0.0991 (0.06%),0.0053 (5.55%),0.0036 (60.50%),0.0292 (3.34%),0.0416 (2.27%),0.0596 (1.66%),0.0818 (0.15%)
5,3.0,0.0082 (66.22%),0.0188 (36.90%),0.0012 (97.53%),0.0087 (89.75%),-0.0204 (62.05%),-0.0007 (98.78%),0.0093 (60.17%),0.0200 (21.53%),0.0014 (96.61%),-0.0033 (95.80%),-0.0329 (38.76%),-0.0211 (53.09%)
6,15.0,0.0055 (39.22%),0.0142 (20.79%),0.0349 (17.37%),0.0441 (19.84%),0.0852 (5.21%),0.0572 (19.88%),0.0055 (40.48%),0.0144 (19.94%),0.0391 (11.83%),0.0383 (22.53%),0.0790 (6.77%),0.0510 (26.59%)
7,7.0,0.0093 (27.98%),-0.0083 (59.48%),-0.0173 (71.50%),-0.0145 (85.43%),-0.0641 (42.20%),-0.0242 (78.48%),0.0041 (50.75%),-0.0119 (45.50%),-0.0141 (72.71%),-0.0408 (54.67%),-0.0740 (27.78%),-0.0702 (37.83%)
8,39.0,-0.0045 (53.38%),-0.0110 (29.32%),-0.0007 (95.65%),0.0040 (84.46%),0.0097 (75.47%),-0.0226 (48.52%),-0.0047 (53.05%),-0.0145 (18.18%),-0.0043 (76.65%),0.0018 (91.94%),0.0193 (50.76%),-0.0041 (89.19%)
9,49.0,0.0018 (58.55%),0.0087 (10.72%),0.0183 (12.81%),0.0517 (1.47%),0.0877 (0.70%),0.1115 (0.41%),0.0016 (59.75%),0.0092 (13.76%),0.0125 (31.31%),0.0297 (17.97%),0.0518 (10.58%),0.0660 (7.84%)


2006-01-01 - 2007-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
1,7.0,-0.0067 (65.56%),0.0059 (80.39%),0.0115 (82.74%),0.0103 (83.68%),0.0027 (96.65%),0.0328 (60.11%),-0.0003 (97.14%),0.0035 (80.20%),0.0069 (78.57%),-0.0374 (17.37%),-0.0703 (8.36%),-0.0540 (28.79%)
2,13.0,0.0035 (58.30%),-0.0034 (79.04%),0.0107 (54.60%),0.0988 (1.10%),0.1384 (1.12%),0.2150 (0.56%),0.0003 (95.42%),-0.0081 (55.12%),-0.0069 (67.69%),0.0552 (7.93%),0.0714 (12.68%),0.1274 (7.34%)
3,1.0,0.0200 (nan%),0.0581 (nan%),-0.1092 (nan%),0.0283 (nan%),0.1249 (nan%),0.3997 (nan%),0.0058 (nan%),0.1137 (nan%),-0.1041 (nan%),0.0301 (nan%),0.0925 (nan%),0.3217 (nan%)
4,93.0,0.0099 (2.58%),0.0073 (37.18%),0.0059 (61.09%),0.0311 (5.83%),0.0509 (1.99%),0.0806 (0.11%),0.0076 (8.42%),0.0041 (59.96%),-0.0036 (73.92%),0.0032 (83.23%),0.0056 (77.78%),0.0140 (52.40%)
5,3.0,0.0091 (59.57%),-0.0101 (56.01%),-0.0337 (58.89%),-0.0384 (55.53%),-0.0240 (73.78%),-0.0312 (77.38%),0.0099 (57.63%),-0.0052 (83.37%),-0.0337 (61.18%),-0.0366 (51.88%),-0.0027 (97.22%),-0.0175 (87.49%)
6,19.0,-0.0068 (9.41%),-0.0218 (2.36%),0.0047 (83.31%),-0.0054 (87.43%),-0.0362 (27.09%),-0.0195 (58.14%),-0.0086 (4.39%),-0.0127 (20.03%),0.0107 (64.49%),0.0057 (87.13%),-0.0156 (64.09%),0.0054 (87.87%)
7,10.0,-0.0039 (71.39%),0.0020 (89.42%),0.0425 (2.83%),0.1037 (0.42%),0.1503 (0.84%),0.1710 (2.18%),0.0016 (84.85%),0.0097 (48.42%),0.0484 (1.34%),0.0894 (0.37%),0.1075 (2.38%),0.1173 (5.62%)
8,45.0,0.0000 (99.55%),0.0008 (93.36%),0.0052 (70.07%),0.0169 (46.40%),0.0332 (31.03%),0.0285 (42.15%),-0.0012 (76.65%),-0.0073 (41.88%),-0.0056 (66.13%),0.0002 (99.18%),0.0023 (94.09%),-0.0118 (71.10%)
9,43.0,0.0072 (13.49%),0.0082 (34.93%),0.0049 (65.31%),0.0139 (42.98%),0.0036 (84.35%),-0.0079 (70.04%),0.0049 (31.09%),0.0035 (62.12%),0.0026 (77.94%),0.0181 (24.59%),0.0087 (63.20%),0.0031 (87.81%)
10,3.0,0.0222 (31.00%),0.0152 (68.85%),0.0233 (14.92%),0.1483 (29.21%),0.1799 (34.42%),-0.0319 (87.80%),0.0088 (35.58%),0.0135 (67.01%),0.0134 (20.64%),0.0843 (32.10%),0.1391 (15.10%),-0.0011 (98.07%)


2008-01-01 - 2009-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
1,16.0,-0.0047 (74.86%),0.0155 (70.53%),0.0612 (24.36%),0.2000 (1.20%),0.1715 (12.57%),0.2596 (8.20%),0.0005 (97.14%),0.0030 (93.26%),0.0354 (30.60%),0.1428 (2.17%),0.1403 (7.79%),0.2499 (2.84%)
2,16.0,0.0050 (61.61%),-0.0126 (31.96%),0.0002 (99.60%),0.0235 (62.19%),0.0338 (63.16%),0.0310 (69.62%),0.0023 (79.75%),-0.0158 (33.31%),0.0163 (59.85%),0.0214 (65.93%),0.0413 (54.88%),0.0528 (49.25%)
3,2.0,0.0092 (88.78%),-0.1087 (54.61%),-0.0399 (36.09%),-0.0845 (51.68%),0.0358 (61.18%),0.0197 (86.24%),0.0149 (76.52%),-0.0365 (82.91%),0.0604 (29.53%),0.0218 (52.53%),0.0782 (45.90%),0.1111 (41.90%)
4,123.0,-0.0007 (85.82%),-0.0027 (72.45%),0.0122 (25.98%),0.0051 (71.45%),0.0081 (67.05%),0.0028 (89.43%),-0.0001 (98.56%),-0.0007 (92.01%),0.0103 (29.52%),-0.0060 (63.21%),-0.0092 (59.84%),-0.0200 (28.02%)
6,12.0,-0.0053 (74.99%),-0.0303 (32.70%),0.0062 (91.61%),-0.0165 (74.79%),0.0017 (97.68%),0.0102 (89.93%),-0.0102 (54.61%),-0.0460 (12.52%),-0.0166 (78.02%),-0.0342 (43.60%),-0.0263 (62.76%),-0.0294 (71.62%)
7,4.0,-0.0055 (25.97%),0.0455 (49.00%),0.0628 (25.06%),0.0111 (87.81%),0.0042 (93.58%),-0.0297 (67.19%),-0.0034 (12.49%),0.0598 (37.04%),0.0258 (58.52%),-0.0250 (76.98%),-0.0283 (73.27%),-0.0636 (54.05%)
8,40.0,0.0087 (16.37%),0.0291 (5.56%),0.0349 (4.26%),0.0454 (4.84%),0.0419 (8.58%),0.0588 (7.95%),0.0080 (16.68%),0.0294 (4.08%),0.0312 (4.88%),0.0281 (14.84%),0.0228 (35.43%),0.0411 (19.40%)
9,50.0,-0.0034 (67.93%),-0.0438 (6.10%),-0.0547 (10.10%),0.0091 (85.92%),0.0374 (47.90%),-0.0114 (82.69%),0.0001 (98.81%),-0.0232 (19.80%),-0.0187 (48.09%),0.0422 (33.48%),0.0511 (27.15%),0.0111 (80.42%)
10,8.0,0.0175 (18.12%),-0.0209 (45.62%),0.0589 (37.96%),0.0816 (66.38%),0.1199 (60.48%),0.1459 (64.19%),0.0117 (16.90%),0.0014 (92.73%),0.0675 (16.18%),0.0606 (59.49%),0.0796 (59.43%),0.1067 (58.22%)
11,12.0,0.0065 (89.48%),0.0148 (79.76%),0.0601 (28.69%),0.0664 (20.53%),0.0801 (26.76%),0.0566 (38.71%),0.0059 (90.32%),0.0099 (86.18%),0.0564 (31.00%),0.0409 (44.72%),0.0373 (59.82%),-0.0010 (98.80%)


2010-01-01 - 2011-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
0,6.0,-0.0047 (63.15%),0.0032 (87.87%),-0.0262 (55.18%),-0.0732 (16.68%),-0.0379 (40.51%),-0.0365 (41.58%),-0.0061 (63.60%),0.0011 (84.93%),-0.0308 (22.20%),-0.0680 (18.69%),-0.0441 (18.01%),-0.0627 (22.01%)
1,20.0,0.0044 (64.34%),0.0077 (60.06%),0.0049 (83.86%),0.0069 (84.88%),0.0274 (66.06%),-0.0099 (89.07%),0.0043 (61.65%),0.0057 (66.30%),0.0034 (87.86%),0.0000 (99.92%),0.0384 (49.05%),-0.0057 (92.79%)
2,41.0,0.0010 (58.03%),0.0063 (15.32%),0.0142 (1.37%),0.0243 (3.57%),0.0252 (9.59%),0.0083 (67.50%),0.0028 (16.19%),0.0040 (39.91%),0.0027 (69.36%),0.0090 (41.98%),0.0058 (68.17%),-0.0077 (65.70%)
3,8.0,0.0041 (61.01%),0.0181 (35.16%),0.0232 (58.56%),0.0098 (87.72%),-0.0277 (65.57%),-0.0068 (92.15%),-0.0022 (81.19%),0.0125 (48.44%),0.0303 (24.73%),0.0438 (16.42%),0.0008 (97.84%),0.0254 (64.94%)
4,217.0,0.0021 (33.25%),-0.0028 (48.66%),-0.0013 (84.10%),-0.0015 (88.26%),0.0049 (64.80%),0.0104 (42.32%),0.0025 (19.40%),-0.0032 (39.18%),-0.0050 (38.68%),-0.0106 (24.25%),-0.0085 (38.10%),-0.0044 (70.97%)
5,1.0,0.0260 (nan%),0.0208 (nan%),-0.0779 (nan%),-0.1267 (nan%),-0.1064 (nan%),0.0089 (nan%),0.0314 (nan%),0.0154 (nan%),-0.0873 (nan%),-0.1446 (nan%),-0.0875 (nan%),0.0172 (nan%)
6,15.0,-0.0132 (14.24%),-0.0181 (29.17%),-0.0160 (53.17%),-0.0233 (46.68%),-0.0220 (58.06%),-0.0034 (93.99%),-0.0125 (16.18%),-0.0149 (31.39%),-0.0101 (62.76%),-0.0206 (44.11%),-0.0368 (31.81%),-0.0242 (58.09%)
7,15.0,0.0163 (8.56%),0.0137 (28.18%),0.0132 (39.74%),0.0196 (53.87%),0.0291 (49.54%),0.0763 (19.16%),0.0132 (16.10%),0.0040 (71.79%),0.0055 (70.70%),0.0112 (69.19%),0.0221 (55.24%),0.0600 (27.45%)
8,75.0,-0.0032 (34.97%),-0.0053 (39.82%),-0.0024 (84.19%),0.0076 (66.55%),-0.0160 (41.82%),-0.0177 (46.80%),-0.0042 (23.21%),-0.0092 (14.79%),-0.0168 (16.60%),-0.0095 (58.70%),-0.0360 (6.85%),-0.0354 (13.77%)
9,68.0,0.0041 (17.16%),0.0049 (48.41%),-0.0092 (34.81%),0.0057 (82.33%),-0.0470 (1.36%),-0.0672 (0.15%),0.0040 (18.10%),0.0097 (15.94%),0.0068 (47.60%),0.0391 (12.35%),0.0047 (79.74%),-0.0064 (75.27%)


2012-01-01 - 2013-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
0,3.0,-0.0020 (78.68%),-0.0181 (19.60%),-0.0553 (4.76%),-0.0764 (8.83%),-0.0759 (13.34%),-0.0694 (34.30%),-0.0005 (23.52%),-0.0028 (18.67%),-0.0051 (23.90%),-0.0119 (23.90%),-0.0145 (22.70%),-0.0171 (24.23%)
1,33.0,-0.0024 (65.35%),-0.0186 (10.04%),-0.0158 (45.59%),-0.0481 (4.27%),-0.0536 (3.24%),-0.0554 (10.20%),-0.0014 (77.36%),-0.0073 (45.20%),0.0004 (98.30%),-0.0232 (22.00%),-0.0358 (8.52%),-0.0398 (22.73%)
2,40.0,-0.0039 (19.98%),-0.0074 (22.97%),-0.0127 (27.24%),0.0079 (53.30%),-0.0146 (34.53%),0.0019 (91.99%),-0.0011 (68.16%),0.0045 (40.86%),0.0069 (50.54%),0.0282 (4.64%),0.0174 (25.92%),0.0314 (10.73%)
3,8.0,-0.0085 (13.00%),-0.0070 (69.49%),0.0061 (83.28%),0.0109 (77.22%),-0.0287 (52.31%),-0.0266 (67.07%),-0.0106 (17.93%),-0.0005 (96.86%),0.0168 (56.82%),0.0211 (50.95%),-0.0431 (34.06%),-0.0565 (36.40%)
4,227.0,-0.0015 (47.39%),0.0059 (20.50%),0.0040 (51.37%),0.0174 (6.49%),0.0275 (1.42%),0.0489 (0.03%),-0.0004 (84.08%),0.0075 (9.35%),0.0070 (24.78%),0.0207 (1.96%),0.0313 (0.33%),0.0505 (0.01%)
5,8.0,-0.0018 (64.47%),-0.0056 (68.24%),-0.0166 (54.13%),-0.0417 (47.14%),-0.0364 (52.08%),-0.0107 (87.65%),-0.0016 (64.38%),-0.0036 (79.63%),-0.0243 (40.87%),-0.0558 (37.10%),-0.0612 (31.55%),-0.0407 (56.21%)
6,14.0,-0.0125 (9.82%),-0.0012 (93.94%),0.0125 (69.81%),0.0287 (50.11%),0.0176 (70.62%),0.0595 (26.68%),-0.0116 (12.01%),-0.0025 (87.39%),0.0148 (63.70%),0.0222 (58.00%),0.0093 (84.14%),0.0477 (39.31%)
7,5.0,0.0072 (16.75%),0.0235 (36.49%),-0.0639 (17.89%),-0.0478 (56.50%),-0.0574 (58.06%),-0.0386 (69.19%),0.0035 (42.36%),0.0197 (27.78%),-0.0496 (15.62%),-0.0522 (48.26%),-0.0625 (54.24%),-0.0479 (61.42%)
8,81.0,0.0038 (14.20%),0.0173 (0.55%),0.0299 (3.85%),0.0734 (0.32%),0.0724 (1.71%),0.0762 (1.56%),0.0028 (26.38%),0.0142 (2.23%),0.0254 (7.92%),0.0635 (0.85%),0.0571 (5.10%),0.0555 (7.07%)
9,67.0,0.0043 (3.78%),0.0113 (3.42%),0.0175 (6.99%),0.0236 (13.12%),0.0423 (4.60%),0.0596 (0.42%),0.0033 (8.73%),0.0043 (38.00%),0.0048 (56.53%),0.0084 (55.71%),0.0236 (21.68%),0.0317 (8.81%)


2014-01-01 - 2015-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
0,3.0,0.0003 (94.69%),-0.0053 (68.73%),-0.0288 (20.45%),-0.0607 (11.07%),-0.0569 (36.42%),-0.0889 (22.55%),0.0014 (32.47%),0.0028 (47.38%),-0.0030 (48.33%),-0.0106 (14.90%),-0.0155 (15.73%),-0.0237 (6.95%)
1,45.0,-0.0075 (30.25%),-0.0274 (1.54%),-0.0882 (0.03%),-0.1654 (0.00%),-0.1763 (0.00%),-0.2716 (0.00%),-0.0074 (25.94%),-0.0200 (6.25%),-0.0602 (0.11%),-0.0935 (0.07%),-0.0817 (0.71%),-0.1421 (0.00%)
2,31.0,-0.0001 (98.16%),-0.0137 (11.15%),-0.0280 (6.47%),-0.0286 (18.46%),-0.0316 (19.58%),-0.0325 (20.18%),0.0009 (79.59%),-0.0068 (41.90%),-0.0158 (18.69%),-0.0098 (64.32%),-0.0106 (66.39%),-0.0193 (46.35%)
3,7.0,0.0121 (1.75%),0.0027 (86.14%),-0.0394 (20.32%),-0.0680 (9.52%),-0.1277 (2.70%),-0.1357 (4.98%),0.0092 (5.34%),0.0026 (86.50%),-0.0256 (18.35%),-0.0425 (8.78%),-0.0873 (1.70%),-0.0692 (11.02%)
4,287.0,-0.0001 (95.40%),-0.0007 (85.97%),-0.0053 (36.43%),-0.0130 (9.17%),-0.0151 (9.22%),-0.0164 (14.04%),-0.0006 (70.77%),0.0001 (98.36%),-0.0039 (49.01%),-0.0060 (44.64%),-0.0055 (55.94%),-0.0059 (59.96%)
5,21.0,0.0044 (48.92%),-0.0006 (94.61%),-0.0033 (86.10%),-0.0299 (27.53%),-0.0459 (15.88%),-0.0929 (3.55%),0.0048 (44.56%),-0.0010 (90.89%),0.0009 (96.12%),-0.0152 (56.19%),-0.0315 (31.89%),-0.0713 (10.27%)
6,23.0,0.0120 (15.78%),0.0222 (10.25%),0.0505 (2.19%),0.0594 (6.59%),0.0587 (21.99%),0.0546 (30.40%),0.0116 (17.26%),0.0172 (18.98%),0.0351 (9.10%),0.0298 (34.47%),0.0134 (77.14%),-0.0019 (97.06%)
7,11.0,-0.0005 (92.58%),-0.0047 (61.23%),0.0115 (58.75%),0.0208 (66.18%),0.0298 (41.98%),0.0290 (60.11%),-0.0009 (83.92%),-0.0053 (56.77%),0.0146 (43.28%),0.0136 (77.43%),0.0196 (62.00%),-0.0049 (92.06%)
8,112.0,-0.0005 (87.28%),-0.0046 (40.59%),-0.0159 (10.55%),-0.0244 (11.31%),-0.0339 (6.57%),-0.0320 (10.48%),-0.0001 (96.02%),-0.0048 (37.56%),-0.0194 (5.39%),-0.0298 (5.72%),-0.0415 (2.68%),-0.0396 (5.08%)
9,101.0,0.0028 (9.05%),0.0071 (11.16%),0.0082 (24.66%),0.0147 (26.38%),-0.0034 (83.10%),-0.0124 (49.19%),0.0022 (17.41%),0.0027 (51.68%),-0.0034 (60.25%),0.0009 (94.33%),-0.0166 (28.57%),-0.0333 (6.79%)


2016-01-01 - 2017-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
0,4.0,-0.0030 (30.32%),-0.0159 (35.74%),-0.0094 (61.99%),0.0395 (40.40%),0.0636 (4.98%),0.0346 (14.88%),-0.0016 (40.85%),-0.0047 (27.28%),-0.0095 (38.46%),-0.0104 (26.94%),-0.0029 (76.89%),-0.0133 (15.08%)
1,58.0,-0.0060 (49.26%),-0.0090 (59.40%),-0.0221 (33.29%),-0.0402 (20.19%),-0.0363 (29.64%),-0.0306 (41.73%),-0.0081 (31.68%),-0.0031 (83.35%),-0.0147 (44.83%),-0.0153 (57.15%),-0.0135 (66.12%),-0.0046 (89.11%)
2,41.0,-0.0052 (31.33%),-0.0075 (46.42%),0.0104 (56.51%),0.0123 (56.44%),-0.0045 (88.05%),-0.0051 (87.98%),-0.0041 (44.30%),0.0020 (83.92%),0.0258 (13.95%),0.0423 (4.84%),0.0461 (10.43%),0.0494 (13.79%)
3,6.0,-0.0125 (12.90%),-0.0212 (10.79%),-0.0023 (95.11%),-0.0238 (61.92%),-0.0022 (96.52%),-0.0173 (84.01%),0.0028 (72.34%),-0.0152 (10.70%),-0.0195 (50.51%),-0.0294 (57.20%),-0.0120 (82.71%),-0.0291 (67.38%)
4,247.0,0.0008 (58.50%),0.0013 (73.88%),-0.0073 (19.50%),-0.0062 (43.31%),-0.0033 (74.81%),-0.0101 (46.86%),0.0008 (55.79%),0.0013 (73.79%),-0.0065 (24.04%),-0.0039 (61.20%),-0.0001 (98.87%),-0.0044 (74.71%)
5,27.0,0.0075 (10.66%),0.0160 (9.13%),0.0220 (32.38%),0.0061 (84.05%),0.0107 (75.54%),0.0149 (67.22%),0.0086 (8.16%),0.0203 (3.37%),0.0330 (11.89%),0.0271 (36.47%),0.0381 (27.01%),0.0550 (11.35%)
6,26.0,-0.0043 (64.38%),0.0142 (30.23%),-0.0013 (95.18%),-0.0275 (33.82%),-0.0550 (9.58%),-0.0386 (31.18%),-0.0046 (64.61%),0.0157 (26.35%),0.0034 (85.88%),-0.0195 (46.30%),-0.0497 (13.30%),-0.0453 (21.36%)
7,25.0,-0.0068 (10.92%),-0.0072 (59.50%),-0.0223 (12.23%),-0.0386 (3.22%),-0.0322 (21.56%),-0.0271 (31.17%),-0.0063 (11.73%),-0.0070 (55.15%),-0.0264 (6.33%),-0.0228 (23.12%),-0.0234 (37.39%),-0.0122 (65.04%)
8,123.0,0.0008 (74.14%),-0.0026 (61.73%),0.0077 (29.87%),0.0073 (52.10%),-0.0099 (48.50%),-0.0031 (85.14%),0.0009 (68.77%),0.0004 (92.46%),0.0038 (58.12%),-0.0016 (87.94%),-0.0152 (28.33%),-0.0071 (66.19%)
9,85.0,-0.0014 (46.20%),-0.0062 (17.91%),-0.0149 (3.74%),-0.0040 (70.58%),0.0018 (88.04%),0.0116 (46.50%),-0.0036 (5.13%),-0.0094 (2.83%),-0.0148 (3.28%),-0.0124 (24.70%),-0.0214 (7.08%),-0.0316 (3.89%)


2018-01-01 - 2019-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
1,45.0,-0.0062 (27.46%),-0.0359 (0.30%),-0.0606 (0.07%),-0.1383 (0.00%),-0.1831 (0.00%),-0.2631 (0.00%),-0.0037 (49.29%),-0.0214 (4.24%),-0.0281 (7.99%),-0.0733 (0.38%),-0.1111 (0.01%),-0.1518 (0.00%)
2,40.0,0.0035 (34.95%),-0.0075 (49.36%),-0.0179 (30.84%),-0.0073 (68.90%),-0.0215 (35.25%),-0.0116 (68.88%),0.0005 (86.40%),-0.0123 (18.18%),-0.0258 (9.41%),-0.0208 (20.20%),-0.0475 (5.94%),-0.0398 (17.22%)
3,8.0,-0.0124 (21.78%),0.0170 (38.60%),0.0197 (53.52%),-0.0729 (48.31%),-0.1190 (27.82%),-0.1047 (46.19%),-0.0006 (94.50%),0.0252 (16.27%),0.0501 (7.72%),-0.0413 (68.92%),-0.0799 (48.40%),-0.0446 (74.43%)
4,211.0,-0.0004 (88.63%),-0.0036 (46.60%),-0.0133 (11.93%),-0.0353 (0.11%),-0.0609 (0.00%),-0.0725 (0.00%),-0.0015 (55.02%),-0.0035 (47.33%),-0.0047 (55.01%),-0.0215 (3.87%),-0.0400 (0.11%),-0.0498 (0.08%)
5,24.0,0.0087 (13.03%),0.0135 (35.41%),0.0252 (30.62%),-0.0093 (82.49%),-0.0266 (59.01%),-0.0285 (66.89%),0.0067 (21.93%),0.0116 (44.65%),0.0314 (18.98%),0.0011 (97.81%),-0.0006 (98.95%),-0.0068 (91.05%)
6,26.0,0.0024 (66.51%),0.0112 (30.31%),-0.0147 (57.22%),-0.0558 (16.30%),-0.0786 (11.67%),-0.0978 (12.30%),0.0043 (42.49%),0.0104 (31.03%),-0.0257 (29.25%),-0.0728 (4.96%),-0.1063 (2.69%),-0.1403 (3.95%)
7,18.0,0.0003 (94.32%),0.0069 (55.01%),-0.0015 (93.06%),0.0071 (74.81%),-0.0089 (82.38%),-0.0711 (21.13%),-0.0000 (99.48%),0.0136 (17.22%),0.0145 (28.92%),0.0261 (19.55%),0.0179 (62.65%),-0.0318 (52.45%)
8,80.0,0.0039 (31.76%),0.0066 (37.00%),0.0095 (39.40%),-0.0040 (82.96%),-0.0155 (51.20%),0.0062 (81.27%),0.0024 (50.89%),-0.0001 (98.60%),-0.0068 (52.42%),-0.0239 (20.11%),-0.0331 (14.34%),-0.0182 (46.94%)
9,87.0,-0.0035 (2.39%),-0.0020 (63.51%),-0.0182 (0.21%),-0.0340 (0.06%),-0.0524 (0.00%),-0.0557 (0.02%),-0.0049 (0.03%),-0.0067 (10.45%),-0.0155 (0.57%),-0.0292 (0.25%),-0.0373 (0.08%),-0.0438 (0.13%)
10,13.0,0.0065 (28.39%),0.0076 (61.50%),0.0144 (52.69%),0.0306 (25.39%),-0.0511 (34.10%),-0.0769 (32.34%),0.0020 (68.68%),0.0104 (49.62%),0.0123 (60.44%),0.0314 (21.16%),-0.0295 (50.52%),-0.0590 (39.12%)


In [10]:
import glob

# display performance by time partitions

for basket_name in MODEL_PARAM:

    print(basket_name, MODEL_PARAM[basket_name])


    repository_path = f"backtests/{backtests_name}/{basket_name}"
    file_path = f"{repository_path}/{sub_folder}/performance_evaluation_*_*.csv"

    master_df = list()
    for filename in glob.glob(file_path):
        t0, t1 = filename.split("/")[-1].split(".")[0].split("_")[2:]
        df = pd.read_csv(filename, index_col=0).iloc[-1].to_frame().T
        cols = df.columns.tolist()
        df['t0'], df['t1'] = t0, t1
        df = df[['t0', 't1'] + cols]
        master_df.append(df)
    total_df = pd.concat(master_df).set_index(['t0', 't1']).sort_index()
    total_df.to_csv(f"{repository_path}/{sub_folder}/performance_evaluation_total.csv")
    display(total_df)  
    print('\n')



ESG_baseline []


Unnamed: 0_level_0,Unnamed: 1_level_0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
t0,t1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-01-01,2005-12-31,224,0.36% (9.89%),0.49% (22.03%),2.13% (0.6%),3.84% (0.09%),6.3% (0.01%),7.86% (0.0%),0.35% (10.79%),0.41% (29.09%),1.8% (1.18%),3.03% (0.49%),4.95% (0.07%),5.79% (0.02%)
2006-01-01,2007-12-31,252,0.47% (5.13%),0.28% (50.8%),0.71% (24.47%),2.67% (0.33%),3.64% (0.22%),5.16% (0.02%),0.3% (20.22%),-0.02% (95.79%),-0.0% (99.41%),0.89% (28.66%),0.76% (49.02%),1.3% (29.23%)
2008-01-01,2009-12-31,294,0.14% (67.14%),-0.41% (55.3%),0.92% (31.37%),2.45% (6.47%),3.42% (3.18%),3.29% (7.82%),0.18% (53.14%),-0.08% (89.67%),1.3% (9.47%),1.72% (12.46%),1.94% (15.42%),1.78% (24.65%)
2010-01-01,2011-12-31,529,0.24% (6.27%),0.07% (78.39%),0.08% (84.35%),0.38% (55.49%),-0.32% (63.95%),-0.43% (61.16%),0.22% (7.37%),-0.05% (83.62%),-0.31% (38.85%),-0.14% (82.03%),-0.79% (21.55%),-0.87% (26.23%)
2012-01-01,2013-12-31,566,-0.04% (71.12%),0.61% (1.65%),0.73% (10.94%),1.65% (1.54%),2.33% (0.75%),3.62% (0.01%),-0.01% (91.52%),0.66% (0.58%),0.8% (5.9%),1.69% (0.72%),2.18% (0.68%),2.99% (0.07%)
2014-01-01,2015-12-31,756,0.0% (97.08%),-0.27% (25.7%),-1.25% (0.2%),-2.45% (0.0%),-3.52% (0.0%),-4.93% (0.0%),-0.02% (86.27%),-0.23% (33.02%),-1.11% (0.39%),-1.83% (0.06%),-2.69% (0.0%),-3.89% (0.0%)
2016-01-01,2017-12-31,766,0.02% (89.62%),-0.32% (18.77%),-0.6% (8.84%),-0.78% (11.17%),-0.97% (10.17%),-0.74% (30.65%),0.02% (86.68%),-0.16% (47.01%),-0.51% (12.23%),-0.55% (23.88%),-0.91% (11.33%),-0.99% (16.16%)
2018-01-01,2019-12-31,651,-0.02% (90.07%),-0.23% (34.53%),-0.94% (2.36%),-3.11% (0.0%),-5.5% (0.0%),-6.76% (0.0%),-0.1% (36.91%),-0.27% (26.38%),-0.68% (8.07%),-2.5% (0.0%),-4.2% (0.0%),-5.2% (0.0%)






In [13]:
""" end of performance calculation """

In [None]:
""" generate mapping from SIC to french's industry portfolio """
import re
import json 
master = dict()
industry_portfolio_type=49
with open(f"data/French_Data_Siccodes{industry_portfolio_type}.txt", "r") as reader:
    for line in reader:
        # get sector name
        if re.match(r"^\s([0-9])", line):
            sector = line.split(" ")[2]
            master[sector] = list()
        # get SIC that belongs to a sector
        if re.match(r"^ {2}", line):
            line = line.lstrip().replace("\n", "").split(" ")[0]
            val0, val1 = line.split("-")
            master[sector].extend(np.arange(int(val0), int(val1)+1))

SIC_Portfolio_mapping = dict()
for portfolio in master:
    SICs = master[portfolio]
    SIC_Portfolio_mapping = {**SIC_Portfolio_mapping, **{int(SIC):portfolio for SIC in SICs}}
with open(f"data/SIC_portfl_mapping_Siccodes{industry_portfolio_type}.json", "w") as f:
    json.dump(SIC_Portfolio_mapping, f, indent=4)

In [None]:
""" plot histogram of returns """
series = return_stats['Annual return']
pd.options.plotting.backend = "plotly"
df = series
fig = df.plot.hist(width=1000)
fig.show()