In [3]:
import pandas as pd 
import numpy as np 
import wrds
import os
import requests
import unicodedata

In [4]:
db = wrds.Connection(wrds_username='codywan')

Loading library list...
Done


In [3]:
""" prepare for scraping corporate annoucments/articles/press on sustainability """

' prepare for scraping corporate annoucments/articles/press on sustainability '

In [4]:
# get permno_list, to get weburl of company for scraping
permno_list = pd.read_csv("data/permno/filtered_permno_list_19960105_20200101.csv", header=None, dtype='int').iloc[0].values
len(permno_list)

775

In [5]:
# get linking table from permno to gvkey
LINKING_FILE_PATH = '/Users/codywan/Data/WRDS Data/crspa_ccmlinktable.csv'
link = pd.read_csv(LINKING_FILE_PATH, header=0, keep_default_na=False).replace("", np.NaN).dropna(subset=['LPERMNO', 'GVKEY'])
link[['LPERMNO', 'GVKEY']] = link[['LPERMNO', 'GVKEY']].astype('int')
permno_gvkey_mapping = {int(permno): list(set(link[(link['LPERMNO']==permno)]['GVKEY'])) for permno in link['LPERMNO'].unique()}
# get gvkey_list from permno_list (to fetch company weburl data) 
gvkey_list = [permno_gvkey_mapping[permno] for permno in permno_list if permno in permno_gvkey_mapping]
# a few has no matching (e.g. 16267 ARROWSTREET CAPITAL, 
# which i'm very confused, i thought it was a private asset management firm) 
# see db.raw_sql("SELECT * FROM crsp.msf WHERE permno=16267")
# and a few has one permno matched to multiple gvkey:
print(len(gvkey_list), sum([len(gvkey) for gvkey in gvkey_list]))
# flatten gvkey_list to get company name and url from wrds
gvkey_list = [a for b in gvkey_list for a in b]
print(len(gvkey_list))

772 781
781


In [6]:
# conml: company legal name; weburl: website url
res = db.raw_sql(f"SELECT gvkey, conml, weburl FROM comp.company where CAST(gvkey AS INT) in ({','.join(str(g) for g in gvkey_list)})")
res = res.dropna(subset=['weburl'], axis=0, how='any').astype({'gvkey':'int64'}).set_index('gvkey')
res['mkvalt'] = [0]*len(res)
len(res), res.head()
# res.to_csv("data/web_scraping/seeds.txt", index=False)

(566,
                                   conml                   weburl  mkvalt
 gvkey                                                                   
 1072                          AVX Corp.              www.avx.com       0
 1414                      Primerica Inc        www.primerica.com       0
 1177                         Aetna Inc.            www.aetna.com       0
 1487   American International Group Inc              www.aig.com       0
 1447                American Express Co  www.americanexpress.com       0)

In [8]:
# df = db.raw_sql(f"SELECT gvkey, fyear, mkvalt FROM compa.funda "
#                 f"where CAST(gvkey AS INT) in ({','.join(str(g) for g in gvkey_list)})"
#                 f"AND mkvalt IS NOT NULL")
# df.to_csv("data/web_scraping/gvkey_mkvalt.csv")

# # read gvkey to mkvalt (market cap) from local
df = pd.read_csv("data/web_scraping/gvkey_mkvalt.csv", index_col=0, dtype={'gvkey':'int64'})

In [9]:
# map mkvalt to dataframe
for row in res.itertuples():
    gvkey = getattr(row, 'Index')
    df_gvkey = df[df['gvkey']==gvkey]
    if not df_gvkey.empty:
        res.at[gvkey, 'mkvalt']=df_gvkey.iloc[-1]['mkvalt']

In [10]:
res = res.sort_values('mkvalt', ascending=False)
res.reset_index().head()

Unnamed: 0,gvkey,conml,weburl,mkvalt
0,160329,Alphabet Inc,www.abc.xyz,729439
1,170617,Facebook Inc,investor.fb.com,512792
2,12141,Microsoft Corp,www.microsoft.com,354392
3,179534,Visa Inc,usa.visa.com,333174
4,4503,Exxon Mobil Corp,corporate.exxonmobil.com,323960


In [222]:
# shuffle rows of the dataframe
# for a different order of scraping websites
res = res.sample(frac=1)

In [224]:
# NOTE: don't run, unless prepping for scraping
# save company name, weburl to local files, to be read by a python script that does scraping
n = 14
count = 0
for i in range(len(res)):
    # if count < n * 5:
    #     count += 1
    #     continue
    file_num = i%n + 1
    with open(f"data/web_scraping/seeds_part{file_num}.txt", 'a') as f:
        conml, weburl = res.iloc[i][['conml', 'weburl']]
        if not weburl == 'www.americanexpress.com':
            f.write(f"{conml},{weburl}\n")

In [11]:
""" test a few functionalities used for scraping """

' test a few functionalities used for scraping '

In [None]:
# NOTE: test dragnet, for extracting meaningful content of a web page

from dragnet import extract_content
# url = r"https://www.microsoft.com/en-us/corporate-responsibility/sustainability"
url = r"https://www.microsoft.com/en-us/corporate-responsibility/sustainability/operations"
response = requests.get(url)
content = extract_content(response.content)

In [None]:
unicodedata.normalize("NFKD", content).replace("\n", " ").replace("\t", "").replace("  ", " ")

In [None]:
# dragnet is pretty accurate at getting the content but it also misses a lot of things compared to 
# good old-fashioned soup.find_all('p') using the same url
from bs4 import BeautifulSoup
soup = BeautifulSoup(requests.get(url).content, 'html5')
text = soup.find_all('p')
for t in text:
    t = t.get_text()
    page_text_norm = str(unicodedata.normalize('NFKD', t).encode(encoding='ascii', errors='ignore'))
    # print(page_text_norm)

In [None]:
# however, we just need to identify some texts on sustainability with high confidence. Let's use dragnet on
# every company in our list and see if collectively, their word distribution skew towards sustainability, csr, etc.

In [None]:
# NOTE: test whitelist and blacklist words

from scrapy.spiders import Spider
from scrapy.http import Request
from scrapy.http.response.html import HtmlResponse
from scrapy.linkextractors import LinkExtractor
from urllib.parse import urlparse
import re

In [None]:
url = r"https://www.microsoft.com"
company = "microsoft"
request = Request(url)
request.meta.update(company=company.strip())

In [None]:
whitelist = ['csr', 'esg', 'responsibility', 'sustainability', 'climate', 'conservation'
                          'environment', 'renewable', 'carbon-footprint', 'clean-energy', 'carbon-neutral',
                          'corporate-governance', 'diverse-workforce', 'climate-change', 'customer-privacy',
                          'community-impact', 'clean-water']
blacklist = ['document', 'blog', 'product', 'profit', 'revenue', 'archive', 'search', 'login',
                          'accessories', 'shop', 'support', 'developer', 'dmg', 'forums']

In [None]:
link_str = ' https://support.apple.com/es-us/guide/iphone/iph3dd5fc7e/12.0/ios/12.0'
# keywords stores matching of any word from whitelist (| means OR), re.I means ignore case
keywords = list(set(re.findall("|".join(whitelist), link_str, flags=re.I)))
flashcards = list(set(re.findall("|".join(blacklist), link_str, flags=re.I)))

In [12]:
""" after scraping, process scraped content stored in local mongodb database """

' process scraped content stored in local mongodb database '

In [31]:
from pymongo import MongoClient

class Connect(object):
    @staticmethod    
    def get_connection():
        return MongoClient(port=27017, username='codywan', password='password', authSource="admin")

# connect to local mongodb
connection = Connect.get_connection()
db = connection['admin']

In [32]:
# get whitelist used in scraping, for compiling statistics
keywords_dict = dict()

# further process scraped content

from langid.langid import LanguageIdentifier, model
identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)

def convert_unicode(content):
    return unicodedata.normalize("NFKD", content).replace("\n", " ").replace("\t", "").replace("  ", " ")

filter_list = ["system.users", 'system.version']
master_texts = list()
for collection_name in db.collection_names():
    if collection_name not in filter_list:
        # only store unique texts/str
        sub_texts = set()
        cursor = db[collection_name].find({})
        for item in cursor:
            # convert any leftover unicode
            text = convert_unicode(item['content'])
            # skip non-English texts
            lang, prob = identifier.classify(text)
            if prob > 0.8 and lang == 'en':
                sub_texts.add(text)
            # get count for keywords
            for keyword in item['keywords']:
                keywords_dict[keyword] = keywords_dict.get(keyword, 0) + 1
        master_texts.extend(list(sub_texts))

In [34]:
keywords_dict

{'sustainability': 46875,
 'carbon-neutral': 114,
 'corporate-responsibility': 425,
 'climate-change': 749,
 'corporate-governance': 140,
 'social-responsibility': 252,
 'community-impact': 36,
 'clean-energy': 203,
 'global-warming': 26,
 'clean-water': 65,
 'social-justice': 14,
 'social-impact': 98,
 'carbon-footprint': 34,
 'inclusive-environment': 4,
 'customer-privacy': 3,
 'diverse-workforce': 2}

In [37]:
# save processed text corpora to local
import pickle
with open("data/web_scraping/sustainability_corpora.pickle", "wb") as f:
    pickle.dump(master_texts, f)

In [38]:
""" after saving processed scraped content to local as corpora, prepare corpora for LDA """

' after saving processed scraped content to local as corpora, prepare corpora for LDA '

In [1]:
import pickle
# load text corpora
with open("data/web_scraping/sustainability_corpora.pickle", "rb") as f:
    texts = pickle.load(f)

In [2]:
len(texts)

15407

In [41]:
# convert text to bag of words:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.stem.porter import PorterStemmer

wordlemmatizer = WordNetLemmatizer()
list_of_words = list()

for text in texts:
    # tokenize text as English
    text_tokenized = word_tokenize(text, language='English')
    # convert to lowercases and remove puncutation
    text_tokenized = [word.lower() for word in text_tokenized if word.isalpha()]
    # lemmatize token
    text_tokenized = [wordlemmatizer.lemmatize(word) for word in text_tokenized if word not in stopwords.words('english')]
    list_of_words.append(text_tokenized)

In [42]:
# check content with very small number of words (likely off-topic content)
len([words for words in list_of_words if len(words) <= 20])

81

In [43]:
# remove texts with few words (likely scraped from comment or survey section with no substantial information)
list_of_words = [words for words in list_of_words if len(words) > 20]

In [44]:
# save processed text tokenized to local
with open("data/web_scraping/sustainability_corpora_tokenized.pickle", "wb") as f:
    pickle.dump(list_of_words, f)

In [45]:
""" after saving tokenized corpora to local, load and prepare tokenized corpora for LDA """

' after saving tokenized corpora to local, load and prepare tokenized corpora for LDA '

In [1]:
import pickle
with open("data/web_scraping/sustainability_corpora_tokenized.pickle", "rb") as f:
    list_of_words = pickle.load(f)

In [2]:
len(list_of_words)

15326

In [3]:
# NOTE: run LDA
from gensim.test.utils import common_texts
from gensim.corpora.dictionary import Dictionary
from gensim import corpora, models
import gensim
import pyLDAvis.gensim

# create dictionary based on list of words
dictionary = Dictionary(list_of_words)
# remove tokens appear less than no_below and more than no_above (in %)
dictionary.filter_extremes(no_above=0.8)
bow_corpus = [dictionary.doc2bow(doc) for doc in list_of_words]

In [5]:
# apply tf-idf
tfidf = models.TfidfModel(bow_corpus)
corpus_tfidf = tfidf[bow_corpus]

In [6]:
# fit with bag of words (without tf-idf)
num_topics=6
path=f"data/LDA/lda_model_{num_topics}topics"
if not os.path.exists(path):
    os.mkdir(path)
lda_model = gensim.models.LdaMulticore(bow_corpus, num_topics=num_topics, id2word=dictionary, workers=7)
# save model specifications to local
lda_model.save(f"{path}/topic_model.model")
dictionary.save(f"{path}/dictionary.dict")
corpora.MmCorpus.serialize(f"{path}/corpus.mm", bow_corpus)
# visualize LDA
res = pyLDAvis.gensim.prepare(lda_model, bow_corpus, dictionary)
pyLDAvis.save_html(res,f'{path}/vis.html')

In [21]:
# show distribution of words for each topic (-1 choose all topics), ordered by significance
# for idx, topic in lda_model.print_topics(-1):
#     print(f'Topic: {idx} \nWords: {topic}')

In [22]:
# fit with bag of words with tf-idf
path="data/LDA/lda_model_tfidf"
lda_model_tfidf = gensim.models.LdaMulticore(corpus_tfidf, num_topics=20, id2word=dictionary, workers=7)
# save model specifications to local
lda_model.save(f"{path}/topic_model.model")
dictionary.save(f"{path}/dictionary.dict")
corpora.MmCorpus.serialize(f"{path}/corpus.mm", bow_corpus)
# visualize LDA
res = pyLDAvis.gensim.prepare(lda_model, bow_corpus, dictionary)
pyLDAvis.save_html(res,f'{path}/vis.html')

In [None]:
# for idx, topic in lda_model_tfidf.print_topics(-1):
#     print(f'Topic: {idx} \nWords: {topic}')

In [99]:
# performance evaluation by classifying sample document using lda bag of words model
for index, score in sorted(lda_model[dictionary.doc2bow(list_of_words[2])], key=lambda topic: -1*topic[1]):
    print(f"\nScore: {score}\t \nTopic: {lda_model.print_topic(index, 10)}")


Score: 0.9526092410087585	 
Topic: 0.013*"company" + 0.011*"energy" + 0.009*"climate" + 0.006*"sustainability" + 0.006*"said" + 0.006*"change" + 0.006*"business" + 0.005*"community" + 0.005*"environmental" + 0.005*"report"


In [24]:
""" load fitted lda model from local """

path="data/LDA/lda_model"
lda_model = gensim.models.LdaModel.load(f'{path}/topic_model.model')
dictionary = gensim.corpora.Dictionary.load(f'{path}/dictionary.dict')
bow_corpus = gensim.corpora.MmCorpus(f'{path}/corpus.mm')

In [35]:
# show distribution of words for each topic (-1 choose all topics), ordered by significance
# for idx, topic in lda_model.print_topics(-1):
#     print(f'Topic: {idx+1} \nWords: {topic}')

In [46]:
# apply LDA model to signal filings from keywords matching
# !python utils_LDA.py

In [None]:
""" evaluate LDA signal """

In [13]:
import importlib
import wrds
import numpy as np 
import pandas as pd
import json
import utils.backtest_performance
importlib.reload(utils.backtest_performance)
from utils.backtest_performance import price_history_to_cumu, add_to_return_stats, get_price_history_yahoo_finance, get_price_history, get_return_stats, get_NACIS_code_sector_name

db = wrds.Connection(wrds_username='codywan')
# db.create_pgpass_file() # first-time use
def get_CIK_mapping():
    """
    """
    # get linking table from CIK to permno (for retreiving price history from wrds)
    LINKING_FILE_PATH = '/Users/codywan/Data/WRDS Data/crspa_ccmlinktable.csv'
    link = pd.read_csv(LINKING_FILE_PATH, header=0, keep_default_na=False).replace("", np.NaN).dropna(subset=['LPERMNO', 'cik'])
    link[['LPERMNO', 'cik']] = link[['LPERMNO', 'cik']].astype('int')
    CIK_PERMNO_mapping = {CIK: set(link[(link['cik']==CIK)]['LPERMNO']) for CIK in link['cik'].unique()}

    # get linking table from CIK to ticker (for retreiving price history after 12/31/2019 from yahoo finance)
    link['tic'] = link['tic'].astype('str')
    CIK_TICKER_mapping = {CIK: list(set(link[(link['cik']==CIK)]['tic'])) for CIK in link['cik'].unique()}

    # check if the mapping between CIK and TICKER is one-to-one
    if len(CIK_TICKER_mapping) == sum([len(CIK_TICKER_mapping[CIK]) for CIK in CIK_TICKER_mapping]):
        CIK_TICKER_mapping = {CIK: CIK_TICKER_mapping[CIK][0] for CIK in CIK_TICKER_mapping}
    else:
        print("multiple tickers for CIK")

    # get linking table from CIK to SIC (for benchmarking using industry level returns from  ken french website)
    CIK_SIC_mapping = {CIK: int(link[(link['cik']==CIK)]['sic'].unique().tolist()[0]) for CIK in link['cik'].unique()}
    return CIK_PERMNO_mapping, CIK_TICKER_mapping, CIK_SIC_mapping

Loading library list...
Done


In [14]:
# read SIC to industry portfolio mapping
with open("data/industry_classification_and_portfolio/SIC_portfl_mapping_Siccodes49.json", "r") as f:
    SIC_portfolio_mapping = json.load(f)

In [15]:
CIK_PERMNO_mapping, CIK_TICKER_mapping, CIK_SIC_mapping = get_CIK_mapping()

In [16]:
NACIS_sector_code, NACIS_sector_name = get_NACIS_code_sector_name()

In [97]:
import logging
# my_logger = logging.getLogger()
# my_logger.disabled = True

logging.basicConfig(filename="logs/generate_price_history/log.txt",
                    filemode='a',
                    level=logging.INFO,
                    format='%(levelname)s: %(asctime)s - %(message)s',
                    datefmt='%m/%d/%Y %I:%M:%S')

                    
repository_path = "backtests/LDA"

# get price history and performance metrics for each signal event
HOLDING_PERIOD = 30*5 # in days

master_df = list()
for sector_name in NACIS_sector_name:
    
    file_path = f"{repository_path}/{sector_name}/signal.csv"
    if not Path(file_path).is_file():
        continue
    print(sector_name)

    # get signal events
    df = pd.read_csv(file_path)
    BUY_SIGNAL = { int(col): df[col].dropna().to_list() for col in df}

    # get price history from wrds
    price_history, volume_on_event_day, no_price_history_signal = get_price_history(db, CIK_PERMNO_mapping, BUY_SIGNAL, HOLDING_PERIOD)
    # get price history of those not available from wrds, from yahoo finance
    ex_price_history, ex_volume_on_event_day, _ = get_price_history_yahoo_finance(CIK_TICKER_mapping, no_price_history_signal, HOLDING_PERIOD)
    # put price history from wrds and yahoo finance together
    price_history[ex_price_history.columns] = ex_price_history
    volume_on_event_day = pd.concat([volume_on_event_day, ex_volume_on_event_day])

    # compute return statistics
    return_stats = get_return_stats(db, price_history, CIK_SIC_mapping, SIC_portfolio_mapping)

    # add volume data to return stats
    add_to_return_stats(index=3, df=volume_on_event_day, return_stats=return_stats)

    # get earnings call flag and add to return stats
    earnings_call_date, _ = utils.backtest_performance.get_earnings_call_date(db, CIK_TICKER_mapping, BUY_SIGNAL)
    add_to_return_stats(index=4, df=earnings_call_date, return_stats=return_stats)

    # append to master variables
    master_df.append(return_stats)
    display(return_stats.sort_values("R", ascending=False).iloc[:5])
    # save to local
    return_stats.to_csv(f"{repository_path}/{sector_name}/performance.csv")

    # break

# sector-aggregated performance table to local
pd.concat(master_df).to_csv(f"{repository_path}/performance.csv")

Agriculture, Forestry, Fishing and Hunting
8/8 price history generated from wrds/crsp
0/0 price history generated from yahoo finance
8/8 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
18169,2012-09-17,2012-09-18,1.015328,1.000297,0.015031,4.063723,,0.023594,-0.060028,-0.109767,-0.219718,-0.302071,Agric,-0.008352,-0.060351,-0.138513,-0.324356,-0.372845
1110783,2008-03-31,2008-04-01,1.004484,1.03277,-0.028285,1.191451,1.0,0.090976,0.017837,0.171132,-0.004116,0.096612,Agric,0.006831,-0.004927,0.015511,-0.010566,-0.004276
1110783,2013-01-30,2013-01-31,0.995522,1.009092,-0.01357,0.769629,,-0.024049,-0.031472,-0.02914,-0.078926,-0.1533,Agric,-0.001248,-0.000581,-0.004012,-0.014729,-0.026945
18169,2012-09-20,2012-09-21,0.994425,0.997799,-0.003374,2.400726,,-0.066822,-0.141302,-0.193622,-0.273133,-0.328859,Agric,-0.069676,-0.138451,-0.207239,-0.338833,-0.365333
1110783,2017-01-26,2017-01-27,0.988798,0.992008,-0.00321,0.72279,,-0.029457,0.007649,0.022494,0.010466,-0.016598,Agric,-0.000824,0.010097,0.007999,0.010048,0.005113


Mining, Quarrying, and Oil and Gas Extraction
212/258 price history generated from wrds/crsp
9/11 price history generated from yahoo finance
245/258 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
773910,2014-04-02,2014-04-03,1.168613,0.982246,0.186368,0.851952,,0.164832,0.164451,0.232864,0.210824,0.162857,Oil,0.11063,0.10899,0.16456,0.173723,0.192149
1518832,2019-12-13,2019-12-16,1.163153,1.008407,0.154746,1.970733,,0.240296,0.072927,-0.43311,-0.47173,-0.316202,Oil,0.221876,0.165123,-0.130052,-0.178056,-0.080214
1070412,2016-02-26,2016-02-29,1.154337,1.016339,0.137998,1.112579,,0.475397,0.661811,0.705788,0.944704,1.194114,Oil,0.430704,0.58954,0.605133,0.844409,1.099941
831259,2016-11-08,2016-11-09,1.141439,1.017847,0.123592,0.780734,,0.295063,0.070498,0.196267,-0.047473,-0.168796,Mines,0.174731,0.019104,0.074487,-0.036726,-0.109522
1524741,2015-10-26,2015-10-27,1.131396,1.01004,0.121356,0.618072,0.0,0.252973,0.273716,0.125774,0.251364,0.577935,Mines,0.294259,0.387221,0.295162,0.27731,0.51928


Utilities
223/355 price history generated from wrds/crsp
11/12 price history generated from yahoo finance
332/355 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1379895,2016-02-23,2016-02-24,1.213555,1.016358,0.197197,1.728756,0.0,0.453239,0.773437,1.250187,1.075562,0.46127,Oil,0.405391,0.738588,1.130609,0.937765,0.389769
1599947,2017-10-16,2017-10-17,1.17474,1.0015,0.17324,1.73272,,0.154127,0.028985,-0.107634,-0.072377,-0.089068,Util,0.14665,0.027516,0.03292,0.083368,0.015199
1379895,2016-02-29,2016-03-01,1.173611,1.028926,0.144685,1.960314,,0.196714,0.400595,0.660995,0.582245,0.052505,Oil,0.142417,0.333028,0.543317,0.476972,-0.040246
1379895,2016-06-27,2016-06-28,1.164843,1.035917,0.128926,1.003176,,0.145246,-0.243393,-0.196788,-0.521295,-0.564684,Oil,0.15918,-0.171752,-0.127497,-0.497298,-0.586999
1379895,2013-03-13,2013-03-14,1.134787,1.003889,0.130898,4.187053,0.0,0.178709,0.190189,0.080704,0.024129,-0.134212,Oil,0.181981,0.226424,0.111575,0.059815,-0.094318


Construction
25/26 price history generated from wrds/crsp
0/0 price history generated from yahoo finance
26/26 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1124198,2011-05-05,2011-05-06,1.099406,1.010125,0.089281,0.998076,-1.0,0.024651,-0.02148,-0.018665,0.008205,-0.067233,Cnstr,0.035199,0.010634,0.048044,0.115567,0.033915
1124198,2018-08-02,2018-08-03,1.088781,1.007714,0.081067,0.732614,-1.0,0.090653,0.092561,-0.084562,-0.267086,-0.22279,Cnstr,0.099724,0.11978,-0.001168,-0.174987,-0.171399
1124198,2017-11-01,2017-11-02,1.066454,1.003702,0.062752,0.645956,0.0,0.053419,0.108274,0.281696,0.228015,0.324636,Cnstr,0.048044,0.077278,0.300239,0.301158,0.368217
1124198,2007-12-20,2007-12-21,1.044484,1.024531,0.019953,1.135959,,0.05808,-0.109195,0.028284,0.124859,0.399727,Cnstr,0.113669,-0.104134,0.030115,0.092503,0.321715
1357615,2011-05-23,2011-05-24,1.039493,1.003094,0.036399,0.633402,,0.072612,0.08255,-0.090431,-0.172019,-0.136328,Cnstr,0.075575,0.085043,0.012817,-0.035118,-0.040436


Manufacturing
1255/1493 price history generated from wrds/crsp
35/38 price history generated from yahoo finance
1426/1493 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1609351,2019-02-22,2019-02-25,2.200349,0.999898,1.200451,14.623877,,1.21674,1.157118,1.096636,0.926713,0.9392,Drugs,1.212526,1.169531,1.140298,0.961948,0.98835
1651235,2018-10-31,2018-11-01,1.29823,1.007532,0.290699,0.963224,0.0,0.296933,0.2228,0.264915,0.53081,0.611571,Chips,0.352335,0.298457,0.373217,0.582045,0.626957
1408356,2013-10-09,2013-10-10,1.270602,1.029558,0.241044,1.004837,,0.57239,0.20971,0.598241,0.911686,0.478406,ElcEq,0.553421,0.192517,0.575646,0.906874,0.454753
1307954,2007-06-22,2007-06-25,1.26357,0.992614,0.270956,0.797463,,0.44118,0.323961,0.36959,0.354159,0.303165,Chems,0.433709,0.300424,0.322982,0.274751,0.175078
1644440,2017-03-01,2017-03-02,1.252372,0.993894,0.258478,1.223336,0.0,0.279218,0.275653,0.160194,0.14684,0.067871,Chems,0.27726,0.26815,0.157853,0.115911,0.051608


Wholesale Trade
110/138 price history generated from wrds/crsp
6/9 price history generated from yahoo finance
131/138 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1599617,2018-04-30,2018-05-01,1.140973,0.996085,0.144888,0.93264,1.0,0.17781,0.07123,0.112703,0.214043,0.197159,Whlsl,0.191633,0.076755,0.130991,0.227192,0.238419
1600438,2018-10-31,2018-11-01,1.097324,1.007532,0.089792,1.041496,,0.055843,0.068865,0.154118,-0.035835,-0.007173,Whlsl,0.04143,0.067204,0.154808,-0.0168,0.013969
1600438,2017-09-05,2017-09-06,1.087299,1.002098,0.085201,1.399358,0.0,0.06191,0.020853,0.088609,0.041597,-0.14199,Whlsl,0.057446,0.001018,0.091634,0.002543,-0.144197
1577916,2019-05-03,2019-05-06,1.073448,0.979266,0.094182,1.255155,1.0,0.137516,0.093999,0.123177,0.009525,-0.031747,Whlsl,0.122555,0.093334,0.125267,-0.025996,-0.093437
1439095,2019-11-01,2019-11-04,1.065714,1.003699,0.062015,3.113164,,-0.024029,-0.052104,-0.250738,-0.486794,-0.618368,Whlsl,-0.026529,-0.041332,-0.2071,-0.412566,-0.525778


Retail Trade
89/110 price history generated from wrds/crsp
2/3 price history generated from yahoo finance
108/110 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1610092,2015-02-25,2015-02-26,1.175549,0.995603,0.179946,0.723223,,0.117126,0.360446,0.515172,0.772516,0.783683,Rtail,0.098042,0.343004,0.529354,0.765857,0.744885
1419852,2012-04-09,2012-04-10,1.120448,0.989843,0.130605,2.879444,0.0,0.053594,-0.136273,-0.155651,-0.240349,-0.328553,Rtail,0.057331,-0.176997,-0.211952,-0.301462,-0.400938
1419852,2014-09-03,2014-09-04,1.118817,1.002792,0.116025,0.988721,0.0,0.098575,0.08026,0.218416,0.029966,0.0069,Rtail,0.097472,0.053115,0.140398,-0.084956,-0.123682
1326380,2014-03-27,2014-03-28,1.100991,1.01384,0.087151,1.802533,-1.0,0.106909,-0.032187,0.026252,0.077953,0.0981,Rtail,0.108637,-0.020408,0.066689,0.116497,0.126109
1573516,2014-05-08,2014-05-09,1.080788,1.01453,0.066258,2.680823,,0.096363,0.039558,0.054836,0.140927,0.19663,Rtail,0.103535,0.062977,0.073018,0.143771,0.18868


Transportation and Warehousing
54/66 price history generated from wrds/crsp
2/4 price history generated from yahoo finance
62/66 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1614436,2016-04-01,2016-04-04,1.417224,0.986539,0.430685,8.20506,,0.409342,0.426455,0.45304,0.374042,0.294909,Trans,0.403695,0.469186,0.523974,0.440882,0.345429
1090727,2019-07-23,2019-07-24,1.123302,1.000258,0.123044,2.575551,0.0,0.139655,0.17821,0.145649,0.115991,0.044136,Trans,0.141215,0.211594,0.185251,0.179604,0.125693
1066107,2011-05-23,2011-05-24,1.112223,1.003094,0.10913,1.161312,,0.081006,0.052069,0.075378,0.067775,0.347933,Util,0.064039,0.060756,-0.025865,-0.069833,0.235496
1492691,2011-10-26,2011-10-27,1.088506,1.035296,0.053209,1.765881,-1.0,-0.008496,0.075568,0.158265,0.153678,0.036359,Trans,-0.014349,0.055254,0.126347,0.178767,0.054647
1066107,2006-02-15,2006-02-16,1.087237,1.005888,0.081349,1.073787,,0.040513,-0.042451,0.186511,0.243662,0.225207,Util,0.026419,-0.014493,0.185447,0.176198,0.094655


Information
509/605 price history generated from wrds/crsp
22/25 price history generated from yahoo finance
582/605 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
732712,2008-10-24,2008-10-27,1.261962,1.060817,0.201144,1.075221,0.0,0.166284,0.314378,0.2535,0.254809,0.237681,Telcm,0.109915,0.193301,0.15313,0.171887,0.13165
1265888,2016-12-13,2016-12-14,1.19349,0.996362,0.197128,1.187609,,0.217124,0.199269,0.159023,0.145848,0.119249,Telcm,0.188456,0.156023,0.149112,0.135003,0.168279
1101215,2001-09-21,2001-09-24,1.1925,1.045883,0.146617,0.59277,,0.100957,0.17111,0.244513,0.573618,0.714595,BusSv,0.113972,0.11995,0.133151,0.590996,0.71411
1108524,2005-05-18,2005-05-19,1.167298,1.003394,0.163904,0.865111,-1.0,0.284605,0.215336,0.346934,0.454717,0.61956,Softw,0.287232,0.245046,0.342504,0.461134,0.5616
1091667,2019-01-29,2019-01-30,1.142418,1.024439,0.117979,0.84235,1.0,0.123041,0.153296,0.166087,0.278803,0.27676,Telcm,0.15161,0.159019,0.160757,0.246316,0.250667


Finance and Insurance
325/397 price history generated from wrds/crsp
7/7 price history generated from yahoo finance
377/397 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
777001,2000-09-27,2000-09-28,1.113812,1.010308,0.103504,1.004685,,0.015509,0.07181,-0.005088,0.154798,0.061183,Fin,0.066861,0.088177,-0.028969,0.066642,0.063825
1295172,2009-02-18,2009-02-19,1.095363,0.977033,0.11833,1.617912,,0.08085,0.243713,0.267083,0.32362,0.124522,Insur,0.21863,0.335674,0.291785,0.386299,0.144768
1267238,2009-03-06,2009-03-09,1.092496,1.051908,0.040588,0.923283,,0.20179,0.346287,0.019576,0.032786,0.180985,Insur,0.100544,0.186241,-0.149038,-0.088853,-0.125607
1430592,2012-11-05,2012-11-06,1.085652,0.984715,0.100937,0.780834,0.0,0.176717,0.184273,0.396816,0.370272,0.371688,Fin,0.189227,0.168527,0.291491,0.247431,0.285595
1141391,2009-01-01,2009-01-02,1.08172,1.005876,0.075844,0.338429,,-0.07356,0.207195,0.182693,0.18007,0.113843,Banks,0.042741,0.443016,0.37153,0.27661,0.240049


Real Estate and Rental and Leasing
50/58 price history generated from wrds/crsp
1/1 price history generated from yahoo finance
57/58 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1138118,2009-05-15,2009-05-18,1.117397,1.030784,0.086613,1.046415,,0.117864,0.275005,0.436158,0.444112,0.292204,RlEst,0.03695,0.186653,0.208556,0.235911,0.10926
1138118,2011-03-07,2011-03-08,1.075678,1.007984,0.067694,0.545934,,0.116264,0.168485,0.053671,-0.070586,-0.323735,RlEst,0.060996,0.105743,0.030336,-0.026054,-0.156423
1657853,2016-07-06,2016-07-07,1.059488,1.016203,0.043285,3.161081,,0.1477,0.053171,0.072964,0.216671,0.255359,PerSv,0.115495,0.067666,0.107224,0.179193,0.199277
1138118,2011-09-05,2011-09-06,1.051576,1.018439,0.033137,1.161865,,-0.053595,-0.007172,0.108812,0.088928,0.124613,RlEst,-0.041723,0.028873,0.09123,0.057669,0.038095
1138118,2010-12-01,2010-12-02,1.044025,1.015843,0.028182,0.876925,,0.038619,0.074826,0.202063,0.341556,0.284896,RlEst,0.026966,0.034186,0.105951,0.177584,0.157373


Professional, Scientific, and Technical Services
104/118 price history generated from wrds/crsp
3/3 price history generated from yahoo finance
116/118 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1169264,2008-05-05,2008-05-06,1.317302,0.990776,0.326526,1.235704,,0.387165,0.17293,0.099001,0.261057,0.319947,Softw,0.377442,0.133943,0.077779,0.250054,0.285489
1169264,2008-01-14,2008-01-15,1.131466,0.971122,0.160344,0.584567,,0.166752,0.306075,0.328245,0.524056,0.145842,Softw,0.188537,0.366749,0.366418,0.526242,0.117554
1571123,2014-06-09,2014-06-10,1.121349,0.996301,0.125048,2.117503,0.0,0.126988,0.076536,0.124324,0.196765,0.21339,Softw,0.12306,0.05641,0.091033,0.174758,0.210019
868857,2008-02-11,2008-02-12,1.089961,1.020789,0.069172,0.61601,0.0,0.016555,0.010273,0.111523,0.206964,0.127233,BusSv,0.000413,0.003942,0.11542,0.182212,0.081645
70866,2013-04-29,2013-04-30,1.087523,0.992069,0.095455,1.067469,0.0,0.100295,0.205597,0.238678,0.300595,0.384937,Softw,0.094848,0.194216,0.25522,0.2869,0.358444


Administrative and Support and Waste Management and Remediation Services
50/66 price history generated from wrds/crsp
1/1 price history generated from yahoo finance
64/66 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1324424,2016-02-09,2016-02-10,1.115825,0.988399,0.127426,2.148261,0.0,0.076626,0.054303,0.094501,0.009005,0.036344,Trans,0.061682,0.028492,0.121429,0.064116,0.124949
1062047,2014-01-06,2014-01-07,1.053212,1.007203,0.046009,0.723724,,0.032059,-0.05673,0.030006,0.036677,-0.046493,BusSv,0.030965,-0.039178,0.059969,0.079619,-0.006878
1395942,2011-05-16,2011-05-17,1.049299,1.009089,0.040211,0.827347,,0.076552,-0.017165,-0.174019,-0.213445,-0.270638,BusSv,0.071255,-0.014734,-0.131096,-0.171023,-0.248744
1324424,2015-05-21,2015-05-22,1.044858,0.987722,0.057136,0.616125,,0.009006,0.029771,0.17888,0.209969,0.243554,Trans,0.019993,0.060444,0.189253,0.209906,0.254311
1082754,2011-05-26,2011-05-27,1.03512,1.015351,0.019769,0.47614,,-0.007027,0.056909,-0.017349,-0.064849,-0.037569,BusSv,-0.009763,0.057216,0.01922,-0.028929,-0.013583


Educational Services
1/1 price history generated from wrds/crsp
0/0 price history generated from yahoo finance
1/1 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1434588,2017-06-13,2017-06-14,1.005741,0.995305,0.010436,0.774767,,-0.013617,-0.121103,-0.012347,0.047605,0.058428,PerSv,-0.039131,-0.155627,-0.07243,-0.002919,0.011621


Health Care and Social Assistance
15/33 price history generated from wrds/crsp
1/4 price history generated from yahoo finance
30/33 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1320414,2017-10-20,2017-10-23,1.071348,0.997491,0.073857,1.483208,,-0.049977,-0.040646,-0.114386,-0.055194,-0.031206,Hlth,0.014671,0.006044,-0.100912,-0.102114,-0.06051
1045829,2011-11-14,2011-11-15,1.061111,0.989108,0.072003,0.766931,,0.140153,0.124289,0.04934,-0.023483,-0.141217,Hlth,0.158227,0.174692,0.091286,0.046946,-0.092784
1437578,2013-04-10,2013-04-11,1.017148,1.00069,0.016458,0.358566,,-0.005626,-0.012806,-0.021312,0.01636,-0.016514,PerSv,0.003261,-0.040289,-0.061149,0.01469,-0.005359
860730,2019-01-16,2019-01-17,1.016854,1.020497,-0.003642,0.793586,,0.022133,-0.040774,-0.146461,-0.135743,-0.087984,Hlth,0.020493,-0.005419,-0.053147,-0.048448,-0.032019
1320414,2016-09-27,2016-09-28,1.005327,0.995745,0.009582,1.100299,,0.056548,-0.126088,-0.033499,-0.12975,-0.176082,Hlth,0.063584,-0.029172,0.05928,-0.052323,-0.092782


Arts, Entertainment, and Recreation
24/31 price history generated from wrds/crsp
1/1 price history generated from yahoo finance
31/31 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
701374,2011-12-19,2011-12-20,1.080043,1.033062,0.046981,0.673755,,0.028587,0.045598,0.066155,0.088994,0.235092,Fun,0.009532,-0.074887,-0.104348,-0.026459,0.241854
701374,2010-09-10,2010-09-13,1.038945,1.012392,0.026553,0.591688,,0.052957,0.19283,0.224,0.201989,0.326795,Fun,0.035403,0.129236,0.049928,0.064094,0.212719
1564902,2015-08-05,2015-08-06,1.037931,0.987632,0.050299,1.530385,0.0,0.112555,0.079425,0.16329,0.084794,0.209264,Fun,0.164825,0.152916,0.221021,0.152378,0.289041
1656239,2017-12-19,2017-12-20,1.030711,1.0025,0.028211,0.956442,,-0.008109,-0.003761,-0.066473,0.021434,-0.011662,Fun,-0.011698,-0.141719,-0.306346,-0.235856,-0.398724
701374,2010-08-17,2010-08-18,1.024483,0.984663,0.039819,2.36103,,0.14765,0.227516,0.406509,0.439672,0.539067,Fun,0.166571,0.199279,0.187701,0.29482,0.371449


Accommodation and Food Services
190/227 price history generated from wrds/crsp
9/12 price history generated from yahoo finance
217/227 earnings call date checked


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,R,MKT,R - MKT,vol,EA,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
CIK,day -1,day 0,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
858339,2014-11-11,2014-11-12,1.482047,1.0004,0.481647,0.76672,,0.520094,0.361173,-0.047886,-0.181175,-0.141596,Fun,0.536483,0.43606,-0.022,-0.136845,-0.11296
858339,2013-04-22,2013-04-23,1.315452,1.011707,0.303746,2.324882,,0.185665,0.022828,0.223311,0.674142,0.635222,Fun,0.143411,-0.023512,0.188519,0.591865,0.462559
858339,2016-07-29,2016-08-01,1.150725,0.991411,0.159313,0.498378,,0.068352,-0.02581,0.05575,0.131462,0.252755,Fun,0.049604,-0.087516,-0.100688,-0.029959,0.110385
858339,2014-12-18,2014-12-19,1.138952,1.008016,0.130936,2.08616,,0.038639,-0.137135,-0.311356,-0.327851,-0.510855,Fun,0.026276,-0.208316,-0.373273,-0.413537,-0.631239
858339,2014-12-19,2014-12-22,1.129726,1.005808,0.123918,1.412747,,-0.000844,-0.147795,-0.332249,-0.344986,-0.544683,Fun,-0.001818,-0.19342,-0.366452,-0.410255,-0.64113


In [9]:
# by time periods and remove earnings call

from scipy.stats import ttest_1samp
import pandas as pd 
import numpy as np
from pathlib import Path

from utils.backtest_performance import price_history_to_cumu, add_to_return_stats, get_price_history_yahoo_finance, get_price_history, get_return_stats, get_NAICS_code_sector_name
# generate pair of NAICS sector code and name

NAICS_sector_code, NAICS_sector_name = get_NAICS_code_sector_name()


vol_threshold = 1.25
COLUMNS = ['1d - MKT', '10d - MKT', '30d - MKT', '60d - MKT', '90d - MKT', '120d - MKT','1d - IDT', '10d - IDT', '30d - IDT', '60d - IDT', '90d - IDT', '120d - IDT']
# TIME_PARTITION = [['1995-01-01', '1999-12-31'], ['2000-01-01', '2004-12-31'], 
#                 ['2005-01-01', '2009-12-31'], ['2010-01-01', '2014-12-31'], 
#                 ['2015-01-01', '2019-12-31']]

TIME_PARTITION = [['1995-01-01', '1999-12-31'], ['2000-01-01', '2005-12-31'], 
                ['2006-01-01', '2007-12-31'], ['2008-01-01', '2009-12-31'],
                ['2010-01-01', '2011-12-31'], ['2012-01-01', '2013-12-31'], 
                ['2014-01-01', '2015-12-31'], ['2016-01-01', '2017-12-31'],
                ['2018-01-01', '2019-12-31']]

repository_path = f"backtests/LDA/ESG_baseline"
master_df = list()
for (t0_str, t1_str) in TIME_PARTITION[1:]:
    t0, t1 = pd.to_datetime(t0_str), pd.to_datetime(t1_str)
    print(f"{t0_str} - {t1_str}")
    master_t_stats = list()

    for i, sector_name in enumerate(NAICS_sector_name):

        file_path = f"{repository_path}/{sector_name}/performance.csv"
        if not Path(file_path).is_file():
            continue

        # get performance df for this sector
        df = pd.read_csv(file_path, parse_dates=[2])
        # select those in t0 to t1 periods
        df = df[(t0 < df['day 0']) & (df['day 0'] <= t1)]
        # select a subset, that tries to remove earnings call, merger etc. events
        df = df[(df['EA'].isnull()) & (df['vol'] <= vol_threshold)]

        if len(df) == 0:
            continue

        # compute t-stats
        t_stats = df[COLUMNS].apply(lambda col: f"{col.mean():.4f} ({ttest_1samp(list(col), popmean=0)[1]*100:.2f}%)", axis=0).to_frame().T
        t_stats.insert(0, 'sample size', len(df))
        t_stats.index = [f"{i}"]
        master_t_stats.append(t_stats)

    # if there's signal events in this time period
    if len(master_t_stats) != 0:
        # compute performance for all signal events in this basket
        df = pd.read_csv(f"{repository_path}/performance.csv", parse_dates=[2])
        df = df[(t0 < df['day 0']) & (df['day 0'] <= t1)]
        df = df[(df['EA'].isnull()) & (df['vol'] <= vol_threshold)][COLUMNS]
        df = df.apply(lambda col: f"{np.round(col.mean(), 4)} ({np.round(ttest_1samp(list(col), popmean=0)[1]*100, 2)}%)", axis=0).to_frame().T
        df.index = [f"Total"]
        # add to master variable
        master_t_stats.append(df)
        all_sector_returns = pd.concat(master_t_stats)
        all_sector_returns.at['Total', 'sample size'] = all_sector_returns.iloc[:-1]['sample size'].sum()
        all_sector_returns.to_csv(f"{repository_path}/performance_evaluation_{t0_str}_{t1_str}.csv")
        # display to console
        display(all_sector_returns)
    else:
        print(f"{len(master_t_stats)} events from {t0_str} - {t1_str}")
    
    # break


2000-01-01 - 2005-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
2,1.0,0.0236 (nan%),-0.0246 (nan%),-0.0705 (nan%),-0.0988 (nan%),-0.0422 (nan%),-0.0171 (nan%),0.0121 (nan%),-0.0602 (nan%),-0.1108 (nan%),-0.1840 (nan%),-0.1311 (nan%),-0.1714 (nan%)
4,23.0,0.0097 (0.43%),0.0104 (39.52%),0.0242 (31.84%),0.0229 (44.63%),0.1095 (1.02%),0.1717 (0.03%),0.0063 (3.06%),0.0086 (44.87%),0.0296 (24.53%),0.0195 (53.43%),0.0886 (3.25%),0.1343 (0.26%)
5,2.0,0.0041 (90.50%),0.0041 (79.58%),-0.0321 (10.80%),-0.0511 (9.27%),-0.0439 (51.20%),-0.0246 (72.55%),0.0028 (92.40%),0.0120 (53.68%),-0.0283 (10.40%),-0.0591 (12.93%),-0.0511 (43.23%),-0.0407 (44.85%)
6,2.0,-0.0100 (9.33%),0.0385 (35.65%),0.0668 (34.94%),0.0486 (24.43%),0.1311 (2.83%),0.0486 (45.02%),-0.0085 (2.13%),0.0410 (48.84%),0.0592 (56.75%),0.0407 (27.36%),0.1559 (3.65%),0.0471 (54.26%)
7,3.0,0.0143 (47.81%),0.0014 (92.47%),0.0269 (61.48%),0.1169 (18.41%),0.0518 (26.69%),0.0777 (39.14%),0.0060 (66.79%),-0.0119 (67.72%),0.0055 (92.35%),0.0387 (57.09%),-0.0191 (61.73%),-0.0395 (47.35%)
8,12.0,-0.0094 (65.59%),-0.0195 (32.56%),0.0029 (92.59%),0.0464 (28.08%),0.0663 (40.13%),0.0014 (98.35%),-0.0114 (60.82%),-0.0304 (20.69%),-0.0150 (68.09%),0.0149 (62.12%),0.0620 (35.15%),0.0160 (81.25%)
9,17.0,0.0043 (57.95%),0.0195 (2.68%),0.0296 (12.69%),0.0693 (9.58%),0.1111 (6.96%),0.1356 (6.80%),0.0021 (72.60%),0.0220 (3.10%),0.0280 (11.81%),0.0616 (15.17%),0.0819 (17.41%),0.0976 (17.68%)
11,2.0,0.0059 (74.12%),0.0044 (91.65%),-0.0075 (97.38%),-0.0077 (97.81%),0.1389 (68.28%),0.3461 (51.78%),0.0069 (78.14%),0.0065 (83.33%),0.0067 (97.46%),-0.0250 (92.23%),0.1158 (68.84%),0.3249 (52.06%)
13,1.0,-0.0127 (nan%),0.0611 (nan%),-0.0373 (nan%),-0.0070 (nan%),0.0664 (nan%),0.1412 (nan%),-0.0030 (nan%),0.0598 (nan%),-0.0383 (nan%),-0.0210 (nan%),0.0260 (nan%),0.1091 (nan%)
17,5.0,0.0202 (32.15%),0.0124 (31.61%),0.0304 (37.64%),0.0475 (41.49%),0.0437 (40.87%),0.0629 (43.93%),0.0213 (35.96%),0.0147 (20.40%),0.0265 (35.58%),0.0404 (30.25%),0.0438 (30.79%),0.0524 (42.89%)


2006-01-01 - 2007-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
1,3.0,0.0262 (27.84%),0.0222 (64.14%),0.0540 (60.80%),0.0259 (42.95%),-0.0250 (87.11%),0.0783 (63.56%),0.0192 (27.90%),0.0131 (60.87%),0.0290 (64.07%),-0.0474 (34.07%),-0.0902 (39.39%),-0.0162 (87.27%)
2,6.0,0.0045 (68.70%),0.0067 (78.85%),0.0290 (29.39%),0.1147 (11.21%),0.1887 (10.87%),0.3272 (4.14%),0.0040 (69.21%),0.0071 (78.09%),0.0198 (40.75%),0.0602 (34.16%),0.1130 (26.76%),0.2265 (13.46%)
3,1.0,0.0200 (nan%),0.0581 (nan%),-0.1092 (nan%),0.0283 (nan%),0.1249 (nan%),0.3997 (nan%),0.0058 (nan%),0.1137 (nan%),-0.1041 (nan%),0.0301 (nan%),0.0925 (nan%),0.3217 (nan%)
4,35.0,0.0166 (8.48%),0.0255 (14.75%),0.0333 (14.48%),0.0729 (1.94%),0.0994 (1.47%),0.1291 (0.29%),0.0128 (18.25%),0.0185 (25.98%),0.0105 (62.60%),0.0301 (28.46%),0.0425 (23.49%),0.0489 (18.07%)
5,3.0,0.0091 (59.57%),-0.0101 (56.01%),-0.0337 (58.89%),-0.0384 (55.53%),-0.0240 (73.78%),-0.0312 (77.38%),0.0099 (57.63%),-0.0052 (83.37%),-0.0337 (61.18%),-0.0366 (51.88%),-0.0027 (97.22%),-0.0175 (87.49%)
6,5.0,-0.0059 (39.46%),0.0164 (33.71%),0.0794 (29.85%),0.0808 (15.23%),0.0571 (45.10%),0.0204 (75.98%),-0.0108 (11.62%),0.0215 (40.04%),0.0923 (21.29%),0.1086 (7.17%),0.1065 (20.31%),0.0838 (15.96%)
7,8.0,-0.0019 (88.47%),0.0009 (96.23%),0.0418 (7.94%),0.0887 (1.69%),0.1495 (2.71%),0.2059 (1.63%),0.0040 (69.89%),0.0085 (62.83%),0.0486 (4.44%),0.0849 (1.11%),0.1217 (3.51%),0.1490 (4.30%)
8,24.0,0.0022 (74.50%),-0.0103 (49.42%),-0.0120 (47.19%),-0.0228 (33.79%),-0.0200 (56.53%),-0.0130 (76.92%),0.0001 (98.77%),-0.0150 (26.82%),-0.0192 (20.23%),-0.0423 (4.68%),-0.0540 (9.77%),-0.0615 (11.94%)
9,14.0,0.0073 (16.63%),-0.0066 (72.76%),-0.0104 (67.32%),-0.0110 (77.68%),-0.0339 (39.67%),-0.0409 (34.12%),0.0012 (79.64%),-0.0101 (50.46%),-0.0141 (46.30%),-0.0031 (91.75%),-0.0266 (38.55%),-0.0406 (24.11%)
10,2.0,0.0071 (64.35%),-0.0011 (98.58%),0.0186 (44.50%),0.0442 (24.98%),0.0418 (70.69%),-0.2083 (25.38%),0.0015 (57.00%),-0.0053 (90.17%),0.0062 (13.97%),0.0201 (30.80%),0.0827 (29.39%),-0.0301 (62.96%)


2008-01-01 - 2009-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
1,6.0,-0.0262 (6.94%),0.0745 (21.16%),0.1174 (11.51%),0.1951 (26.65%),0.2745 (30.66%),0.3815 (29.60%),-0.0193 (17.54%),0.0319 (30.13%),0.0541 (16.30%),0.1472 (26.01%),0.2298 (20.94%),0.3559 (16.14%)
2,11.0,0.0051 (67.82%),-0.0111 (41.48%),0.0197 (65.53%),0.0448 (49.97%),0.0554 (57.99%),0.0248 (82.61%),0.0006 (96.11%),-0.0273 (18.79%),0.0294 (47.32%),0.0474 (49.74%),0.0621 (53.71%),0.0589 (59.62%)
3,1.0,0.0608 (nan%),0.0170 (nan%),-0.0145 (nan%),0.0046 (nan%),0.0871 (nan%),0.1096 (nan%),0.0535 (nan%),0.0962 (nan%),0.0905 (nan%),0.0455 (nan%),0.1470 (nan%),0.1970 (nan%)
4,61.0,-0.0034 (49.54%),0.0031 (76.40%),0.0102 (50.09%),-0.0005 (97.81%),0.0008 (97.69%),-0.0175 (58.44%),-0.0019 (61.40%),0.0042 (64.91%),0.0087 (54.29%),-0.0116 (46.75%),-0.0111 (62.14%),-0.0239 (36.39%)
6,3.0,-0.0297 (23.91%),-0.0837 (14.83%),-0.1617 (9.39%),-0.0952 (38.86%),-0.1571 (2.65%),-0.1066 (26.15%),-0.0277 (24.95%),-0.0758 (12.82%),-0.1694 (8.24%),-0.1102 (20.88%),-0.1662 (0.57%),-0.1697 (2.48%)
7,2.0,0.0004 (91.02%),0.1205 (39.79%),0.0857 (55.88%),0.0846 (61.96%),0.0348 (80.06%),0.0616 (60.55%),-0.0041 (46.52%),0.1272 (42.28%),0.0533 (67.70%),0.0723 (68.26%),0.0262 (90.15%),0.0420 (84.57%)
8,17.0,0.0051 (59.49%),-0.0024 (90.35%),0.0060 (79.06%),0.0163 (58.20%),-0.0025 (92.97%),0.0345 (43.28%),0.0047 (61.01%),0.0015 (93.67%),-0.0040 (82.93%),-0.0040 (86.61%),-0.0365 (24.38%),0.0127 (76.73%)
9,14.0,0.0008 (93.75%),-0.0031 (92.79%),0.0064 (91.96%),-0.0405 (70.02%),-0.0627 (49.92%),-0.1142 (22.38%),0.0132 (19.20%),0.0115 (62.24%),0.0560 (29.51%),0.0277 (75.02%),-0.0065 (93.40%),-0.0627 (42.09%)
10,7.0,0.0228 (11.12%),-0.0304 (32.98%),0.0441 (55.46%),0.0776 (72.17%),0.0954 (72.03%),0.1147 (75.08%),0.0148 (11.35%),-0.0036 (83.01%),0.0634 (24.83%),0.0683 (60.51%),0.0766 (65.83%),0.1009 (65.34%)
11,10.0,0.0108 (85.72%),0.0232 (73.67%),0.0789 (22.08%),0.0875 (13.49%),0.1196 (11.72%),0.1061 (7.96%),0.0093 (87.50%),0.0161 (81.29%),0.0765 (23.78%),0.0699 (24.10%),0.0827 (27.41%),0.0541 (38.21%)


2010-01-01 - 2011-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
0,2.0,-0.0262 (23.23%),0.0141 (50.17%),-0.0717 (58.36%),-0.1296 (46.48%),-0.1208 (16.78%),-0.1596 (9.60%),-0.0352 (39.07%),0.0001 (99.62%),-0.0955 (18.72%),-0.1987 (18.32%),-0.1304 (13.10%),-0.1803 (30.77%)
1,10.0,-0.0024 (81.94%),-0.0019 (91.83%),-0.0240 (45.07%),-0.0293 (53.45%),-0.0613 (24.78%),-0.1185 (10.89%),-0.0009 (91.59%),-0.0035 (79.05%),-0.0275 (23.43%),-0.0495 (10.69%),-0.0730 (3.48%),-0.1236 (0.97%)
2,23.0,-0.0003 (88.69%),0.0076 (24.29%),0.0079 (26.23%),0.0074 (56.55%),-0.0011 (95.03%),-0.0105 (68.76%),0.0022 (43.22%),0.0053 (46.34%),0.0025 (79.19%),0.0089 (54.93%),-0.0083 (66.09%),-0.0185 (46.32%)
3,3.0,0.0194 (18.88%),0.0341 (21.94%),0.0064 (93.32%),-0.0742 (41.56%),-0.1279 (18.78%),-0.0149 (93.07%),0.0220 (4.14%),0.0422 (13.18%),0.0435 (48.30%),0.0203 (72.05%),-0.0224 (72.69%),0.0431 (76.73%)
4,113.0,0.0012 (63.31%),-0.0032 (55.43%),0.0040 (64.46%),-0.0063 (64.77%),-0.0041 (78.33%),0.0077 (67.75%),0.0025 (26.31%),-0.0040 (41.40%),-0.0021 (79.90%),-0.0171 (17.94%),-0.0176 (18.32%),-0.0061 (70.69%)
5,1.0,0.0260 (nan%),0.0208 (nan%),-0.0779 (nan%),-0.1267 (nan%),-0.1064 (nan%),0.0089 (nan%),0.0314 (nan%),0.0154 (nan%),-0.0873 (nan%),-0.1446 (nan%),-0.0875 (nan%),0.0172 (nan%)
6,8.0,-0.0177 (29.28%),-0.0251 (19.15%),-0.0347 (20.63%),-0.0245 (49.42%),-0.0160 (76.49%),-0.0049 (94.22%),-0.0200 (23.27%),-0.0268 (15.95%),-0.0324 (22.85%),-0.0206 (49.54%),-0.0284 (59.21%),-0.0261 (70.61%)
7,7.0,0.0123 (49.35%),-0.0005 (98.21%),0.0097 (76.93%),-0.0053 (89.73%),-0.0119 (76.88%),0.0866 (37.70%),0.0106 (56.16%),-0.0009 (95.55%),0.0054 (86.11%),-0.0062 (86.45%),-0.0141 (70.33%),0.0758 (40.37%)
8,33.0,-0.0023 (56.25%),0.0084 (29.99%),-0.0010 (92.85%),0.0083 (69.67%),-0.0014 (95.86%),-0.0331 (31.81%),-0.0027 (49.38%),0.0053 (47.69%),-0.0130 (27.52%),-0.0078 (71.21%),-0.0247 (34.79%),-0.0532 (9.89%)
9,37.0,0.0010 (77.91%),-0.0045 (54.34%),-0.0167 (11.23%),0.0118 (77.34%),-0.0476 (3.87%),-0.0635 (1.62%),0.0025 (52.76%),0.0054 (42.90%),0.0058 (56.46%),0.0530 (18.70%),0.0115 (61.35%),0.0019 (94.00%)


2012-01-01 - 2013-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
0,1.0,-0.0136 (nan%),-0.0240 (nan%),-0.0315 (nan%),-0.0291 (nan%),-0.0789 (nan%),-0.1533 (nan%),-0.0006 (nan%),-0.0012 (nan%),-0.0006 (nan%),-0.0040 (nan%),-0.0147 (nan%),-0.0269 (nan%)
1,23.0,-0.0056 (42.03%),-0.0241 (12.78%),0.0029 (91.60%),-0.0206 (41.86%),-0.0238 (35.78%),-0.0324 (46.32%),-0.0046 (47.08%),-0.0105 (43.58%),0.0171 (50.16%),-0.0010 (96.21%),-0.0146 (51.22%),-0.0249 (56.28%)
2,33.0,-0.0042 (24.10%),-0.0049 (46.59%),-0.0122 (34.26%),0.0088 (54.83%),-0.0164 (34.65%),0.0004 (98.44%),-0.0014 (67.19%),0.0031 (62.49%),0.0069 (58.16%),0.0343 (4.36%),0.0199 (28.13%),0.0348 (12.71%)
3,4.0,-0.0074 (31.09%),-0.0064 (73.81%),0.0536 (1.12%),0.0117 (57.17%),-0.0227 (76.22%),-0.0167 (87.84%),-0.0038 (67.95%),0.0015 (83.11%),0.0717 (1.45%),0.0472 (1.39%),-0.0171 (82.38%),-0.0146 (89.79%)
4,122.0,0.0012 (70.99%),0.0075 (28.68%),0.0032 (69.99%),0.0212 (8.52%),0.0302 (5.28%),0.0520 (0.59%),0.0022 (44.07%),0.0089 (18.21%),0.0068 (41.03%),0.0251 (3.57%),0.0363 (1.58%),0.0587 (0.16%)
5,6.0,0.0009 (82.42%),0.0002 (98.91%),-0.0393 (23.29%),-0.0775 (23.99%),-0.0851 (15.62%),-0.0705 (27.99%),0.0009 (79.62%),-0.0002 (99.12%),-0.0470 (20.02%),-0.0894 (23.34%),-0.1024 (15.87%),-0.0916 (21.66%)
6,3.0,-0.0234 (52.40%),-0.0051 (93.68%),-0.0628 (55.78%),-0.0112 (92.55%),-0.0227 (84.76%),0.0226 (69.94%),-0.0243 (52.78%),-0.0026 (97.01%),-0.0445 (64.25%),-0.0052 (95.12%),-0.0159 (81.61%),0.0308 (25.22%)
7,1.0,0.0064 (nan%),0.0353 (nan%),0.0257 (nan%),0.0354 (nan%),-0.1260 (nan%),-0.1419 (nan%),0.0022 (nan%),0.0135 (nan%),0.0178 (nan%),-0.0317 (nan%),-0.2201 (nan%),-0.2076 (nan%)
8,46.0,0.0028 (46.35%),0.0125 (11.38%),0.0176 (37.41%),0.0651 (6.06%),0.0584 (12.86%),0.0767 (7.99%),0.0021 (57.62%),0.0088 (24.75%),0.0134 (47.83%),0.0578 (8.04%),0.0482 (18.00%),0.0642 (12.67%)
9,30.0,0.0040 (14.34%),0.0174 (4.21%),0.0267 (5.60%),0.0104 (69.81%),0.0266 (49.47%),0.0255 (48.57%),0.0033 (12.42%),0.0108 (17.58%),0.0127 (29.02%),-0.0039 (87.81%),0.0144 (68.40%),0.0086 (79.59%)


2014-01-01 - 2015-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
1,33.0,-0.0100 (30.85%),-0.0316 (3.30%),-0.1002 (0.15%),-0.1916 (0.01%),-0.1860 (0.03%),-0.3067 (0.00%),-0.0103 (24.46%),-0.0250 (6.31%),-0.0723 (0.27%),-0.1179 (0.07%),-0.0906 (1.73%),-0.1739 (0.01%)
2,19.0,-0.0026 (59.54%),-0.0234 (2.63%),-0.0327 (10.54%),-0.0464 (13.96%),-0.0452 (22.75%),-0.0433 (24.71%),-0.0007 (87.58%),-0.0184 (8.70%),-0.0191 (19.53%),-0.0317 (28.31%),-0.0245 (49.98%),-0.0259 (49.84%)
3,2.0,0.0209 (1.82%),-0.0135 (28.04%),-0.0142 (74.84%),-0.0059 (93.21%),-0.0536 (25.02%),-0.0935 (23.53%),0.0190 (9.58%),-0.0373 (16.85%),-0.0104 (33.66%),0.0039 (5.53%),-0.0278 (31.97%),-0.0319 (57.41%)
4,149.0,0.0024 (30.91%),0.0046 (43.52%),-0.0040 (63.56%),-0.0142 (18.46%),-0.0259 (2.33%),-0.0215 (13.68%),0.0014 (53.68%),0.0033 (57.01%),-0.0089 (28.95%),-0.0168 (12.65%),-0.0268 (2.02%),-0.0182 (20.40%)
5,13.0,0.0101 (12.69%),0.0006 (95.48%),-0.0065 (77.33%),-0.0271 (35.43%),-0.0352 (32.89%),-0.0738 (19.20%),0.0099 (13.80%),-0.0009 (93.00%),-0.0058 (78.57%),-0.0134 (63.63%),-0.0232 (51.48%),-0.0563 (33.88%)
6,18.0,0.0141 (19.09%),0.0254 (12.36%),0.0475 (8.07%),0.0484 (21.35%),0.0427 (44.99%),0.0555 (39.99%),0.0137 (20.60%),0.0209 (18.80%),0.0335 (19.91%),0.0213 (58.68%),0.0032 (95.55%),0.0104 (87.05%)
7,3.0,-0.0068 (63.44%),-0.0292 (16.28%),-0.0141 (75.85%),-0.0159 (77.28%),0.0376 (43.86%),0.0312 (64.29%),-0.0071 (57.91%),-0.0251 (27.47%),-0.0132 (72.33%),-0.0416 (56.71%),0.0129 (85.10%),-0.0142 (72.17%)
8,54.0,-0.0056 (31.44%),-0.0048 (55.47%),-0.0173 (20.90%),-0.0223 (28.36%),-0.0219 (41.46%),-0.0336 (17.56%),-0.0052 (33.26%),-0.0049 (55.88%),-0.0207 (15.11%),-0.0300 (17.18%),-0.0328 (24.57%),-0.0384 (15.80%)
9,49.0,0.0031 (18.74%),0.0096 (11.65%),0.0015 (88.20%),0.0068 (73.65%),-0.0107 (63.86%),-0.0323 (23.76%),0.0020 (40.03%),0.0035 (55.40%),-0.0097 (29.47%),-0.0079 (68.00%),-0.0281 (21.10%),-0.0560 (4.59%)
10,2.0,-0.0028 (73.52%),-0.0019 (92.60%),-0.0366 (73.24%),-0.1008 (58.25%),-0.1016 (40.51%),-0.1007 (52.93%),0.0006 (89.59%),0.0001 (98.96%),-0.0064 (85.42%),-0.0483 (39.02%),-0.0472 (12.06%),-0.0154 (36.95%)


2016-01-01 - 2017-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
0,1.0,-0.0032 (nan%),-0.0295 (nan%),0.0076 (nan%),0.0225 (nan%),0.0105 (nan%),-0.0166 (nan%),-0.0009 (nan%),-0.0008 (nan%),0.0101 (nan%),0.0080 (nan%),0.0100 (nan%),0.0051 (nan%)
1,40.0,-0.0072 (55.53%),-0.0073 (75.72%),-0.0169 (59.22%),-0.0358 (41.86%),-0.0256 (59.77%),-0.0117 (82.12%),-0.0099 (38.57%),-0.0026 (90.12%),-0.0041 (87.94%),-0.0083 (82.65%),0.0006 (98.87%),0.0167 (72.04%)
2,22.0,-0.0047 (57.32%),-0.0082 (60.50%),0.0375 (22.35%),0.0309 (41.68%),0.0187 (71.21%),0.0294 (60.80%),-0.0039 (62.28%),0.0007 (96.06%),0.0457 (12.11%),0.0633 (10.06%),0.0683 (18.22%),0.0778 (18.86%)
3,2.0,0.0061 (49.69%),-0.0322 (13.92%),-0.0113 (81.91%),-0.0269 (44.93%),0.0619 (60.35%),0.1154 (68.07%),0.0210 (15.23%),-0.0134 (1.85%),-0.0207 (65.70%),-0.0320 (46.79%),0.0397 (66.06%),0.0461 (79.98%)
4,112.0,0.0013 (60.66%),0.0035 (51.77%),-0.0027 (74.19%),-0.0018 (88.20%),0.0111 (50.46%),0.0058 (80.37%),0.0020 (37.99%),0.0062 (25.41%),0.0005 (94.90%),0.0063 (59.78%),0.0186 (24.55%),0.0187 (41.06%)
5,20.0,0.0040 (27.00%),0.0125 (10.65%),0.0245 (33.78%),-0.0121 (66.46%),-0.0190 (58.59%),-0.0028 (94.10%),0.0054 (16.16%),0.0180 (0.77%),0.0366 (11.53%),0.0030 (91.56%),0.0066 (85.33%),0.0272 (46.35%)
6,13.0,-0.0135 (46.37%),0.0088 (64.74%),0.0062 (83.18%),-0.0200 (57.55%),-0.0608 (17.08%),-0.0403 (37.61%),-0.0146 (46.44%),0.0085 (67.03%),-0.0016 (94.85%),-0.0205 (55.46%),-0.0632 (20.02%),-0.0601 (22.13%)
7,11.0,-0.0108 (15.06%),-0.0121 (58.24%),-0.0249 (29.13%),-0.0477 (11.46%),-0.0302 (51.29%),-0.0493 (30.92%),-0.0099 (11.21%),-0.0104 (61.59%),-0.0315 (17.34%),-0.0338 (31.71%),-0.0230 (62.90%),-0.0228 (62.18%)
8,54.0,0.0020 (67.22%),0.0026 (73.91%),0.0108 (23.95%),0.0117 (42.87%),-0.0103 (59.09%),0.0024 (91.58%),0.0031 (48.69%),0.0044 (51.88%),0.0069 (42.80%),0.0023 (86.48%),-0.0114 (54.10%),0.0027 (89.40%)
9,36.0,-0.0018 (46.37%),-0.0113 (11.73%),-0.0223 (7.43%),-0.0181 (21.83%),-0.0081 (67.98%),0.0056 (81.60%),-0.0030 (25.42%),-0.0131 (7.52%),-0.0243 (6.24%),-0.0290 (8.43%),-0.0332 (12.33%),-0.0332 (18.14%)


2018-01-01 - 2019-12-31


Unnamed: 0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
1,19.0,-0.0227 (5.41%),-0.0632 (0.19%),-0.0781 (0.82%),-0.1723 (0.42%),-0.2224 (0.06%),-0.2948 (0.04%),-0.0183 (10.40%),-0.0451 (1.41%),-0.0432 (7.98%),-0.1006 (4.40%),-0.1353 (1.12%),-0.1688 (0.84%)
2,22.0,0.0005 (92.45%),-0.0151 (38.64%),-0.0307 (29.52%),-0.0144 (65.36%),-0.0522 (19.26%),-0.0597 (21.98%),0.0004 (93.84%),-0.0164 (29.61%),-0.0437 (10.10%),-0.0389 (16.35%),-0.0878 (4.15%),-0.0799 (11.47%)
3,3.0,-0.0019 (89.64%),0.0371 (45.31%),0.0684 (20.52%),-0.0218 (93.32%),-0.0647 (79.46%),-0.1006 (78.35%),0.0081 (54.09%),0.0563 (21.42%),0.0965 (1.42%),-0.0081 (97.47%),-0.0492 (84.65%),-0.0738 (82.45%)
4,81.0,-0.0068 (26.22%),-0.0036 (69.58%),-0.0102 (52.77%),-0.0402 (2.94%),-0.0653 (0.28%),-0.0767 (0.45%),-0.0064 (27.54%),0.0012 (88.94%),0.0022 (88.19%),-0.0183 (28.90%),-0.0318 (11.88%),-0.0398 (11.71%)
5,17.0,0.0140 (4.73%),0.0228 (22.70%),0.0587 (5.19%),0.0224 (69.01%),-0.0145 (82.15%),-0.0081 (92.65%),0.0115 (8.83%),0.0222 (25.96%),0.0631 (3.29%),0.0306 (56.90%),0.0114 (84.88%),0.0107 (89.37%)
6,4.0,-0.0100 (52.05%),0.0095 (78.73%),0.0143 (88.38%),0.1313 (34.53%),0.1421 (42.31%),0.1279 (51.96%),-0.0082 (67.49%),0.0120 (77.09%),-0.0019 (98.27%),0.0971 (47.95%),0.1246 (48.90%),0.1292 (55.40%)
7,3.0,-0.0262 (34.30%),-0.0207 (38.51%),-0.0502 (50.55%),0.0025 (90.00%),0.0107 (84.20%),-0.0077 (93.20%),-0.0226 (40.53%),-0.0119 (34.04%),-0.0203 (70.57%),0.0229 (30.73%),0.0429 (47.00%),0.0088 (92.90%)
8,34.0,0.0032 (50.90%),-0.0100 (33.00%),-0.0211 (14.73%),-0.0066 (84.27%),-0.0386 (38.76%),-0.0245 (57.99%),0.0021 (64.31%),-0.0152 (13.04%),-0.0387 (0.52%),-0.0238 (48.91%),-0.0420 (33.00%),-0.0371 (37.70%)
9,23.0,-0.0061 (3.18%),-0.0017 (84.93%),-0.0057 (59.64%),-0.0288 (12.00%),-0.0501 (6.43%),-0.0718 (0.79%),-0.0062 (1.55%),-0.0091 (28.01%),-0.0045 (65.29%),-0.0341 (4.53%),-0.0400 (9.45%),-0.0564 (1.03%)
10,4.0,0.0134 (26.01%),0.0324 (46.62%),0.0675 (30.68%),0.0687 (32.82%),-0.0881 (46.49%),-0.1469 (29.05%),0.0128 (11.83%),0.0287 (47.16%),0.0550 (43.26%),0.0469 (54.61%),-0.1106 (29.69%),-0.1610 (23.42%)


In [12]:
import glob

repository_path = f"backtests/LDA/ESG_baseline"
file_path = f"{repository_path}/performance_evaluation_*_*.csv"

master_df = list()
for filename in glob.glob(file_path):
    t0, t1 = filename.split("/")[-1].split(".")[0].split("_")[2:]

    df = pd.read_csv(filename, index_col=0).iloc[-1].to_frame().T
    cols = df.columns.tolist()
    df['t0'], df['t1'] = t0, t1
    df = df[['t0', 't1'] + cols]
    master_df.append(df)
df = pd.concat(master_df).set_index(['t0', 't1']).sort_index()
display(df)  
df.to_csv(f"backtests/LDA/ESG_baseline/9_time_partitions/performance_evaluation_total.csv")
print('\n')

Unnamed: 0_level_0,Unnamed: 1_level_0,sample size,1d - MKT,10d - MKT,30d - MKT,60d - MKT,90d - MKT,120d - MKT,1d - IDT,10d - IDT,30d - IDT,60d - IDT,90d - IDT,120d - IDT
t0,t1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2000-01-01,2005-12-31,68,0.005 (27.32%),0.0078 (19.3%),0.0187 (11.52%),0.04 (2.25%),0.089 (0.05%),0.113 (0.01%),0.0027 (55.54%),0.0053 (41.86%),0.0155 (21.51%),0.025 (13.89%),0.0687 (0.43%),0.0836 (0.31%)
2006-01-01,2007-12-31,112,0.0054 (18.39%),0.0063 (40.95%),0.016 (12.26%),0.0305 (2.69%),0.0413 (2.88%),0.0634 (0.54%),0.0028 (47.27%),0.0031 (66.32%),0.0049 (60.96%),0.0072 (55.97%),0.0096 (57.69%),0.0183 (35.63%)
2008-01-01,2009-12-31,137,0.0003 (95.54%),0.0033 (70.54%),0.0187 (13.02%),0.0164 (40.57%),0.0201 (41.14%),0.0187 (53.52%),0.0017 (72.79%),0.0042 (57.88%),0.0201 (6.78%),0.0108 (49.62%),0.0091 (64.62%),0.0114 (62.76%)
2010-01-01,2011-12-31,281,0.0018 (25.26%),0.0013 (65.5%),-0.0005 (91.16%),-0.0037 (68.0%),-0.0151 (8.46%),-0.0139 (21.28%),0.0021 (16.4%),0.0005 (86.63%),-0.004 (36.38%),-0.0076 (36.29%),-0.0181 (2.08%),-0.0168 (9.58%)
2012-01-01,2013-12-31,314,-0.0003 (86.91%),0.0052 (14.29%),0.0013 (81.59%),0.0079 (36.04%),0.009 (38.8%),0.0238 (5.26%),0.0006 (70.65%),0.0062 (6.12%),0.004 (46.22%),0.0124 (13.29%),0.0129 (18.21%),0.0256 (2.76%)
2014-01-01,2015-12-31,389,0.0009 (68.69%),-0.0024 (52.12%),-0.0176 (0.26%),-0.0352 (0.0%),-0.0454 (0.0%),-0.0661 (0.0%),0.0005 (81.88%),-0.0032 (38.22%),-0.0176 (0.13%),-0.0317 (0.0%),-0.0401 (0.0%),-0.0544 (0.0%)
2016-01-01,2017-12-31,370,-0.0002 (91.33%),-0.0028 (44.48%),-0.0036 (51.72%),-0.0078 (31.05%),-0.0055 (55.43%),0.0013 (90.93%),-0.0 (98.62%),-0.0001 (97.4%),-0.0023 (64.99%),-0.004 (58.41%),-0.0029 (74.47%),0.0036 (74.5%)
2018-01-01,2019-12-31,250,-0.0033 (17.89%),-0.0052 (25.51%),-0.0102 (18.33%),-0.031 (0.58%),-0.0582 (0.0%),-0.0723 (0.0%),-0.003 (20.9%),-0.0034 (44.09%),-0.0061 (38.42%),-0.0213 (3.95%),-0.0377 (0.23%),-0.0471 (0.12%)




