In [1]:
import os
import numpy as np
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
from kagglehub import dataset_download
from cleanco.clean import custom_basename
from cleanco.clean import prepare_default_terms
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key = api_key)

  from .autonotebook import tqdm as notebook_tqdm


## Load NASDAQ traded stock symbols

In [2]:
datadir = 'stock-market-dataset'
fn = 'symbols_valid_meta.csv'
nasdaq = pd.read_csv(os.path.join(datadir, fn))
nasdaq

Unnamed: 0,Nasdaq Traded,Symbol,Security Name,Listing Exchange,Market Category,ETF,Round Lot Size,Test Issue,Financial Status,CQS Symbol,NASDAQ Symbol,NextShares
0,Y,A,"Agilent Technologies, Inc. Common Stock",N,,N,100.0,N,,A,A,N
1,Y,AA,Alcoa Corporation Common Stock,N,,N,100.0,N,,AA,AA,N
2,Y,AAA,Alternative Access First Priority CLO Bond ETF,P,,Y,100.0,N,,AAA,AAA,N
3,Y,AAAU,Goldman Sachs Physical Gold ETF Shares,Z,,Y,100.0,N,,AAAU,AAAU,N
4,Y,AACG,ATA Creativity Global - American Depositary Sh...,Q,S,N,100.0,N,N,,AACG,N
...,...,...,...,...,...,...,...,...,...,...,...,...
8654,Y,ZVOL,Volatility Premium Plus ETF,Z,,Y,100.0,N,,ZVOL,ZVOL,N
8655,Y,ZVRA,"Zevra Therapeutics, Inc. - Common Stock",Q,Q,N,100.0,N,N,,ZVRA,N
8656,Y,ZWS,Zurn Elkay Water Solutions Corporation Common ...,N,,N,100.0,N,,ZWS,ZWS,N
8657,Y,ZYME,Zymeworks Inc. - Common Stock,Q,Q,N,100.0,N,N,,ZYME,N


In [3]:
nasdaq_stocks = nasdaq.loc[nasdaq.ETF == 'N'].copy()
nasdaq_stocks

Unnamed: 0,Nasdaq Traded,Symbol,Security Name,Listing Exchange,Market Category,ETF,Round Lot Size,Test Issue,Financial Status,CQS Symbol,NASDAQ Symbol,NextShares
0,Y,A,"Agilent Technologies, Inc. Common Stock",N,,N,100.0,N,,A,A,N
1,Y,AA,Alcoa Corporation Common Stock,N,,N,100.0,N,,AA,AA,N
4,Y,AACG,ATA Creativity Global - American Depositary Sh...,Q,S,N,100.0,N,N,,AACG,N
5,Y,AACT,Ares Acquisition Corporation II Class A Ordina...,N,,N,100.0,N,,AACT,AACT,N
7,Y,AAL,"American Airlines Group, Inc. - Common Stock",Q,Q,N,100.0,N,N,,AAL,N
...,...,...,...,...,...,...,...,...,...,...,...,...
8653,Y,ZVIA,Zevia PBC Class A Common Stock,N,,N,100.0,N,,ZVIA,ZVIA,N
8655,Y,ZVRA,"Zevra Therapeutics, Inc. - Common Stock",Q,Q,N,100.0,N,N,,ZVRA,N
8656,Y,ZWS,Zurn Elkay Water Solutions Corporation Common ...,N,,N,100.0,N,,ZWS,ZWS,N
8657,Y,ZYME,Zymeworks Inc. - Common Stock,Q,Q,N,100.0,N,N,,ZYME,N


In [4]:
nasdaq_stocks_health = pd.read_csv(os.path.join(datadir, 'nasdaq_screener_healthcare.csv'))
nasdaq_stocks_health = nasdaq_stocks.loc[nasdaq_stocks.Symbol.isin(nasdaq_stocks_health.Symbol)]
nasdaq_stocks_health

Unnamed: 0,Nasdaq Traded,Symbol,Security Name,Listing Exchange,Market Category,ETF,Round Lot Size,Test Issue,Financial Status,CQS Symbol,NASDAQ Symbol,NextShares
23,Y,ABBV,AbbVie Inc. Common Stock,N,,N,100.0,N,,ABBV,ABBV,N
25,Y,ABCL,AbCellera Biologics Inc. - Common Shares,Q,Q,N,100.0,N,N,,ABCL,N
27,Y,ABEO,Abeona Therapeutics Inc. - Common Stock,Q,S,N,100.0,N,N,,ABEO,N
39,Y,ABOS,"Acumen Pharmaceuticals, Inc. - Common Stock",Q,Q,N,100.0,N,N,,ABOS,N
41,Y,ABP,"Abpro Holdings, Inc - Common Stock",Q,G,N,100.0,N,D,,ABP,N
...,...,...,...,...,...,...,...,...,...,...,...,...
8650,Y,ZTS,Zoetis Inc. Class A Common Stock,N,,N,100.0,N,,ZTS,ZTS,N
8652,Y,ZURA,Zura Bio Limited - Class A Ordinary shares,Q,S,N,100.0,N,N,,ZURA,N
8655,Y,ZVRA,"Zevra Therapeutics, Inc. - Common Stock",Q,Q,N,100.0,N,N,,ZVRA,N
8657,Y,ZYME,Zymeworks Inc. - Common Stock,Q,Q,N,100.0,N,N,,ZYME,N


## Load FDA product recalls

In [5]:
pth = dataset_download('mexwell/fda-product-recalls')
recalls = pd.read_csv(os.path.join(pth, 'fda_product_recalls.csv'))
datecol = 'center_classification_date'
recalls[datecol] = pd.to_datetime(recalls[datecol])
recalls

Unnamed: 0,fei_number,recalling_firm_name,product_type,product_classification,status,distribution_pattern,recalling_firm_city,recalling_firm_state,recalling_firm_country,center_classification_date,reason_for_recall,product_description,event_id,event_classification,product_id,center,recall_details
0,3.002602e+09,Lamb Weston Sales,Food/Cosmetics,Class I,Ongoing,"Distributed in CA, IA, IL, KS, LA MO, MS, NM, ...",Kennewick,Washington,United States,2023-04-21,Undeclared Wheat in foodservice item Hashbrown...,"G5300 Lamb's Supreme Hash Brown Patties, Froze...",92014,Class I,199418,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
1,3.012438e+09,Fresh Express Incorpated,Food/Cosmetics,Class I,Ongoing,Product was shipped to the following states: F...,Windermere,Florida,United States,2023-04-21,The firm was notified by one of their customer...,Fresh EXPRESS Chopped Kit Caesar Romaine Lettu...,92068,Class I,199573,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
2,3.012438e+09,Fresh Express Incorpated,Food/Cosmetics,Class I,Ongoing,Product was shipped to the following states: F...,Windermere,Florida,United States,2023-04-21,The firm was notified by one of their customer...,Fresh Express Chopped Kit Chipotle Cheddar TOT...,92068,Class I,199574,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
3,3.012438e+09,Fresh Express Incorpated,Food/Cosmetics,Class I,Ongoing,Product was shipped to the following states: F...,Windermere,Florida,United States,2023-04-21,The firm was notified by one of their customer...,PREMIUM MAKOTO HONEY GINGER SALAD KIT TOTAL NE...,92068,Class I,199575,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
4,1.000222e+09,"Blood Bank Computer Systems, Inc",Biologics,Class II,Terminated,"GA, DE, TX, MO, PA, CA, FL, KY, IA, MI, IL, an...",Auburn,Washington,United States,2023-04-21,Blood Bank Computer Systems has discovered in ...,"ABO Wheels, Version 1.1.0",91219,Class II,197268,CBER,https://www.accessdata.fda.gov/scripts/ires/?P...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83150,3.004404e+09,Panera Bread LLC,Food/Cosmetics,Class II,Terminated,Nationwide,Saint Louis,Missouri,United States,2012-06-08,Product ingredient statement reversed for Red...,"Panera ,HAZELNUT CREAM CHEESE SPREAD Reduced F...",61831,Class II,109200,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
83151,3.004162e+09,"DSM Nutritional Products, Inc.",Food/Cosmetics,Class II,Terminated,"NJ, WI, IL",Parsippany,New Jersey,United States,2012-06-08,Flavor is contaminated with Salmonella,GB Select Roast Meat Type Flavor Net Wt. 55 lb...,61936,Class II,109523,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
83152,3.002727e+09,Best West Foods,Food/Cosmetics,Class II,Terminated,NV only.,Las Vegas,Nevada,United States,2012-06-08,Soy was not included in the ingredient stateme...,"Florentine Lasagna Rolls;\r\nPerishable, keep ...",61968,Class II,109609,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
83153,3.002727e+09,Best West Foods,Food/Cosmetics,Class II,Terminated,NV only.,Las Vegas,Nevada,United States,2012-06-08,Soy was not included in the ingredient stateme...,"Cheese Lasagna Rolls;\r\nPerishable, keep froz...",61968,Class II,109610,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...


## Select class I medical recalls

In [6]:
msk_med = recalls.product_type.isin(['Drugs', 'Devices', 'Biologics'])
msk_c1 = (recalls.event_classification == 'Class I')
recalls_medc1 = recalls.loc[msk_med & msk_c1].copy()
recalls_medc1

Unnamed: 0,fei_number,recalling_firm_name,product_type,product_classification,status,distribution_pattern,recalling_firm_city,recalling_firm_state,recalling_firm_country,center_classification_date,reason_for_recall,product_description,event_id,event_classification,product_id,center,recall_details
26,2.936999e+06,Covidien,Devices,Class I,Ongoing,Worldwide - US Nationwide distribution includi...,Boulder,Colorado,United States,2023-04-19,"A manufacturing error, resulted in a less than...",Shiley Adult Flexible Tracheostomy Tube with T...,91943,Class I,199257,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
157,3.014732e+09,"Fresenius Kabi USA, LLC",Devices,Class I,Ongoing,"Domestic: CA, CO, NJ, WI, & UT. No foreign dis...",North Andover,Massachusetts,United States,2023-04-11,Fluid ingress that can cause a loss of electri...,"Ivenix Infusion System (IIS), Large Volume Pum...",91783,Class I,198841,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
185,3.002803e+09,"Abbott Diabetes Care, Inc.",Devices,Class I,Ongoing,U.S. Nationwide.,Alameda,California,United States,2023-04-06,Lithium-ion batteries in glucose monitoring sy...,"FreeStyle Libre Reader, REF: 71525-01, 71701-0...",91756,Class I,198772,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
186,3.002803e+09,"Abbott Diabetes Care, Inc.",Devices,Class I,Ongoing,U.S. Nationwide.,Alameda,California,United States,2023-04-06,Lithium-ion batteries in glucose monitoring sy...,"FreeStyle Libre Reader, REF: 71936-01, 71937-0...",91756,Class I,198773,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
187,3.002803e+09,"Abbott Diabetes Care, Inc.",Devices,Class I,Ongoing,U.S. Nationwide.,Alameda,California,United States,2023-04-06,Lithium-ion batteries in glucose monitoring sy...,"FreeStyle Libre Reader, REF: 71951-01, 71952-0...",91756,Class I,198813,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82896,1.039215e+06,Nidek Medical Products Inc,Devices,Class I,Terminated,Worldwide Distribution-USA (nationwide) and th...,Birmingham,Alabama,United States,2012-06-19,Capacitor failure may result in a fire hazard ...,"NIDEK Medical MARK5 NUVO / M5C5, 115 V ~60Hz -...",61843,Class I,109227,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
82908,1.641527e+06,"Physicians Total Care, Inc",Drugs,Class I,Terminated,FL,Tulsa,Oklahoma,United States,2012-06-18,Labeling: Label mix-up; Bottles labeled to con...,"Morphine Sulfate Extended Release tablet, 30 m...",61233,Class I,107624,CDER,https://www.accessdata.fda.gov/scripts/ires/?P...
82909,1.641527e+06,"Physicians Total Care, Inc",Drugs,Class I,Terminated,FL,Tulsa,Oklahoma,United States,2012-06-18,Labeling: Label mix-up; Bottles labeled to con...,"Morphine Sulfate Immediate Release tablet, 30 ...",61233,Class I,107625,CDER,https://www.accessdata.fda.gov/scripts/ires/?P...
82998,2.126677e+06,"GE Healthcare, LLC",Devices,Class I,Terminated,Nationwide Distribution - including the states...,Waukesha,Wisconsin,United States,2012-06-17,GE Healthcare has recently become aware of a p...,"GE Healthcare, Aestiva/5 7900 SmartVent, anest...",61639,Class I,108604,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...


In [7]:
colnames = ['event_id', 'recalling_firm_name', datecol,
            'reason_for_recall', 'recall_details']
recalls_events = (recalls_medc1.loc[:, colnames]
    .drop_duplicates(colnames[:-2]).set_index('event_id'))
recalls_events

Unnamed: 0_level_0,recalling_firm_name,center_classification_date,reason_for_recall,recall_details
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
91943,Covidien,2023-04-19,"A manufacturing error, resulted in a less than...",https://www.accessdata.fda.gov/scripts/ires/?P...
91783,"Fresenius Kabi USA, LLC",2023-04-11,Fluid ingress that can cause a loss of electri...,https://www.accessdata.fda.gov/scripts/ires/?P...
91756,"Abbott Diabetes Care, Inc.",2023-04-06,Lithium-ion batteries in glucose monitoring sy...,https://www.accessdata.fda.gov/scripts/ires/?P...
91837,"Philips Respironics, Inc.",2023-04-06,A limited number of remediated Philips DreamSt...,https://www.accessdata.fda.gov/scripts/ires/?P...
91832,"Philips Respironics, Inc.",2023-04-03,The accuracy of delivered oxygen may deviate b...,https://www.accessdata.fda.gov/scripts/ires/?P...
...,...,...,...,...
62108,"Fresenius Medical Care Holdings, Inc.",2012-06-25,Risk of Alkalosis with acetate containing dial...,https://www.accessdata.fda.gov/scripts/ires/?P...
61843,Nidek Medical Products Inc,2012-06-19,Capacitor failure may result in a fire hazard ...,https://www.accessdata.fda.gov/scripts/ires/?P...
61233,"Physicians Total Care, Inc",2012-06-18,Labeling: Label mix-up; Bottles labeled to con...,https://www.accessdata.fda.gov/scripts/ires/?P...
61639,"GE Healthcare, LLC",2012-06-17,GE Healthcare has recently become aware of a p...,https://www.accessdata.fda.gov/scripts/ires/?P...


## Keep recalling firms with multiple recalls

In [8]:
recalling_firm_name = recalls_events.recalling_firm_name.copy()
basename = lambda x: custom_basename(x, prepare_default_terms(), middle = True)
for i in range(2):
    for _ in range(3): recalling_firm_name = recalling_firm_name.map(basename)
    recalling_firm_name = (recalling_firm_name
                           .str.lower()
                           .str.replace(r"\/|\.", ' ', regex = True)
                           .str.replace(r'[^\w\s\-]', '', regex = True)
                           .str.replace(r'\s+', ' ', regex = True)
                           .str.replace(r'(?<=\b\w) (?=\w\b)', '', regex = True)
                           .str.strip())
recalls_events['recalling_firm_name'] = recalling_firm_name.copy() 
recalls_events.sort_values('recalling_firm_name', inplace = True)
recalls_events

Unnamed: 0_level_0,recalling_firm_name,center_classification_date,reason_for_recall,recall_details
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
86022,4e brands north america,2020-10-07,Chemical Contamination and Subpotent Drug: Pro...,https://www.accessdata.fda.gov/scripts/ires/?P...
87436,a-s medication solutions,2021-04-30,Labeling: Label Mix-up; The bottle of over-the...,https://www.accessdata.fda.gov/scripts/ires/?P...
86037,aaa cosmetica,2020-09-11,Chemical Contamination: Product contains metha...,https://www.accessdata.fda.gov/scripts/ires/?P...
79893,abbott,2018-05-16,Reports of outflow graft twist occlusions. Pa...,https://www.accessdata.fda.gov/scripts/ires/?P...
66886,abbott diabetes care,2013-12-17,Certain lots of FreeStyle and FreeStyle Lite B...,https://www.accessdata.fda.gov/scripts/ires/?P...
...,...,...,...,...
81127,zimmer biomet,2018-11-01,Lack of adequate validation and controls to en...,https://www.accessdata.fda.gov/scripts/ires/?P...
75971,zimmer biomet,2017-02-10,Higher than anticipated rate of fracturing due...,https://www.accessdata.fda.gov/scripts/ires/?P...
67080,zions rx formulations services dba rx formuati...,2014-04-07,Non-Sterility: RX Formulation initiated this r...,https://www.accessdata.fda.gov/scripts/ires/?P...
65394,zydus pharmaceuticals usa,2013-08-02,Failed Tablet/Capsule Specifications: A produc...,https://www.accessdata.fda.gov/scripts/ires/?P...


In [9]:
msk = recalls_events.duplicated('recalling_firm_name', keep = False)
recalls_events = recalls_events.loc[msk]; recalls_events

Unnamed: 0_level_0,recalling_firm_name,center_classification_date,reason_for_recall,recall_details
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
66886,abbott diabetes care,2013-12-17,Certain lots of FreeStyle and FreeStyle Lite B...,https://www.accessdata.fda.gov/scripts/ires/?P...
91756,abbott diabetes care,2023-04-06,Lithium-ion batteries in glucose monitoring sy...,https://www.accessdata.fda.gov/scripts/ires/?P...
67472,abbott diabetes care,2014-03-18,Abbott Diabetes Care has identified through in...,https://www.accessdata.fda.gov/scripts/ires/?P...
64876,abbott diabetes care,2013-05-09,"AT rare, extremely high glucose levels (1024 m...",https://www.accessdata.fda.gov/scripts/ires/?P...
73243,abbott vascular,2016-03-07,Abbott Vascular has recently received reports ...,https://www.accessdata.fda.gov/scripts/ires/?P...
...,...,...,...,...
71272,zimmer,2015-06-08,Zimmer is initiating a voluntary recall of 64 ...,https://www.accessdata.fda.gov/scripts/ires/?P...
81127,zimmer biomet,2018-11-01,Lack of adequate validation and controls to en...,https://www.accessdata.fda.gov/scripts/ires/?P...
75971,zimmer biomet,2017-02-10,Higher than anticipated rate of fracturing due...,https://www.accessdata.fda.gov/scripts/ires/?P...
65394,zydus pharmaceuticals usa,2013-08-02,Failed Tablet/Capsule Specifications: A produc...,https://www.accessdata.fda.gov/scripts/ires/?P...


In [10]:
recalls_firms = recalls_events.recalling_firm_name.drop_duplicates()
recalls_firms

event_id
66886                abbott diabetes care
73243                     abbott vascular
68283               abrams royal pharmacy
80850                   accord healthcare
85721              acella pharmaceuticals
                       ...               
74111                      vyaire medical
82393    vyaire medical carefusion viasys
63683                              zimmer
81127                       zimmer biomet
65394           zydus pharmaceuticals usa
Name: recalling_firm_name, Length: 161, dtype: object

## Infer recalling firms' stock symbols

In [31]:
fn = 'recalling_firm_symbols.csv'

# def get_symbol(name, model = 'gpt-4o', ref = nasdaq_stocks_health.Symbol.tolist()):
#     prompt = (f"{name}:\nWITH GREAT CARE AND SKEPTICISM RETURN THE CORPORATION'S STOCK TICKER "
#               f"SYMBOL FOR THE BIOPHARMA OR HEALTHCARE COMPANY AND NOTHING ELSE\n{name}")
#     out = client.responses.create(
#         model = model,
#         input = prompt,
#         temperature = 0
#         ).output[0].content
#     symbol = out[0].text.strip() if out else out
#     symbol = symbol if symbol in ref else None
#     print(name, f'({symbol})')
#     return symbol

# with open(os.path.join(datadir, fn), 'w') as f:
#     f.write('recalling_firm_name,Symbol\n')
#     for ix in recalls_firms.index:
#         name = recalls_firms.loc[ix]
#         symbol = get_symbol(name)
#         f.write(f'{name},{symbol}\n')

recalls_symbols = pd.read_csv(os.path.join(datadir, fn))
recalls_symbols = recalls_symbols.loc[~recalls_symbols.Symbol.isna()]
recalls_symbols.set_index('recalling_firm_name', inplace = True)
recalls_symbols

Unnamed: 0_level_0,Symbol
recalling_firm_name,Unnamed: 1_level_1
abbott diabetes care,ABT
abbott vascular,ABT
alcon research,ALC
avanos medical,AVNS
baxter englewood,BAX
baxter healthcare,BAX
becton dickinson,BDX
boston scientific,BSX
bristol-myers squibb,BMY
cardinal health,CAH


In [32]:
recalls_events = recalls_events.join(
    other = recalls_symbols,
    on = 'recalling_firm_name',
    how = 'inner')
recalls_events

Unnamed: 0_level_0,recalling_firm_name,center_classification_date,reason_for_recall,recall_details,Symbol
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
66886,abbott diabetes care,2013-12-17,Certain lots of FreeStyle and FreeStyle Lite B...,https://www.accessdata.fda.gov/scripts/ires/?P...,ABT
91756,abbott diabetes care,2023-04-06,Lithium-ion batteries in glucose monitoring sy...,https://www.accessdata.fda.gov/scripts/ires/?P...,ABT
67472,abbott diabetes care,2014-03-18,Abbott Diabetes Care has identified through in...,https://www.accessdata.fda.gov/scripts/ires/?P...,ABT
64876,abbott diabetes care,2013-05-09,"AT rare, extremely high glucose levels (1024 m...",https://www.accessdata.fda.gov/scripts/ires/?P...,ABT
73243,abbott vascular,2016-03-07,Abbott Vascular has recently received reports ...,https://www.accessdata.fda.gov/scripts/ires/?P...,ABT
...,...,...,...,...,...
71799,teva pharmaceuticals usa,2015-08-24,Presence of Particulate Matter: silcone rubber...,https://www.accessdata.fda.gov/scripts/ires/?P...,TEVA
63683,zimmer,2012-12-21,Zimmer Spine has received reports of the PEEK ...,https://www.accessdata.fda.gov/scripts/ires/?P...,ZBH
71272,zimmer,2015-06-08,Zimmer is initiating a voluntary recall of 64 ...,https://www.accessdata.fda.gov/scripts/ires/?P...,ZBH
81127,zimmer biomet,2018-11-01,Lack of adequate validation and controls to en...,https://www.accessdata.fda.gov/scripts/ires/?P...,ZBH


## Compile market-adjusted daily returns

In [33]:
start_date, end_date = '2009-01-01', '2023-12-31'
ix = pd.date_range(start_date, end_date, freq = 'B')
cols = nasdaq_stocks.Symbol.copy()
X = pd.DataFrame(index = ix, columns = cols, dtype = np.float64)
datadir = os.path.join(datadir, 'stocks')
for col in X.columns:
    pth = os.path.join(datadir, col + '.csv')
    df = pd.read_csv(pth, header = [0, 1, 2], index_col = 0)
    df.index = pd.to_datetime(df.index)
    X[col] = df['Adj Close']
R = X.pct_change().dropna(how = 'all')
R = R.sub(R.mean(1), 0); R

  R = X.pct_change().dropna(how = 'all')


Symbol,A,AA,AACG,AACT,AAL,AAME,AAMI,AAOI,AAON,AAP,...,ZTO,ZTR,ZTS,ZUMZ,ZURA,ZVIA,ZVRA,ZWS,ZYME,ZYXI
2009-01-05,0.019879,-0.034632,-0.053988,,-0.021139,-0.013988,,,-0.011593,-0.027462,...,,0.004504,,0.032766,,,,,,-0.091910
2009-01-06,0.052366,-0.005520,-0.067125,,0.060192,-0.204658,,,-0.028876,-0.025958,...,,-0.018798,,-0.008833,,,,,,0.099317
2009-01-07,0.028395,-0.078607,-0.022576,,0.025086,0.038263,,,-0.005829,-0.016848,...,,0.000021,,0.117885,,,,,,-0.033371
2009-01-08,-0.004338,0.032239,-0.026071,,0.043045,-0.010920,,,-0.005502,-0.007524,...,,-0.007995,,0.010215,,,,,,-0.004297
2009-01-09,0.030094,-0.027585,0.031819,,-0.038731,0.308709,,,0.005642,-0.001324,...,,0.014999,,0.001222,,,,,,0.060304
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-25,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2023-12-26,-0.009391,-0.008149,0.025927,-0.009193,-0.025087,-0.020589,-0.007924,0.080756,-0.001030,-0.016498,...,0.005523,-0.001953,-0.008443,-0.005013,-0.070046,0.033890,-0.015134,-0.003762,0.075360,-0.004531
2023-12-27,-0.006246,0.021435,0.243682,-0.009189,-0.014823,0.027175,0.009034,-0.019464,-0.005509,0.004188,...,-0.011276,-0.004503,0.000843,0.002268,-0.026520,0.003251,0.100753,-0.013944,0.000236,-0.000716
2023-12-28,-0.003775,-0.010887,0.082296,-0.003418,-0.004133,0.015101,0.000232,-0.030949,-0.003148,-0.004555,...,0.036443,-0.005229,-0.002097,0.021620,-0.013727,-0.017636,0.036728,-0.012105,-0.008069,0.007724


## Find FDA recalls' market impact dates

In [34]:
days_window, days_baseline, z_thresh = 60, 3*360, 3
recalls_events['market_impact_date'] = pd.NaT
for ix in recalls_events.index:
    symbol = recalls_events.loc[ix].Symbol
    win_end = recalls_events.loc[ix, datecol]
    win_start = win_end - pd.Timedelta(days = days_window)
    bl_start = win_start - pd.Timedelta(days = days_baseline)
    Rwin = R.loc[win_start : win_end, symbol]
    Rbl = R.loc[bl_start : win_start, symbol]
    z = (Rwin - Rbl.mean()) / Rbl.std()
    dt_impact = Rwin.index[z < -z_thresh].max()
    recalls_events.loc[ix, 'market_impact_date'] = dt_impact
recalls_events

Unnamed: 0_level_0,recalling_firm_name,center_classification_date,reason_for_recall,recall_details,Symbol,market_impact_date
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
66886,abbott diabetes care,2013-12-17,Certain lots of FreeStyle and FreeStyle Lite B...,https://www.accessdata.fda.gov/scripts/ires/?P...,ABT,NaT
91756,abbott diabetes care,2023-04-06,Lithium-ion batteries in glucose monitoring sy...,https://www.accessdata.fda.gov/scripts/ires/?P...,ABT,NaT
67472,abbott diabetes care,2014-03-18,Abbott Diabetes Care has identified through in...,https://www.accessdata.fda.gov/scripts/ires/?P...,ABT,NaT
64876,abbott diabetes care,2013-05-09,"AT rare, extremely high glucose levels (1024 m...",https://www.accessdata.fda.gov/scripts/ires/?P...,ABT,NaT
73243,abbott vascular,2016-03-07,Abbott Vascular has recently received reports ...,https://www.accessdata.fda.gov/scripts/ires/?P...,ABT,NaT
...,...,...,...,...,...,...
71799,teva pharmaceuticals usa,2015-08-24,Presence of Particulate Matter: silcone rubber...,https://www.accessdata.fda.gov/scripts/ires/?P...,TEVA,NaT
63683,zimmer,2012-12-21,Zimmer Spine has received reports of the PEEK ...,https://www.accessdata.fda.gov/scripts/ires/?P...,ZBH,2012-11-30
71272,zimmer,2015-06-08,Zimmer is initiating a voluntary recall of 64 ...,https://www.accessdata.fda.gov/scripts/ires/?P...,ZBH,NaT
81127,zimmer biomet,2018-11-01,Lack of adequate validation and controls to en...,https://www.accessdata.fda.gov/scripts/ires/?P...,ZBH,2018-10-26


In [35]:
recalls_events = recalls_events.loc[~recalls_events.market_impact_date.isna()]
recalls_events

Unnamed: 0_level_0,recalling_firm_name,center_classification_date,reason_for_recall,recall_details,Symbol,market_impact_date
event_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
91506,baxter healthcare,2023-02-24,There is the potential for patient desaturatio...,https://www.accessdata.fda.gov/scripts/ires/?P...,BAX,2023-02-09
61355,baxter healthcare,2012-06-13,Recall expansion; the infusion pumps have the ...,https://www.accessdata.fda.gov/scripts/ires/?P...,BAX,2012-04-16
90730,baxter healthcare,2022-09-09,Firm noted an increase in customer reports of ...,https://www.accessdata.fda.gov/scripts/ires/?P...,BAX,2022-07-28
72409,boston scientific,2015-11-12,Boston Scientific is recalling its recently re...,https://www.accessdata.fda.gov/scripts/ires/?P...,BSX,2015-09-17
87014,boston scientific,2021-01-19,There is potential for fractures which results...,https://www.accessdata.fda.gov/scripts/ires/?P...,BSX,2020-12-03
86947,boston scientific,2021-01-09,Failure to execute the visual inspection corre...,https://www.accessdata.fda.gov/scripts/ires/?P...,BSX,2020-12-03
69931,boston scientific,2015-01-13,Lotus valve became unlocked during release fro...,https://www.accessdata.fda.gov/scripts/ires/?P...,BSX,2014-12-26
88266,cardinal health,2021-08-16,The current Argyle UVC Insertion Tray does not...,https://www.accessdata.fda.gov/scripts/ires/?P...,CAH,2021-08-05
88421,cardinal health,2021-08-21,Potential for the plunger to draw back after t...,https://www.accessdata.fda.gov/scripts/ires/?P...,CAH,2021-08-05
81441,dr reddys laboratories,2019-02-11,Labeling: Label Error on Declared Strength; th...,https://www.accessdata.fda.gov/scripts/ires/?P...,RDY,2019-02-11


## Compile ground-truth spillover network

In [None]:
days_window, edgelist = 2, list()
in_symbols = nasdaq_stocks_health.Symbol.copy()
for ix in recalls_events.index:
    symbol_i = recalls_events.loc[ix].Symbol
    win_start = recalls_events.loc[ix].market_impact_date
    win_end = win_start + pd.Timedelta(days = days_window)
    bl_start = win_start - pd.Timedelta(days = days_baseline)
    Rwin = R.loc[win_start : win_end, in_symbols]
    Rbl = R.loc[bl_start : win_start, in_symbols]
    z = (Rwin - Rbl.mean(0)) / Rbl.std(0)
    symbols_j = ((abs(z) > z_thresh).sum(0) > 0)
    symbols_j = symbols_j.index[symbols_j].tolist()
    edgelist.extend([(symbol_i, j) for j in symbols_j])
edgelist = list(set(edgelist))
refNetwork = pd.DataFrame(edgelist, columns = ['Gene1', 'Gene2'])
refNetwork.sort_values(['Gene1', 'Gene2'], inplace = True)
refNetwork.to_csv(os.path.join('DELAY', 'refNetwork.csv'), index = False)
refNetwork

Unnamed: 0,Gene1,Gene2
1308,BAX,AKBA
3112,BAX,ALBT
1220,BAX,BAX
1,BAX,BGM
3633,BAX,BHC
...,...,...
3632,ZBH,XRAY
95,ZBH,XTLB
2705,ZBH,XTNT
1626,ZBH,ZBH


In [71]:
n_nodes_in = refNetwork.Gene2.unique().size
C_norm = refNetwork.groupby('Gene1').size() / n_nodes_in
C_norm.sort_values(ascending = False, inplace = True)
print(C_norm, '\n', C_norm.mean())

Gene1
BSX     0.922256
MDT     0.807927
FMS     0.804878
PEN     0.774390
EW      0.753049
ZBH     0.443598
ICUI    0.414634
TFX     0.347561
LMAT    0.114329
VTRS    0.074695
BAX     0.071646
RDY     0.030488
CAH     0.028963
PODD    0.024390
NEPH    0.018293
PHG     0.013720
dtype: float64 
 0.3528010670731707
