In [1]:
import os
import numpy as np
import pandas as pd
from openai import OpenAI
from dotenv import load_dotenv
from kagglehub import dataset_download
from cleanco.clean import custom_basename
from cleanco.clean import prepare_default_terms
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
client = OpenAI(api_key = api_key)

  from .autonotebook import tqdm as notebook_tqdm


## Match FDA recalls to Nasdaq stock symbols

In [2]:
nasdaq = pd.read_csv(os.path.join('stock-market-dataset', 'symbols_valid_meta.csv'))
nasdaq

Unnamed: 0,Nasdaq Traded,Symbol,Security Name,Listing Exchange,Market Category,ETF,Round Lot Size,Test Issue,Financial Status,CQS Symbol,NASDAQ Symbol,NextShares
0,Y,A,"Agilent Technologies, Inc. Common Stock",N,,N,100.0,N,,A,A,N
1,Y,AA,Alcoa Corporation Common Stock,N,,N,100.0,N,,AA,AA,N
2,Y,AAA,Alternative Access First Priority CLO Bond ETF,P,,Y,100.0,N,,AAA,AAA,N
3,Y,AAAU,Goldman Sachs Physical Gold ETF Shares,Z,,Y,100.0,N,,AAAU,AAAU,N
4,Y,AACG,ATA Creativity Global - American Depositary Sh...,Q,S,N,100.0,N,N,,AACG,N
...,...,...,...,...,...,...,...,...,...,...,...,...
8140,Y,ZVOL,Volatility Premium Plus ETF,Z,,Y,100.0,N,,ZVOL,ZVOL,N
8141,Y,ZVRA,"Zevra Therapeutics, Inc. - Common Stock",Q,Q,N,100.0,N,N,,ZVRA,N
8142,Y,ZWS,Zurn Elkay Water Solutions Corporation Common ...,N,,N,100.0,N,,ZWS,ZWS,N
8143,Y,ZYME,Zymeworks Inc. - Common Stock,Q,Q,N,100.0,N,N,,ZYME,N


In [3]:
nasdaq_stocks = nasdaq.loc[nasdaq.ETF == 'N'].copy()
nasdaq_stocks

Unnamed: 0,Nasdaq Traded,Symbol,Security Name,Listing Exchange,Market Category,ETF,Round Lot Size,Test Issue,Financial Status,CQS Symbol,NASDAQ Symbol,NextShares
0,Y,A,"Agilent Technologies, Inc. Common Stock",N,,N,100.0,N,,A,A,N
1,Y,AA,Alcoa Corporation Common Stock,N,,N,100.0,N,,AA,AA,N
4,Y,AACG,ATA Creativity Global - American Depositary Sh...,Q,S,N,100.0,N,N,,AACG,N
6,Y,AAL,"American Airlines Group, Inc. - Common Stock",Q,Q,N,100.0,N,N,,AAL,N
7,Y,AAME,Atlantic American Corporation - Common Stock,Q,G,N,100.0,N,N,,AAME,N
...,...,...,...,...,...,...,...,...,...,...,...,...
8139,Y,ZVIA,Zevia PBC Class A Common Stock,N,,N,100.0,N,,ZVIA,ZVIA,N
8141,Y,ZVRA,"Zevra Therapeutics, Inc. - Common Stock",Q,Q,N,100.0,N,N,,ZVRA,N
8142,Y,ZWS,Zurn Elkay Water Solutions Corporation Common ...,N,,N,100.0,N,,ZWS,ZWS,N
8143,Y,ZYME,Zymeworks Inc. - Common Stock,Q,Q,N,100.0,N,N,,ZYME,N


In [4]:
nasdaq_stocks_ref = nasdaq_stocks[['Symbol', 'Security Name']].copy()
terms = prepare_default_terms()
terms.extend([(1, [term]) for term in ['stock', 'common', 'class', 'shares', 'units', 'holding']])
nasdaq_stocks_ref['Security Name'] = (nasdaq_stocks_ref['Security Name'].map(
    lambda x: custom_basename(x, terms, middle = True)).map(
    lambda x: custom_basename(x, terms, middle = True))
    .str.lower().str.replace(r'[^\w\s]', '', regex = True)
    .str.replace(r'\s+', ' ', regex = True).str.strip())
nasdaq_stocks_ref

Unnamed: 0,Symbol,Security Name
0,A,agilent technologies
1,AA,alcoa
4,AACG,ata creativity global american depositary each...
6,AAL,american airlines group
7,AAME,atlantic american
...,...,...
8139,ZVIA,zevia pbc a
8141,ZVRA,zevra therapeutics
8142,ZWS,zurn elkay water solutions
8143,ZYME,zymeworks


In [5]:
recalls_pth = dataset_download('mexwell/fda-product-recalls')
recalls = pd.read_csv(os.path.join(recalls_pth, 'fda_product_recalls.csv'))
recalls['center_classification_date'] = pd.to_datetime(recalls.center_classification_date)
recalls = recalls.sort_values('recalling_firm_name')
recalls

Unnamed: 0,fei_number,recalling_firm_name,product_type,product_classification,status,distribution_pattern,recalling_firm_city,recalling_firm_state,recalling_firm_country,center_classification_date,reason_for_recall,product_description,event_id,event_classification,product_id,center,recall_details
45208,3.012894e+09,"1 Epic, LLC",Food/Cosmetics,Class II,Terminated,"AL, CA, FL, GA, IL, KY, MD, MI, NY, OH, PA, TN...",Gilbert,Arizona,United States,2016-11-08,1 Epic is recalling specific lots of various F...,"Fruit 66 Mixed Berry, 8 FL OZ (240 mL) 100% Ju...",75501,Class II,150557,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
45209,3.012894e+09,"1 Epic, LLC",Food/Cosmetics,Class II,Terminated,"AL, CA, FL, GA, IL, KY, MD, MI, NY, OH, PA, TN...",Gilbert,Arizona,United States,2016-11-08,1 Epic is recalling specific lots of various F...,"Fruit 66 Strawberry Melon, 8 FL OZ (240 mL) 10...",75501,Class II,150723,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
45210,3.012894e+09,"1 Epic, LLC",Food/Cosmetics,Class II,Terminated,"AL, CA, FL, GA, IL, KY, MD, MI, NY, OH, PA, TN...",Gilbert,Arizona,United States,2016-11-08,1 Epic is recalling specific lots of various F...,"Fruit 66 Tropical Punch, 8 FL OZ (240 mL) 100%...",75501,Class II,150724,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
45211,3.012894e+09,"1 Epic, LLC",Food/Cosmetics,Class II,Terminated,"AL, CA, FL, GA, IL, KY, MD, MI, NY, OH, PA, TN...",Gilbert,Arizona,United States,2016-11-08,1 Epic is recalling specific lots of various F...,"Fruit 66 Passion Fruit Mango, 8 FL OZ (240 mL)...",75501,Class II,150726,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
26958,3.010573e+09,1908 Brands,Food/Cosmetics,Class II,Terminated,Distributed to One consignee. No foreign/VA/go...,Longmont,Colorado,United States,2019-02-13,Bundle Organics snack bars are recalled due to...,"Bundle Organics Bar Chocolate Chip, 1.9 Oz sna...",82067,Class II,170832,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17888,3.005533e+09,ulrich medical USA Inc,Devices,Class II,Ongoing,"AZ, TX, GA, FL, AR, CA",Chesterfield,Missouri,United States,2020-02-24,"Due to a production error, the weld seam of th...","Ulrich medical neon3 OCT spinal stabilization,...",84638,Class II,178852,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
20860,3.015801e+09,weaver fundraising,Food/Cosmetics,Class II,Terminated,MN,Indianapolis,Indiana,United States,2019-10-24,Inaccurate Nutrition Facts and ingredient decl...,"Trails End Sweet and Savory Collection, contai...",83920,Class II,176744,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
21280,3.015801e+09,weaver fundraising,Food/Cosmetics,Class II,Terminated,Boy Scout troops in Houston and Corpus Christi TX,Indianapolis,Indiana,United States,2019-10-07,"undeclared almonds, cashews, and pecans","Trail's End Chocolatey Carmel Crunch, in 15oz ...",83810,Class II,176405,CFSAN,https://www.accessdata.fda.gov/scripts/ires/?P...
34689,3.013369e+09,www.blankterrmall.com,Drugs,Class I,Terminated,Product was distributed in the United States.,Olean,New York,United States,2018-02-20,Marketed without an approved NDA/ANDA: Product...,"Hard Times For Men capsules, packaged in a 23-...",78643,Class I,160058,CDER,https://www.accessdata.fda.gov/scripts/ires/?P...


In [6]:
msk_medical = recalls.product_type.isin(['Drugs', 'Devices', 'Biologics'])
msk_class2p = (recalls.event_classification != 'Class III')
recalls_medical2p = recalls.loc[msk_medical & msk_class2p].copy()
recalls_medical2p

Unnamed: 0,fei_number,recalling_firm_name,product_type,product_classification,status,distribution_pattern,recalling_firm_city,recalling_firm_state,recalling_firm_country,center_classification_date,reason_for_recall,product_description,event_id,event_classification,product_id,center,recall_details
41251,3.010477e+09,2k Innovations Inc.,Devices,Class II,Terminated,Nationwide Distribution,Saint Augustine,Florida,United States,2017-04-12,During an FDA inspection it was found that the...,Volt Resistance Unisex Rechargeable Heated Sli...,76787,Class II,154201,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
41250,3.010477e+09,2k Innovations Inc.,Devices,Class II,Terminated,Nationwide Distribution,Saint Augustine,Florida,United States,2017-04-12,During an FDA inspection it was found that the...,Volt Resistance Heated Slipper (All sizes)\r\...,76787,Class II,154200,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
41248,3.010477e+09,2k Innovations Inc.,Devices,Class II,Terminated,Nationwide Distribution,Saint Augustine,Florida,United States,2017-04-12,During an FDA inspection it was found that the...,Volt Resistance Heated Slipper Black (All size...,76787,Class II,154198,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
41241,3.010477e+09,2k Innovations Inc.,Devices,Class II,Terminated,Nationwide Distribution,Saint Augustine,Florida,United States,2017-04-12,During an FDA inspection it was found that the...,Volt Resistance Heated Lower Back Garment\r\nD...,76787,Class II,154190,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
41242,3.010477e+09,2k Innovations Inc.,Devices,Class II,Terminated,Nationwide Distribution,Saint Augustine,Florida,United States,2017-04-12,During an FDA inspection it was found that the...,Volt Resistance Heated Therapy Shoulder Wrap\r...,76787,Class II,154192,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53749,3.004505e+09,the Compounder,Drugs,Class II,Terminated,Nationwide,Aurora,Illinois,United States,2015-11-10,Recalled products were made using an active in...,"ESTRIOL (E3) 2.5MG suppositories, Rx only, The...",71535,Class II,140411,CDER,https://www.accessdata.fda.gov/scripts/ires/?P...
25419,3.005533e+09,ulrich medical USA Inc,Devices,Class II,Terminated,"AZ, IL, LA, MI, MO, NV, OH, PA, TN, and TX.",Chesterfield,Missouri,United States,2019-04-11,Certain Torque Limiting Handles in the field a...,ULRICH Torque Limiting Handle for use with a S...,82497,Class II,171706,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
40916,3.005533e+09,ulrich medical USA Inc,Devices,Class II,Terminated,"US Nationwide in the states of: AL, AR, AZ, CA...",Chesterfield,Missouri,United States,2017-04-25,Surgical technique was revised to prevent stat...,uNion Cervical Plate System\r\n\r\nProduct Usa...,76740,Class II,154069,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...
17888,3.005533e+09,ulrich medical USA Inc,Devices,Class II,Ongoing,"AZ, TX, GA, FL, AR, CA",Chesterfield,Missouri,United States,2020-02-24,"Due to a production error, the weld seam of th...","Ulrich medical neon3 OCT spinal stabilization,...",84638,Class II,178852,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...


In [7]:
terms = prepare_default_terms()
recalls_medical2p['recalling_firm_name'] = \
    (recalls_medical2p.recalling_firm_name.map(
     lambda x: custom_basename(x, terms, middle = True))
     .str.lower().str.replace('/', ' ')
     .str.replace(r'[^\w\s]', '', regex = True)
     .str.replace(r'\s+', ' ', regex = True)
     .str.replace(r'(?<=\b\w) (?=\w\b)', '', regex = True)
     .str.strip())
recalls_medical2p.sort_values('recalling_firm_name', inplace = True)
recalls_firms = recalls_medical2p.recalling_firm_name.drop_duplicates()
recalls_firms

41251                        2k innovations
6406                              3d matrix
72405    3m company 3m espe dental products
55244               3m health care business
2577                 3m healthcare business
                        ...                
66894                          zoll medical
23761             zydus pharmaceuticals usa
65453                         zynex medical
75937                          zyno medical
48721                     zyto technologies
Name: recalling_firm_name, Length: 3462, dtype: object

In [8]:
# def get_nasdaq_symbol(name, symbols_ref = nasdaq_stocks_ref, n_iter = 3):
#     symbols = [None] * n_iter
#     for i in range(n_iter):
#         prompt = ""
#         if i > 0:
#             if symbols[i - 1]:
#                 prompt += f"""
#                 Your last response was: {symbols[i - 1]}. This is the Nasdaq symbol for {security}. Is {security}
#                 the same company as {name}? Or is {security} the correct parent company for {name}?"""
#             else:
#                 prompt += f"""
#                 You previously returned no response. Is {name} or its parent company listed on the Nasdaq exchange?"""
#         prompt += f"""
#         {name} is an FDA-regulated food, drug, medical device, biophamaceutical, or diagnostics company,
#         or one of its subsidiaries. If either {name} or its parent company is listed on the Nasdaq exchange,
#         return ONLY the ticker symbol.
#         """
#         resp = client.chat.completions.create(
#             messages = [{'role' : 'user', 'content' : prompt}],
#             model = 'gpt-4.1',
#             temperature = 0, seed = 42,
#             max_completion_tokens = 8)
#         symbol_i = resp.choices[0].message.content.strip()
#         symbol_i = symbol_i if symbol_i in symbols_ref.Symbol.tolist() else None
#         symbols[i] = symbol_i
#         if symbol_i:
#             msk = symbols_ref.Symbol.isin([symbol_i])
#             security = symbols_ref.loc[msk, 'Security Name'].values[0]
#         else:
#             security = None
#     symbol = max(set(symbols), key = symbols.count)
#     print(name, f'({symbol})')
#     return symbol

# with open(os.path.join('fda-product-recalls', 'recalls_nasdaq_symbols.csv'), 'w') as f:
#     f.write('recalling_firm_name,Symbol\n')
#     for ix in recalls_firms.index:
#         name = recalls_firms.loc[ix]
#         symbol = get_nasdaq_symbol(name)
#         f.write(f'{name},{symbol}\n')

recalls_nasdaq = pd.read_csv(os.path.join('fda-product-recalls', 'recalls_nasdaq_symbols.csv'))
recalls_nasdaq = recalls_nasdaq.loc[~recalls_nasdaq.Symbol.isna()]
recalls_nasdaq

Unnamed: 0,recalling_firm_name,Symbol
2,3m company 3m espe dental products,MMM
3,3m health care business,MMM
4,3m healthcare business,MMM
19,abbott,ABT
20,abbott diabetes care,ABT
...,...,...
3447,zimmer spine,ZBH
3448,zimmer surgical,ZBH
3449,zimmer trabecular metal technology,ZBH
3453,zmedica,ICUI


In [9]:
recalls_medical2p = pd.merge(recalls_medical2p, recalls_nasdaq, on = 'recalling_firm_name')
# recalls_medical2p.to_csv(os.path.join('fda-product-recalls', 'fda_product_recalls_medical2p.csv'))
recalls_medical2p

Unnamed: 0,fei_number,recalling_firm_name,product_type,product_classification,status,distribution_pattern,recalling_firm_city,recalling_firm_state,recalling_firm_country,center_classification_date,reason_for_recall,product_description,event_id,event_classification,product_id,center,recall_details,Symbol
0,3.005174e+09,3m company 3m espe dental products,Devices,Class II,Terminated,Worldwide Distribution - USA Nationwide in the...,Saint Paul,Minnesota,United States,2013-08-27,3M ESPE is recalling Unitek Primary Stainless ...,3M ESPE Unitek Primary Stainless Steel Crowns ...,65913,Class II,120837,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,MMM
1,2.110898e+06,3m health care business,Drugs,Class II,Terminated,"Nationwide, Puerto Rico, and to foreign distri...",Saint Paul,Minnesota,United States,2015-09-08,Chemical contamination: Product may be contami...,DuraPrep Surgical Solution Iodine Povacrylex (...,71837,Class II,139184,CDER,https://www.accessdata.fda.gov/scripts/ires/?P...,MMM
2,2.110898e+06,3m health care business,Devices,Class II,Terminated,Worldwide Distribution - US: Nationwide and th...,Saint Paul,Minnesota,United States,2015-07-08,3M is initiating a Field Correction to remove...,Block Versions Used by Dental Offices often re...,71456,Class II,137871,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,MMM
3,2.110898e+06,3m health care business,Devices,Class II,Terminated,"CA, MI, IL, MO, MN, GA, and Canada, Switzerland",Saint Paul,Minnesota,United States,2017-11-18,"During a recent investigation, 3M confirmed th...","3M Bair Hugger(TM) Normothermia System, Temper...",78327,Class II,159193,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,MMM
4,2.110898e+06,3m health care business,Devices,Class II,Terminated,"AL, AR, AZ, CA, CO, CT, DE, FL, GA, IA, IL, IN...",Saint Paul,Minnesota,United States,2018-02-07,Mold was found on the non-patient contact\r\ns...,"ACE(TM) BRAND, ULTRA LITE ANKLE brace: (a) Siz...",78444,Class II,159601,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,MMM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15589,3.005751e+09,zimmer trabecular metal technology,Devices,Class II,Terminated,Nationwide and Foreign.,Parsippany,New Jersey,United States,2016-06-10,Zimmer Biomet is initiating a recall of specif...,"TM CR TIB SZ 3 C-H, 10MM- 00588604310\r\n\r\n\...",73536,Class II,145299,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,ZBH
15590,3.004139e+09,zmedica,Devices,Class II,Terminated,Worldwide Distribution - US (Nationwide)\r\nFo...,Wallingford,Connecticut,United States,2017-06-19,Packaging breach may compromise sterility,"QuikClot TraumaPad, , sterile, soft, white, X-...",77316,Class II,155692,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,ICUI
15591,3.004139e+09,zmedica,Devices,Class II,Terminated,Nationally,Wallingford,Connecticut,United States,2018-03-08,A customer complained that one of the pouches ...,"QuikClot TraumaPad, Part# 460 a topical dressi...",79407,Class II,162235,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,ICUI
15592,3.004139e+09,zmedica,Devices,Class II,Ongoing,\nUS Nationwide distribution in the state of NC.,Wallingford,Connecticut,United States,2021-11-05,Lack of Packaging seal integrity may result in...,QuikClot Combat Gauze-For Temporary External U...,88818,Class II,189630,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,ICUI


In [10]:
recalls_nasdaq_audit = pd.read_csv(os.path.join('fda-product-recalls', 'recalls_nasdaq_symbols_audit_gpt5.csv'))
recalls_nasdaq_audit.drop_duplicates(inplace = True)
recalls_nasdaq_audit

Unnamed: 0,event_id,recalling_firm_name,Symbol_original,Symbol_audit
0,62293,alere san diego,ABT,ALR
1,61927,alere san diego,ABT,ALR
3,63175,alere san diego,ABT,ALR
4,63091,alere san diego,ABT,ALR
5,64701,alere san diego,ABT,ALR
...,...,...,...,...
1716,69866,zimmer trabecular metal technology,ZBH,ZMH
1717,70605,zimmer trabecular metal technology,ZBH,ZMH
1718,77316,zmedica,ICUI,
1719,79407,zmedica,ICUI,


In [11]:
msk_original = ~recalls_medical2p.event_id.isin(recalls_nasdaq_audit.event_id)
audit_dict = recalls_nasdaq_audit.set_index('event_id').Symbol_audit.to_dict()
recalls_medical2p['Symbol_audit'] = recalls_medical2p.event_id.map(audit_dict)
recalls_medical2p.loc[msk_original, 'Symbol_audit'] = recalls_medical2p.loc[msk_original].Symbol
recalls_medical2p = recalls_medical2p.loc[recalls_medical2p.Symbol_audit.isin(nasdaq_stocks.Symbol)]
# recalls_medical2p.to_csv(os.path.join('fda-product-recalls', 'fda_product_recalls_medical2p_audit.csv'))
recalls_medical2p

Unnamed: 0,fei_number,recalling_firm_name,product_type,product_classification,status,distribution_pattern,recalling_firm_city,recalling_firm_state,recalling_firm_country,center_classification_date,reason_for_recall,product_description,event_id,event_classification,product_id,center,recall_details,Symbol,Symbol_audit
0,3.005174e+09,3m company 3m espe dental products,Devices,Class II,Terminated,Worldwide Distribution - USA Nationwide in the...,Saint Paul,Minnesota,United States,2013-08-27,3M ESPE is recalling Unitek Primary Stainless ...,3M ESPE Unitek Primary Stainless Steel Crowns ...,65913,Class II,120837,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,MMM,MMM
1,2.110898e+06,3m health care business,Drugs,Class II,Terminated,"Nationwide, Puerto Rico, and to foreign distri...",Saint Paul,Minnesota,United States,2015-09-08,Chemical contamination: Product may be contami...,DuraPrep Surgical Solution Iodine Povacrylex (...,71837,Class II,139184,CDER,https://www.accessdata.fda.gov/scripts/ires/?P...,MMM,MMM
2,2.110898e+06,3m health care business,Devices,Class II,Terminated,Worldwide Distribution - US: Nationwide and th...,Saint Paul,Minnesota,United States,2015-07-08,3M is initiating a Field Correction to remove...,Block Versions Used by Dental Offices often re...,71456,Class II,137871,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,MMM,MMM
3,2.110898e+06,3m health care business,Devices,Class II,Terminated,"CA, MI, IL, MO, MN, GA, and Canada, Switzerland",Saint Paul,Minnesota,United States,2017-11-18,"During a recent investigation, 3M confirmed th...","3M Bair Hugger(TM) Normothermia System, Temper...",78327,Class II,159193,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,MMM,MMM
4,2.110898e+06,3m health care business,Devices,Class II,Terminated,"AL, AR, AZ, CA, CO, CT, DE, FL, GA, IA, IL, IN...",Saint Paul,Minnesota,United States,2018-02-07,Mold was found on the non-patient contact\r\ns...,"ACE(TM) BRAND, ULTRA LITE ANKLE brace: (a) Siz...",78444,Class II,159601,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,MMM,MMM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15587,3.005751e+09,zimmer trabecular metal technology,Devices,Class II,Terminated,Nationwide and Foreign.,Parsippany,New Jersey,United States,2016-06-10,Zimmer Biomet is initiating a recall of specif...,TM MONO TIB STR GRN SZ 5 - 00588606510\r\nTM M...,73536,Class II,145915,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,ZBH,ZBH
15588,3.005751e+09,zimmer trabecular metal technology,Devices,Class II,Terminated,Nationwide and Foreign.,Parsippany,New Jersey,United States,2016-06-10,Zimmer Biomet is initiating a recall of specif...,POROUS PATELLA 32MM X 10MM - 00587806532\r\n\r...,73536,Class II,144426,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,ZBH,ZBH
15589,3.005751e+09,zimmer trabecular metal technology,Devices,Class II,Terminated,Nationwide and Foreign.,Parsippany,New Jersey,United States,2016-06-10,Zimmer Biomet is initiating a recall of specif...,"TM CR TIB SZ 3 C-H, 10MM- 00588604310\r\n\r\n\...",73536,Class II,145299,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,ZBH,ZBH
15592,3.004139e+09,zmedica,Devices,Class II,Ongoing,\nUS Nationwide distribution in the state of NC.,Wallingford,Connecticut,United States,2021-11-05,Lack of Packaging seal integrity may result in...,QuikClot Combat Gauze-For Temporary External U...,88818,Class II,189630,CDRH,https://www.accessdata.fda.gov/scripts/ires/?P...,ICUI,TFX


In [None]:
# def get_event_importance(event_id, recalls_df = recalls_medical2p, nasdaq_symbols_df = nasdaq_stocks_ref):
#     event_df = recalls_df.loc[recalls_df.event_id == event_id].copy()
#     event_firm = event_df.iloc[0].recalling_firm_name
#     event_class = event_df.iloc[0].event_classification
#     event_symbol = event_df.iloc[0].Symbol_audit
#     event_security = nasdaq_symbols_df.loc[nasdaq_symbols_df.Symbol == event_symbol, 'Security Name'].values[0]
    
#     prompt = f"""
#     Below is a list of FDA-regulated medical products manufactured or sold by {event_firm}, which led to a {event_class} recall.
#     Entry Format:
#     Product Type
#     Product Classification
#     Product Description
#     Distribution Pattern
#     Recall Reason
#     Status"""

#     for ix in event_df.index:
#         df_ix = event_df.loc[ix]
#         prompt += f"""
#         \n{df_ix.product_type}
#         {df_ix.product_classification}
#         {df_ix.product_description}
#         {df_ix.distribution_pattern}
#         {df_ix.reason_for_recall}
#         {df_ix.status}"""

#     prompt += f"""
#     \nThe Nasdaq symbol for {event_firm} or its parent company {event_security} is {event_symbol}. Due to the
#     geographic, economic, and safety scope of the {event_class} recall, as well as the size of {event_firm}
#     and {event_security}, predict the importance of the recall on {event_symbol}'s near-term price performance.
#     Return ONLY: High, Medium, or Low."""

#     resp = client.chat.completions.create(
#         messages = [{'role' : 'user', 'content' : prompt}],
#         model = 'gpt-4.1',
#         temperature = 0, seed = 42,
#         max_completion_tokens = 8)
    
#     importance = resp.choices[0].message.content.strip()
#     importance = importance if importance in ['High', 'Medium', 'Low'] else None
#     print(event_id, importance)
#     return importance

# with open(os.path.join('fda-product-recalls', 'recalls_event_importance.csv'), 'w') as f:
#     f.write('event_id,importance\n')
#     for event_id in recalls_medical2p.event_id.unique():
#         f.write(f'{event_id},{get_event_importance(event_id)}\n')

## Preprocess training data for DELAY

In [13]:
start_date = '2012-06-08'
end_date = '2023-04-21'
ix_date = pd.date_range(start = start_date, end = end_date, freq = 'D')
col_names = recalls_medical2p.Symbol_audit.sort_values().unique()
X = pd.DataFrame(0., index = ix_date, columns = col_names)
for col in X.columns:
    X_col = pd.read_csv(os.path.join('stock-market-dataset', 'stocks', f'{col}.csv'), header = [0, 1, 2])
    X.loc[X_col.iloc[:, 0].values, col] = X_col.iloc[:, 1].values
X = np.log1p(X.loc[X.sum(1) > 0])
# X.T.to_csv('NormalizedData.csv'); X

In [14]:
t = np.arange(X.shape[0]) / (X.shape[0] - 1)
t = pd.Series(t, index = X.index, name = 'PseudoTime')
# t.to_csv('PseudoTime.csv'); t

In [None]:
tf = pd.Series(X.columns).to_csv(
    'TranscriptionFactors.csv',
    index = False, header = False)

In [None]:
np.random.seed(1)
split = pd.Series(1, index = X.columns, name = 'Split')
split.loc[split.sample(frac = .2).index] = 2
split.to_csv('splitLabels.csv'); split

## Construct groud-truth network from recalls

In [None]:
events = recalls[['recalling_firm_name', 'center_classification_date']].copy()
symbol_dict = results.set_index('recalling_firm_name').Symbol.to_dict()
events['Symbol'] = events.recalling_firm_name.map(symbol_dict)
events = (events.loc[~events.Symbol.isna()].drop_duplicates()
          .sort_values('center_classification_date'))
events = pd.merge_asof(
    events, pd.Series(X.index, name = 'date'),
    left_on = 'center_classification_date',
    right_on = 'date', direction = 'forward')
events

In [None]:
# plt.figure()
# for N in range(1, 15):
#     day0 = X.values[X.index.get_indexer(events.date), X.columns.get_indexer(events.Symbol)]
#     ixN = X.index.get_indexer(events.date) + N
#     valid = ixN < X.shape[0]
#     day0 = day0[valid]
#     dayN = X.values[ixN[valid], X.columns.get_indexer(events.Symbol)[valid]]
#     events.loc[valid, f'change_{N}'] = dayN - day0
#     plt.scatter([N], [np.quantile(dayN - day0, .2)])

#     # events.hist(f'change_{N}', bins = np.linspace(-1, 1, 50), alpha = .25, ax = plt.gca(), label = N)
# # plt.yscale('log')
# plt.legend()
# plt.show()