In [14]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import os, sys, time
from tqdm.notebook import tqdm

# url for 510k SaMD details page
eq_url = 'https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpmn/pmn.cfm?ID='
# url for De Novo SaMD details page
denovo_url = 'https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpmn/denovo.cfm?id='
# url for product codes
product_code_url = 'https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpcd/classification.cfm?start_search=1&amp;productcode='

base_fda_data_url = 'www.accessdata.fda.gov'

In [7]:
# download base submission data (requires manual table extraction to kickstart)
#   - TODO: provide instructions for table extraction.

cols = []
data = []

soup = BeautifulSoup(open('aiml_samd_09222021.htm'))
for header in soup.findAll("th"):
    cols.append(header.string.lower().replace(' ', "_")) 

cols.append('submission_type')

for i, row in enumerate(soup.findAll('tr')):
    if i == 0: continue
    row_data = []
    for cell in row.findAll('td'):
        row_data.append(cell.string)

    if row_data[1].startswith('K'):
        row_data.append('510k')
    elif row_data[1].startswith('DEN'):
        row_data.append('DENOVO')
    else:
        row_data.append(None)
        
    data.append(row_data)

df_submissions = pd.DataFrame(data, columns=cols)

df_submissions.head(10)

['JAK', 'LLZ', 'MXD', 'PIB', 'QPF', 'QAS', 'KPS', 'QIH', 'QKB', 'MUJ', 'QFM', 'QNP', 'MQB', 'QOK', 'OEB', 'LNH', 'QME', 'POK', 'QNV', 'NQQ', 'QNL', 'QDQ', 'POV', 'JOY', 'DQD', 'QJU', 'DQK', 'PRH', 'IYN', 'MLO', 'QBS', 'IYO', 'DSI', 'DPS', 'IYE', 'MWI', 'BZG', 'QEK', 'HAW', 'QJB', 'MRZ', 'MUD', 'GKZ', 'JAA', 'PIW', 'GXY', 'PLB', 'PJA', 'JFY', 'PBH', 'QER', 'QAQ', 'PPU', 'OZE', 'DQA', 'DXH', 'NDC', 'QEA', 'POS', 'PCS', 'OMB', 'GWN', 'JIL', 'OLO', 'QCC', 'NAY', 'OBH', 'PNN', 'DSB', 'DRG', 'DSH', 'DPT', 'MNR', 'PTA', 'OLZ', 'NBW', 'MYN', 'PBZ', 'OWB', 'NFJ', 'KPR', 'PEX', 'OIW', 'MHX']


Unnamed: 0,date_of_final_decision,submission_number,device,company,panel_(lead),primary_product_code,submission_type
0,06/17/2021,K203514,Precise Position,"Philips Healthcare (Suzhou) Co., Ltd.",Radiology,JAK,510k
1,06/16/2021,K202718,Qmenta Care Platform Family,"Mint Labs, Inc., D/B/A. QMENTA",Radiology,LLZ,510k
2,06/11/2021,K210484,"LINQ II Insertable Cardiac Monitor, Zelda AI E...","Medtronic, Inc.",Cardiovascular,MXD,510k
3,06/10/2021,K203629,IDx-DR,Digital Diagnostics Inc.,Ophthalmic,PIB,510k
4,06/02/2021,DEN200069,Cognoa Asd Diagnosis Aid,"Cognoa, Inc.",Neurology,QPF,DENOVO
5,05/19/2021,K210237,CINA CHEST,Avicenna.AI,Radiology,QAS,510k
6,04/30/2021,K210001,HYPER AiR,"Shanghai United Imaging Healthcare Co.,Ltd.",Radiology,KPS,510k
7,04/23/2021,K203314,Cartesion Prime (PCD-1000A/3) V10.8,Canon Medical Systems Corporation,Radiology,KPS,510k
8,04/23/2021,K203502,MEDO-Thyroid,MEDO DX Pte. Ltd.,Radiology,QIH,510k
9,04/21/2021,K210556,Preview Shoulder,Genesis Software Innovations,Radiology,QIH,510k


In [8]:
# download submission pages
with tqdm(total=len(df_submissions), desc='Submission HTML Download') as pbar:
    for i, row in df_submissions.iterrows():
        if row.submission_type == '510k':
            url = eq_url + row.submission_number
        if row.submission_type == 'DENOVO':
            url = denovo_url + row.submission_number 
        
        res = requests.get(url)
        soup = BeautifulSoup(res.text, 'html.parser')
        
        data_path = os.path.join('data', 'submission_html')
        os.makedirs(data_path, exist_ok=True)

        with open(os.path.join(data_path, f'{row.submission_number}.htm'), 'w') as f:
            f.write(str(soup))

        pbar.update(1)

  0%|          | 0/343 [00:00<?, ?it/s]

In [10]:
# download product code HTML
product_codes = df_submissions.primary_product_code.unique().tolist()

with tqdm(total=len(product_codes), desc='Product Code HTML Download') as pbar:
    for i, pc in enumerate(product_codes):

        url = product_code_url + pc
        res = requests.get(url)
        soup = BeautifulSoup(res.text, 'html.parser')
        
        data_path = os.path.join('data', 'product_code_html')
        os.makedirs(data_path, exist_ok=True)
        
        with open(os.path.join(data_path, f'{pc}.htm'), 'w') as f:
            f.write(str(soup))

        pbar.update(1) 

Product Code HTML Download:   0%|          | 0/84 [00:00<?, ?it/s]

In [13]:
# extract submission tables
def extract_fda_data_table(data_path, id, table_chars):
    soup = BeautifulSoup(open(os.path.join(data_path, f'{id}.htm')))
    t = soup.find('table', table_chars)

    if t is not None:
        with open(os.path.join(os.path.join(data_path, f'{id}_table.htm')), 'w') as f:
            f.write(str(t.prettify()))

data_path = os.path.join('data', 'submission_html')
os.makedirs(data_path, exist_ok=True)

with tqdm(total=len(df_submissions), desc='Submissions Extraction') as pbar:
    for i, row in df_submissions.iterrows():
        if row.submission_type == 'DENOVO':
            extract_fda_data_table(
                data_path, 
                row.submission_number, 
                {'style': 'text-transform: capitalize; table-layout:fixed; width:500px'}
            )
        if row.submission_type == '510k':
            extract_fda_data_table(
                data_path,
                row.submission_number,
                {'style': 'text-transform: none'}
            )
        
        pbar.update(1)

data_path = os.path.join('data', 'product_code_html')
os.makedirs(data_path, exist_ok=True)
with tqdm(total=len(product_codes), desc='Product Codes Extraction') as pbar:
    for code in product_codes: 
        extract_fda_data_table(
            data_path, 
            code, 
            {'width': '600', 'cellspacing': '5'}
        )
        pbar.update(1)

Submissions Extraction:   0%|          | 0/343 [00:00<?, ?it/s]

Product Codes Extraction:   0%|          | 0/84 [00:00<?, ?it/s]

In [106]:
# generate full submission table

# extract submission headers
df_submission_denovo = df_submissions[df_submissions.submission_type == 'DENOVO']

submission_denovo_headers = set()
for i, row in df_submission_denovo.iterrows():
    soup = BeautifulSoup(open(os.path.join('data', 'submission_html', f'{row.submission_number}_table.htm')))
    headers = []
    for header in soup.findAll('th'):
        headers.append(header.string.replace('\n', '').strip())
    submission_denovo_headers = submission_denovo_headers | set(headers)

submission_denovo_headers = list(submission_denovo_headers)

denovo_data = []
url_map = {}
for i, sub in df_submission_denovo.iterrows():
    soup = BeautifulSoup(open(os.path.join('data', 'submission_html', f'{sub.submission_number}_table.htm')))
    obj = { key: None for key in submission_denovo_headers }
    obj['id'] = i

    table = soup.table
    # print(table)
    for row in table.findAll('tr'):
        # try: 
        if row.th is None: continue
        header = row.th.string.replace('\n', '').strip()
        data = row.td
        
        if data.a is not None:
            link = data.a.get('href')
            url = requests.compat.urlparse(link)

            if url.scheme is None or url.scheme == '':
                url = url._replace(scheme='https')
            if url.netloc is None or url.netloc == '':
                url = url._replace(netloc=base_fda_data_url)

            if url_map.get(f'{header} URL', None) is None:
                url_map[f'{header} URL'] = []
            
            url_map[f'{header} URL'].append(
                {
                    'id': i, 
                    f'{header} URL': url.geturl()
                }
            )

            data = data = f'{data.get_text()}'

        elif data.table is not None:
            # data = ' '.join([x.text for x in data.table.findAll('td')])
            data = data.get_text()
        else:
            data = data.get_text()

        data = data \
            .replace('\r', '') \
            .replace('\n', '') \
            .replace('\t', '') \
            .strip()

        obj[header] = data
        # except: continue
    
    denovo_data.append(obj)
    
df_denovo = pd.DataFrame.from_dict(denovo_data)
df_denovo = df_denovo.set_index('id')

df_denovo_urls: pd.DataFrame = None
for i, col in enumerate(url_map):
    df = pd.DataFrame.from_dict(url_map[col])
    df = df.set_index('id')
    df_denovo_urls = df if df_denovo_urls is None else df_denovo_urls.join(df)

df_denovo = pd.merge(df_denovo, df_denovo_urls, on='id')
df_denovo.head()


Unnamed: 0_level_0,Device Name,FDA Review,Classification Advisory Committee,Type,Reclassification Order,Expedited Review,Date Received,Requester,Regulation Number,510(K) Number,...,Classification Product Code,Decision,Review Advisory Committee,Device Classification Name,Decision Date,Device Classification Name URL,Regulation Number URL,Classification Product Code URL,Reclassification Order URL,FDA Review URL
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4,Cognoa ASD Diagnosis Aid,,Neurology,Direct,Reclassification Order,,11/03/2020,"Cognoa, Inc. 2185 park blvd. ...",882.1491,,...,QPF,granted (DENG),Neurology,pediatric autism spectrum disorder diagnosis aid,06/02/2021,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf20...,
14,GI Genius,,Gastroenterology/Urology,Direct,Reclassification Order,,09/08/2020,"Cosmo Artificial Intelligence - AI, LTD ...",876.152,,...,QNP,granted (DENG),Gastroenterology/Urology,gastrointesinal lesion software detection system,04/09/2021,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf20...,
16,Gili Pro BioSensor (also known as “Gili BioSen...,Decision Summary,Cardiovascular,Direct,Reclassification Order,,06/12/2020,ContinUse Biometrics Ltd. habarzel 3...,870.2786,,...,QOK,granted (DENG),Cardiovascular,hardware and software for optical camera-based...,04/01/2021,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf20...,http://www.accessdata.fda.gov/cdrh_docs/review...
20,Oxehealth Vital Signs,Decision Summary,Cardiovascular,Direct,Reclassification Order,,03/27/2020,Oxehealth Limited magdalen center no...,870.2785,,...,QME,granted (DENG),Cardiovascular,software for optical camera-based measurement ...,03/26/2021,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf20...,http://www.accessdata.fda.gov/cdrh_docs/review...
26,Analytic for Hemodynamic Instability (AHI),,Cardiovascular,Direct,Reclassification Order,,04/03/2020,"Fifth Eye Inc. 110 miller avenue, su...",870.222,,...,QNV,granted (DENG),Cardiovascular,adjunctive hemodynamic indicator with decision...,03/01/2021,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf20...,


In [95]:
url_map.keys()

dict_keys(['Device Classification Name URL', 'Regulation Number URL', 'Classification Product Code URL', 'Reclassification Order URL', 'FDA Review URL'])

In [125]:
urls_df = None
for i, col in enumerate(url_map):
    for row in url_map[col]:
        print(requests.compat.urlparse(row[col]).path)

/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfpcd/classification.cfm
/scripts/cdrh/cfdocs/cfcfr/cfrsearch.cfm
/scripts/cdrh/cfdocs/cfcfr/cfrsearch.cfm
/scripts/cdrh/cfdocs/cfcfr/cfrsearch.cfm
/scripts/cdrh/cfdocs/cfcfr/cfrsearch.cfm
/scripts/cdrh/cfdocs/cfcfr/cfrsearch.cfm
/scripts/cdrh/cfdocs/cfcfr/cfrsearch.cfm
/scripts/cdrh/cfdo

In [98]:
pd.merge(df_denovo, urls_df, on='id')

Unnamed: 0_level_0,Device Name,FDA Review,Classification Advisory Committee,Type,Reclassification Order,Expedited Review,Date Received,Requester,Regulation Number,510(K) Number,...,Classification Product Code,Decision,Review Advisory Committee,Device Classification Name,Decision Date,Device Classification Name URL,Regulation Number URL,Classification Product Code URL,Reclassification Order URL,FDA Review URL
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
4,Cognoa ASD Diagnosis Aid,,Neurology,Direct,Reclassification Order,,11/03/2020,"Cognoa, Inc. 2185 park blvd. ...",882.1491,,...,QPF,granted (DENG),Neurology,pediatric autism spectrum disorder diagnosis aid,06/02/2021,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf20...,
14,GI Genius,,Gastroenterology/Urology,Direct,Reclassification Order,,09/08/2020,"Cosmo Artificial Intelligence - AI, LTD ...",876.152,,...,QNP,granted (DENG),Gastroenterology/Urology,gastrointesinal lesion software detection system,04/09/2021,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf20...,
16,Gili Pro BioSensor (also known as “Gili BioSen...,Decision Summary,Cardiovascular,Direct,Reclassification Order,,06/12/2020,ContinUse Biometrics Ltd. habarzel 3...,870.2786,,...,QOK,granted (DENG),Cardiovascular,hardware and software for optical camera-based...,04/01/2021,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf20...,http://www.accessdata.fda.gov/cdrh_docs/review...
20,Oxehealth Vital Signs,Decision Summary,Cardiovascular,Direct,Reclassification Order,,03/27/2020,Oxehealth Limited magdalen center no...,870.2785,,...,QME,granted (DENG),Cardiovascular,software for optical camera-based measurement ...,03/26/2021,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf20...,http://www.accessdata.fda.gov/cdrh_docs/review...
26,Analytic for Hemodynamic Instability (AHI),,Cardiovascular,Direct,Reclassification Order,,04/03/2020,"Fifth Eye Inc. 110 miller avenue, su...",870.222,,...,QNV,granted (DENG),Cardiovascular,adjunctive hemodynamic indicator with decision...,03/01/2021,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf20...,
132,Caption Guidance,Decision Summary,Radiology,Direct,Reclassification Order,,08/27/2019,"Bay Labs, Inc. 290 king street ...",892.21,,...,QJU,granted (DENG),Radiology,image acquisition and/or optimization guided b...,02/07/2020,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf19...,http://www.accessdata.fda.gov/cdrh_docs/review...
213,EyeBOX,Decision Summary,Neurology,Direct,Reclassification Order,,12/22/2017,"Oculogica, Inc. 33 irving place ...",882.1455,,...,QEA,granted (DENG),Neurology,brain injury adjunctive interpretive oculomoto...,12/28/2018,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf17...,http://www.accessdata.fda.gov/cdrh_docs/review...
251,DreaMed Advisor Pro,Decision Summary,Clinical Chemistry,Direct,Reclassification Order,,08/17/2017,"DreaMed Diabetes, Ltd. 3 shimshon st...",862.1358,,...,QCC,granted (DENG),Clinical Chemistry,insulin pump therapy adjustment calculator for...,06/12/2018,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf17...,http://www.accessdata.fda.gov/cdrh_docs/review...
254,OsteoDetect,Decision Summary,Radiology,Direct,Reclassification Order,,02/05/2018,"Imagen Technologies, Inc. 33 east 33...",892.209,,...,QBS,granted (DENG),Radiology,radiological computer assisted detection/diagn...,05/24/2018,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf18...,http://www.accessdata.fda.gov/cdrh_docs/review...
261,IDx-DR,Decision Summary,Ophthalmic,Direct,Reclassification Order,,01/12/2018,"IDx, LLC 458 highway 1 west ...",886.11,,...,PIB,granted (DENG),Ophthalmic,diabetic retinopathy detection device,04/11/2018,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/scripts/cdrh/cf...,https://www.accessdata.fda.gov/cdrh_docs/pdf18...,http://www.accessdata.fda.gov/cdrh_docs/review...


In [None]:
df_submission_510k = df_submissions[df_submissions.submission_type == '510k']
submission_510k_headers = set()
for i, row in df_submission_510k.iterrows():
    soup = BeautifulSoup(open(os.path.join('data', 'submission_html', f'{row.submission_number}_table.htm')))
    headers = []
    for header in soup.findAll('th'):
        headers.append(header.string)
    submission_510k_headers = submission_510k_headers | set(headers)

print(list(submission_510k_headers))

In [17]:
# download submission pdfs (DOES NOT WORK, LINK IS DYNAMIC BY FIRST SUB DATE?)
# import requests

# url = 'http://www.hrecos.org//images/Data/forweb/HRTVBSH.Metadata.pdf'
# r = requests.get(url, stream=True)

# with open('/tmp/metadata.pdf', 'wb') as fd:
#     for chunk in r.iter_content(chunk_size):
#         fd.write(chunk)

for i, row in df_submissions.iterrows():

    year = row.date_of_final_decision[-2:]

    url = f'https://www.accessdata.fda.gov/cdrh_docs/pdf{year}/{row.submission_number}.pdf'
    res = requests.get(url)

    data_path = os.path.join('data', 'submission_pdf')
    os.makedirs(data_path, exist_ok=True)

    with open(os.path.join(data_path, f'{row.submission_number}.pdf'), 'wb') as f:
        f.write(res.content)
    
    print(f'{url} {i+1}/{len(df_submissions)}')
    time.sleep(2)
    
        

https://www.accessdata.fda.gov/cdrh_docs/pdf21/K203514.pdf 1/343
https://www.accessdata.fda.gov/cdrh_docs/pdf21/K202718.pdf 2/343
https://www.accessdata.fda.gov/cdrh_docs/pdf21/K210484.pdf 3/343
https://www.accessdata.fda.gov/cdrh_docs/pdf21/K203629.pdf 4/343
https://www.accessdata.fda.gov/cdrh_docs/pdf21/DEN200069.pdf 5/343
https://www.accessdata.fda.gov/cdrh_docs/pdf21/K210237.pdf 6/343
https://www.accessdata.fda.gov/cdrh_docs/pdf21/K210001.pdf 7/343


KeyboardInterrupt: 

In [4]:
cols = []
data = []



#     tables = soup.findAll('table')
#     table = tables[4]

#     if i == 0:
#         for row in table.findAll('tr'):
#             for header in row.findAll('th'):
#                 cols.append(header.text)

#     row_data = []
#     for row in table.findAll('tr'):
#         for header in row.findAll('th'):
#             row_data.append(header.findNext('td').text.replace('\t', '').replace('\n', '').replace('\r', ''))
        
#     data.append(row_data)
#     print(url, len(cols), len(row_data))
#     print(cols)
#     print(row_data)
#     print('---')

# df_products = pd.DataFrame(data, columns=cols)
# df_products.head()

https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpcd/classification.cfm?start_search=1&amp;productcode=JAK
https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpcd/classification.cfm?start_search=1&amp;productcode=LLZ
https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpcd/classification.cfm?start_search=1&amp;productcode=MXD
https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpcd/classification.cfm?start_search=1&amp;productcode=PIB
https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpcd/classification.cfm?start_search=1&amp;productcode=QPF
https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpcd/classification.cfm?start_search=1&amp;productcode=QAS
https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpcd/classification.cfm?start_search=1&amp;productcode=KPS
https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpcd/classification.cfm?start_search=1&amp;productcode=QIH
https://www.accessdata.fda.gov/scripts/cdrh/cfdocs/cfpcd/classification.cfm?start_search=1&amp;productcode=QKB
h