In [1]:
import pandas as pd
import requests
import json
import io
import xml.etree.ElementTree as ET


In [2]:
data = pd.read_csv("../data/drugs.txt", sep="\t", low_memory=False)

In [3]:
data['Highest Development Phase'].value_counts()

No development reported (Preclinical)        422
Discontinued (Preclinical)                   240
Discontinued (II)                            196
Marketed                                     191
Preclinical                                  171
Phase II                                     133
No development reported (I)                  117
Discontinued (I)                             105
No development reported (Research)            77
Phase I                                       76
Research                                      76
No development reported (II)                  43
Discontinued (III)                            43
Phase III                                     42
Phase Unknown                                 19
Clinical Phase Unknown                        15
No development reported (Clinical)            14
Preregistration                               14
Phase I/II                                    14
No development reported (III)                 12
Discontinued (Prereg

In [4]:
drug_list = data[data['Highest Development Phase'].str.contains("Phase")]["Drug Name"]

In [5]:
drug_list

0                    Ganaxolone - Marinus Pharmaceuticals
4                                                 STR 324
6                       Nicotinamide riboside - ChromaDex
7                                                AMG 0101
9                                                KAND 567
                              ...                        
1455    Buprenorphine/naloxone - Aoxing Pharmaceutical...
1462         Doxepin intranasal - Winston Pharmaceuticals
1468                                  Danazol transdermal
1478                                              NXN 188
1483                       Morphine-6-glucuronide - PAION
Name: Drug Name, Length: 303, dtype: object

## List of drugs from R

In [6]:
drug_list = pd.read_csv("../data/drug_list_20210503.csv")

In [7]:
drug_list = list(drug_list.transpose().values.flatten())

## Fields available per drug on clinical trials

In [8]:
fields = requests.get("https://clinicaltrials.gov/api/info/study_fields_list")

In [9]:
tree = ET.parse(io.StringIO(fields.text))

In [10]:
fieldlist = []
for i in tree.find('FieldList'):
    fieldlist.append(i.items()[0][1])

print(fieldlist)
print(len(fieldlist))

['Acronym', 'AgreementOtherDetails', 'AgreementPISponsorEmployee', 'AgreementRestrictionType', 'AgreementRestrictiveAgreement', 'ArmGroupDescription', 'ArmGroupInterventionName', 'ArmGroupLabel', 'ArmGroupType', 'AvailIPDComment', 'AvailIPDId', 'AvailIPDType', 'AvailIPDURL', 'BaselineCategoryTitle', 'BaselineClassDenomCountGroupId', 'BaselineClassDenomCountValue', 'BaselineClassDenomUnits', 'BaselineClassTitle', 'BaselineDenomCountGroupId', 'BaselineDenomCountValue', 'BaselineDenomUnits', 'BaselineGroupDescription', 'BaselineGroupId', 'BaselineGroupTitle', 'BaselineMeasureCalculatePct', 'BaselineMeasureDenomCountGroupId', 'BaselineMeasureDenomCountValue', 'BaselineMeasureDenomUnits', 'BaselineMeasureDenomUnitsSelected', 'BaselineMeasureDescription', 'BaselineMeasureDispersionType', 'BaselineMeasureParamType', 'BaselineMeasurePopulationDescription', 'BaselineMeasureTitle', 'BaselineMeasureUnitOfMeasure', 'BaselineMeasurementComment', 'BaselineMeasurementGroupId', 'BaselineMeasurementLow

In [11]:
def get_clinical_trials_data(drugname, fields):
    query0 = "https://clinicaltrials.gov/api/query/study_fields?expr="
    query1 = "&fmt=csv&field=InterventionName&fields="
    
    response = requests.get(query0 + drugname + query1 + fields)
    # find location of empty line; data follows that
    posn = response.text.find("\n\n")
    df = pd.read_csv(io.StringIO(response.text[posn+2:]))
    if df.empty:
        print(f'{drugname} not found')
        pass
    else:
        df['Name'] = drugname
        return df
    

In [12]:
fields = "OverallStatus,Condition,Phase,LastUpdatePostDate,StudyType,BriefSummary"

# test
get_clinical_trials_data('AV 101', fields).shape
get_clinical_trials_data('XEN 2174', fields)

XEN 2174 not found


In [13]:
clinical_trials_df = []

for name in drug_list:
    ctdf = get_clinical_trials_data(name, fields)
    clinical_trials_df.append(ctdf)

clinical_trials_df = pd.concat(clinical_trials_df)

Tramadol - Aytu BioPharma not found
Oxycodone/promethazine - Charleston Laboratories not found
Buprenorphine sublingual - Benuvia Therapeutics not found
Buprenorphine/naloxone sublingual - Benuvia Therapeutics not found
Oxycodone hydrochloride - SpecGx not found
Diclofenac - Benuvia Therapeutics not found
Zonisamide - Eisai Co Ltd not found
PTI 555 not found
Dextromethadone - Cornell University/Relmada Therapeutics not found
Tramadol controlled release - e-Therapeutics not found
Tramadol orodispersible - Ethypharm not found
Oxycodone controlled release - Teikoku Seiyaku not found
JNJ 38488502 not found
Fentanyl intranasal spray - Kyowa Kirin International not found
Oxycodone controlled release - Cassava Sciences not found
Efipladib not found
Nitroglycerin topical - Kyowa Kirin International not found
Capsaicin dermal patch - Grunenthal not found
Rufinamide - Novartis not found
Memantine - Children's Medical Center Corporation/Merz Pharma not found
Eliapixant - Bayer not found
Celecoxib

NCX 701 not found
Hydrocodone/paracetamol/alvimopan not found
CEP 28190 not found
Anpirtoline not found
Buprenorphine transdermal - Samyang not found
Tramadol controlled release - Toray not found
Tramadol - TheraQuest Biosciences not found
ONO 8711 not found
OT 7100 not found
Morphine - Ethypharm not found
LAS 34475 not found
WAY 195725 not found
Butorphanol intranasal not found
Fentanyl transdermal - Acrux not found
M 40419 not found
VANH 36 not found
CGX 1160 not found
Prosaptide TX14 A not found
GPI 5693 not found
Devazepide not found
Dextromethorphan/methadone - Endo not found
MH 15E not found
BMS 347070 not found
CGX 1007 not found
ADL 21294 not found
Fentanyl transdermal - 3M Drug Delivery Systems/Purdue Pharma not found
CO 102862 not found
CJC 1008 not found
Morphine/lidocaine topical spray - EpiCept not found
Clonidine gel - Curatek Pharmaceuticals not found
Morphine inhalation - Aradigm Corporation not found
BL 1832 not found
BL 1834 not found
JTC 801 not found
NGX 5020 not fo

## Check what drugs we got data for

In [14]:
returned_names = list(clinical_trials_df['Name'].unique())

In [15]:
len(drug_list)

431

## Names of drugs with no data on clinical trials

In [16]:
drugs_with_no_info = list(set(drug_list) ^ set(returned_names))

In [17]:
drugs_with_no_info

['Oxymorphone transdermal - Avecho Biotechnology',
 'Buprenorphine implant - Molteni/Titan Pharmaceuticals',
 'BMS 347070',
 'Cannabinol Topical - InMed Pharmaceuticals',
 'Levacecarnine',
 'Fentanyl patch - Kyukyu Pharmaceutical/Maruishi Pharmaceutical',
 'AM 336',
 'Paracetamol intravenous - Bristol-Myers Squibb/Mallinckrodt',
 'Nabilone controlled release - AOP Orphan Pharmaceuticals',
 'Flupirtine - Synthetic Biologics',
 'GR 253035',
 'Butibufen',
 'Tramadol intravenous - Revogenex/Avenue Therapeutics',
 'Morphine - Ethypharm',
 'Fentanyl intranasal spray - Kyowa Kirin International',
 'Morphine buccal - Generex Biotechnology',
 'Tramadol controlled release - Toray',
 'Oxycodone transdermal - Avecho Biotechnology',
 'Abuse resistant oxycodone immediate release - Inspirion Delivery Sciences',
 'Tilidine - Aoxing',
 'Tapentadol intranasal - Torrent Pharmaceuticals',
 'Cannabidiol/tetrahydrocannabinol - AusCann',
 'Fentanyl transdermal - Altea Therapeutics',
 'GR 79236',
 'PTI 501',
