In [1]:
from textwrap import wrap
import pandas as pd
import requests
import json
from pandas import json_normalize # tranform JSON file into a pandas dataframe
import time
import re

pd.set_option('display.max_colwidth', 100)

In [22]:
def build_url_from_query(query):
    
    query_tokenized = query.split()
    
    field_values = ['NCTId', 'LeadSponsorName', 'BriefTitle', 'Condition', 'Phase', 'StudyType',
                    'EnrollmentCount', 'StartDate', 'PrimaryCompletionDate', 'EligibilityCriteria', 'InterventionName', 
                    'ArmGroupInterventionName', 'ArmGroupDescription', 'InterventionArmGroupLabel', 'OutcomeMeasureType', 'OutcomeMeasureTitle',
                    'OutcomeMeasureDescription', 'OutcomeMeasureTimeFrame', 'OutcomeMeasurementValue', 'OutcomeMeasureUnitOfMeasure']
    
    max_rank = 1000             # max number of items returned by the API query (max for the clinicaltrials.gov API is 1000)
    
    
    url = 'https://clinicaltrials.gov/api/query/study_fields?expr='
    
    for i, word in enumerate(query_tokenized):              # build query URL by adding all search terms and field values to the query URL, following the appropriate format
        if i == 0:
            url = url + word
        else:
            url = url + '+' + word
        
    url = url + '&fields='
    
    for i, word in enumerate(field_values):
        if i == 0:
            url = url + word
        else:
            url = url + '%2C' + word
    
    url = url + '&min_rnk=1&max_rnk=' + str(max_rank) + '&fmt=json' 
    url = url.strip()
    
    # print('\n\nQuerying up to 1,000 trials from clinicaltrials.gov with the following url...\n\n'+url+'\n\n')
    
    return url

In [80]:
def build_study_table(url):
        
    ## convert the clinicaltrials.gov JSON response to a pandas dataframe
    
    result = requests.get(url).json()
    
    # loop through the list and identify ONLY studies with outcome measures reported 
    # result_list = [result for result in result['StudyFieldsResponse']['StudyFields'] if result['OutcomeMeasureType']]     
    # result_list = [result for result in result['StudyFieldsResponse']['StudyFields']]
    
    result_list = [result for result in result['StudyFieldsResponse']['StudyFields'] if result['StudyType'][0] == 'Interventional']
    
    
    df_master = json_normalize(result_list[0])      # initialize dataframe using JSON result

    df_master = clean_columns(df_master)

    try:
        for study in result_list[1:]:        # concatenate each study to the dataframe to generate master dataframe
            df = json_normalize(study)
            df = clean_columns(df)
            df_master = pd.concat([df_master, df], axis=0, ignore_index=True)
    except:
        print("There was an error!")
    
#     df_master = df_master[(df_master['Phase'].str.match('.*3.*')) | (df_master['Phase'].str.match('.*2.*'))]
#     df_master = df_master[df_master['Phase'].str.match('.*3.*')].drop('Rank', 1).reset_index().drop('index', 1)
    df_master = df_master[df_master['Phase'].str.match('.*3.*')]
    
    return df_master

In [35]:
def clean_columns(df_master):      # this is a single-row dataframe
    for col in df_master.columns[1:]:    
        if len(df_master[col][0]) == 0:
            df_master[col] = 'Unreported'
        elif (len(df_master[col][0]) == 1) and (not re.search('Outcome.*', col)):
            df_master[col] = df_master[col][0]
        elif (len(df_master[col][0]) > 1) and (not re.search('Outcome.*', col)):
            df_master[col] = ', '.join(df_master[col][0])
    
    return df_master

# is it an outcome column?
# is length greater than 0?

# scenarios
# is any column, 0 length - confirmed
# is outcome column, any length - confirmed
# is not outcome column, 1 length - confirmed
# is not outcome column, >1 length - confirmed

In [4]:
def map_measure_values(df_master):      # this is a single-row dataframe
    if df_master
    num_of_outcomes = len(df_master['OutcomeMeasureType'][0])
    temp_list = []
    try:                            
        measures_per_outcome = len(df_master['OutcomeMeasurementValue'][0])//len(df_master['OutcomeMeasureTitle'][0])
    except:
        measures_per_outcome = 1
    
    beginning = 0
    increment = measures_per_outcome               # maps correct number of outcome measures reported in the study, as multiple performance values may pertain to each endpoint (i.e. value for each intervention arm + placebo)
    try:
        for i in range(num_of_outcomes):
            try:
                temp_list.append([df_master['OutcomeMeasurementValue'][0][beginning:beginning+increment]])
                beginning += increment
            except:
                print('Errors!')
        df_master['OutcomeMeasurementValue'] = temp_list
    except:
        print('Errors!')
    
    return df_master

In [23]:
query_term = input("Enter search query: \n")
start_time = time.process_time()
url = build_url_from_query(query_term)
print(f"Time to build url: {time.process_time()-start_time} seconds")
print(url)

Enter search query: 
eosinophilic asthma
Time to build url: 0.0 seconds
https://clinicaltrials.gov/api/query/study_fields?expr=eosinophilic+asthma&fields=NCTId%2CLeadSponsorName%2CBriefTitle%2CCondition%2CPhase%2CStudyType%2CEnrollmentCount%2CStartDate%2CPrimaryCompletionDate%2CEligibilityCriteria%2CInterventionName%2CArmGroupInterventionName%2CArmGroupDescription%2CInterventionArmGroupLabel%2COutcomeMeasureType%2COutcomeMeasureTitle%2COutcomeMeasureDescription%2COutcomeMeasureTimeFrame%2COutcomeMeasurementValue%2COutcomeMeasureUnitOfMeasure&min_rnk=1&max_rnk=1000&fmt=json


In [78]:
start_time = time.process_time()
study_table = build_study_table(url)
print(f"Time to build table: {time.process_time()-start_time} seconds.")

Time to build table: 0.6875 seconds.


  df_master = df_master[df_master['Phase'].str.match('.*3.*')].drop('Rank', 1).reset_index().drop('index', 1)


In [79]:
study_table

Unnamed: 0,NCTId,LeadSponsorName,BriefTitle,Condition,Phase,StudyType,EnrollmentCount,StartDate,PrimaryCompletionDate,EligibilityCriteria,InterventionName,ArmGroupInterventionName,ArmGroupDescription,InterventionArmGroupLabel,OutcomeMeasureType,OutcomeMeasureTitle,OutcomeMeasureDescription,OutcomeMeasureTimeFrame,OutcomeMeasurementValue,OutcomeMeasureUnitOfMeasure
0,NCT01285323,"Teva Branded Pharmaceutical Products R&D, Inc.",A Study to Evaluate the Efficacy and Safety of Reslizumab in Patients With Eosinophilic Asthma,Eosinophilic Asthma,Phase 3,Interventional,464,March 2011,April 2014,"Inclusion Criteria:\n\nThe patient is male or female, 12 through 75 years of age, with a previou...","Reslizumab, Placebo","Drug: Placebo, Drug: Reslizumab","Placebo administered intravenously once every 4 weeks ( +-7 days) for a total of 13 doses., Resl...","Reslizumab 3.0 mg/kg, Placebo","[Primary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary...","[Frequency of Clinical Asthma Exacerbations (CAEs) During 12 Months of Treatment, Change From Ba...",[An exacerbation event was considered a CAE if the patient met either or both of the criteria li...,"[Day 1 to Month 12, Day 1 (baseline, pre-dose), Week 16, Day 1 (baseline, pre-dose), Weeks 4, 8,...","[2.115, 0.859, 0.122, 0.223, 0.094, 0.187, 0.777, 0.987, -0.660, -0.857, NA, NA, 0.080, 0.115, -...","[CAEs in 52 weeks, liters, liters, units on a scale, units on a scale, weeks, units on a scale, ..."
1,NCT01287039,"Teva Branded Pharmaceutical Products R&D, Inc.",A Study to Evaluate the Efficacy and Safety of Reslizumab (3.0 mg/kg) in the Reduction of Clinic...,Eosinophilic Asthma,Phase 3,Interventional,489,April 2011,December 2013,"Inclusion Criteria:\n\nThe patient is male or female, 12 through 75 years of age, with a previou...","Reslizumab, Placebo","Drug: Placebo, Drug: Reslizumab","Placebo administered intravenously once every 4 weeks ( +-7 days) for a total of 13 doses., Resl...","Reslizumab 3.0 mg/kg, Placebo","[Primary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary...","[Frequency of Clinical Asthma Exacerbations (CAEs) During 12 Months of Treatment, Change From Ba...",[An exacerbation event was considered a CAE if the patient met either or both of the criteria li...,"[Day 1 to Week 52, Day 1 (baseline, pre-dose), Weeks 4, 8, 12 and 16, Day 1 (baseline, pre-dose)...","[1.804, 0.904, 0.110, 0.248, 0.695, 0.933, -0.676, -0.941, 34.9, NA, 0.109, 0.167, -0.36, -0.64,...","[CAEs in 52 weeks, liters, units on a scale, units on a scale, weeks, units on a scale, puffs/da..."
2,NCT01270464,"Teva Branded Pharmaceutical Products R&D, Inc.",A Study to Evaluate the Efficacy and Safety of Reslizumab (0.3 or 3.0 mg/kg) as Treatment for Pa...,Eosinophilic Asthma,Phase 3,Interventional,315,February 2011,September 2013,"Inclusion Criteria:\n\nThe patient is male or female, 12 through 75 years of age, with a previou...","Reslizumab, Placebo","Drug: Placebo, Drug: Reslizumab, Drug: Reslizumab","Placebo administered intravenously (iv) once every 4 weeks, for a total of 4 doses., 0.3 mg/kg, ...","Reslizumab - 0.3 mg/kg, Reslizumab - 3.0 mg/kg, Placebo","[Primary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary...",[Change From Baseline In Forced Expiratory Volume In 1 Second (FEV1) Over 16 Weeks Using Mixed M...,[FEV1 is a standard measurement of air movement in the lungs of patients with asthma obtained fr...,"[Day 0 (baseline, pre-dose), Weeks 4, 8, 12 and 16, Day 1 (baseline, pre-dose), Weeks 4, 8, 12, ...","[0.126, 0.242, 0.286, 0.172, 0.220, 0.301, -0.145, -0.114, 0.089, 0.8, 4.9, 7.5, 0.8, 5.5, 6.7, ...","[liters, liters, liters/second, percentage of predicted FEV1, units on a scale, units on a scale..."
3,NCT01290887,"Teva Branded Pharmaceutical Products R&D, Inc.",Open-Label Extension Study to Evaluate the Long-Term Safety and Efficacy of Reslizumab (3.0 mg/k...,Eosinophilic Asthma,Phase 3,Interventional,1052,June 2011,January 2015,Inclusion Criteria:\n\nWritten informed consent is obtained.\nPatient must have completed treatm...,Reslizumab,Drug: Reslizumab,Reslizumab 3.0 mg/kg administered intravenously once every 4 weeks ( +-7 days) for up to 24 months.,Reslizumab 3.0 mg/kg,"[Primary, Primary, Secondary, Secondary, Secondary, Secondary, Secondary, Primary, Secondary, Se...","[Participants With Treatment-Emergent Adverse Events, Participants With Treatment-Emergent Poten...",[An adverse event was defined in the protocol as any untoward medical occurrence that develops o...,[Day 1 (post-dose) to Week 65. The endpoint for adverse events was the last postbaseline observa...,"[359, 385, 744, 31, 47, 78, 49, 41, 90, 6, 12, 18, 33, 45, 78, 1, 2, 3, 344, 367, 711, 78, 82, 1...","[participants, participants, liters, percentage of predicted FEV1, liters, liters/second, # puff..."
4,NCT02594332,Johannes Gutenberg University Mainz,Effects of Mepolizumab Compared to Placebo on Airway Physiology in Patients With Eosinophilic As...,Asthma,Phase 3,Interventional,29,"November 17, 2015","May 22, 2017",Inclusion Criteria:\n\nPatients must be able to give written informed consent prior to participa...,"Mepolizumab, Placebo","Drug: Mepolizumab, Drug: Placebo","100 mg SC every 4 weeks for 13 injections, Amount of Placebo corresponding to mepolizumab dose S...","Mepolizumab, Placebo",Unreported,Unreported,Unreported,Unreported,Unreported,Unreported
5,NCT02555371,GlaxoSmithKline,Cessation Versus Continuation of Long-term Mepolizumab in Severe Eosinophilic Asthma Patients,Asthma,Phase 3,Interventional,306,"January 7, 2016","July 24, 2019","Inclusion Criteria:\n\nInformed Consent: Prior to commencing any study related activities, subje...","Mepolizumab 100mg, Placebo","Biological: Mepolizumab 100mg, Biological: Mepolizumab 100mg, Drug: Placebo",There will be 4 parts during the study. Part A will be Variable Open-Label Run-in (maximum up to...,"Arm Mepolizumab 100 mg, Arm Placebo, Arm Placebo","[Primary, Secondary, Secondary, Secondary]","[Percentage of Participants With First Clinically Significant Exacerbation in Part C, Ratio to B...",[Clinically significant exacerbation was defined as worsening of asthma which requires use of sy...,"[Weeks 12, 24, 36 and 52, Baseline and Weeks 12, 24, 36 and 52, Baseline and Weeks 12, 24, 36 an...","[31.8, 20.2, 49.3, 32.3, 56.0, 40.3, 60.7, 47.1, 6.03, 1.16, 6.58, 1.03, 6.48, 1.20, 6.17, 1.00,...","[Percentage of participants, Ratio, Percentage of participants, Percentage of participants]"
6,NCT02281318,GlaxoSmithKline,Efficacy and Safety Study of Mepolizumab Adjunctive Therapy in Participants With Severe Eosinoph...,Asthma,Phase 3,Interventional,556,"December 11, 2014","June 10, 2016",Inclusion Criteria:\n\nAge: At least 12 years of age at the time of signing the informed consent...,"Mepolizumab, Placebo, SOC","Biological: Mepolizumab, Drug: SOC, Drug: Placebo, Drug: SOC",Participants will receive Mepolizumab 100 mg subcutaneously (SC) into the upper arm or thigh eve...,"Mepolizumab SC, Placebo SC, Mepolizumab SC, Placebo SC","[Primary, Secondary, Secondary, Secondary]",[Mean Change From Baseline (BL) in St. George's Respiratory Questionnaire (SGRQ) Score at Week 2...,[SGRQ consisted of 50 questions (scored from 0 to 100 where 0 indicates best and 100 indicates w...,"[Baseline and Week 24, Baseline and Week 24, Baseline (Visit 2-latest pre-dose assessment) and W...","[-7.9, -15.6, 56, 176, 55, 73, -0.40, -0.80]","[Scores on a scale, Milliliters (mL), Percentage of participants, Scores on a scale]"
7,NCT04305405,AstraZeneca,PK/PD and Long Term Safety Study of Benralizumab in Children With Severe Eosinophilic Asthma,Severe Uncontrolled Asthma,Phase 3,Interventional,30,"November 21, 2019","October 31, 2022",Inclusion Criteria:\n\nPatients are eligible to be included in the study only if all of the foll...,Benralizumab,"Drug: Benralizumab, Drug: Benralizumab","Below 35 kilos, Greater than/equal to 35 kilos","Dose 1, Dose 2",Unreported,Unreported,Unreported,Unreported,Unreported,Unreported
8,NCT03052725,"Teva Branded Pharmaceutical Products R&D, Inc.",A Study of Reslizumab in Patients 12 Years of Age and Older With Severe Eosinophilic Asthma,"Eosinophils, Asthma",Phase 3,Interventional,391,"March 10, 2017","February 22, 2018",Inclusion Criteria:\n\n• Patient with eosinophilic asthma who completed the treatment period of ...,reslizumab,Drug: reslizumab,"Reslizumab was administered as 110 mg subcutaneous (sc) injection in the thigh, abdomen, or uppe...",reslizumab 110 mg,"[Primary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary...","[Participants With Treatment-Emergent Adverse Events (TEAEs), Participants With Potentially Clin...","[An adverse event is any untoward medical occurrence, regardless of whether it has a causal rela...",[Day 1 to up to Day 269; for participants who discontinued early for reasons other than study te...,"[102, 114, 7, 6, 5, 11, 0, 0, 2, 0, 0, 0, 8, 5, 1, 0, 4, 2, 2, 0, 0, 1, 2, 0, 1, 1, 0, 1, 12, 10...","[Participants, Participants, Participants, Participants, Participants, CAEs / year, CAEs / year,..."
9,NCT02559791,McMaster University,Anti-Interleukin-5 (IL5) Monoclonal Antibody (MAb) in Prednisone-dependent Eosinophilic Asthma,"Severe Persistent Asthma, Eosinophilic Bronchitis","Phase 2, Phase 3",Interventional,10,October 2015,April 2017,"Inclusion Criteria:\n\nInformed consent Prior to the beginning of the study, patients must be wi...","Reslizumab, Placebo","Biological: Reslizumab, Drug: Placebo",All study participants will receive 2 monthly doses of placebo followed by 4 monthly doses of IV...,"Study participants, Study participants",Unreported,Unreported,Unreported,Unreported,Unreported,Unreported
