In [32]:
from textwrap import wrap
import pandas as pd
import requests
import json
from pandas import json_normalize # tranform JSON file into a pandas dataframe
import time
import re

pd.set_option('display.max_colwidth', 100)

In [3]:
def build_url_from_query(query):
    
    query_tokenized = query.split()
    
    field_values = ['NCTId', 'BriefTitle', 'Condition', 'Phase', 'StudyType',
                    'EnrollmentCount', 'StartDate', 'PrimaryCompletionDate', 'EligibilityCriteria', 'InterventionName', 
                    'ArmGroupInterventionName', 'ArmGroupDescription', 'InterventionArmGroupLabel', 'OutcomeMeasureType', 'OutcomeMeasureTitle',
                    'OutcomeMeasureDescription', 'OutcomeMeasureTimeFrame', 'OutcomeMeasurementValue', 'OutcomeMeasureUnitOfMeasure']
    
    max_rank = 1000             # max # of items returned by the API query (max for the clinicaltrials.gov API is 1000)
    
    
    url = 'https://clinicaltrials.gov/api/query/study_fields?expr='
    
    for i, word in enumerate(query_tokenized):              # build query URL by adding all search terms and field values to the query URL, following the appropriate format
        if i == 0:
            url = url + word
        else:
            url = url + '+' + word
        
    url = url + '&fields='
    
    for i, word in enumerate(field_values):
        if i == 0:
            url = url + word
        else:
            url = url + '%2C' + word
    
    url = url + '&min_rnk=1&max_rnk=' + str(max_rank) + '&fmt=json' 
    url = url.strip()
    
    # print('\n\nQuerying up to 1,000 trials from clinicaltrials.gov with the following url...\n\n'+url+'\n\n')
    
    return url

In [29]:
def clean_columns(df_master):
    for col in df_master.columns[1:]:    
        if not re.search('Outcome.*', col):
            if len(df_master[col][0]) == 1:                                            
                df_master[col] = df_master[col][0]
            elif len(df_master[col][0]) > 1:
                df_master[col] = ', '.join(df_master[col][0])
            else:
                df_master[col] = 'Unknown'    
    return df_master

In [18]:
def map_measure_values(df_master):
    num_of_outcomes = len(df_master['OutcomeMeasureType'][0])
    temp_list = []
    try:                            
        measures_per_outcome = len(df_master['OutcomeMeasurementValue'][0])//len(df_master['OutcomeMeasureTitle'][0])
    except:
        measures_per_outcome = 1
    
    beginning = 0
    increment = measures_per_outcome               # maps correct number of outcome measures reported in the study, as multiple performance values may pertain to each endpoint (i.e. value for each intervention arm + placebo)
    try:
        for i in range(num_of_outcomes):
            try:
                temp_list.append([df_master['OutcomeMeasurementValue'][0][beginning:beginning+increment]])
                beginning += increment
            except:
                print('Errors!')
        df_master['OutcomeMeasurementValue'] = temp_list
    except:
        print('Errors!')
    
    return df_master

In [30]:
def build_study_table(url):
        
    ## convert the clinicaltrials.gov JSON response to a pandas dataframe
    
    result = requests.get(url).json()
    
    result_list = [result for result in result['StudyFieldsResponse']['StudyFields'] if result['OutcomeMeasureType']]     #loop through the list and identify ONLY studies with outcome measures reported 
    df_master = json_normalize(result_list[0])      # initialize dataframe using JSON result

    df_master = clean_columns(df_master)

    try:
        for study in result_list[1:]:        # concatenate each study to the dataframe to generate master dataframe
            df = json_normalize(study)
            df = clean_columns(df)
            df_master = pd.concat([df_master, df], axis=0, ignore_index=True)
    except:
        print("There was an error!")
        
    return df_master

In [6]:
query_term = input("Enter search query: \n")
start_time = time.process_time()
url = build_url_from_query(query_term)
print(f"Time to build url: {time.process_time()-start_time} seconds")
print(url)

Enter search query: 
psoriasis
Time to build url: 0.0 seconds
https://clinicaltrials.gov/api/query/study_fields?expr=psoriasis&fields=NCTId%2CBriefTitle%2CCondition%2CPhase%2CStudyType%2CEnrollmentCount%2CStartDate%2CPrimaryCompletionDate%2CEligibilityCriteria%2CInterventionName%2CArmGroupInterventionName%2CArmGroupDescription%2CInterventionArmGroupLabel%2COutcomeMeasureType%2COutcomeMeasureTitle%2COutcomeMeasureDescription%2COutcomeMeasureTimeFrame%2COutcomeMeasurementValue%2COutcomeMeasureUnitOfMeasure&min_rnk=1&max_rnk=1000&fmt=json


In [33]:
start_time = time.process_time()
study_table = build_study_table(url)
print(f"Time to build table: {time.process_time()-start_time} seconds.")

Time to build table: 5.046875 seconds.


In [34]:
study_table

Unnamed: 0,Rank,NCTId,BriefTitle,Condition,Phase,StudyType,EnrollmentCount,StartDate,PrimaryCompletionDate,EligibilityCriteria,InterventionName,ArmGroupInterventionName,ArmGroupDescription,InterventionArmGroupLabel,OutcomeMeasureType,OutcomeMeasureTitle,OutcomeMeasureDescription,OutcomeMeasureTimeFrame,OutcomeMeasurementValue,OutcomeMeasureUnitOfMeasure
0,3,NCT00800982,Open Label Study Etanercept's Maintenance Dose in Obese Patients With Moderate to Severe Plaque ...,Psoriasis,Not Applicable,Interventional,30,October 2008,January 2012,Inclusion Criteria:\n\nNonimmunocompromised males or females 18 years of age or older.\nMust be ...,"Narrow band (310-312 nm) ultraviolet light B phototherapy, etanercept","Drug: etanercept, Procedure: Narrow band (310-312 nm) ultraviolet light B phototherapy, Drug: et...",Subjects will only be treated with etanercept. This is given at the standard FDA approved dosage...,"2 (Etanercept + nb-UVB), 1 (Etanercept only), 2 (Etanercept + nb-UVB)",[Primary],"[Psoriasis Area Severity Index. This Scale Ranges From 0-72, 0 Being no Disease, and 72 Being Mo...",[Psoriasis area severity index was used to determine the number of patients in each treatment ar...,[Weeks 12-24],"[6, 6]",[participants]
1,5,NCT01916629,Photocil (Topical) for the Treatment of Psoriasis Vulgaris,Psoriasis,Not Applicable,Interventional,12,August 2013,December 2014,Inclusion Criteria:\n\nDiagnosed with psoriasis vulgaris confirmed by a dermatologist\nPsoriasis...,"Photocil for Psoriasis, Placebo - Sunscreen (SPF 2)","Drug: Photocil for Psoriasis, Other: Placebo - Sunscreen (SPF 2)","Active Drug - Photocil for Psoriasis, Placebo - Sunscreen (SPF 2)","Photocil for Psoriasis, Placebo - Sunscreen (SPF 2)",[Primary],[Percent Lesion Clearance],[],[90 days],"[75.7, 8]",[Percent Clearance]
2,8,NCT00115076,"Study of the Drug Efalizumab (Raptiva), for Adult Patients With Moderate to Severe Plaque Psoriasis",Psoriasis,Phase 3,Interventional,31,"August 4, 2003","May 18, 2009",Inclusion Criteria:\n\nSigned informed consent\n\nPlaque psoriasis covering >10% of total BSA\nD...,Efalizumab,Drug: Efalizumab,moderate to severe plaque psoriasis,psoriasis,"[Primary, Secondary]","[Number of Participants With Clinical Improvement of Target Lesions, Assessment of Overall Clini...","[a single composite score based on quantitative measurement of epidermal acanthosis, qualitative...","[week 12, Day 0, day 14, day 42, day 84, Days 112, 140, and 168. PASI has been measured at those...","[18, 34.58, 12.94]","[Participants, score on a scale]"
3,11,NCT00195507,Study Evaluating Etanercept in the Treatment of Subjects With Psoriasis,Psoriasis,Phase 4,Interventional,720,December 2004,Unknown,"Inclusion Criteria:\n\nStable, active plaque psoriasis\nFailure to respond to the following syst...",Etanercept,Unknown,Unknown,Unknown,"[Primary, Secondary, Secondary, Secondary]","[Physician Global Assessment of Psoriasis (PGA) Score - Mean Value Over 54 Weeks, Patient Global...",[Physician Global Assessment of Psoriasis (PGA) is a 7-point scale used to assess severity of ps...,"[54 weeks, 54 weeks, 54 weeks, 54 weeks]","[1.98, 2.51, 57.03, 40.92, 127, 168, 70, 80, 231, 198]","[units on scale, percentage improvement, days, participants]"
4,13,NCT01126619,A Study of Effectiveness and Safety of Tumor Necrosis Factor (TNF) Inhibitors in Patients With M...,Psoriasis,Unknown,Observational,103,May 2010,September 2011,Inclusion Criteria:\n\nMale or female patients ≥18 years old with moderate-to-severe psoriasis.\...,Unknown,Unknown,Participants with moderate to severe psoriasis who were prescribed an Anti Tumor Necrosis Factor...,Unknown,"[Primary, Secondary, Secondary, Secondary, Secondary, Primary, Secondary, Secondary]","[Percent Change From Baseline in Psoriasis Area and Severity Index (PASI) Score, Dynamic Physici...",[The Psoriasis Area and Severity Index (PASI) score is a combination of the intensity of psorias...,"[Baseline and Weeks 4, 8, 16 and 24, Baseline and Week 24, Baseline and Week 24, Baseline and We...","[48.65, 75.70, 87.85, 90.29, 1.51, 1.51, 0, 14, 0, 38, 0, 19, 1, 4, 35, 8, 27, 3, 23, 0, 53.13, ...","[Percent change, scores on a scale, scores on a scale, participants, percent change, percentage ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,987,NCT02407041,"An Open-Label, Phase 2a Study to Evaluate Safety and Efficacy of GR-MD-02 for Treatment of Psori...",Psoriasis,Phase 2,Interventional,5,September 2015,March 2018,Inclusion Criteria:\n\nEach subject must meet all of the following criteria to be enrolled in th...,GR-MD-02,Drug: GR-MD-02,active arm,GR-MD-02,[Primary],"[Number of Participants With PASI-75, or a 75% Improvement From Baseline in PASI Score]","[The primary endpoint will be the number of participants with PASI-75, or a 75% improvement from...",[6 months],[1],[Participants]
353,989,NCT02533375,Study to Investigate Efficacy and Safety of Adalimumab in Japanese Subjects With Generalized Pus...,"Generalized Pustular Psoriasis (GPP), Adalimumab, Japanese",Phase 3,Interventional,10,"September 28, 2015","September 15, 2016",Inclusion Criteria:\n\nDiagnosis of generalized pustular psoriasis\nTotal skin score of at least...,Adalimumab,Drug: Adalimumab,"80 mg at Week 0 by subcutaneous (SC) injection, followed by 40 mg every other week (eow) on and ...",Participants receiving adalimumab,"[Primary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary...","[Proportion of Participants Achieving Clinical Response at Week 16, Number of Participants Achie...",[Clinical Response was defined as reduction of the Generalized Pustular Psoriasis (GPP) total sk...,"[Baseline and Week 16, Baseline, Week 2, Week 4, Week 8, Week 12, Week 24, Week 36, Week 52, Wee...","[7, 5, 6, 6, 5, 6, 6, 5, 0, 1, 0, 0, 0, 0, 0, 0, -2.9, -3.8, -3.2, -3.1, -4.6, -3.8, -5.5, -6.0,...","[Participants, participants, participants, units on a scale, Participants, units on a scale, per..."
354,991,NCT00704262,Effect of Calcipotriol Plus Hydrocortisone Ointment on the Adrenal Hormone Balance and Calcium M...,Psoriasis Vulgaris,Phase 2,Interventional,33,May 2008,December 2009,Inclusion Criteria:\n\nClinical diagnosis of psoriasis vulgaris involving the face and the inter...,Calcipotriol plus hydrocortisone (LEO 80190),Drug: Calcipotriol plus hydrocortisone (LEO 80190),Unknown,Calcipotriol plus hydrocortisone (LEO 80190),"[Primary, Primary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, Secondary, ...",[The Adrenal Response to the ACTH Challenge Test Defined as the Serum Cortisol Concentration Obt...,[The adrenal function was assessed by a rapid standard dose ACTH (tetracosactid/cosyntropin) cha...,"[At Week 4 (Day 28) and Week 8 (Day 56), From baseline to Week 4, Week 8, and end of treatment (...","[29, 1, 2.215, 0.002, -0.045, -0.033, 29, 1, 30, 0, 25.29, 25.93, 28.81, 28.82, 2.215, 0.018, -0...","[Participants, mmol/L, Participants, Participants, mcg/dL, mcg/dL, mmol/L, mmol/L, Participants,..."
355,993,NCT02748863,Study of Secukinumab With 2 mL Pre-filled Syringes,Psoriasis,Phase 3,Interventional,214,"December 12, 2016","August 8, 2017",Inclusion Criteria:\n\nSubjects eligible for inclusion in this study must fulfill all of the fol...,"Placebo, Secukinumab 2 mL form, Secukinumab 1 mL form","Drug: Placebo, Drug: Secukinumab 2 mL form, Drug: Secukinumab 1 mL form","Placebo, provided in a 2 mL pre-filled syringe Placebo, provided in a 1 mL pre-filled syringe, S...","Placebo, Secukinumab 2 mL form, Secukinumab 1 mL form","[Primary, Secondary, Secondary, Secondary, Primary]",[Participants With Psoriasis Area and Severity Index (PASI) 75 Response After 12 Weeks of Treatm...,[Number of participants who achieved ≥ 75% reduction in PASI compared to baseline\n\nPASI is a c...,"[12 weeks, 12 weeks, 12 weeks, up to week 52, 12 weeks]","[64, 1, 48, 1, 28, 26, 0, 1, 0, 0, 0, 0, 5, 5, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...","[participants, participants, participants, participants, participants]"


In [20]:
map_measure_values(study_table)

Errors!


Unnamed: 0,Rank,NCTId,BriefTitle,Condition,Phase,StudyType,EnrollmentCount,StartDate,PrimaryCompletionDate,EligibilityCriteria,InterventionName,ArmGroupInterventionName,ArmGroupDescription,InterventionArmGroupLabel,OutcomeMeasureType,OutcomeMeasureTitle,OutcomeMeasureDescription,OutcomeMeasureTimeFrame,OutcomeMeasurementValue,OutcomeMeasureUnitOfMeasure
0,3,NCT00800982,Open Label Study Etanercept's Maintenance Dose in Obese Patients With Moderate to Severe Plaque ...,Psoriasis,Not Applicable,Interventional,30,October 2008,January 2012,Inclusion Criteria:\n\nNonimmunocompromised males or females 18 years of age or older.\nMust be ...,"Narrow band (310-312 nm) ultraviolet light B phototherapy, etanercept","Drug: etanercept, Procedure: Narrow band (310-312 nm) ultraviolet light B phototherapy, Drug: et...",Subjects will only be treated with etanercept. This is given at the standard FDA approved dosage...,"2 (Etanercept + nb-UVB), 1 (Etanercept only), 2 (Etanercept + nb-UVB)",Primary,"Psoriasis Area Severity Index. This Scale Ranges From 0-72, 0 Being no Disease, and 72 Being Mos...",Psoriasis area severity index was used to determine the number of patients in each treatment arm...,Weeks 12-24,"6, 6",participants
