In [1]:
import requests

# rate limiting is important to avoid accidental service abuse of the OpenFDA API provider
from ratelimit import limits, sleep_and_retry

# cache API calls in a sqllite file to reduce the number of requests to openfda server
import requests_cache
requests_cache.install_cache('openfda_cache')

OPENFDA_API = "https://api.fda.gov/drug/event.json"

@sleep_and_retry
@limits(calls=40, period=60)
def call_api(params):
    """
    OpenFDA API call. Respects rate limit. Overrides default data limit
    Input: dictionary with API parameters {search: '...', count: '...'}
    Output: nested dictionary representation of the JSON results section
    
    OpenFDA API rate limits:
         With no API key: 40 requests per minute, per IP address. 1000 requests per day, per IP address.
         With an API key: 240 requests per minute, per key. 120000 requests per day, per key.
    """
    if not params:
        params = {}
    params['limit'] = params.get('limit', 1000)
    response = requests.get(OPENFDA_API, params=params)
    print(response.url)

    if response.status_code != 200:
        raise Exception('API response: {}'.format(response.status_code))
    return response.json()['results']

OPENFDA_METADATA_YAML = "https://open.fda.gov/fields/drugevent.yaml"
# munch is a yaml parser with javascript-style object access
from munch import Munch

def api_meta():
    """
    YAML file with field description and other metadata retrieved from the OpenFDA website
    Parses YAML file and provides syntactic sugar for accessing nested dictionaries
    Example: .patient.properties.patientagegroup.possible_values.value
    Note: reserved words, such as count and items still have to be accessed via ['count'], ['items']
    """
    response = requests.get(OPENFDA_METADATA_YAML)
    if response.status_code != 200:
        raise Exception('Could not retrieve YAML file with drug event API fields')
    y = Munch.fromYAML(response.text)
    return y['properties']

In [2]:
api_meta().patient.properties.keys()

dict_keys(['drug', 'patientagegroup', 'patientdeath', 'patientonsetage', 'patientonsetageunit', 'patientsex', 'patientweight', 'reaction', 'summary'])

In [3]:
import pandas as pd
import numpy as np
import datetime

start_date = '20101101'
end_date = '20101130'


# For example, 20100729 is 07/29/2010"
#start_date = input("Enter the beginning of your desired date range: " )
#end_date = input("Enter the end of your desired date range: ")

#country_list = input("Enter the countries you would like to limit your search to: ")

# create a range of all dates between start and end date
my_range = pd.date_range(start=start_date, end=end_date)

# reformat the dates to match FAERS
f_range = []
for dt in my_range:
    y = str(dt)[0:4]
    m = str(dt)[5:7]
    d = str(dt)[8:10]
    new_dt = y + m + d
    f_range.append(new_dt)



In [4]:
#safetyreportid of test entry: 10574310

columbia_api_key = 'Og4jAa0KIhPJkiwaxXVD6VHp3DGqoQf37JFPeRct'

# test strings for API query search fields
date_query = 'patient.summary.narrativeincludeclinical:('

d_sub = '"CASE EVENT DATE:'

# add each date in range to date_query
date_ind = 0
num_dates = len(f_range)
for ymd in f_range:
    date_ind += 1
    if date_ind < num_dates:
        date_query += d_sub + ymd + '"' + " OR "
    else:
        date_query += d_sub + ymd + '"' + ")"
country_query = 'primarysource.reportercountry:("FR" OR "FRANCE")' 

#'search': "receivedate:[20040101 TO 20200101] AND patient.reaction.reactionmeddrapt.exact: {}".format(


test_out = call_api({
    'api_key': columbia_api_key,
    'limit': 5,
    'search': country_query + ' AND ' + date_query
})


https://api.fda.gov/drug/event.json?api_key=Og4jAa0KIhPJkiwaxXVD6VHp3DGqoQf37JFPeRct&limit=5&search=primarysource.reportercountry%3A%28%22FR%22+OR+%22FRANCE%22%29+AND+patient.summary.narrativeincludeclinical%3A%28%22CASE+EVENT+DATE%3A20101101%22+OR+%22CASE+EVENT+DATE%3A20101102%22+OR+%22CASE+EVENT+DATE%3A20101103%22+OR+%22CASE+EVENT+DATE%3A20101104%22+OR+%22CASE+EVENT+DATE%3A20101105%22+OR+%22CASE+EVENT+DATE%3A20101106%22+OR+%22CASE+EVENT+DATE%3A20101107%22+OR+%22CASE+EVENT+DATE%3A20101108%22+OR+%22CASE+EVENT+DATE%3A20101109%22+OR+%22CASE+EVENT+DATE%3A20101110%22+OR+%22CASE+EVENT+DATE%3A20101111%22+OR+%22CASE+EVENT+DATE%3A20101112%22+OR+%22CASE+EVENT+DATE%3A20101113%22+OR+%22CASE+EVENT+DATE%3A20101114%22+OR+%22CASE+EVENT+DATE%3A20101115%22+OR+%22CASE+EVENT+DATE%3A20101116%22+OR+%22CASE+EVENT+DATE%3A20101117%22+OR+%22CASE+EVENT+DATE%3A20101118%22+OR+%22CASE+EVENT+DATE%3A20101119%22+OR+%22CASE+EVENT+DATE%3A20101120%22+OR+%22CASE+EVENT+DATE%3A20101121%22+OR+%22CASE+EVENT+DATE%3A20101122%2

In [5]:
testDF=pd.DataFrame(test_out)

#for item in test_out:
#    print(item)

#call_api returns results, which is a list of dictionaries: [ {...}, {...}, {...} ]



write_obj = open('results.txt', 'w')
write_obj.close()
results_obj = open('results.txt', 'a')
#results_obj.write(str(x['patient']['drug']))


safetyreportid_list = []
# iterate through each entry
# store safetyreportid, demographic info, rxcuis

for entry in test_out:
    # safetyreportid
    results_obj.write("report ID: ")
    results_obj.write(str(entry['safetyreportid']) + "\n")
    safetyreportid_list.append(str(entry['safetyreportid']))

    # reportercountry
    results_obj.write("\treportercountry: " + str(entry['primarysource']['reportercountry']) + "\n")

    # sex
    if 'patientsex' in entry['patient']:
        results_obj.write("\tsex: " + str(entry['patient']['patientsex']) + "\n")

    # date
    results_obj.write("\t" + str(entry['patient']['summary']['narrativeincludeclinical']) + "\n")
    
    '''
    # drugs 
    for drug in entry['patient']['drug']:
        
        results_obj.write("\t" + "medicinal product: ")
        results_obj.write(str(drug['medicinalproduct']) + "\n")
        if 'openfda' in drug:
            if 'rxcui' in drug['openfda']:
                results_obj.write("\t\t" + "rxcuis: ")
                results_obj.write(str(drug['openfda']['rxcui']) + "\n")
                if 'drugindication' in drug:
                    results_obj.write("\t\t" + "indication: ")
                    results_obj.write(str(drug['drugindication']) + "\n")
                else: results_obj.write("\t\t" + "no indication" + "\n")
            else: results_obj.write("\t\t" + "no rxcuis" + "\n")
        else: results_obj.write("\t" + 'no openfda'  + "\n")
    results_obj.write("\n\n")
    '''
results_obj.close()

In [6]:

# get lines in file that start with the given string
# used to search standard_case_drug to find all drugs in a given reportid 
def generate_last_cols_lines_that_equal(string, fp):
    # iterate through each line in the given file
    for line in fp:
        # if there is a match
        if line.startswith(string):
            # split line by tabs
            tab_split = line.split('\t')
            # get the last column
            lc = tab_split[(len(tab_split)-1)].replace('\n', '')
            yield lc

# get lines in file that contain the given string
# used for searching RxNorm concepts
def generate_lines_that_equal(string, fp):
    # iterate through each line in the given file
    for line in fp:
        # if there is a match
        if line.startswith(string):
            # split line by tabs
            tab_split = line.split('\t')
            # get the second column (concept name)
            lc = tab_split[2]
            yield lc


In [7]:
drug_template = { "drug_concept_id": None, "drug_name": None, "KEGG": [ { "get_URL": None, "D_number": None, "Target": [ ], "Pathway": None } ] }

sub_dict = { "drugs": [  ] }
# creates a dictionary entry for each id
results_dict = dict.fromkeys(safetyreportid_list, sub_dict)
print(results_dict)

# the value for each id is a list of all corresponding drug standard_concept_ids from standard_case_drug.tsv
for id in results_dict:
    # search standard_case_drug for ids
    with open("doi_10.5061_dryad.8q0s4__v1/aeolus_v1/standard_case_drug.tsv", "r") as fp:
        id_vals = []
        for i in generate_last_cols_lines_that_equal(str(id), fp):
            id_vals.append(i)
            x = drug_template.copy()
            x["drug_concept_id"] = i
            y = results_dict[id]['drugs'].copy()
            y.append(x)
            results_dict[id]['drugs'] = y.copy()
        print('\nid_vals for ' + id, id_vals)
        '''
        for k in id_vals:
            x = drug_template.copy()
            x["drug_concept_id"] = k
            y = results_dict[id]['drugs'].copy()
            y.append(x)
            print(y)
        '''
        #drug_concepts_dict = dict.fromkeys(id_vals)
        #results_dict[id]['drugs'] = results_dict[id] + drug_concepts_dict


{'10366267': {'drugs': []}, '10397359': {'drugs': []}, '10443691': {'drugs': []}, '10565575': {'drugs': []}, '10572706': {'drugs': []}}

id_vals for 10366267 ['1710281', '1727223']

id_vals for 10397359 ['19059796', '40226742', '1713332', '1742253', '1125315', '1750500']

id_vals for 10443691 ['40171288', '40171288', '904453', '1551099', '1551099', '40171288', '1125315', '1551099', '40171288', '40171288', '40171288', '1551099', '1551099', '1551099', '1550557', '40171288', '40171288', '40171288', '1101898', '40171288', '40171288', '1551099', '40171288', '40171288', '40171288', '1551099', '1101898', '1550557', '40171288', '40171288', '1101898', '40171288', '1551099', '744740', '1550557', '40171288', '40171288', '1550557', '1551099', '1551099', '40171288', '40171288', '1551099', '1551099', '1101898', '1551099', '40171288', '1551099', '1551099', '1551099', '1551099', '40171288', '1551099', '1551099', '1551099', '40171288', '40171288', '1551099', '40173582', '1551099', '40171288', '1550557'

In [8]:
for item in results_dict['10397359']['drugs']:
    print(item)

for item in results_dict['10366267']['drugs']:
    print(item)




{'drug_concept_id': '1710281', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '1727223', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '19059796', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '40226742', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '1713332', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '1742253', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '1125315', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '1750500', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target

In [9]:
for item in results_dict['10366267']['drugs']:
    print(item)

{'drug_concept_id': '1710281', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '1727223', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '19059796', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '40226742', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '1713332', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '1742253', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '1125315', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '1750500', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target

In [10]:
RxConcepts_results_dict = {}

# creates a dictionary entry for each id
# the value for each id is a list of all corresponding drug standard_concept_ids from standard_case_drug.tsv
for id in safetyreportid_list:
    id_vals = []
    # search standard_case_drug for ids
    with open("RxNorm_vocab/CONCEPT.csv", "r") as fp:
        for i in generate_lines_that_equal(str(id), fp):
            id_vals.append(i)
        scd_results_dict[str(id)] = id_vals

NameError: name 'scd_results_dict' is not defined

In [None]:
#KEGG_API = 'https://rest.kegg.jp/find/drug/'

#KEGG_response = requests.get('https://rest.kegg.jp/find/drug/')

# KEGG API request
#def KEGG_request(params):



In [None]:
# todo: finish fixing the generate lines function for the RxNorm data
# look up drug concept IDs in drugs.csv, get the drug names
# make a KEGG API request for the drug names

abc = [ {'drug_concept_id': '1727223', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}, {'drug_concept_id': '1727223', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}]
for item in abc:
    print(item)

{'drug_concept_id': '1727223', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
{'drug_concept_id': '1727223', 'drug_name': None, 'KEGG': [{'get_URL': None, 'D_number': None, 'Target': [], 'Pathway': None}]}
