In [1]:
import pandas as pd
import requests
import json
import time
import copy
import pickle

In [2]:
#Setting params for API. Search is handled in the URL itself because I couldn't get it working in the parameters.
#NOTE: You'll need to get and set your own API key.
parameters = {
    "api_key": "",
    "limit": 100,
    "skip":0
}

In [3]:
#Initial test API call to see if everything is working
response = requests.get('https://api.fda.gov/drug/event.json?search=(aripiprazol+aripiprazole+abilify+aristada+ariprazol+aripirazol)+AND+transmissiondate:[2002-01-01+TO+2019-12-31]', params=parameters)

#A response status code of 200 is good. Everything else is bad. More info here: https://restfulapi.net/http-status-codes/
print("Response status code: " + str(response.status_code))

#Prints out the "total" field of the response to see how many records were retrieved
print("Number of records: " + str(response.json()['meta']['results']['total']))

Response status code: 200
Number of records: 80915


In [4]:
# Here we're collecting all the records and dumping them in an empty list. Since the max skip per call is 25K,
# I've batched the records by year. 
# 

resultList = []
year = 2003
url1 = "https://api.fda.gov/drug/event.json?search=(aripiprazol+aripiprazole+abilify+aristada+ariprazol)+AND+transmissiondate:["
url2 = "-01-01+TO+"
url3 = "-12-31]"

#Loop that iterates thru all the years
while year < 2020:
    strYear = str(year)
    print("year: " + strYear)
    
    #Reset skip
    parameters['skip'] = 0
    i = 0
    
    #Get number of results for given year
    yearUrl = url1 + strYear + url2 + strYear +url3
    print("URL: " + yearUrl)
    yearResponse = requests.get(yearUrl, params=parameters)
    
    if (yearResponse.status_code) == 200:
        yearJson = yearResponse.json()
        yearResults = yearJson['meta']['results']['total']
        print("Year results: " + str(yearResults))

        #Add the year's results to resultList, incrementing the "skip" parameter 
        #until all results have been collected
        while i <= yearResults:
            query = requests.get(yearUrl, params=parameters)

            resultList.extend(query.json()['results'])
            i = i + parameters['limit']
            parameters['skip'] = i
        
    else:
        print("Records for " + strYear + " not collected. Status code: " + str(yearResponse.status_code))
    
    print("List length for 2003-" + strYear + ": " + str(len(resultList))) #just printing to see how long it is
    
    year += 1
    
    

year: 2003
URL: https://api.fda.gov/drug/event.json?search=(aripiprazol+aripiprazole+abilify+aristada+ariprazol)+AND+transmissiondate:[2003-01-01+TO+2003-12-31]
Records for 2003 not collected. Status code: 404
List length for 2003-2003: 0
year: 2004
URL: https://api.fda.gov/drug/event.json?search=(aripiprazol+aripiprazole+abilify+aristada+ariprazol)+AND+transmissiondate:[2004-01-01+TO+2004-12-31]
Year results: 351
List length for 2003-2004: 351
year: 2005
URL: https://api.fda.gov/drug/event.json?search=(aripiprazol+aripiprazole+abilify+aristada+ariprazol)+AND+transmissiondate:[2005-01-01+TO+2005-12-31]
Year results: 2040
List length for 2003-2005: 2391
year: 2006
URL: https://api.fda.gov/drug/event.json?search=(aripiprazol+aripiprazole+abilify+aristada+ariprazol)+AND+transmissiondate:[2006-01-01+TO+2006-12-31]
Year results: 1632
List length for 2003-2006: 4023
year: 2007
URL: https://api.fda.gov/drug/event.json?search=(aripiprazol+aripiprazole+abilify+aristada+ariprazol)+AND+transmissi

In [5]:
pickle.dump( resultList, open( "/Users/jjeffrey-wilensky/Sites/abilify-aes-1/data/FAERSAPI_Data.pckl", "wb" ) )

In [6]:
resultList = pickle.load( open( "/Users/jjeffrey-wilensky/Sites/abilify-aes-1/data/FAERSAPI_Data.pckl", "rb" ) )

In [7]:
#Filter out rows with missing age info or missing/scrambled age onset info
resultsAgeAvailable = []
noAgeInfo = []
badAgeInfo = []
possibilitiesList = ["800","801","802","803","804","805"]
for event in resultList:
        if 'patientonsetage' in event['patient'] and 'patientonsetageunit' in event['patient']:
            #Catches records where the patientonset age unit is "None" or otherwise bad
            if (event['patient']['patientonsetageunit'] in possibilitiesList):
                resultsAgeAvailable.append(event)
            else:
                badAgeInfo.append(event)
        else:
            noAgeInfo.append(event)
            
#Print the lengths of all the lists to check that everything adds up
print("resultsAgeAvailable length: " + str(len(resultsAgeAvailable)))
print("noAgeInfo length: "+ str(len(noAgeInfo)))
print("Total length: " + str(len(resultsAgeAvailable) + len(noAgeInfo) + len(badAgeInfo)))
      

resultsAgeAvailable length: 48603
noAgeInfo length: 32310
Total length: 80915


In [8]:
#Standardize ages not in years. Multipliers for weeks, days and hours found here:
#https://www.inchcalculator.com/convert/time/
resultsAgesStandardized = []

for event in resultsAgeAvailable:
    standardizedEvent = {}
    
    #Automatically adds ages already in years
    if (event['patient']['patientonsetageunit'] == "801"):
        resultsAgesStandardized.append(event)
    else:
        
        #Decade
        if (event['patient']['patientonsetageunit'] == "800"):
            standardizedEvent = copy.deepcopy(event)
            standardizedEvent['patient']['patientonsetage'] = str(int(event['patient']['patientonsetage'])*10)
            resultsAgesStandardized.append(event)
            
        #Month
        if (event['patient']['patientonsetageunit'] == "802"):
            standardizedEvent = copy.deepcopy(event)
            standardizedEvent['patient']['patientonsetage'] = str(int(int(event['patient']['patientonsetage'])/12))
            resultsAgesStandardized.append(event)
            
        #Week
        if (event['patient']['patientonsetageunit'] == "803"):
            standardizedEvent = copy.deepcopy(event)
            standardizedEvent['patient']['patientonsetage'] = str(int(int(event['patient']['patientonsetage'])*0.019165))
            resultsAgesStandardized.append(event)
            
        #Day
        if (event['patient']['patientonsetageunit'] == "804"):
            standardizedEvent = copy.deepcopy(event)
            standardizedEvent['patient']['patientonsetage'] = str(int(int(event['patient']['patientonsetage'])*0.002738))
            resultsAgesStandardized.append(event)
        
        #Hour
        if (event['patient']['patientonsetageunit'] == "805"):
            standardizedEvent = copy.deepcopy(event)
            standardizedEvent['patient']['patientonsetage'] = str(int(int(event['patient']['patientonsetage'])*0.000114))
            resultsAgesStandardized.append(event)  
            
#Checks length of resulting list just to be sure we got everything
print("Results with ages standardized: " + str(len(resultsAgesStandardized)))


Results with ages standardized: 48603


In [9]:
#Limit to kids ages 3-17. This one takes a while to run and could stand to be streamlined.
resultsKidsOnly = []
resultsEveryoneElse = []
resultsNone = []

for event in resultsAgesStandardized:
    
    #Makes a copy to avoid Python reference/assignment weirdness
    AgeTestEvent = copy.deepcopy(event)
    if AgeTestEvent['patient']['patientonsetage'] is not None:
        ageInt = int(AgeTestEvent['patient']['patientonsetage'])
        if (3 <= ageInt <= 17):
            resultsKidsOnly.append(event)
        else:
            resultsEveryoneElse.append(event)
    else:
        resultsNone.append(event)

print("Kids list length: " + str(len(resultsKidsOnly)))
print("Adults list length: " + str(len(resultsEveryoneElse)))
print("None list length: " + str(len(resultsNone)))
print("Total: " + str(len(resultsKidsOnly) + len(resultsEveryoneElse) + len(resultsNone)))

Kids list length: 6750
Adults list length: 41800
None list length: 53
Total: 48603


In [10]:
#Checking that the records have aripirazole or one of its variants in the "medicinal product" field
medicinalProduct = []
medicinalOther = []
drugList = ["ABILIFY","ARIPIPRAZOL","ARISTADA","ARIPIRAZOL","ARIPIRAZOLE",'abilify', 'aripiprazol',
          'aristada', 'aripirazol', 'aripirazole'] #inc a few typos

for event in resultsKidsOnly:

    addVar = False

    for drug in event['patient']['drug']:
        if any(ele in drug['medicinalproduct'] for ele in drugList):
            addVar = True
    
    if addVar:
        medicinalProduct.append(event)
    
    else:
        medicinalOther.append(event)
                
print("Medicinal Product length: " + str(len(medicinalProduct)))
print("Other length: " + str(len(medicinalOther)))        

Medicinal Product length: 6730
Other length: 20


In [11]:
#If there are drugs without aripiprazole or one of its known variants in the "medicinal product" field, this will
#print their indications. If the indication contains autism, it will print the entire record. That way, you can
#see how the drug is labeled and consider modifying your stringList so that it includes all instances of
#aripiprazole prescribed for autism.
for event in medicinalOther:
    for drug in event['patient']['drug']:
        if 'drugindication' in drug:
            print(drug['drugindication'])
            if 'AUTISM' in drug['drugindication']:
                print(event)

PRODUCT USED FOR UNKNOWN INDICATION
PRODUCT USED FOR UNKNOWN INDICATION
PRODUCT USED FOR UNKNOWN INDICATION
PRODUCT USED FOR UNKNOWN INDICATION
OBSESSIVE-COMPULSIVE DISORDER
ABNORMAL BEHAVIOUR
BIPOLAR DISORDER
WEIGHT CONTROL
BIPOLAR DISORDER
PRODUCT USED FOR UNKNOWN INDICATION
PRODUCT USED FOR UNKNOWN INDICATION
ATTENTION DEFICIT/HYPERACTIVITY DISORDER
PRODUCT USED FOR UNKNOWN INDICATION
CHRONIC TIC DISORDER
HYPOMANIA
EPILEPSY
PRODUCT USED FOR UNKNOWN INDICATION
PRODUCT USED FOR UNKNOWN INDICATION
ATTENTION DEFICIT/HYPERACTIVITY DISORDER
AGGRESSION
MOOD SWINGS
PRODUCT USED FOR UNKNOWN INDICATION
PRECOCIOUS PUBERTY
ABNORMAL BEHAVIOUR
AFFECTIVE DISORDER
BIPOLAR DISORDER
PSYCHIATRIC SYMPTOM
PRODUCT USED FOR UNKNOWN INDICATION
DEPRESSION
IMPAIRED GASTRIC EMPTYING
ABNORMAL BEHAVIOUR
MOOD ALTERED
ABNORMAL BEHAVIOUR
PSYCHOTIC SYMPTOM
ABNORMAL BEHAVIOUR
PSYCHOTIC SYMPTOM
PSYCHIATRIC SYMPTOM
AGGRESSION
PSYCHIATRIC SYMPTOM
MOOD ALTERED
PSYCHOTIC SYMPTOM
MOOD ALTERED
SCHIZOPHRENIA
ANTIRETROVIRAL 

In [12]:
#Including only events where aripiprazole has a drugcharacterization of 1, AKA is suspected to have caused the AE

abilifySuspect = []
abilifyNotSuspect = []

for event in resultsKidsOnly:
    
    addVar1 = False
    
    for drug in event['patient']['drug']:
        if any(ele in drug['medicinalproduct'] for ele in drugList):
            if drug['drugcharacterization'] == "1":
                addVar1 = True
                
    if addVar1:
        abilifySuspect.append(event)
    else:
        abilifyNotSuspect.append(event)
    
print("Abilify suspected: " + (str(len(abilifySuspect))))
print("Abilify not suspected: " + (str(len(abilifyNotSuspect))))  
    
    

Abilify suspected: 5684
Abilify not suspected: 1066


In [13]:
#Limiting to autism only. For some reason, comes up with about 100 fewer than FDA public dash? Maybe bc we limited
#ourselves to cases where Abilify was solely responsible?
autismIndicated = []
autismNotIndicated = []

for event in abilifySuspect:
    
    addVar2 = False
    
    for drug in event['patient']['drug']:
        if any(ele in drug['medicinalproduct'] for ele in drugList):
            if 'drugindication' in drug:
                if 'autis' in drug['drugindication'].lower():
                    addVar2 = True
    
    if addVar2:
        autismIndicated.append(event)
    else:
        autismNotIndicated.append(event)
    
print("Autism indicated: " + (str(len(autismIndicated))))
print("Autism not indicated: " + (str(len(autismNotIndicated))))           

Autism indicated: 371
Autism not indicated: 5313


In [14]:
#Turns "reaction" into explorable df
rxndf = pd.DataFrame(autismIndicated[90]['patient']['reaction'])
rxndf.index = rxndf.index + 1
rxndf

Unnamed: 0,reactionoutcome,reactionmeddraversionpt,reactionmeddrapt
1,6,16.0,Tardive dyskinesia
2,6,16.0,Dizziness
3,6,16.0,Weight increased


In [15]:
#Putting rxns into a df
rxnList = []
for event in autismIndicated:
    for reaction in event['patient']['reaction']:
        rxnList.append(reaction)

rxndf = pd.DataFrame(rxnList)
rxndf.index = rxndf.index + 1
len(rxndf)

976

In [16]:
#Standardizing caps so everything is nice and neat
rxndf['reactionmeddrapt'] = rxndf['reactionmeddrapt'].str.title()
rxndf

Unnamed: 0,reactionmeddrapt,reactionoutcome,reactionmeddraversionpt
1,Weight Decreased,,
2,Fatigue,,
3,Autistic Disorder,,
4,Dehydration,,
5,Dysphagia,,
...,...,...,...
972,Product Use In Unapproved Indication,6,22.0
973,Condition Aggravated,6,22.0
974,Irritability,6,22.0
975,Weight Increased,6,22.0


In [18]:
#Export to CSV
rxndf.to_csv ('/Users/jjeffrey-wilensky/Sites/abilify-aes/2020-01-27_JupyterAEs_Final.csv', index=None, header=True)