# Import library(s)

In [1]:
import numpy as np
import pandas as pd
import requests
import operator
import matplotlib.pyplot as plt

# Play with r.json()--the json decoding function

In [2]:
# Link to API and get the version of .json file
# r = requests.get("https://rxnav.nlm.nih.gov/REST/version.json")
# r.json()

In [3]:
# retrieve a specific drug by its NDC id
# r = requests.get("https://rxnav.nlm.nih.gov/REST/rxcui.json?idtype=NDC&id=11523-7020-1")
# r.json()

In [4]:
# retrieve a specific drug by its name
# note that change another name, Plavix, r.json() also returns an rxnormId
# r = requests.get("https://rxnav.nlm.nih.gov/REST/rxcui.json?name=morphine")#liptor
# r.json()

In [5]:
# retrieve a specific drug's class
# r = requests.get("https://rxnav.nlm.nih.gov/REST/rxclass/class/byDrugName.json?drugName=metformin&relaSource=MEDRT")
# r.json()

In [6]:
# retrieve all info for a specific drug by its rxnormId
# r = requests.get("https://rxnav.nlm.nih.gov/REST/rxcui/7052/allProperties.json?prop=all")
# r.json()

# Read data

In [7]:
# .csv file and the .py file in the same directory
# data = pd.read_csv('./drugs.csv')

# Get a glance at the dataframe

In [8]:
# # list first 10 rows of the dataframe
# data.head(10)

# Print dataframe headers for further processing

In [9]:
# # list data headers
# list(data)

# Start retrieval with generic name and brand name respectively

## Extract the generic_name column

In [10]:
# extract the column of generic_name of drugs 
# find out the topNum most frequent generic names and their freqs
generic = data.groupby('generic_name').size()
freqs, names = generic.values, generic.keys()
freqs_sorted, names_sorted = zip(*sorted(zip(freqs, names), reverse=True))
topNum = 10
for i in range(topNum):
    print('Top',topNum,'Generic names',names_sorted[i],'Frequency(ies)',freqs_sorted[i])

## Retrieve drugs in RxNorm API by their generic names

In [11]:
# retrieve info by generic names
# note that json() returns a dictionary
num_Total = 0
num_Match = 0
num_ExactM = 0
for index in range(len(generic.keys())):
    name = generic.keys()[index]
    r = requests.get("https://rxnav.nlm.nih.gov/REST/rxcui.json?name=" + name) 
    name_RxNorm = r.json().get('idGroup').get('name')
    id_RxNorm = r.json().get('idGroup').get('rxnormId')
    #print('Generic name: ',name_RxNorm,' RxNormId',id_RxNorm)
    number = generic.values[index]
#     print(name, ' number: ', number)
    if name_RxNorm == name:
        num_Match = num_Match + number
        if id_RxNorm != None:
            num_ExactM = num_ExactM + number
     
    num_Total = num_Total + number

percent_Match = num_Match / num_Total * 100
percent_ExactM = num_ExactM / num_Total * 100
# print("Percent of having a match in RxNorm RESTful API by generic names: ",percent_Match,'%')
# print("Percent of having an exact match in RxNorm RESTful API by generic names: ",percent_ExactM,'%')
# print("Percent of having an approximate match in RxNorm RESTful API by generic names: ",percent_Match-percent_ExactM,'%')

## Retrieve drug class in RxNorm API by generic names

Try another address not just 'maytreat' for drug classes retrieval.
see: https://rxnav.nlm.nih.gov/REST/rxclass/class/byDrugName.json?drugName=clopidogrel&relaSource=MEDRT

In [12]:
# Use the generic names for retrieval firstly since it leads to higher rate of exact match
classInfo_Exa = []
classInfo_App = []
for index in range(len(generic.keys())):
    name = generic.keys()[index]
    r = requests.get("https://rxnav.nlm.nih.gov/REST/rxcui.json?name=" + name) 
    name_RxNorm = r.json().get('idGroup').get('name')
    # retrieve drug class for drugs with exact match
    id_RxNorm = r.json().get('idGroup').get('rxnormId')
    drugName = None
    classType = list()
    drugClass = list()
    if id_RxNorm is not None: #indicator for exact match
        url = "https://rxnav.nlm.nih.gov/REST/rxclass/class/byDrugName.json?drugName="+name_RxNorm+"&relaSource=MEDRT"#&relas=may_treat
        r = requests.get(url)
        if r.json().get('rxclassDrugInfoList') is not None:
            if r.json().get('rxclassDrugInfoList').get('rxclassDrugInfo') is not None:
                # use a var to hold the list of drug classes info
                var = r.json().get('rxclassDrugInfoList').get('rxclassDrugInfo')
                for item in var:
                    subTuple = (name_RxNorm,item.get('rxclassMinConceptItem').get('className'),item.get('rxclassMinConceptItem').get('classType')) 
                    classInfo_Exa.append(subTuple)
        else:
            subTuple = (name_RxNorm,'NA','NA')
            classInfo_Exa.append(subTuple)

    else:
        url = "https://rxnav.nlm.nih.gov/REST/rxclass/class/byDrugName.json?drugName="+name+"&relaSource=MEDRT"#&relas=may_treat
        r = requests.get(url)
        if r.json().get('rxclassDrugInfoList') is not None:
            if r.json().get('rxclassDrugInfoList').get('rxclassDrugInfo') is not None:
                # use a var to hold the list of drug classes info
                var = r.json().get('rxclassDrugInfoList').get('rxclassDrugInfo')
                for item in var:
                    subTuple = (name,item.get('rxclassMinConceptItem').get('className'),item.get('rxclassMinConceptItem').get('classType'))
                    classInfo_App.append(subTuple)
        else:
            subTuple = (name_RxNorm,'NA','NA')
            classInfo_App.append(subTuple)

In [17]:
# print(classInfo_Exa)

## Count frequency of drug class by generic names

RxClass includes six sets of NDFRT drug classes:
1. Established Pharmacologic Classes (EPC)
2. Chemical Structure (Chem)
3. Disease
4. Mechanism of Action (MoA)
5. Physiologic Effect (PE)
6. Pharmacokinetics (PK)

In [16]:
# count the frequency of drug class occurence
numClassType = 6
stats = {'EPC':{},'CHEM':{},'DISEASE':{},'MOA':{},'PE':{},'PK':{} }

for subTuple in classInfo_Exa:
#     print(subTuple)
    subDict = stats[subTuple[2]]
    if subTuple[1] in subDict.keys():
        subDict[subTuple[1]] += generic.get(subTuple[0])
    else:
        subDict[subTuple[1]] = generic.get(subTuple[0])

# print(stats)

#plot
plt.figure(figsize=(15, 8))
for i, key in enumerate(stats.keys()):
    print (i+1,'.',key)
    if len(stats[key]) == 0:
        continue
    x = list(stats[key].keys())
    y = list(stats[key].values())
    freqs_sorted, names_sorted = zip(*sorted(zip(y, x), reverse=True))
    topNum = min(10, len(x))
    for j in range(topNum):
        print('Top',j+1,'Class',names_sorted[j],'Frequency(ies)',freqs_sorted[j])

    plt.subplot(2, 3, i+1)
    plt.plot(list(range(len(y))), y, 'x')

## Extract the brand_name column

In [13]:
# extract the column of brand_name of drugs 
# find out the topNum most frequent brand names and their freqs
brand = data.groupby('brand_name').size()
freqs, names = brand.values, brand.keys()
freqs_sorted, names_sorted = zip(*sorted(zip(freqs, names), reverse=True))
topNum = 10
for i in range(topNum):
    print('Top',topNum,'Brand names',names_sorted[i],'Frequency(ies)',freqs_sorted[i])

## Retrieve drugs in RxNorm API by their brand names

In [14]:
# retrieve info by brand names
# note that json() returns a dictionary
num_Total = 0
num_Match = 0
num_ExactM = 0
for index in range(len(brand.keys())):
    name = brand.keys()[index]
    r = requests.get("https://rxnav.nlm.nih.gov/REST/rxcui.json?name=" + name) 
    name_RxNorm = r.json().get('idGroup').get('name')
    id_RxNorm = r.json().get('idGroup').get('rxnormId')
    #print('Brand name: ',name_RxNorm,' RxNormId',id_RxNorm)
    number = brand.values[index]
    #print(name, ' number: ', number)
    if name_RxNorm == name:
        num_Match = num_Match + number
        if id_RxNorm != None:
            num_ExactM = num_ExactM + number
     
    num_Total = num_Total + number

percent_Match = num_Match / num_Total * 100
percent_ExactM = num_ExactM / num_Total * 100
# print("Percent of having a match in RxNorm RESTful API by brand names: ",percent_Match,'%')
# print("Percent of having an exact match in RxNorm RESTful API by brand names: ",percent_ExactM,'%')
# print("Percent of having an approximate match in RxNorm RESTful API by brand names: ",percent_Match-percent_ExactM,'%')

## Retrieve drug class in RxNorm API by brand names

In [129]:
# Use the brand names for retrieval firstly since it leads to higher rate of exact match
classInfo_Exa = []
classInfo_App = []
for index in range(len(brand.keys())):
    name = brand.keys()[index]
    r = requests.get("https://rxnav.nlm.nih.gov/REST/rxcui.json?name=" + name) 
    name_RxNorm = r.json().get('idGroup').get('name')
    # retrieve drug class for drugs with exact match
    id_RxNorm = r.json().get('idGroup').get('rxnormId')
    drugName = None
    classType = list()
    drugClass = list()
    if id_RxNorm is not None: #indicator for exact match
        url = "https://rxnav.nlm.nih.gov/REST/rxclass/class/byDrugName.json?drugName="+name_RxNorm+"&relaSource=MEDRT"#&relas=may_treat
        r = requests.get(url)
        if r.json().get('rxclassDrugInfoList') is not None:
            if r.json().get('rxclassDrugInfoList').get('rxclassDrugInfo') is not None:
                # use a var to hold the list of drug classes info
                var = r.json().get('rxclassDrugInfoList').get('rxclassDrugInfo')
                for item in var:
                    subTuple = (name_RxNorm,item.get('rxclassMinConceptItem').get('className'),item.get('rxclassMinConceptItem').get('classType')) 
                    classInfo_Exa.append(subTuple)
        else:
            subTuple = (name_RxNorm,'NA','NA')
            classInfo_Exa.append(subTuple)

    else:
        url = "https://rxnav.nlm.nih.gov/REST/rxclass/class/byDrugName.json?drugName="+name+"&relaSource=MEDRT"#&relas=may_treat
        r = requests.get(url)
        if r.json().get('rxclassDrugInfoList') is not None:
            if r.json().get('rxclassDrugInfoList').get('rxclassDrugInfo') is not None:
                # use a var to hold the list of drug classes info
                var = r.json().get('rxclassDrugInfoList').get('rxclassDrugInfo')
                for item in var:
                    subTuple = (name,item.get('rxclassMinConceptItem').get('className'),item.get('rxclassMinConceptItem').get('classType'))
                    classInfo_App.append(subTuple)
        else:
            subTuple = (name_RxNorm,'NA','NA')
            classInfo_App.append(subTuple)

## Count frequency of drug class for drugs with exact match by brand names

RxClass includes six sets of NDFRT drug classes:
1. Established Pharmacologic Classes (EPC)
2. Chemical Structure (Chem)
3. Disease
4. Mechanism of Action (MoA)
5. Physiologic Effect (PE)
6. Pharmacokinetics (PK)

In [15]:
# count the frequency of drug class occurence
numClassType = 6
stats = {'EPC':{},'CHEM':{},'DISEASE':{},'MOA':{},'PE':{},'PK':{} }

for subTuple in classInfo_Exa:
#     print(subTuple)
    subDict = stats[subTuple[2]]
    if subTuple[1] in subDict.keys():
        subDict[subTuple[1]] += brand.get(subTuple[0])
    else:
        subDict[subTuple[1]] = brand.get(subTuple[0])

# print(stats)

#plot
plt.figure(figsize=(15, 8))
for i, key in enumerate(stats.keys()):
    print (i+1,'.',key)
    if len(stats[key]) == 0:
        continue
    x = list(stats[key].keys())
    y = list(stats[key].values())
    freqs_sorted, names_sorted = zip(*sorted(zip(y, x), reverse=True))
    topNum = min(10, len(x))
    for j in range(topNum):
        print('Top',j+1,'Class',names_sorted[j],'Frequency(ies)',freqs_sorted[j])

    plt.subplot(2, 3, i+1)
    plt.plot(list(range(len(y))), y, 'x')