## code for generating ATC codes from rxcui and NDC
ultimate goal: We want a list of all antibiotics prescribed in the prescription table that would be prescribed if the physician suspected the patient of having a serious bacterial infection


* ATC codes are obtained via two step conversion:
 * national drug codes (NDC) found in the PRESCRIPTION table of the MIMIC-III dataset -> RxNorm concept unique identifier (RXCUI) codes. 
 * -> RXCUI codes into ATC codes.

* ATC codes with prefix 'J01' are considered to be antibiotics. 
* Regular expressions were used on prescription names to further filter out erroneous entries and entries with missing NDC/RXCUI codes. 
 * note: the text analysis via manual review and regular expressions is located in __. in this notebook, all erroneous values are hardcoded in a list to filter out.

In [1]:
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
import collections
import asyncio
import getpass
import re
from datetime import datetime as dt
import os,sys,re
import urllib3
import prettytable
from collections import Counter
import seaborn as sns
import random

%matplotlib inline
plt.style.use('ggplot')

from notebook.services.config import ConfigManager
cm = ConfigManager()
cm.update('livereveal', {
        'width': 1024,
        'height': 768,
        'scroll': True,
})

%load_ext autotime

## Fetch all responses within one Client session first

In [2]:
import asyncio
from aiohttp import ClientSession
import time
import json

async def fetch(url, session):
    async with session.get(url) as response:
        return await response.read()

async def run(url_lst):
    tasks = []

    # Fetch all responses within one Client session,
    # keep connection alive for all requests.
    start = time.perf_counter()
    
    async with ClientSession() as session:
        for url in url_lst:
            task = asyncio.ensure_future(fetch(url, session))
            tasks.append(task)

        responses = await asyncio.gather(*tasks)
        # you now have all response bodies in this variable
#         print(responses)

    duration = time.perf_counter() - start
    print('total time: {}'.format(duration))
    return responses

time: 175 ms


## Access MIMIC database and convert it to dataframe in Pandas

In [3]:
# note, all server information is stored in a config.py file that is present in the .gitignore
import config 
conn = psycopg2.connect(dbname=config.dbname, user=config.user, host=config.host, port=config.port,password=config.password)
cur=conn.cursor()

query_schema = 'SET search_path to ' + "mimiciii" + ';'

time: 146 ms


## Prescription
* grabbing the entire prescription table from MIMIC

In [4]:
#code for generating ATC codes from rxcui and NDC

time: 590 µs


In [5]:
cur.execute('Select * from mimiciii.prescriptions')
rows = cur.fetchall()

time: 38min 23s


In [6]:
col_name = []
for i in range(len(cur.description)):
    col_name.append(cur.description[i][0])
print (col_name)

['row_id', 'subject_id', 'hadm_id', 'icustay_id', 'startdate', 'enddate', 'drug_type', 'drug', 'drug_name_poe', 'drug_name_generic', 'formulary_drug_cd', 'gsn', 'ndc', 'prod_strength', 'dose_val_rx', 'dose_unit_rx', 'form_val_disp', 'form_unit_disp', 'route']
time: 2.37 ms


In [7]:
prescriptionTable = pd.DataFrame(rows,columns=col_name)

time: 21.8 s


In [8]:
prescriptionTable.head(10)

Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,startdate,enddate,drug_type,drug,drug_name_poe,drug_name_generic,formulary_drug_cd,gsn,ndc,prod_strength,dose_val_rx,dose_unit_rx,form_val_disp,form_unit_disp,route
0,2065012,17932,146366,,2116-09-22,2116-09-25,MAIN,Enoxaparin Sodium,Enoxaparin Sodium,Enoxaparin Sodium,LOVE40I,39482,75062040,40mg Syringe,40,mg,1,SYR,SC
1,2065010,17932,146366,,2116-09-23,2116-09-21,MAIN,Enoxaparin Sodium,Enoxaparin Sodium,Enoxaparin Sodium,LOVE40I,39482,75062040,40mg Syringe,40,mg,1,SYR,SC
2,2065708,17932,146366,,2116-09-23,2116-09-24,MAIN,Oxycodone-Acetaminophen,Oxycodone-Acetaminophen,Oxycodone-Acetaminophen,PERC,4222,54465025,5mg/325mg Tab,1-2,TAB,1-2,TAB,PO
3,2480732,17947,193730,284413.0,2110-07-10,2110-07-10,MAIN,Metoprolol,Metoprolol,Metoprolol,METO25,50631,51079025520,25mg Tablet,25,mg,1,TAB,PO
4,2480731,17947,193730,284413.0,2110-07-10,2110-07-10,MAIN,Metoprolol,Metoprolol,Metoprolol,METO5I,19808,55390007310,5mg/5mL Vial,2.5,mg,0.5,VIAL,IV
5,2480733,17947,193730,284413.0,2110-07-10,2110-07-10,MAIN,Furosemide,Furosemide,Furosemide,FURO40I,8205,409610204,40mg/4mL Vial,20,mg,0.5,VIAL,IV
6,2480734,17947,193730,284413.0,2110-07-10,2110-07-10,MAIN,Metoclopramide,Metoclopramide,Metoclopramide HCl,METO10I,5229,60977045101,5mg/mL-2mL,5,mg,0.5,VIAL,IV
7,2480749,17947,193730,284413.0,2110-07-10,2110-07-10,MAIN,Metoprolol,Metoprolol,Metoprolol,METO25,50631,51079025520,25mg Tablet,12.5,mg,0.5,TAB,PO
8,2480752,17947,193730,284413.0,2110-07-10,2110-07-11,MAIN,Metoprolol,Metoprolol,Metoprolol,METO25,50631,51079025520,25mg Tablet,25,mg,1,TAB,PO
9,2480753,17947,193730,284413.0,2110-07-10,2110-07-11,MAIN,Metoprolol,Metoprolol,Metoprolol,METO25,50631,51079025520,25mg Tablet,25,mg,1,TAB,PO


time: 167 ms


## Convert NDC to Rxcui and add it in to prescription table
* must ping nlm url to do this. 

In [9]:
ndc_lst = prescriptionTable['ndc'].unique()
url_lst = [#'http://rxnav.nlm.nih.gov/REST/rxcui.json?idtype=NDC&id={0}'.format(ndc)
           'https://rxnav.nlm.nih.gov/REST/ndcstatus.json?ndc={0}'.format(ndc)
           for ndc in ndc_lst]

loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(url_lst))
responses = loop.run_until_complete(future)

total time: 255.66725896304706
time: 4min 16s


In [11]:
#rxcui
def get_rxnormId_by_ndc(response):
    try:
        #class_lst = [x['rxclassMinConceptItem']['classId'] for x in json.loads(response)['rxclassDrugInfoList']['rxclassDrugInfo']]
        return json.loads(response)['ndcStatus']['rxcui']
    except:
        return ''

time: 2.12 ms


In [12]:
Rx_code_lst = [get_rxnormId_by_ndc(resp) for resp in responses] #list of rxnorm_codes

#making a dictionary of mappings for ndc_code: rxcui
ndc_rc_code_lst_map = {ndc: rx for ndc, rx in zip(ndc_lst, Rx_code_lst)}


time: 56.4 ms


In [13]:
##make a column that mapps the NDC code to RXCUI
prescriptionTable = prescriptionTable.assign(rxcui = prescriptionTable['ndc'].map(ndc_rc_code_lst_map))

time: 3.53 s


## Convert Rxcui to ATC and check whether the drug is Anibiotics, and add it into table

In [14]:
rxcui_lst = prescriptionTable['rxcui'].unique()
url_lst = ['https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.json?rxcui={}&relaSource=ATC'.format(rxcui)
           for rxcui in rxcui_lst]

loop = asyncio.get_event_loop()
future = asyncio.ensure_future(run(url_lst))
responses = loop.run_until_complete(future)

total time: 181.71388792601647
time: 3min 1s


In [15]:
def get_ATC_code_lst(response):
    try:
        class_lst = [x['rxclassMinConceptItem']['classId'] for x in json.loads(response)['rxclassDrugInfoList']['rxclassDrugInfo']]
        return class_lst
    except:
        return []

ATC_code_lst_lst = [get_ATC_code_lst(resp) for resp in responses]

#qc to check it works
pd.DataFrame({'rxcui': rxcui_lst,
                   'ATC_code_lst': ATC_code_lst_lst}).head(5)

time: 66.1 ms


In [18]:
ab_lst = [any(map(lambda x: x.startswith('J01'), ATC_code_lst)) 
           for ATC_code_lst in ATC_code_lst_lst]

#qc to check it works
pd.DataFrame({'rxcui': rxcui_lst,
                   'ATC_code_lst': ATC_code_lst_lst,
                   'ab': ab_lst}).head(10)

Unnamed: 0,rxcui,ATC_code_lst,ab
0,854236,[B01AB],False
1,1050490,"[N02BE, N02AA]",False
2,866924,[C07AB],False
3,866508,[C07AB],False
4,1719291,[C03CA],False
5,207263,[],False
6,1658221,"[A06AD, A12CC, B05XA, D11AX, V04CC]",False
7,630208,"[R03AC, R03CC]",False
8,1807508,"[A07AA, J01XA]",True
9,201828,[],False


time: 15.2 ms


In [19]:
ab_map = {rxcui: ab for rxcui, ab in zip(rxcui_lst, ab_lst)}

prescriptionTable = prescriptionTable.assign(Antibiotics = prescriptionTable['rxcui'].map(ab_map))

time: 2.57 ms


In [22]:
# this is the end result of code present in: https://github.com/geickelb/Abx_patients_outcome
# As part of a class project, our group performed some review of all antibiotics present in the prescriptionTable with antibiotic==True (based on the ATC code).
# the review involved manual review and regular expressions to identify and filter out any unwanted antibiotics in our dataset. 

#some rows in AB have the same drug name, but differing true/false ab status. this is updating any antibiotic row as true.

#input: imported prescription df annotated with atc codes. 
#ouput: updated prescription df where more true antibiotics are covered

def prescription_update_fxn(prescription_df):
    """
    this function:
    some rows in AB have the same drug name, but differing true/false ab status. this is updating any antibiotic row as true.
    Additionally, there are drugs with antibiotics==True that are not desired for our uses. We want antibiotics that would be prescribed if the physician suspected the patient of having a serious bacterial infection
    
    input: imported prescription df annotated with atc codes. 
    ouput: updated prescription df where more true antibiotics are covered

    """
    drugs_that_dont_belong =['Furosemide','Dextrose 50%','Vancomycin Oral Liquid',
                     'Erythromycin 0.5% Ophth Oint','NEO*IV*Furosemide',
                     'Nystatin','Orthopedic Solution','Neomycin-Polymyxin-Bacitracin Ophth. Oint',
                     'Bacitracin Ophthalmic Oint','Bacitracin Ointment','Lasix',
                     'dextrose','MetronidAZOLE Topical 1 % Gel','Enalaprilat',
                     'NEO*PO*Furosemide (10mg/1ml)','Metronidazole Gel 0.75%-Vaginal','Spironolactone',
                     'Heparin',
                    'voriconazole','valgancyclovir','chloroquine','tamiflu','mefloquine','foscarnet',
                     'fluconazole','vorconazole','quinine','ribavirin','gancyclovir','chloroquine',
                     'atovaquone','ambisome', 'acyclovir', 'Acyclovir']

    drugs_that_dont_belong=[w.lower() for w in drugs_that_dont_belong]
    
    prescriptions= prescription_df
    prescriptions_updated = list(prescriptions.loc[prescriptions.loc[:,"Antibiotics"]==True,'drug'].unique()) 
    true_ndc= prescriptions.loc[prescriptions.loc[:,"drug"].isin(prescriptions_updated),'ndc'].unique() 

    ABrx2= prescriptions.loc[prescriptions.loc[:,"ndc"].isin(true_ndc),:]
    ABrx2= ABrx2.loc[~ABrx2.loc[:,"drug"].str.lower().isin(drugs_that_dont_belong),:] #tilde transforms isin to notin()
    ABrx2['Antibiotics']= True
    return(ABrx2)



time: 23.4 ms


In [23]:
#this code takes a while to run, only run if need to update prescription table (which is saved in github)
#note shown: imported antibiotic annotations into prescription table using code provided by
#Dr. Luo's postdoc, which pings API
dtype = {'icustay_id': str,
         'NDC': str,
         'rxcui': str,
         'ingredient': str}
#prescriptions = pd.read_csv('/Users/geickelb1/Desktop/PhD_Misc/HSIP_442_Yuan_Lao/project/Newprescription.csv', index_col=0, dtype=dtype)

#problem: some rows in AB have the same drug name, but differing true/false ab status. this is updating any antibiotic row as true.


ABrx = prescription_update_fxn(prescriptionTable)
ABrx = ABrx.loc[ABrx['Antibiotics'] == True, :]

#save csv:
#pd.DataFrame(ABrx).to_csv('/Users/geickelb1/Documents/GitHub/mimiciii-antibiotics-modeling/data/raw/csv/02082018_ABrx_updated.csv')


time: 2.21 s


In [163]:
ABrx

Unnamed: 0,row_id,subject_id,hadm_id,icustay_id,startdate,enddate,drug_type,drug,drug_name_poe,drug_name_generic,...,prod_strength,dose_val_rx,dose_unit_rx,form_val_disp,form_unit_disp,route,rxcui,Antibiotics,rxcui2,Antibiotics2
32,2089125,9,150750,220597,2149-11-10,2149-11-11,MAIN,Levofloxacin,Levofloxacin,Levofloxacin,...,500mg Tab,500,mg,1,TAB,NG,211816,True,211816,True
35,2968761,2,163353,243653,2138-07-18,2138-07-20,MAIN,NEO*IV*Gentamicin,,,...,10mg/mL-2mL,15.5,mg,0.775,VIAL,IV,1870676,True,1870676,True
37,2968762,2,163353,243653,2138-07-18,2138-07-21,MAIN,Ampicillin Sodium,,,...,500mg Vial,500,mg,1,VIAL,IV,1721474,True,1721474,True
53,1213116,4,185777,294638,2191-03-16,2191-03-22,MAIN,Clindamycin,,,...,600mg Premix,600,mg,1,BAG,IV,685578,True,685578,True
65,1213118,4,185777,294638,2191-03-17,2191-03-18,MAIN,Vancomycin HCl,,,...,500mg Vial,750,mg,1.5,VIAL,IV,1807516,True,1807516,True
71,1213119,4,185777,294638,2191-03-18,2191-03-20,MAIN,Vancomycin HCl,,,...,500mg Vial,750,mg,1.5,VIAL,IV,1807516,True,1807516,True
137,616184,12,112213,232669,2104-08-08,2104-08-09,MAIN,Metronidazole,,,...,500mg Premix Bag,500,mg,1,BAG,IV,311683,True,311683,True
165,616190,12,112213,232669,2104-08-11,2104-08-11,MAIN,Metronidazole,,,...,500mg Premix Bag,500,mg,1,BAG,IV,311683,True,311683,True
176,616851,12,112213,232669,2104-08-12,2104-08-12,MAIN,Piperacillin-Tazobactam Na,,,...,4.5g Frozen Bag,4.5,gm,1,BAG,IV,884254,True,884254,True
179,616192,12,112213,232669,2104-08-12,2104-08-13,MAIN,Piperacillin-Tazobactam Na,,,...,4.5g Frozen Bag,4.5,gm,1,BAG,IV,884254,True,884254,True


time: 395 ms
