In [1]:
# Import libraries
import numpy as np
import pandas as pd
import psycopg2
import getpass
import time
import os.path
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Load the database connection details
_data = np.load('data/db_details.npy', allow_pickle=True).tolist()
db_details = _data['db_details']
user = db_details['user']
host = db_details['host']
port = db_details['port']
dbname = db_details['dbname']
schema = db_details['schema']
    
# Connect to the database
conn = psycopg2.connect(dbname=dbname, user=user, host=host, port=port, password=getpass.getpass(prompt='Password:'.format(user)))
cur = conn.cursor()
cur.execute('SET search_path TO {}'.format(schema))
conn.commit()

Password: ··········


In [3]:
# Load the appropriate query
query = open('queries/ce_itemids.sql', 'r')

start = time.time()

# Store the result of the query as a pandas dataframe
chartevents = pd.read_sql_query(query.read(), conn)
query.close()

end = time.time()

# Print the run time
print('Run time: {:.2f} minutes'.format((end-start)/60))

Run time: 168.64 minutes


In [14]:
# Show some sample data
chartevents.sort_values(by=['count'],ascending=False).head(n=50)

Unnamed: 0,itemid,label,count
4239,646,SpO2,2144767
4238,211,Heart Rate,2131455
4237,742,calprevflg,2119399
4236,618,Respiratory Rate,2077843
4235,212,Heart Rhythm,2021687
4234,128,Code Status,1985482
4233,161,Ectopy Type,1978345
4232,550,Precautions,1976848
4231,1125,Service Type,1796117
4230,51,Arterial BP [Systolic],1557735


In [5]:
# Load the appropriate query
query = open('queries/le_itemids.sql', 'r')

start = time.time()

# Store the result of the query as a pandas dataframe
labevents = pd.read_sql_query(query.read(), conn)
query.close()

end = time.time()

# Print the run time
print('Run time: {:.2f} minutes'.format((end-start)/60))

Run time: 0.47 minutes


In [6]:
# Show some sample data
labevents.sort_values(by=['count'],ascending=False).head()

Unnamed: 0,itemid,label,count


In [7]:
# Load the appropriate query
query = open('queries/oe_itemids.sql', 'r')

start = time.time()

# Store the result of the query as a pandas dataframe
outputevents = pd.read_sql_query(query.read(), conn)
query.close()

end = time.time()

# Print the run time
print('Run time: {:.2f} minutes'.format((end-start)/60))

Run time: 0.16 minutes


In [8]:
# Show some sample data
outputevents.sort_values(by=['count'],ascending=False).head()

Unnamed: 0,itemid,label,count
679,40055,Urine Out Foley,1293789
678,226559,Foley,707116
677,40076,Chest Tubes CTICU CT 1,134011
676,226588,Chest Tube #1,66911
675,40054,Stool Out Stool,48084


In [9]:
# Load the appropriate query
query = open('queries/iecv_itemids.sql', 'r')

start = time.time()

# Store the result of the query as a pandas dataframe
inputevents_cv = pd.read_sql_query(query.read(), conn)
query.close()

end = time.time()

# Print the run time
print('Run time: {:.2f} minutes'.format((end-start)/60))

Run time: 1.08 minutes


In [15]:
# Show some sample data
inputevents_cv.sort_values(by=['count'],ascending=False).head(n=10)

Unnamed: 0,itemid,label,count
1436,30013,D5W,1789655
1435,30018,.9% Normal Saline,1611230
1434,30131,Propofol,772595
1433,30045,Insulin,627391
1432,30118,Fentanyl,616161
1431,30128,Neosynephrine-k,422977
1430,30124,Midazolam,394081
1429,30025,Heparin,380185
1428,30120,Levophed-k,332016
1427,30140,,310960


In [11]:
# Load the appropriate query
query = open('queries/iemv_itemids.sql', 'r')

start = time.time()

# Store the result of the query as a pandas dataframe
inputevents_mv = pd.read_sql_query(query.read(), conn)
query.close()

end = time.time()

# Print the run time
print('Run time: {:.2f} minutes'.format((end-start)/60))

Run time: 0.22 minutes


In [16]:
# Show some sample data
inputevents_mv.sort_values(by=['count'],ascending=False).head()

Unnamed: 0,itemid,label,count
261,225158,NaCl 0.9%,322811
260,220949,Dextrose 5%,254207
259,225943,Solution,189128
258,222168,Propofol,145822
257,223258,Insulin - Regular,84766


In [17]:
# Save to item_ids.npy
if not os.path.exists('./data'):
    os.makedirs('./data')

tosave = {'chartevents': chartevents, 'labevents': labevents, 'outputevents': outputevents, 'inputevents_cv': inputevents_cv, 'inputevents_mv': inputevents_mv}
np.save('data/item_ids.npy',tosave)
print("Saved!")

Saved!


In [24]:
chartevents[chartevents['itemid']==220179]

Unnamed: 0,itemid,label,count
4182,220179,Non Invasive Blood Pressure systolic,530876
