In [1]:
# Import libraries
import numpy as np
import pandas as pd
import psycopg2
import getpass
import time
import os.path
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
os.chdir('C:\\Users\\anear\\OneDrive - National University of Ireland, Galway\\PhD\\Research Projects\\Ventilation Project')

In [5]:
# Load the database connection details
_data = np.load('data/db_details.npy', allow_pickle=True).tolist()
db_details = _data['db_details']
user = db_details['user']
host = db_details['host']
port = db_details['port']
dbname = db_details['dbname']
schema = db_details['schema']
    
# Connect to the database
conn = psycopg2.connect(dbname=dbname, user=user, host=host, port=port, password=getpass.getpass(prompt='Password:'.format(user)))
cur = conn.cursor()
cur.execute('SET search_path TO {}'.format(schema))
conn.commit()

Password: ··········


In [6]:
# Load the patient data
_data = np.load('data/final_patients.npy', allow_pickle=True).tolist()
patients = _data['patients']
print("Loaded the patient data!")

Loaded the patient data!


In [9]:
# Load the appropriate query
query = open('queries/ce_itemids.sql', 'r')

start = time.time()

# Store the result of the query as a pandas dataframe
chartevents = pd.read_sql_query(query.read(), conn)
query.close()

end = time.time()

# Print the run time
print('Run time: {:.2f} minutes'.format((end-start)/60))

Run time: 95.63 minutes


In [10]:
# Show some sample data
chartevents.sort_values(by=['count'],ascending=False).head(n=50)

Unnamed: 0,itemid,label,count
3917,646,SpO2,1557059
3916,211,Heart Rate,1527556
3915,742,calprevflg,1519420
3914,618,Respiratory Rate,1485080
3913,212,Heart Rhythm,1448041
3912,128,Code Status,1447415
3911,550,Precautions,1443421
3910,161,Ectopy Type,1421957
3909,220045,Heart Rate,1298288
3908,1125,Service Type,1295689


In [11]:
# Load the appropriate query
query = open('queries/le_itemids.sql', 'r')

start = time.time()

# Store the result of the query as a pandas dataframe
labevents = pd.read_sql_query(query.read(), conn)
query.close()

end = time.time()

# Print the run time
print('Run time: {:.2f} minutes'.format((end-start)/60))

Run time: 0.37 minutes


In [12]:
# Show some sample data
labevents.sort_values(by=['count'],ascending=False).head()

Unnamed: 0,itemid,label,count


In [13]:
# Load the appropriate query
query = open('queries/oe_itemids.sql', 'r')

start = time.time()

# Store the result of the query as a pandas dataframe
outputevents = pd.read_sql_query(query.read(), conn)
query.close()

end = time.time()

# Print the run time
print('Run time: {:.2f} minutes'.format((end-start)/60))

Run time: 0.14 minutes


In [14]:
# Show some sample data
outputevents.sort_values(by=['count'],ascending=False).head()

Unnamed: 0,itemid,label,count
613,40055,Urine Out Foley,894324
612,226559,Foley,599936
611,40054,Stool Out Stool,40829
610,40286,Ultrafiltrate Ultrafiltrate,36571
609,40076,Chest Tubes CTICU CT 1,31123


In [15]:
# Load the appropriate query
query = open('queries/iecv_itemids.sql', 'r')

start = time.time()

# Store the result of the query as a pandas dataframe
inputevents_cv = pd.read_sql_query(query.read(), conn)
query.close()

end = time.time()

# Print the run time
print('Run time: {:.2f} minutes'.format((end-start)/60))

Run time: 0.90 minutes


In [16]:
# Show some sample data
inputevents_cv.sort_values(by=['count'],ascending=False).head(n=10)

Unnamed: 0,itemid,label,count
1189,30018,.9% Normal Saline,1285745
1188,30013,D5W,1188492
1187,30131,Propofol,561212
1186,30118,Fentanyl,533541
1185,30045,Insulin,420732
1184,30124,Midazolam,335609
1183,30025,Heparin,290513
1182,30120,Levophed-k,238281
1181,30140,,232579
1180,30128,Neosynephrine-k,196642


In [17]:
# Load the appropriate query
query = open('queries/iemv_itemids.sql', 'r')

start = time.time()

# Store the result of the query as a pandas dataframe
inputevents_mv = pd.read_sql_query(query.read(), conn)
query.close()

end = time.time()

# Print the run time
print('Run time: {:.2f} minutes'.format((end-start)/60))

Run time: 0.21 minutes


In [18]:
# Show some sample data
inputevents_mv.sort_values(by=['count'],ascending=False).head()

Unnamed: 0,itemid,label,count
267,225158,NaCl 0.9%,295791
266,220949,Dextrose 5%,229813
265,225943,Solution,175033
264,222168,Propofol,126013
263,225799,Gastric Meds,73139


In [27]:
# Save to item_ids.npy
if not os.path.exists('./data'):
    os.makedirs('./data')

tosave = {'chartevents': chartevents, 'labevents': labevents, 'outputevents': outputevents, 'inputevents_cv': inputevents_cv, 'inputevents_mv': inputevents_mv}
np.save('data/item_ids.npy',tosave)
print("Saved!")

Saved!
