In [1]:
# Import libraries
import numpy as np
import pandas as pd
import psycopg2
import getpass
import os.path
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Load the database connection details
_data = np.load('data/db_details.npy', allow_pickle=True).tolist()
db_details = _data['db_details']
user = db_details['user']
host = db_details['host']
port = db_details['port']
dbname = db_details['dbname']
schema = db_details['schema']
    
# Connect to the database
conn = psycopg2.connect(dbname=dbname, user=user, host=host, port=port, password=getpass.getpass(prompt='Password:'.format(user)))
cur = conn.cursor()
cur.execute('SET search_path TO {}'.format(schema))
conn.commit()

Password: ··········


In [3]:
# Load the patient data
_data = np.load('data/patients.npy', allow_pickle=True).tolist()
patients = _data['patients']
mech_vent = _data['mech_vent']
print("Loaded!")

Loaded!


In [4]:
patients.head()

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,dob,age,admittime,dischtime,intime,outtime,los,diagnosis,hospital_expire_flag,ventnum
0,268,110404,280836,F,2132-02-21,65.98,2198-02-11 13:40:00,2198-02-18 03:55:00,2198-02-14 23:27:38,2198-02-18 05:26:11,3.249,DYSPNEA,1,1
1,273,158689,241507,M,2107-08-10,33.69,2141-04-19 06:11:00,2141-04-20 17:00:00,2141-04-19 06:12:05,2141-04-20 17:52:11,1.4862,POLYSUBSTANCE OVERDOSE,0,1
2,275,129886,219649,M,2088-08-07,82.16,2170-10-06 03:09:00,2170-10-19 15:35:00,2170-10-07 11:28:53,2170-10-14 14:38:07,7.1314,UROSEPSIS-CHANGE IN MENTAL STATUS,1,1
3,281,111199,257572,F,2041-10-12,60.02,2101-10-18 04:42:00,2101-10-25 19:10:00,2101-10-18 04:45:22,2101-10-25 22:29:25,7.7389,"PNEUMONIA,HYPONATREMIA",1,2
4,283,109185,231490,M,2090-09-05,75.93,2166-08-12 22:02:00,2166-09-12 14:41:00,2166-08-12 22:03:26,2166-09-12 14:41:42,30.6932,PNEUMONIA,0,2


In [5]:
# Load the FiO2 query
query = open('queries/fio2.sql', 'r')

# Store the result of the query as a pandas dataframe
fio2 = pd.read_sql_query(query.read(), conn)
query.close()

# Show some sample data
fio2.head()

Unnamed: 0,hadm_id,itemid,charttime,valueuom,valuenum
0,161773.0,50816,2100-06-09 01:18:00,%,40.0
1,161773.0,190,2100-06-09 02:00:00,torr,80.000001
2,161773.0,50816,2100-06-09 06:16:00,%,80.0
3,161773.0,190,2100-06-09 07:00:00,torr,50.0
4,161773.0,190,2100-06-09 11:00:00,torr,50.0


In [6]:
# Load the SpO2 query
query = open('queries/spo2.sql', 'r')

# Store the result of the query as a pandas dataframe
spo2 = pd.read_sql_query(query.read(), conn)
query.close()

# Show some sample data
spo2.head()

Unnamed: 0,hadm_id,itemid,charttime,valueuom,valuenum
0,,50817,2099-07-06 11:59:00,%,96.0
1,,50817,2100-01-21 22:04:00,%,98.0
2,,50817,2100-01-30 10:19:00,%,99.0
3,118464.0,220277,2100-06-08 00:19:00,%,98.0
4,118464.0,220277,2100-06-08 01:00:00,%,97.0


In [7]:
# Load the PaO2 query
query = open('queries/pao2.sql', 'r')

# Store the result of the query as a pandas dataframe
pao2 = pd.read_sql_query(query.read(), conn)
query.close()

# Show some sample data
pao2.head()

Unnamed: 0,hadm_id,itemid,charttime,valueuom,valuenum
0,,50821,2096-02-26 05:52:00,mm Hg,76.0
1,,50821,2099-07-06 11:59:00,mm Hg,91.0
2,,50821,2099-10-01 02:25:00,mm Hg,67.0
3,,50821,2100-01-12 00:35:00,mm Hg,75.0
4,,50821,2100-01-20 10:42:00,mm Hg,55.0


In [8]:
# Select data for one patient
patient_id = int(patients[patients['ventnum']==2].sample(n=1)['hadm_id']) # randmly select a hadm_id
pat = patients[patients['hadm_id']==patient_id] # patient details
mv = mech_vent[mech_vent['icustay_id']==float(pat['icustay_id'])]
p = pao2[pao2['hadm_id']==patient_id] # PaO2
s = spo2[spo2['hadm_id']==patient_id] # SpO2
f = fio2[fio2['hadm_id']==patient_id] # FiO2
adm = pat['intime'].iloc[0] # ICU admission time
dis = pat['outtime'].iloc[0] # ICU discharge time

# Re-format discharge time to hours since ICU admission
end = dis - adm
days = end.days
seconds = end.seconds
dis_time = days*24 + seconds/(60*60)

# Re-format ventilator start and end times to hours since ICU admission
start1, start2 = mv['starttime']
s1_diff = start1 - adm
days = s1_diff.days
seconds = s1_diff.seconds
s1 = days*24 + seconds/(60*60)
s2_diff = start2 - adm
days = s2_diff.days
seconds = s2_diff.seconds
s2 = days*24 + seconds/(60*60)
end1, end2 = mv['endtime']
e1_diff = end1 - adm
days = e1_diff.days
seconds = e1_diff.seconds
e1 = days*24 + seconds/(60*60)
e2_diff = end2 - adm
days = e2_diff.days
seconds = e2_diff.seconds
e2 = days*24 + seconds/(60*60)

# Re-format PaO2 time to hours since ICU admission
p_time = p['charttime'] - adm
days = np.array([x.days for x in p_time])
seconds = np.array([x.seconds for x in p_time])
corrected_p_time = days*24 + seconds/(60*60)

# Re-format SpO2 time to hours since ICU admission
s_time = s['charttime'] - adm
days = np.array([x.days for x in s_time])
seconds = np.array([x.seconds for x in s_time])
corrected_s_time = days*24 + seconds/(60*60)

# Re-format FiO2 time to hours since ICUl admission
f_time = f['charttime'] - adm
days = np.array([x.days for x in f_time])
seconds = np.array([x.seconds for x in f_time])
corrected_f_time = days*24 + seconds/(60*60)

# Plot the raw data
plt.figure(figsize=(25,10))
plt.plot(corrected_p_time, p['pao2'], '-o')
plt.plot(corrected_s_time, s['spo2'], '-o')
plt.plot(corrected_f_time, f['fio2'], '-o')
plt.axvspan(s1, e1, alpha=0.1, color='red')
plt.axvspan(s2, e2, alpha=0.1, color='red')
plt.title('PaO2, SpO2 and FiO2 for Hospital Admission ID {}'.format(patient_id))
plt.xlabel('Time (hours since ICU admission)')
plt.xlim([0,dis_time])
plt.ylabel('Measurement value')
plt.legend(['PaO2 (mmHg)', 'SpO2 (%)', 'FiO2 (%)']);

KeyError: 'pao2'

<Figure size 1800x720 with 0 Axes>