In [1]:
# Import libraries
import numpy as np
import pandas as pd
import psycopg2
import getpass
import os.path
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Load the database connection details
_data = np.load('data/db_details.npy', allow_pickle=True).tolist()
db_details = _data['db_details']
user = db_details['user']
host = db_details['host']
port = db_details['port']
dbname = db_details['dbname']
schema = db_details['schema']
    
# Connect to the database
conn = psycopg2.connect(dbname=dbname, user=user, host=host, port=port, password=getpass.getpass(prompt='Password:'.format(user)))
cur = conn.cursor()
cur.execute('SET search_path TO {}'.format(schema))
conn.commit()

Password: ··········


In [3]:
# Load the patient data
_data = np.load('data/patients.npy', allow_pickle=True).tolist()
patients = _data['patients']
mech_vent = _data['mech_vent']
print("Loaded!")

Loaded!


In [4]:
patients.head()

Unnamed: 0,subject_id,hadm_id,icustay_id,gender,dob,age,admittime,dischtime,intime,outtime,los,diagnosis,hospital_expire_flag,ventnum
0,3,145834,211552,M,2025-04-11,76.52,2101-10-20 19:08:00,2101-10-31 13:58:00,2101-10-20 19:10:11,2101-10-26 20:43:09,6.0646,HYPOTENSION,0,1
1,9,150750,220597,M,2108-01-26,41.79,2149-11-09 13:06:00,2149-11-14 10:15:00,2149-11-09 13:07:02,2149-11-14 20:52:14,5.3231,HEMORRHAGIC CVA,1,1
2,12,112213,232669,M,2032-03-24,72.37,2104-08-07 10:15:00,2104-08-20 02:57:00,2104-08-08 02:08:17,2104-08-15 17:22:25,7.6348,PANCREATIC CANCER/SDA,1,1
3,13,143045,263738,F,2127-02-27,39.86,2167-01-08 18:43:00,2167-01-15 15:15:00,2167-01-08 18:44:25,2167-01-12 10:43:31,3.666,CORONARY ARTERY DISEASE,0,1
4,17,194023,277042,F,2087-07-14,47.45,2134-12-27 07:15:00,2134-12-31 16:05:00,2134-12-27 16:21:48,2134-12-29 18:04:03,2.071,PATIENT FORAMEN OVALE\ PATENT FORAMEN OVALE MI...,0,1


In [5]:
# Load the FiO2 query
query = open('queries/fio2.sql', 'r')

# Store the result of the query as a pandas dataframe
fio2 = pd.read_sql_query(query.read(), conn)
query.close()

# Show some sample data
fio2.head()

Unnamed: 0,hadm_id,itemid,charttime,valueuom,valuenum
0,161773.0,50816,2100-06-09 01:18:00,%,40.0
1,161773.0,190,2100-06-09 02:00:00,torr,80.000001
2,161773.0,50816,2100-06-09 06:16:00,%,80.0
3,161773.0,190,2100-06-09 07:00:00,torr,50.0
4,161773.0,190,2100-06-09 11:00:00,torr,50.0


In [None]:
# Load the SpO2 query
query = open('queries/spo2.sql', 'r')

# Store the result of the query as a pandas dataframe
spo2 = pd.read_sql_query(query.read(), conn)
query.close()

# Show some sample data
spo2.head()

In [None]:
# Load the PaO2 query
query = open('queries/pao2.sql', 'r')

# Store the result of the query as a pandas dataframe
pao2 = pd.read_sql_query(query.read(), conn)
query.close()

# Show some sample data
pao2.head()

In [None]:
# Select data for one patient
patient_id = int(patients[patients['ventnum']==2].sample(n=1)['hadm_id']) # randmly select a hadm_id
pat = patients[patients['hadm_id']==patient_id] # patient details
mv = mech_vent[mech_vent['icustay_id']==float(pat['icustay_id'])]
p = pao2[pao2['hadm_id']==patient_id] # PaO2
s = spo2[spo2['hadm_id']==patient_id] # SpO2
f = fio2[fio2['hadm_id']==patient_id] # FiO2
adm = pat['intime'].iloc[0] # ICU admission time
dis = pat['outtime'].iloc[0] # ICU discharge time

# Re-format discharge time to hours since ICU admission
end = dis - adm
days = end.days
seconds = end.seconds
dis_time = days*24 + seconds/(60*60)

# Re-format ventilator start and end times to hours since ICU admission
start1, start2 = mv['starttime']
s1_diff = start1 - adm
days = s1_diff.days
seconds = s1_diff.seconds
s1 = days*24 + seconds/(60*60)
s2_diff = start2 - adm
days = s2_diff.days
seconds = s2_diff.seconds
s2 = days*24 + seconds/(60*60)
end1, end2 = mv['endtime']
e1_diff = end1 - adm
days = e1_diff.days
seconds = e1_diff.seconds
e1 = days*24 + seconds/(60*60)
e2_diff = end2 - adm
days = e2_diff.days
seconds = e2_diff.seconds
e2 = days*24 + seconds/(60*60)

# Re-format PaO2 time to hours since ICU admission
p_time = p['charttime'] - adm
days = np.array([x.days for x in p_time])
seconds = np.array([x.seconds for x in p_time])
corrected_p_time = days*24 + seconds/(60*60)

# Re-format SpO2 time to hours since ICU admission
s_time = s['charttime'] - adm
days = np.array([x.days for x in s_time])
seconds = np.array([x.seconds for x in s_time])
corrected_s_time = days*24 + seconds/(60*60)

# Re-format FiO2 time to hours since ICUl admission
f_time = f['charttime'] - adm
days = np.array([x.days for x in f_time])
seconds = np.array([x.seconds for x in f_time])
corrected_f_time = days*24 + seconds/(60*60)

# Plot the raw data
plt.figure(figsize=(25,10))
plt.plot(corrected_p_time, p['pao2'], '-o')
plt.plot(corrected_s_time, s['spo2'], '-o')
plt.plot(corrected_f_time, f['fio2'], '-o')
plt.axvspan(s1, e1, alpha=0.1, color='red')
plt.axvspan(s2, e2, alpha=0.1, color='red')
plt.title('PaO2, SpO2 and FiO2 for Hospital Admission ID {}'.format(patient_id))
plt.xlabel('Time (hours since ICU admission)')
plt.xlim([0,dis_time])
plt.ylabel('Measurement value')
plt.legend(['PaO2 (mmHg)', 'SpO2 (%)', 'FiO2 (%)']);