# Acute Airway Obstruction and Treatment

This jupyter notebook is to extract the data related to Acute Airway Obstruction from MIMIC III dataset.

In [1]:
# Import libraries
import pandas as pd
import os
import psycopg2
import getpass
import pandas as pd
import numpy as np

In [2]:
# Path to save the extracted values
export_dir = '../extracted_data_airway'
if not os.path.isdir(export_dir):
    os.mkdir(export_dir)

In [3]:
# Create a database connection
user = 'dhruv.sharma'
host = 'localhost'
dbname = 'mimic'
schema = 'mimiciii'

## Loading the MIMIC III data

In [4]:
# Connect to the database
con = psycopg2.connect(dbname=dbname, user=user, host=host, 
                       password=getpass.getpass(prompt='Password:'.format(user)))
cur = con.cursor()
cur.execute('SET search_path to {}'.format(schema))

Password:········


## Extracting Patients related to Acute Airway Obstruction and Treatment

In [5]:
query = \
"""
SELECT subject_id, hadm_id
FROM diagnoses_icd
where icd9_code like '9950%' or icd9_code like '9956%'
order by subject_id

"""

data = pd.read_sql_query(query,con)
data.head(n=15)

Unnamed: 0,subject_id,hadm_id
0,2003,181620
1,3078,103639
2,3178,123818
3,3178,131757
4,4159,131034
5,4429,179758
6,4745,151356
7,6202,114344
8,7482,189637
9,7613,196983


## Extracting the required information - demographics, vital signs, mechanical ventilation

### Demographic information

Height: 1394(CareVue), 226706(MetaVision)

Weight: 580,581(CareVue), 226512(MetaVision)

In [6]:
query = '''
with meta_data as
(
    with data as
    (
        with sub_ids as 
        (
            SELECT subject_id, hadm_id
            FROM diagnoses_icd diag
            where icd9_code like '9950%' or icd9_code like '9956%'
        ),
        age_gender as
        (
            SELECT
              p.subject_id, p.gender
              , MIN( ROUND( (cast(admittime as date) - cast(dob as date)) / 365.242,2) )
                  AS age
            FROM patients p
            INNER JOIN admissions a
            ON p.subject_id = a.subject_id
            GROUP BY p.subject_id, p.dob, p.gender
            ORDER BY p.subject_id
        )
        select sub_ids.subject_id, sub_ids.hadm_id, age_gender.age, age_gender.gender
        from sub_ids
        left join age_gender
        on sub_ids.subject_id = age_gender.subject_id
        order by sub_ids.subject_id
    ),
    wt_tab as
    (
        select pat.subject_id, min(ce.value) as weight
        from patients pat
        left join chartevents ce
        on pat.subject_id = ce.subject_id
        where ce.itemid in (580,581,226512)
        group by pat.subject_id
        order by pat.subject_id
    )
    select data.subject_id, data.hadm_id, data.age, data.gender, wt_tab.weight
    from data
    left join wt_tab
    on data.subject_id = wt_tab.subject_id
    order by data.subject_id
),
ht_tab as
(
    select subject_id, min(value) as height
    from chartevents
    where itemid in (1394, 226707)
    group by subject_id
    order by subject_id
)
select meta_data.subject_id, meta_data.hadm_id, meta_data.age, meta_data.gender, meta_data.weight, ht_tab.height
from meta_data
left join ht_tab
on meta_data.subject_id = ht_tab.subject_id
order by meta_data.subject_id 
'''

demog = pd.read_sql_query(query,con)
demog.to_csv(os.path.join(export_dir,'demographics_pulse.csv'),index=False,sep=',')
demog.head(n=15)

Unnamed: 0,subject_id,hadm_id,age,gender,weight,height
0,2003,181620,60.3,M,,
1,3078,103639,45.82,M,75.0999984741211,
2,3178,131757,79.67,M,100.8000030517578,
3,3178,123818,79.67,M,100.8000030517578,
4,4159,131034,63.36,M,92.0,
5,4429,179758,61.31,F,80.0999984741211,
6,4745,151356,45.19,F,,
7,6202,114344,76.03,M,81.0999984741211,
8,7482,189637,64.43,F,90.0,66.0
9,7613,196983,67.78,M,166.60000610351562,


### Vitals signs

Heart Rate: 211(CareVue), 220045(MetaVision)<br/>
Respiratory Rate: 618,619(CareVue), 220210(MetaVision)<br/>
Diastolic: 8364,8368,8555(CareVue), 220051,225310(MetaVision)<br/>
Systolic: 6,51,6701(CareVue), 220050,225309(MetaVision)<br/>
Mean Arterial BP: 52(CareVue), 220052(MetaVision)<br/>
CVP: 1103,113(CareVue), 22074(MetaVision)<br/>
FIO2: 3420,2981(CareVue), 227009,227010,226754(MetaVision)<br/>
SPO2: 646(CareVue)<br/>
Tidal Volume: 681,2400,2420,2534(CareVue), 224685(MetaVision)<br/>
SVR: 1373,626(CareVue), 226865(MetaVision)<br/>
Temperature: 676(CareVue), 223762(MetaVision)<br/>
Minute Volume: 448(CareVue), 224687(MetaVision)<br/>
pCO2: 3784,3835(CareVue), 227036(MetaVision)<br/>



In [7]:
query='''
with data as
(
    with sub_ids as 
    (
        SELECT subject_id, hadm_id
        FROM diagnoses_icd diag
        where icd9_code like '9950%' or icd9_code like '9956%'
    )
    select sub_ids.subject_id, sub_ids.hadm_id, icustay_id, extract(epoch from charttime) as charttime, itemid, value, valueuom
    from sub_ids
    left join chartevents ce
    on sub_ids.subject_id = ce.subject_id and sub_ids.hadm_id = ce.hadm_id
    where itemid in (211,220045,          --heart rate
                     618,619,220210,      --respiratoty rate
                     8364,8368,8555,220051,225310,     --diastolic
                     6,51,6701,220050,225309,          --systolic
                     52,220052,           --mean arterial BP
                     1103,113,22074,      --CVP
                     3420,2981,227009,227010,226754,   --fio2
                     646,                 --spo2
                     681,2400,2420,2534,224685,        --tidal volume
                     1373,626,226865,     --SVR
                     676,223762,          --temperature
                     448,224687,          --minute volume
                     3784,3835,227036     --pCO2
                     )
    order by sub_ids.subject_id, sub_ids.hadm_id, icustay_id, charttime
)
select subject_id, hadm_id, icustay_id, charttime, data.itemid, d.label, data.value, valueuom
from data
left join d_items d
on data.itemid = d.itemid
order by subject_id, hadm_id, icustay_id, charttime
'''

data = pd.read_sql_query(query,con)
data.to_csv(os.path.join(export_dir,'vital_pulse.csv'),index=False,sep=',')
data.head(n=15)

Unnamed: 0,subject_id,hadm_id,icustay_id,charttime,itemid,label,value,valueuom
0,2003,181620,275551,5336125000.0,618,Respiratory Rate,,BPM
1,2003,181620,275551,5336125000.0,211,Heart Rate,,BPM
2,2003,181620,275551,5336127000.0,211,Heart Rate,,BPM
3,2003,181620,275551,5336127000.0,618,Respiratory Rate,,BPM
4,2003,181620,275551,5336127000.0,646,SpO2,,%
5,2003,181620,275551,5336129000.0,8368,Arterial BP [Diastolic],,mmHg
6,2003,181620,275551,5336129000.0,52,Arterial BP Mean,,mmHg
7,2003,181620,275551,5336129000.0,646,SpO2,,%
8,2003,181620,275551,5336129000.0,211,Heart Rate,,BPM
9,2003,181620,275551,5336129000.0,51,Arterial BP [Systolic],,mmHg


In [8]:
def get_vital(itemid):
    '''
    This function returns the vital sign corresponding to the the itemid
    Args:
        itemid: The itemid for the vital sign
    Returns:
        vital: a string for the vital sign
    '''
    if(itemid == 211 or itemid == 220045):
        return 'heart_rate(BPM)'
    if(itemid == 618 or itemid == 619 or itemid == 220210):
        return 'respiratory_rate'
    if(itemid == 8364 or itemid == 8368 or itemid == 8555 or itemid == 220051 or itemid == 225310):
        return 'diastolic(mmHg)'
    if(itemid == 6 or itemid == 51 or itemid == 6701 or itemid == 220056 or itemid == 225309):
        return 'systolic(mmHg)'
    if(itemid == 52 or itemid == 220052):
        return 'mean_arterial_bp(mmHg)'
    if(itemid == 1103 or itemid == 113 or itemid == 22074):
        return 'cvp(mmHg)'
    if(itemid == 3420 or itemid == 2981 or itemid == 227009 or itemid == 227010 or itemid == 226754):
        return 'fio2'
    if(itemid == 646):
        return 'spo2(%)'
    if(itemid == 681 or itemid == 2400 or itemid == 2420 or itemid == 2534 or itemid == 224685):
        return 'tidal_volume(mL)'
    if(itemid == 1373 or itemid == 626 or itemid == 226865):
        return 'svr'
    if(itemid == 676 or itemid == 223761):
        return 'temperature(deg_cel)'
    if(itemid == 448 or itemid == 224687):
        return 'minute_volume(L/min)'
    if(itemid == 3784 or itemid == 3835 or itemid == 227036):
        return 'pCO2'

In [9]:
vital_new = pd.DataFrame()
vital_new['heart_rate(BPM)'] = np.float('nan')
vital_new['respiratory_rate'] = np.float('nan')
vital_new['diastolic(mmHg)'] = np.float('nan')
vital_new['systolic(mmHg)'] = np.float('nan')
vital_new['mean_arterial_bp(mmHg)'] = np.float('nan')
vital_new['cvp(mmHg)'] = np.float('nan')
vital_new['fio2'] = np.float('nan')
vital_new['spo2(%)'] = np.float('nan')
vital_new['tidal_volume(mL)'] = np.float('nan')
vital_new['svr'] = np.float('nan')
vital_new['temperature(deg_cel)'] = np.float('nan')
vital_new['minute_volume(L/min)'] = np.float('nan')
vital_new['pCO2'] = np.float('nan')

count = data.shape[0]

In [10]:
prev_charttime = None
prev_df = None
for i in range(count):
    this_data = data.iloc[i]
    if(this_data.charttime == prev_charttime):
        prev_df[get_vital(this_data.itemid)] = str(this_data.value)
    else:
        if(prev_df is not None):
            vital_new = vital_new.append(prev_df, ignore_index=True)
        this_data[get_vital(this_data.itemid)] = this_data.value
        this_data = this_data.drop(['itemid', 'label', 'value', 'valueuom'])
        prev_df = this_data
        prev_charttime = this_data.charttime

if prev_df is not None:
    vital_new = vital_new.append(prev_df, ignore_index=True)

columns = ['subject_id', 'hadm_id', 'icustay_id', 'charttime', 'mean_arterial_bp(mmHg)', 'heart_rate(BPM)', 
           'respiratory_rate', 'temperature(deg_cel)', 'diastolic(mmHg)', 'systolic(mmHg)', 'cvp(mmHg)', 'fio2', 
           'spo2(%)', 'tidal_volume(mL)', 'svr', 'minute_volume(L/min)', 'pCO2']
vital_new = vital_new.reindex(columns=columns)
vital_new.to_csv(os.path.join(export_dir, 'vitals_final.csv'),index=False,sep=',')

vital_new.head(n=15)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.loc[key] = value


Unnamed: 0,subject_id,hadm_id,icustay_id,charttime,mean_arterial_bp(mmHg),heart_rate(BPM),respiratory_rate,temperature(deg_cel),diastolic(mmHg),systolic(mmHg),cvp(mmHg),fio2,spo2(%),tidal_volume(mL),svr,minute_volume(L/min),pCO2
0,2003.0,181620.0,275551.0,5336125000.0,,,,,,,,,,,,,
1,2003.0,181620.0,275551.0,5336127000.0,,,,,,,,,,,,,
2,2003.0,181620.0,275551.0,5336129000.0,,,,,,,,,,,,,
3,2003.0,181620.0,275551.0,5336132000.0,,,,,,,,,,,,,
4,2003.0,181620.0,275551.0,5336133000.0,,,,,,,,,,,,,
5,2003.0,181620.0,275551.0,5336133000.0,,,,,,,,,,,,,
6,2003.0,181620.0,275551.0,5336134000.0,,,,,,,,,,,,,
7,2003.0,181620.0,275551.0,5336136000.0,,,,,,,,,,,,,
8,2003.0,181620.0,275551.0,5336140000.0,,,,,,,,,,,,,
9,2003.0,181620.0,275551.0,5336143000.0,,,,,,,,,,,,,


### Mechanical Ventilation

I:E Ratio: 221(CareVue)<br/>
Mean Airway Pressure: 444(CareVue), 224697(MetaVision)<br/>
Tidal Volume Set: 683(CareVue), 224684(MetaVision)<br/>
Tidal Volume Observed: 682(CareVue), 224685(MetaVision)<br/>
Ventilator Mode: 720(CareVue), 223849(MetaVision)<br/>

In [11]:
query='''
with data as
(
    with sub_ids as 
    (
        SELECT subject_id, hadm_id
        FROM diagnoses_icd diag
        where icd9_code like '9950%' or icd9_code like '9956%'
    )
    select sub_ids.subject_id, sub_ids.hadm_id, icustay_id, extract(epoch from charttime) as charttime, itemid, value, valueuom
    from sub_ids
    left join chartevents ce
    on sub_ids.subject_id = ce.subject_id and sub_ids.hadm_id = ce.hadm_id
    where itemid in (221,          --I:E Ratio
                     444,224697,      --mean airway pressure
                     683,224684,     --tidal volume set
                     682,224685,          --tidal volume observed
                     720,223849           --ventilator mode
                     )
    order by sub_ids.subject_id, sub_ids.hadm_id, icustay_id, charttime
)
select subject_id, hadm_id, icustay_id, charttime, data.itemid, d.label, data.value, valueuom
from data
left join d_items d
on data.itemid = d.itemid
order by subject_id, hadm_id, icustay_id, charttime
'''

data = pd.read_sql_query(query,con)
data.to_csv(os.path.join(export_dir,'mech_vent_pulse.csv'),index=False,sep=',')
data.head(n=15)

Unnamed: 0,subject_id,hadm_id,icustay_id,charttime,itemid,label,value,valueuom
0,3078,103639,264379,6493862000.0,682,Tidal Volume (Obser),643,ml/B
1,3078,103639,264379,6493862000.0,720,Ventilator Mode,Assist Control,
2,3078,103639,264379,6493862000.0,683,Tidal Volume (Set),600,ml/B
3,3078,103639,264379,6493862000.0,444,Mean Airway Pressure,11,cmH2O
4,3078,103639,264379,6493885000.0,683,Tidal Volume (Set),600,ml/B
5,3078,103639,264379,6493885000.0,720,Ventilator Mode,Assist Control,
6,3078,103639,264379,6493885000.0,444,Mean Airway Pressure,9,cmH2O
7,3078,103639,264379,6493885000.0,682,Tidal Volume (Obser),583,ml/B
8,3078,103639,264379,6493891000.0,720,Ventilator Mode,CPAP+PS,
9,3078,103639,264379,6493894000.0,720,Ventilator Mode,CPAP+PS,


In [12]:
def get_mechvent_item(itemid):
    '''
    This function returns the mechanical ventilation item corresponding to the the itemid
    Args:
        itemid: The itemid for the mechanical ventilation item
    Returns:
        mechvent: a string for the mechanical ventilation item
    '''
    if(itemid == 221):
        return 'ie_ratio'
    if(itemid == 444 or itemid == 224697):
        return 'mean_airway_pressure(cmH2O)'
    if(itemid == 683 or itemid == 224684):
        return 'tidal_volume_set(ml/B)'
    if(itemid == 682 or itemid == 224685):
        return 'tidal_volume_observed(ml/B)'
    if(itemid == 720 or itemid == 223849):
        return 'ventilator_mode'

In [13]:
mechvent_new = pd.DataFrame()
mechvent_new['ie_ratio'] = np.float('nan')
mechvent_new['mean_airway_pressure(cmH2O)'] = np.float('nan')
mechvent_new['tidal_volume_set(ml/B)'] = np.float('nan')
mechvent_new['tidal_volume_observed(ml/B)'] = np.float('nan')
mechvent_new['ventilator_mode'] = np.float('nan')

count = data.shape[0]

In [14]:
prev_charttime = None
prev_df = None
for i in range(count):
    this_data = data.iloc[i]
    if(this_data.charttime == prev_charttime and this_data.value):
        prev_df[get_mechvent_item(this_data.itemid)] = str(this_data.value)
    else:
        if(prev_df is not None):
            mechvent_new = mechvent_new.append(prev_df, ignore_index=True)
        this_data[get_mechvent_item(this_data.itemid)] = this_data.value
        this_data = this_data.drop(['itemid', 'label', 'value', 'valueuom'])
        prev_df = this_data
        prev_charttime = this_data.charttime

if prev_df is not None:
    mechvent_new = mechvent_new.append(prev_df, ignore_index=True)

columns = ['subject_id', 'hadm_id', 'icustay_id', 'charttime', 'ie_ratio', 'mean_airway_pressure(cmH2O)', 
           'tidal_volume_set(ml/B)', 'tidal_volume_observed(ml/B)', 'ventilator_mode']
mechvent_new = mechvent_new.reindex(columns=columns)
mechvent_new.to_csv(os.path.join(export_dir, 'mechvent_final.csv'),index=False,sep=',')

mechvent_new.head(n=15)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.


Unnamed: 0,subject_id,hadm_id,icustay_id,charttime,ie_ratio,mean_airway_pressure(cmH2O),tidal_volume_set(ml/B),tidal_volume_observed(ml/B),ventilator_mode
0,3078.0,103639.0,264379.0,6493862000.0,,11.0,600.0,643.0,Assist Control
1,3078.0,103639.0,264379.0,6493885000.0,,9.0,600.0,583.0,Assist Control
2,3078.0,103639.0,264379.0,6493891000.0,,,,,CPAP+PS
3,3078.0,103639.0,264379.0,6493894000.0,,,,,CPAP+PS
4,4745.0,151356.0,288685.0,4941663000.0,1:2.8,9.0,550.0,590.0,Assist Control
5,4745.0,151356.0,288685.0,4941666000.0,1:2.3,,,,
6,4745.0,151356.0,288685.0,4941673000.0,1:2.3,10.0,550.0,587.0,Assist Control
7,4745.0,151356.0,288685.0,4941687000.0,1:2.3,6.0,550.0,663.0,Assist Control
8,4745.0,151356.0,288685.0,4941706000.0,1:2.3,15.0,550.0,648.0,Assist Control
9,4745.0,151356.0,288685.0,4941708000.0,1:2,14.0,450.0,488.0,Assist Control


## Combining Data

In [15]:
combined1 = pd.merge(vital_new, mechvent_new, on = ['subject_id', 'hadm_id', 'icustay_id', 'charttime'], how = 'outer')
combined1.head(n=15)

Unnamed: 0,subject_id,hadm_id,icustay_id,charttime,mean_arterial_bp(mmHg),heart_rate(BPM),respiratory_rate,temperature(deg_cel),diastolic(mmHg),systolic(mmHg),...,spo2(%),tidal_volume(mL),svr,minute_volume(L/min),pCO2,ie_ratio,mean_airway_pressure(cmH2O),tidal_volume_set(ml/B),tidal_volume_observed(ml/B),ventilator_mode
0,2003.0,181620.0,275551.0,5336125000.0,,,,,,,...,,,,,,,,,,
1,2003.0,181620.0,275551.0,5336127000.0,,,,,,,...,,,,,,,,,,
2,2003.0,181620.0,275551.0,5336129000.0,,,,,,,...,,,,,,,,,,
3,2003.0,181620.0,275551.0,5336132000.0,,,,,,,...,,,,,,,,,,
4,2003.0,181620.0,275551.0,5336133000.0,,,,,,,...,,,,,,,,,,
5,2003.0,181620.0,275551.0,5336133000.0,,,,,,,...,,,,,,,,,,
6,2003.0,181620.0,275551.0,5336134000.0,,,,,,,...,,,,,,,,,,
7,2003.0,181620.0,275551.0,5336136000.0,,,,,,,...,,,,,,,,,,
8,2003.0,181620.0,275551.0,5336140000.0,,,,,,,...,,,,,,,,,,
9,2003.0,181620.0,275551.0,5336143000.0,,,,,,,...,,,,,,,,,,


In [16]:
combined_data = pd.merge(demog, combined1, on = ['subject_id', 'hadm_id'], how='outer')
combined_data.head(n=15)

Unnamed: 0,subject_id,hadm_id,age,gender,weight,height,icustay_id,charttime,mean_arterial_bp(mmHg),heart_rate(BPM),...,spo2(%),tidal_volume(mL),svr,minute_volume(L/min),pCO2,ie_ratio,mean_airway_pressure(cmH2O),tidal_volume_set(ml/B),tidal_volume_observed(ml/B),ventilator_mode
0,2003,181620,60.3,M,,,275551.0,5336125000.0,,,...,,,,,,,,,,
1,2003,181620,60.3,M,,,275551.0,5336127000.0,,,...,,,,,,,,,,
2,2003,181620,60.3,M,,,275551.0,5336129000.0,,,...,,,,,,,,,,
3,2003,181620,60.3,M,,,275551.0,5336132000.0,,,...,,,,,,,,,,
4,2003,181620,60.3,M,,,275551.0,5336133000.0,,,...,,,,,,,,,,
5,2003,181620,60.3,M,,,275551.0,5336133000.0,,,...,,,,,,,,,,
6,2003,181620,60.3,M,,,275551.0,5336134000.0,,,...,,,,,,,,,,
7,2003,181620,60.3,M,,,275551.0,5336136000.0,,,...,,,,,,,,,,
8,2003,181620,60.3,M,,,275551.0,5336140000.0,,,...,,,,,,,,,,
9,2003,181620,60.3,M,,,275551.0,5336143000.0,,,...,,,,,,,,,,


In [17]:
combined_data.to_csv(os.path.join(export_dir, 'pulse_final_data.csv'),index=False,sep=',')