# Identify Alarms

## Get unqiue ICU stays in CHARTEVENTS.csv

In [None]:
# Read CHARTEVENTS into Dask DataFrame (parallel DataFrame composed of many smaller Pandas DataFrames)
import dask.dataframe as dd
chartevents = dd.read_csv('./mimic/CHARTEVENTS.csv', dtype={
    'ICUSTAY_ID': 'float64','CGID': 'float64','ERROR': 'float64','STOPPED': 'object','VALUE': 'object','WARNING': 'float64','RESULTSTATUS': 'object'})
chartevents.head()

In [None]:
import pandas as pd
from dask.diagnostics import ProgressBar

with ProgressBar():
    unique_icustays = pd.Series(chartevents.ICUSTAY_ID.unique().compute())
    # Computing duration on Marius' laptop (Intel i5-5200U CPU @ 2.20GHz): 11min 15.4s
    # Note: The progress bar does not progress consistently, but jumps, e.g. in this calculation, in the second half.

display(unique_icustays)

In [None]:
import numpy as np
import pandas as pd
import pyarrow as pa

# Save unique_icustays as parquet file
pd.DataFrame(unique_icustays).to_parquet('./icustays/unique_icustays.parquet', engine='pyarrow')

In [None]:
# Read unique_icustays from parquet file
import pandas as pd
unique_icustays = pd.read_parquet('./icustays/unique_icustays.parquet', engine='pyarrow')

In [None]:
# Read unique_icustays from parquet file to dask data frame
import dask.dataframe as dd
unique_icustays = dd.read_parquet('./icustays/unique_icustays.parquet', engine='pyarrow')

## Create subset of CHARTEVENTS by filtering for selected ITEMIDs

In [None]:
# Set ITEMIDs to be filtered by
itemid_filter = [220045, 220046, 220047, 220179, 223751, 223752, 220180, 220277, 223769, 223770]
# 220045 Heart Rate
# 220046 Heart rate Alarm - High
# 220047 Heart Rate Alarm - Low
# 220179 Non Invasive Blood Pressure systolic
# 223751 Non-Invasive Blood Pressure Alarm - High
# 223752 Non-Invasive Blood Pressure Alarm - Low
# 220180 Non Invasive Blood Pressure diastolic
# 220277 O2 saturation pulseoxymetry
# 223769 O2 Saturation Pulseoxymetry Alarm - High
# 223770 O2 Saturation Pulseoxymetry Alarm - Low

import pandas as pd
from dask.diagnostics import ProgressBar

with ProgressBar():
    chartevents_subset = chartevents[chartevents.ITEMID.isin(itemid_filter)].compute()
    # Computing duration on Marius' laptop (Intel i5-5200U CPU @ 2.20GHz): 12min 26.5s

display(chartevents_subset)

In [None]:
import numpy as np
import pandas as pd
import pyarrow as pa

# Save chartevents_subset as parquet file
pd.DataFrame(chartevents_subset).to_parquet('./icustays/chartevents_subset.parquet', engine='pyarrow')

In [None]:
# Read chartevents_subset from parquet file to pandas data frame
import pandas as pd
chartevents_subset = pd.read_parquet('./icustays/chartevents_subset.parquet', engine='pyarrow')

In [None]:
# Read chartevents_subset from parquet file to dask data frame
import dask.dataframe as dd
chartevents_subset = dd.read_parquet('./icustays/chartevents_subset.parquet', engine='pyarrow')

## Get unqiue ICU stays in chartevents_subset

In [None]:
# Read chartevents_subset from parquet file to pandas data frame
import pandas as pd
chartevents_subset = pd.read_parquet('./icustays/chartevents_subset.parquet', engine='pyarrow')

In [None]:
# Compute unqiue ICU stays in chartevents_subset 
unique_icustays_in_chartevents_subset = pd.Series(chartevents_subset.ICUSTAY_ID.unique()).rename('ICUSTAY_ID')

In [None]:
import numpy as np
import pandas as pd
import pyarrow as pa

# Save unique_icustays as parquet file
pd.DataFrame(unique_icustays_in_chartevents_subset).to_parquet('./icustays/unique_icustays_in_chartevents_subset.parquet', engine='pyarrow')

In [None]:
# Read unique_icustays_in_chartevents_subset from parquet file to pandas data frame
import pandas as pd
unique_icustays_in_chartevents_subset = pd.read_parquet('./icustays/unique_icustays_in_chartevents_subset.parquet', engine='pyarrow')

## Identify alarms

In [None]:
# Follow up: What happens of the itemid is not available for the icustay?
# Note: writing to parquet requires to convert int64 columns to string columns, which must be considered when using them later on.

In [None]:
import pandas as pd

# Create data frame with the ITEMDIDs of the vital parameter and associated alarm thresholds to filter by.
# We could also store this in a CSV file later and read it from there.
parameters = pd.DataFrame({
    'LABEL':            ['HR',      'NBPs',     'SpO2'],
    'VALUE':            [220045,    220179,     220277],
    'THRESHOLD_HIGH':   [220046,    223751,     223769],
    'THRESHOLD_LOW':    [220047,    223752,     223770]})

display(parameters)

In [None]:
import numpy as np
import pandas as pd
import pyarrow as pa

# Read chartevents from parquet file to pandas data frame
chartevents = pd.read_parquet('./icustays/chartevents_subset.parquet', engine='pyarrow')

# Read unique_icustays from parquet file
unique_icustays = pd.read_parquet('./icustays/unique_icustays_in_chartevents_subset.parquet', engine='pyarrow')
unique_icustays = unique_icustays['ICUSTAY_ID']
unique_icustays = unique_icustays[3000:3999] # [0:999] # [1000:1999] # [2000:2999] ...

# unique_icustays = [269851.0] # For test pruposes

# Path of the folder in which the individual files are stored (one parquet file per ICU Stay)
path_to_dir = './icustays/'

# Create an empty dictionary in which the merged time series will be stored. 
merged_time_series = dict()

# Create an empty dictionary in which the merged, triggered alarms will be stored.
merged_alarm_series = dict()

for icustay in unique_icustays:
    
    # Create an empty dictionary in which the individual time series will be stored.
    single_time_series = dict()

    # Create an empty dictionary in which the triggered alarms will be stored.
    single_alarm_series = dict()
    
    for i, parameter in parameters.iterrows():

        # For each parameter, create three time-indexed series
        # One series for the vital parameter value, one for the high alarm threshold, and one for the low alarm threshold.
        # Important: The series must be sorted by CHARTTIME before setting the index!
        # Otherwise, errors may occur later during the interpolation because the 'previous value' may not be the temporally preceding value.
        single_time_series[parameter['LABEL'] + '_VALUE'] = chartevents[(chartevents["ICUSTAY_ID"] == icustay) & (chartevents["ITEMID"] == parameter['VALUE'])][
            ['CHARTTIME','VALUENUM']
            ].sort_values(by=['CHARTTIME']).set_index('CHARTTIME').squeeze(axis=1).rename(parameter['VALUE'])
        
        single_time_series[parameter['LABEL'] + '_THRESHOLD_HIGH'] = chartevents[(chartevents["ICUSTAY_ID"] == icustay) & (chartevents["ITEMID"] == parameter['THRESHOLD_HIGH'])][
            ['CHARTTIME','VALUENUM']
            ].sort_values(by=['CHARTTIME']).set_index('CHARTTIME').squeeze(axis=1).rename(parameter['THRESHOLD_HIGH'])
        
        single_time_series[parameter['LABEL'] + '_THRESHOLD_LOW'] = chartevents[(chartevents["ICUSTAY_ID"] == icustay) & (chartevents["ITEMID"] == parameter['THRESHOLD_LOW'])][
            ['CHARTTIME','VALUENUM']
            ].sort_values(by=['CHARTTIME']).set_index('CHARTTIME').squeeze(axis=1).rename(parameter['THRESHOLD_LOW'])

    else:
        None
    
    # Merge the individual time-indexed series into one data frame
    merged_time_series[icustay] = pd.concat(single_time_series, axis=1)
    # Convert index to datetime format
    merged_time_series[icustay].index = pd.to_datetime(merged_time_series[icustay].index)
    # Interpolate missing values using the last available value.
    # If there is no previous value available, no value will be inserted during the interpolation. The value remains NaN.
    # Note: Currently, vital parameters and alarm thresholds are not differentiated, so interpolation is also applied to vital parameters.
    # We may need to reconsider the interpolation of vital parameters since these may change between measurements (as opposed to the alarm limits for which changes are always recorded).
    merged_time_series[icustay] = merged_time_series[icustay].interpolate('pad')
    
    """
    # Save merged_time_series of the ICU stay as parquet file
    # To do so the int64 column names have to be converted to strings, because parquet must have string column names.
    merged_time_series[icustay].columns = merged_time_series[icustay].columns.astype(str)
    merged_time_series[icustay].to_parquet(f'{path_to_dir}{icustay}.parquet', engine='pyarrow')
    """

    # For each parameter, add two columns to the merged_time_series dataframe
    # One column contains the difference between the vital parameter value and the high alarm threshold; one the difference between the  vital parameter value and the low alarm limit
    # Subsequently, identify triggered alarms
    for i, parameter in parameters.iterrows():
        merged_time_series[icustay]['DIF_' + parameter['LABEL'] + '_VALUE_THRESHOLD_HIGH'] = merged_time_series[icustay][parameter['LABEL'] + '_VALUE'] - merged_time_series[icustay][parameter['LABEL'] + '_THRESHOLD_HIGH']
        merged_time_series[icustay]['DIF_' + parameter['LABEL'] + '_VALUE_THRESHOLD_LOW'] = merged_time_series[icustay][parameter['LABEL'] + '_VALUE'] - merged_time_series[icustay][parameter['LABEL'] + '_THRESHOLD_LOW']
        
        alarm_high = pd.DataFrame(merged_time_series[icustay][(merged_time_series[icustay]['DIF_' + parameter['LABEL'] + '_VALUE_THRESHOLD_HIGH'] >= 0)].reset_index()[[
            'CHARTTIME',
            str(parameter['LABEL'] + '_VALUE')
            ]])
        alarm_high = alarm_high.rename(columns={
            str(parameter['LABEL'] + '_VALUE') : 'PARAMETER_VALUENUM'})
        alarm_high = alarm_high.assign(
            ICUSTAY_ID=icustay,
            PARAMETER_ITEMID=parameter['VALUE'],
            CROSSED_THRESHOLD_ITEMID=parameter['THRESHOLD_HIGH'],
            CROSSED_THRESHOLD_TYPE='HIGH')
        single_alarm_series[parameter['LABEL'] + '_ALARM_HIGH'] = alarm_high

        alarm_low = pd.DataFrame(merged_time_series[icustay][(merged_time_series[icustay]['DIF_' + parameter['LABEL'] + '_VALUE_THRESHOLD_LOW'] <= 0)].reset_index()[[
            'CHARTTIME',
            str(parameter['LABEL'] + '_VALUE')
            ]])
        alarm_low = alarm_low.rename(columns={
            str(parameter['LABEL'] + '_VALUE') : 'PARAMETER_VALUENUM'})
        alarm_low = alarm_low.assign(
            ICUSTAY_ID=icustay,
            PARAMETER_ITEMID=parameter['VALUE'],
            CROSSED_THRESHOLD_ITEMID=parameter['THRESHOLD_LOW'],
            CROSSED_THRESHOLD_TYPE='LOW')
        single_alarm_series[parameter['LABEL'] + '_ALARM_LOW'] = alarm_low

    else:
        None

    merged_alarm_series[icustay] = pd.concat(single_alarm_series, axis=0)
    merged_alarm_series[icustay].index = merged_alarm_series[icustay].index.droplevel()
    merged_alarm_series[icustay] = merged_alarm_series[icustay][['ICUSTAY_ID','PARAMETER_ITEMID','CHARTTIME','PARAMETER_VALUENUM','CROSSED_THRESHOLD_ITEMID','CROSSED_THRESHOLD_TYPE']]
    merged_alarm_series[icustay] = merged_alarm_series[icustay].sort_values(by=['CHARTTIME'],ignore_index=True)

else:
    None

all_triggered_alarms = pd.concat(merged_alarm_series, axis=0)
all_triggered_alarms.index = all_triggered_alarms.index.droplevel()
all_triggered_alarms = all_triggered_alarms.reset_index(drop=True)
all_triggered_alarms

In [None]:
all_triggered_alarms

In [None]:
# Save all_triggered_alarms as parquet file
all_triggered_alarms.to_parquet(f'{path_to_dir}triggered_alarms_3000_3999.parquet', engine='pyarrow')

In [None]:
# Read triggered alarms from parquet file
# import pandas as pd
# triggered_alarms = pd.read_parquet('./icustays/triggered_alarms_2000_2999.parquet', engine='pyarrow')

In [None]:
## Debug
"""
Failed with icustay 269851.0
ValueError: cannot reindex from a duplicate axis

pd.concat(single_time_series, axis=1) does not work
"""