# Time Series Analysis

Document is still in progress. It currently includes a mixture of a) time series visualization and b) an approach to extract alarm violations (different from what Jonas did, as far as I know).

I will tidy it up later and put the time series plot to into the templates for plots document.

In [None]:
# Read CHARTEVENTS into Dask DataFrame (parallel DataFrame composed of many smaller Pandas DataFrames)
import dask.dataframe as dd
chartevents = dd.read_csv('./mimic/CHARTEVENTS.csv', dtype={
    'ICUSTAY_ID': 'float64','CGID': 'float64','ERROR': 'float64','STOPPED': 'object','VALUE': 'object','WARNING': 'float64','RESULTSTATUS': 'object'})
chartevents.head()

In [None]:
# Select only the first partition of the chartevents Dask DataFrame (to save computation time when testing the plot) 
import pandas as pd
chartevents_subset = pd.DataFrame(chartevents.get_partition(1).compute())
display(chartevents_subset)

In [None]:
# Create subset for Heart Rate values and thresholds using the respective ITEMIDs
chartevents_subset_HR = chartevents_subset[(chartevents_subset["ITEMID"].isin([220045,220046,220047]))]

In [None]:
# For testing the visualization, an ICU Stay is now selected that has enough data points for a meaningful visualization.

# Create data frame with new column EVENTCOUNT, which counts how often an ITEMID has occurred per ICUSTAY_ID.
events_per_icustay = chartevents_subset_HR.groupby(['ICUSTAY_ID','ITEMID']).size().reset_index(name='EVENTCOUNT')

# Arrange data using pivot
events_per_icustay = events_per_icustay.pivot('ICUSTAY_ID','ITEMID')

display(events_per_icustay)

In [None]:
# Show those ICU stays where between 50 and 500 heart rate values have been recorded and ...
# ... at least 2 HIGH heart rate alarms have been set (not triggered!) and ...
# ... at least 2 LOW heart rate alarms have been set (not triggered!).
# Sort descending by the number of recorded heart rate values.
events_per_icustay[ 
    (events_per_icustay[('EVENTCOUNT', 220045)] > 50) & 
    (events_per_icustay[('EVENTCOUNT', 220045)] < 500) & 
    (events_per_icustay[('EVENTCOUNT', 220046)] > 2) & 
    (events_per_icustay[('EVENTCOUNT', 220047)] > 2) 
    ].sort_values(by=('EVENTCOUNT', 220045), ascending=False)

In [None]:
# ICUSTAY_ID "208809.0" is selected for further visualization.

# Example some high and low heart rate alarms: ICUSTAY_ID "231056.0"
# Example many high and low heart rate alarms: ICUSTAY_ID "208809.0"
# Example single high heart rate alarm:  "227109.0"
# Example many high heart rate alarms: ICUSTAY_ID "238757.0" and "203317.0"
# Example many low heart rate alarms: ICUSTAY_ID "266167.0"
# Example low sample rate for vital parameter: ICUSTAY_ID "211792.0"
# Example flipped alarm thresholds (probalby wrong input by ICU staff): ICUSTAY_ID "234196.0"

selected_icustay = chartevents_subset_HR[(chartevents_subset_HR["ICUSTAY_ID"] == 208809.0)].copy()
display(selected_icustay)

In [None]:
# Add new column with ITEMID_LABEL, which can be used for the legend of the plot.
import numpy as np
selected_icustay['ITEMID_LABEL'] = np.nan
selected_icustay['ITEMID_LABEL'] = np.where((selected_icustay['ITEMID'] == 220045) & (selected_icustay['ITEMID_LABEL'] != np.nan), "Heart Rate (bpm)", selected_icustay['ITEMID_LABEL'])
selected_icustay['ITEMID_LABEL'] = np.where((selected_icustay['ITEMID'] == 220046) & (selected_icustay['ITEMID_LABEL'] != np.nan), "Alarm threshold: high heart rate (bpm)", selected_icustay['ITEMID_LABEL'])
selected_icustay['ITEMID_LABEL'] = np.where((selected_icustay['ITEMID'] == 220047) & (selected_icustay['ITEMID_LABEL'] != np.nan), "Alarm threshold: low heart rate (bpm)", selected_icustay['ITEMID_LABEL'])
display(selected_icustay)

# Convert CHARTTIME to datetime
selected_icustay['CHARTTIME'] = pd.to_datetime(selected_icustay['CHARTTIME'])

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Set variables
title = "History of heart rate of ICU stay <insert>"
xlabel = "Time"
ylabel = "Beats per minute"
plotdata = selected_icustay
xvalue = "CHARTTIME"
yvalue = "VALUENUM"
huevalue = "ITEMID_LABEL"

# Config figure
sns.set_style("whitegrid")
fig, ax = plt.subplots(
    figsize = (11, 5), 
    dpi = 72 # e.g. 72 for screen, 300 for print
    )
ax = sns.lineplot(
    data = plotdata, 
    x = xvalue,
    y = yvalue, 
    hue = huevalue,
    style= huevalue,
    drawstyle = 'steps-post', # Interpolate missing values by using the last available value
    markers = ['p','^','v'],
    markersize = 5,
    dashes = False,
    palette = [sns.color_palette("colorblind")[0],sns.color_palette("colorblind")[1],sns.color_palette("colorblind")[2]]
    )

plt.legend(title = None, bbox_to_anchor=(1.02, 0.3), loc='upper left', borderaxespad=0)
ax.set_title(title, fontweight='bold', color= 'black', fontsize=14, y=1.05)
ax.set_xlabel(xlabel, fontsize=12, labelpad=15)
ax.set_ylabel(ylabel, fontsize=12, labelpad=15)
plt.xticks(rotation = 90)

# Plot figure
plt.show(fig)

## Identify Alarm Violations

In [None]:
# Create a time-indexed data frame that allows to identify intersection, i.e. triggered alarms a.k.a alarm violations
 
# Firstly, create time-indexed pandas seriesDas
value_series = selected_icustay[(selected_icustay["ITEMID"] == 220045)][['CHARTTIME','VALUENUM']].set_index('CHARTTIME').squeeze().rename("VALUE")
threshold_high_series = selected_icustay[(selected_icustay["ITEMID"] == 220046)][['CHARTTIME','VALUENUM']].set_index('CHARTTIME').squeeze().rename("THRESHOLD_HIGH")
threshold_low_series = selected_icustay[(selected_icustay["ITEMID"] == 220047)][['CHARTTIME','VALUENUM']].set_index('CHARTTIME').squeeze().rename("THRESHOLD_LOW")
# Secondly, merge series to data frame using pd.concat
timeseries = pd.concat([value_series, threshold_high_series, threshold_low_series], axis=1).copy()
display(timeseries)

In [None]:
# Interpolate missing values by using the last available value
# If there is no previous value available, no value will be inserted during the interpolation. The value remains NaN. This is to be expected for alarm limits in the beginning of the time series, since they are likely to be set after the first vital sign values have been recorded.
timeseries['THRESHOLD_HIGH'] = timeseries['THRESHOLD_HIGH'].interpolate('pad')
timeseries['THRESHOLD_LOW'] = timeseries['THRESHOLD_LOW'].interpolate('pad')
# I am not sure if we should interpolate the values of the vital parameter. I think rather not, since these may have changed since the last recorded measurement (as opposed to the alarm limits for which changes are always recorded).
# timeseries['VALUE'] = timeseries['VALUE'].interpolate('pad') 
display(timeseries)

In [None]:
# Add columns containing the differences between the measured value and the currently valid threshold 
timeseries['DIF_VALUE_HIGH'] = timeseries.VALUE - timeseries.THRESHOLD_HIGH
timeseries['DIF_VALUE_LOW'] = timeseries.VALUE - timeseries.THRESHOLD_LOW
display(timeseries)

In [None]:
# Identify triggered alarms (a.k.a. alarm violations) for threshold of type HIGH
alarm_too_high = timeseries[(timeseries["DIF_VALUE_HIGH"] >= 0)][['VALUE','THRESHOLD_HIGH','DIF_VALUE_HIGH']]
alarm_too_high

In [None]:
# Identify triggered alarms (a.k.a. alarm violations) for threshold of type LOW
alarm_too_low = timeseries[(timeseries["DIF_VALUE_LOW"] <= 0)][['VALUE','THRESHOLD_LOW','DIF_VALUE_LOW']]
alarm_too_low

In [None]:
# Complete plot by displaying the triggered alarms (only possible if the alarms have been identified and are available as data frame)

import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Set variables
title = "History of heart rate of ICU stay <insert>"
xlabel = "Time"
ylabel = "Beats per minute"
plotdata = selected_icustay
xvalue = "CHARTTIME"
yvalue = "VALUENUM"
huevalue = "ITEMID_LABEL"

# Config figure
sns.set_style("whitegrid")
fig, ax = plt.subplots(
    figsize = (11, 5), 
    dpi = 72 # e.g. 72 for screen, 300 for print
    )
ax = sns.lineplot(
    data = plotdata, 
    x = xvalue,
    y = yvalue, 
    hue = huevalue,
    style= huevalue,
    drawstyle = 'steps-post', # Interpolate missing values by using the last available value
    markers = ['p','^','v'],
    markersize = 5,
    dashes = False,
    palette = [sns.color_palette("colorblind")[0],sns.color_palette("colorblind")[1],sns.color_palette("colorblind")[2]]
    )

# Add HIGH alarm indicators
# Add vertical lines
if len(alarm_too_high.index)>0 and len(alarm_too_high.index)<11: # Only if between 1 and 11 alarms occur (otherwise the diagram gest too busy)

    for idx, item in enumerate(alarm_too_high.index):
        if idx == 0: # Add label only for the first line of the alarm type (can probably be solved more elegantly)
            plt.axvline(item, linestyle='dotted', color=sns.color_palette("colorblind")[1], alpha=0.8, zorder=0, label='Triggered alarm: heart rate too high')
        else:
            plt.axvline(item, linestyle='dotted', color=sns.color_palette("colorblind")[1], alpha=0.8, zorder=0)
else:
   None
# Add x axis tick for vertical lines (by getting current tick locations and append this array)
# if len(alarm_too_high.index)== 1: # Only if exactly 1 alarm occurs (otherwise the diagram gest too busy)
#     x_ticks = np.append(
#         ax.get_xticks(),
#         ((np.datetime_as_string(np.array(alarm_too_high.index), unit='m')).astype(np.datetime64)).astype(float)/60/24 # Converts the values from the alarm time index to float.
#         # While the plot is based on days (converted to float), the alarm times are first read to the minute and then converted to days because the plot expects days (converted to float).
#         )
#     ax.set_xticks(x_ticks)
# else:
#    None

# Add LOW alarm indicators
# Add vertical lines
if len(alarm_too_low.index)>0 and len(alarm_too_low.index)<11: # Only if between 1 and 11 alarms occur (otherwise the diagram gest too busy)

    for idx, item in enumerate(alarm_too_low.index):
        if idx == 0: # Add label only for the first line of the alarm type (can probably be solved more elegantly)
            plt.axvline(item, linestyle='dotted', color=sns.color_palette("colorblind")[2], alpha=0.8, zorder=0, label='Triggered alarm: heart rate too low')
        else:
            plt.axvline(item, linestyle='dotted', color=sns.color_palette("colorblind")[2], alpha=0.8, zorder=0)
else:
   None
# Add x axis tick for vertical lines (by getting current tick locations and append this array)
# if len(alarm_too_low.index)== 1: # Only if exactly 1 alarm occurs (otherwise the diagram gest too busy)
#     x_ticks = np.append(
#         ax.get_xticks(),
#         ((np.datetime_as_string(np.array(alarm_too_low.index), unit='m')).astype(np.datetime64)).astype(float)/60/24 # Converts the values from the alarm time index to float.
#         # While the plot is based on days (converted to float), the alarm times are first read to the minute and then converted to days because the plot expects days (converted to float).
#         )
#     ax.set_xticks(x_ticks)
# else:
#    None

plt.legend(title = None, bbox_to_anchor=(1.02, 0.3), loc='upper left', borderaxespad=0)
ax.set_title(title, fontweight='bold', color= 'black', fontsize=14, y=1.05)
ax.set_xlabel(xlabel, fontsize=12, labelpad=15)
ax.set_ylabel(ylabel, fontsize=12, labelpad=15)
plt.xticks(rotation = 90)

# Plot figure
plt.show(fig)