## Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
%matplotlib inline

## Load and Clean Signals

* Load physio data
* Cleaning:
    * Timestamps/timezones -- physio data CSVs are time local (EST) but alamrs come with UTC offset.
        * Bokeh visualizer and pandas treat times as UTC, so must explicitly declare TZ intent.
    * Merge duplicate timestamp entries into 1 row

In [2]:
fname = '../../original_data/5-25-5-26'
df = pd.read_json(fname, lines=True)

physio_df = df.set_index("timestamp")
physio_df

Unnamed: 0_level_0,Airway,ECG,Heart Rate,Non-invasive Blood Pressure,Pleth,Respiration Rate,SpO2,alarms,qos
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-05-24 23:56:16.760,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2723, 2807, 2874, 2925, 2961, 2985, 2994, 299...",,,,1
2017-05-24 23:56:17.016,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1718, 1695, 1673, 1652, 1611, 1560, 1524, 149...",,,,1
2017-05-24 23:56:18.680,"{'Respiration Rate': None, 'etCO2': None}",,Not a number,"{'mean': 88, 'systolic': 139, 'diastolic': 73}",,Not a number,,,1
2017-05-24 23:56:18.680,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,96.7,,1
2017-05-24 23:56:18.680,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,,,1
2017-05-24 23:56:18.712,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,,"{'Alarm_T_0': {'source': 'NOM_RESP', 'state': ...",1
2017-05-24 23:56:17.272,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1208, 1320, 1460, 1621, 1771, 1914, 2071, 222...",,,,1
2017-05-24 23:56:17.528,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2191, 2102, 2015, 1933, 1859, 1794, 1737, 168...",,,,1
2017-05-24 23:56:17.784,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1173, 1147, 1119, 1097, 1079, 1062, 1053, 106...",,,,1
2017-05-24 23:56:18.040,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2849, 2809, 2762, 2707, 2643, 2570, 2490, 240...",,,,1


In [3]:
physio_df = physio_df.tz_localize('Etc/GMT+4')
physio_df

Unnamed: 0_level_0,Airway,ECG,Heart Rate,Non-invasive Blood Pressure,Pleth,Respiration Rate,SpO2,alarms,qos
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-05-24 23:56:16.760000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2723, 2807, 2874, 2925, 2961, 2985, 2994, 299...",,,,1
2017-05-24 23:56:17.016000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1718, 1695, 1673, 1652, 1611, 1560, 1524, 149...",,,,1
2017-05-24 23:56:18.680000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,Not a number,"{'mean': 88, 'systolic': 139, 'diastolic': 73}",,Not a number,,,1
2017-05-24 23:56:18.680000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,96.7,,1
2017-05-24 23:56:18.680000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,,,1
2017-05-24 23:56:18.712000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,,"{'Alarm_T_0': {'source': 'NOM_RESP', 'state': ...",1
2017-05-24 23:56:17.272000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1208, 1320, 1460, 1621, 1771, 1914, 2071, 222...",,,,1
2017-05-24 23:56:17.528000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2191, 2102, 2015, 1933, 1859, 1794, 1737, 168...",,,,1
2017-05-24 23:56:17.784000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1173, 1147, 1119, 1097, 1079, 1062, 1053, 106...",,,,1
2017-05-24 23:56:18.040000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2849, 2809, 2762, 2707, 2643, 2570, 2490, 240...",,,,1


In [4]:
'''
Collapse multiple entries for a single timestamp to one row.

Example, this:

        value_1 value_2 value_3
time_1    1        1      NaN
time_1    NaN     NaN      1
time_1    NaN     NaN      1

Becomes:
        value_1 value_2 value_3
time_1     1       1       1

'''
merged_df = physio_df.groupby("timestamp").first()
merged_df

Unnamed: 0_level_0,Airway,ECG,Heart Rate,Non-invasive Blood Pressure,Pleth,Respiration Rate,SpO2,alarms,qos
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-05-24 23:56:16.760000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2723, 2807, 2874, 2925, 2961, 2985, 2994, 299...",,,,1
2017-05-24 23:56:17.016000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1718, 1695, 1673, 1652, 1611, 1560, 1524, 149...",,,,1
2017-05-24 23:56:17.272000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1208, 1320, 1460, 1621, 1771, 1914, 2071, 222...",,,,1
2017-05-24 23:56:17.528000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2191, 2102, 2015, 1933, 1859, 1794, 1737, 168...",,,,1
2017-05-24 23:56:17.784000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1173, 1147, 1119, 1097, 1079, 1062, 1053, 106...",,,,1
2017-05-24 23:56:18.040000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2849, 2809, 2762, 2707, 2643, 2570, 2490, 240...",,,,1
2017-05-24 23:56:18.296000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1397, 1382, 1368, 1350, 1330, 1309, 1287, 126...",,,,1
2017-05-24 23:56:18.552000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2914, 2934, 2943, 2942, 2933, 2917, 2894, 286...",,,,1
2017-05-24 23:56:18.680000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,Not a number,"{'mean': 88, 'systolic': 139, 'diastolic': 73}",,Not a number,,,1
2017-05-24 23:56:18.712000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,,"{'Alarm_T_0': {'source': 'NOM_RESP', 'state': ...",1


Drop machine generated alarms, since we are going to use the manually annotated alarms. Numerics values like airway values (including Respiration rate),etc. can be ignored for current analysis as well.

In [5]:
cleaned_df = merged_df.drop(["alarms", "Respiration Rate", "Airway"],axis = 1)
cleaned_df

Unnamed: 0_level_0,ECG,Heart Rate,Non-invasive Blood Pressure,Pleth,SpO2,qos
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-05-24 23:56:16.760000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2723, 2807, 2874, 2925, 2961, 2985, 2994, 299...",,1
2017-05-24 23:56:17.016000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1718, 1695, 1673, 1652, 1611, 1560, 1524, 149...",,1
2017-05-24 23:56:17.272000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1208, 1320, 1460, 1621, 1771, 1914, 2071, 222...",,1
2017-05-24 23:56:17.528000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2191, 2102, 2015, 1933, 1859, 1794, 1737, 168...",,1
2017-05-24 23:56:17.784000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1173, 1147, 1119, 1097, 1079, 1062, 1053, 106...",,1
2017-05-24 23:56:18.040000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2849, 2809, 2762, 2707, 2643, 2570, 2490, 240...",,1
2017-05-24 23:56:18.296000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1397, 1382, 1368, 1350, 1330, 1309, 1287, 126...",,1
2017-05-24 23:56:18.552000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2914, 2934, 2943, 2942, 2933, 2917, 2894, 286...",,1
2017-05-24 23:56:18.680000-04:00,,Not a number,"{'mean': 88, 'systolic': 139, 'diastolic': 73}",,,1
2017-05-24 23:56:18.712000-04:00,,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,1


## Load and Clean Alarms

* Load alarm data
* Cleaning:
    * Timestamps/timezones

In [6]:
alarms_fname = '../../original_data/5-25-5-26-ecg-alarms.csv'
alarms_df = pd.read_csv(alarms_fname,parse_dates=[0])
alarms_df = alarms_df.set_index("timestamp").tz_localize("UTC").tz_convert('Etc/GMT+4')
alarms_df

Unnamed: 0_level_0,alarm_type
timestamp,Unnamed: 1_level_1
2017-05-25 05:52:54.992000-04:00,Clin. SIGNIFICANT EKG alarm (URGENT)
2017-05-25 05:53:51.312000-04:00,Clin. SIGNIFICANT EKG alarm (URGENT)
2017-05-25 05:58:09.360000-04:00,Clin. SIGNIFICANT EKG alarm (URGENT)
2017-05-25 05:58:10.384000-04:00,Clin. SIGNIFICANT EKG alarm (URGENT)
2017-05-25 06:00:32.720000-04:00,Clin. SIGNIFICANT EKG alarm (URGENT)
2017-05-25 06:00:55.720000-04:00,Clin. SIGNIFICANT EKG alarm (URGENT)
2017-05-25 06:09:29.296000-04:00,Clin. SIGNIFICANT EKG alarm (URGENT)
2017-05-25 09:50:49.584000-04:00,Clin. SIGNIFICANT EKG alarm (URGENT)
2017-05-25 09:56:07.024000-04:00,Clin. SIGNIFICANT EKG alarm (URGENT)
2017-05-25 09:56:08.016000-04:00,Clin. SIGNIFICANT EKG alarm (URGENT)


## Putting it together

* Pipeline:
    1. Choose an alarm
    2. Apply it to the orignial dataset
    3. Unpack the dictionary containing non-invasive blood pressure values.

Slice dataframe based on window of time around alarm (isolated_physio_df)

In [7]:
alarms = alarms_df.index.to_pydatetime()
alarms

array([ datetime.datetime(2017, 5, 25, 5, 52, 54, 992000, tzinfo=<StaticTzInfo 'Etc/GMT+4'>),
       datetime.datetime(2017, 5, 25, 5, 53, 51, 312000, tzinfo=<StaticTzInfo 'Etc/GMT+4'>),
       datetime.datetime(2017, 5, 25, 5, 58, 9, 360000, tzinfo=<StaticTzInfo 'Etc/GMT+4'>),
       datetime.datetime(2017, 5, 25, 5, 58, 10, 384000, tzinfo=<StaticTzInfo 'Etc/GMT+4'>),
       datetime.datetime(2017, 5, 25, 6, 0, 32, 720000, tzinfo=<StaticTzInfo 'Etc/GMT+4'>),
       datetime.datetime(2017, 5, 25, 6, 0, 55, 720000, tzinfo=<StaticTzInfo 'Etc/GMT+4'>),
       datetime.datetime(2017, 5, 25, 6, 9, 29, 296000, tzinfo=<StaticTzInfo 'Etc/GMT+4'>),
       datetime.datetime(2017, 5, 25, 9, 50, 49, 584000, tzinfo=<StaticTzInfo 'Etc/GMT+4'>),
       datetime.datetime(2017, 5, 25, 9, 56, 7, 24000, tzinfo=<StaticTzInfo 'Etc/GMT+4'>),
       datetime.datetime(2017, 5, 25, 9, 56, 8, 16000, tzinfo=<StaticTzInfo 'Etc/GMT+4'>),
       datetime.datetime(2017, 5, 25, 10, 0, 40, 400000, tzinfo=<StaticTzInfo

In [8]:
len(alarms)

25

In [9]:
for i in range(len(alarms)):
    alarm = alarms[i]
    isolated_df = cleaned_df[alarm - pd.Timedelta("500 seconds"):alarm]
    indices = isolated_df.index.to_pydatetime()
    
    tot_cnt = len(isolated_df)
    if (tot_cnt == 0):
        continue
        
    # Create a new dataframe indicating "real" alarms
    l = [0 for i in range(tot_cnt - 1)] # 0 indicates NO alarm
    l.append(1)                         # 1 indicates Real alarm
    temp_df = pd.DataFrame(np.array(l), index = indices, columns = ["Annotated ECG alarm"])
    isolated_df = pd.concat([isolated_df, temp_df], axis = 1)
        
    if i == 0:
        sliced_df = isolated_df.copy()
    else:
        sliced_df = pd.concat([sliced_df, isolated_df], axis = 0)

In [10]:
sliced_df

Unnamed: 0_level_0,ECG,Heart Rate,Non-invasive Blood Pressure,Pleth,SpO2,qos,Annotated ECG alarm
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-05-25 05:44:35.216000-04:00,"[-0.065000000000004, -0.08000000000000501, -0....",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2996, 2986, 2966, 2937, 2901, 2862, 2821, 277...",,1,0
2017-05-25 05:44:35.472000-04:00,"[-0.025000000000005, -0.020000000000010003, -0...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1775, 1779, 1780, 1777, 1769, 1757, 1741, 172...",,1,0
2017-05-25 05:44:35.728000-04:00,"[-0.120000000000004, -0.11500000000000901, -0....",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1218, 1210, 1201, 1196, 1193, 1186, 1177, 116...",,1,0
2017-05-25 05:44:35.792000-04:00,,75,"{'mean': 89, 'systolic': 134, 'diastolic': 74}",,,1,0
2017-05-25 05:44:35.984000-04:00,"[0.034999999999989005, 0.009999999999990001, -...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2972, 2980, 2978, 2967, 2948, 2931, 2913, 288...",,1,0
2017-05-25 05:44:36.240000-04:00,"[-0.08000000000000501, -0.08000000000000501, -...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1713, 1713, 1714, 1713, 1710, 1704, 1695, 168...",,1,0
2017-05-25 05:44:36.496000-04:00,"[-0.140000000000007, -0.135000000000005, -0.13...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1163, 1150, 1138, 1127, 1116, 1105, 1094, 108...",,1,0
2017-05-25 05:44:36.752000-04:00,"[-0.035000000000003, -0.045000000000008006, -0...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2922, 2955, 2981, 2998, 3005, 3002, 2990, 297...",,1,0
2017-05-25 05:44:36.816000-04:00,,75,"{'mean': 89, 'systolic': 134, 'diastolic': 74}",,,1,0
2017-05-25 05:44:37.008000-04:00,"[-0.15000000000000502, -0.15000000000000502, -...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1728, 1724, 1723, 1723, 1722, 1721, 1719, 171...",,1,0


In [11]:
sliced_df[["diastolic_bp","mean_bp","systolic_bp"]] = sliced_df["Non-invasive Blood Pressure"].apply(pd.Series).apply(pd.to_numeric,errors='coerce')
sliced_df = sliced_df.drop("Non-invasive Blood Pressure", axis=1)
sliced_df

Unnamed: 0_level_0,ECG,Heart Rate,Pleth,SpO2,qos,Annotated ECG alarm,diastolic_bp,mean_bp,systolic_bp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-05-25 05:44:35.216000-04:00,"[-0.065000000000004, -0.08000000000000501, -0....",,"[2996, 2986, 2966, 2937, 2901, 2862, 2821, 277...",,1,0,,,
2017-05-25 05:44:35.472000-04:00,"[-0.025000000000005, -0.020000000000010003, -0...",,"[1775, 1779, 1780, 1777, 1769, 1757, 1741, 172...",,1,0,,,
2017-05-25 05:44:35.728000-04:00,"[-0.120000000000004, -0.11500000000000901, -0....",,"[1218, 1210, 1201, 1196, 1193, 1186, 1177, 116...",,1,0,,,
2017-05-25 05:44:35.792000-04:00,,75,,,1,0,74.0,89.0,134.0
2017-05-25 05:44:35.984000-04:00,"[0.034999999999989005, 0.009999999999990001, -...",,"[2972, 2980, 2978, 2967, 2948, 2931, 2913, 288...",,1,0,,,
2017-05-25 05:44:36.240000-04:00,"[-0.08000000000000501, -0.08000000000000501, -...",,"[1713, 1713, 1714, 1713, 1710, 1704, 1695, 168...",,1,0,,,
2017-05-25 05:44:36.496000-04:00,"[-0.140000000000007, -0.135000000000005, -0.13...",,"[1163, 1150, 1138, 1127, 1116, 1105, 1094, 108...",,1,0,,,
2017-05-25 05:44:36.752000-04:00,"[-0.035000000000003, -0.045000000000008006, -0...",,"[2922, 2955, 2981, 2998, 3005, 3002, 2990, 297...",,1,0,,,
2017-05-25 05:44:36.816000-04:00,,75,,,1,0,74.0,89.0,134.0
2017-05-25 05:44:37.008000-04:00,"[-0.15000000000000502, -0.15000000000000502, -...",,"[1728, 1724, 1723, 1723, 1722, 1721, 1719, 171...",,1,0,,,


In [12]:
sliced_df = sliced_df.reindex_axis(["ECG", "Heart Rate", "Pleth", "SpO2", "diastolic_bp","mean_bp","systolic_bp", "qos", "Annotated ECG alarm"], axis=1)
sliced_df

Unnamed: 0_level_0,ECG,Heart Rate,Pleth,SpO2,diastolic_bp,mean_bp,systolic_bp,qos,Annotated ECG alarm
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-05-25 05:44:35.216000-04:00,"[-0.065000000000004, -0.08000000000000501, -0....",,"[2996, 2986, 2966, 2937, 2901, 2862, 2821, 277...",,,,,1,0
2017-05-25 05:44:35.472000-04:00,"[-0.025000000000005, -0.020000000000010003, -0...",,"[1775, 1779, 1780, 1777, 1769, 1757, 1741, 172...",,,,,1,0
2017-05-25 05:44:35.728000-04:00,"[-0.120000000000004, -0.11500000000000901, -0....",,"[1218, 1210, 1201, 1196, 1193, 1186, 1177, 116...",,,,,1,0
2017-05-25 05:44:35.792000-04:00,,75,,,74.0,89.0,134.0,1,0
2017-05-25 05:44:35.984000-04:00,"[0.034999999999989005, 0.009999999999990001, -...",,"[2972, 2980, 2978, 2967, 2948, 2931, 2913, 288...",,,,,1,0
2017-05-25 05:44:36.240000-04:00,"[-0.08000000000000501, -0.08000000000000501, -...",,"[1713, 1713, 1714, 1713, 1710, 1704, 1695, 168...",,,,,1,0
2017-05-25 05:44:36.496000-04:00,"[-0.140000000000007, -0.135000000000005, -0.13...",,"[1163, 1150, 1138, 1127, 1116, 1105, 1094, 108...",,,,,1,0
2017-05-25 05:44:36.752000-04:00,"[-0.035000000000003, -0.045000000000008006, -0...",,"[2922, 2955, 2981, 2998, 3005, 3002, 2990, 297...",,,,,1,0
2017-05-25 05:44:36.816000-04:00,,75,,,74.0,89.0,134.0,1,0
2017-05-25 05:44:37.008000-04:00,"[-0.15000000000000502, -0.15000000000000502, -...",,"[1728, 1724, 1723, 1723, 1722, 1721, 1719, 171...",,,,,1,0


## Next step

Data preparation for Goal #2 (as described in project description)
Since the waveform signals (ECG and PPG) are asynchronized with other signals (Heart Rate, SpO2, diastolic_bp, mean_bp, systolic_bp), meaning that the devices cannot collect them at the same timestamp.

### Create functions for assigning timestamps to signals

Data arrives in packets of 64 (ecg) or 32 (pleth) with a single timestamp. Therefore, each array needs to manually be assigned individual timestamps.

In [13]:
def expand_pleth_times(timestamp):
    x = pd.date_range(timestamp, periods=32,freq='8L',closed="left")
    return x

def expand_ecg_times(timestamp):
    x = pd.date_range(timestamp, periods=64,freq='4L',closed="left")
    return x

In [14]:
x = np.hstack(sliced_df["Pleth"].dropna().index.to_series().apply(expand_pleth_times).values)
y = np.hstack(sliced_df["Pleth"].dropna().values)
print(len(x))
print(len(y))

1289664
1289664


Create a new dataframe to store the expanded Pleth values. Note that there might be some duplicate indices(timestamp values) that need to be dropped.

In [15]:
ecg_df = pd.DataFrame(y, index=x, columns = ['PPG'])
ecg_df.index.name = 'timestamp'
ecg_df = ecg_df.reset_index().drop_duplicates(subset='timestamp', keep='last').set_index('timestamp')
ecg_df

Unnamed: 0_level_0,PPG
timestamp,Unnamed: 1_level_1
2017-05-25 09:44:35.216,2996
2017-05-25 09:44:35.224,2986
2017-05-25 09:44:35.232,2966
2017-05-25 09:44:35.240,2937
2017-05-25 09:44:35.248,2901
2017-05-25 09:44:35.256,2862
2017-05-25 09:44:35.264,2821
2017-05-25 09:44:35.272,2776
2017-05-25 09:44:35.280,2728
2017-05-25 09:44:35.288,2677


Repeat the above two steps for expanding ECG signals.

In [16]:
X = np.hstack(sliced_df["ECG"].dropna().index.to_series().apply(expand_pleth_times).values)
Y = np.hstack(sliced_df["ECG"].dropna().values)
Y = [Y[i]  for i in range(len(Y)) if i % 2 == 1]
print(len(X))
print(len(Y))

1289664
1289664


In [17]:
ppg_df = pd.DataFrame(Y, index = X, columns = ['ECG'])
ppg_df.index.name = 'timestamp'
ppg_df = ppg_df.reset_index().drop_duplicates(subset='timestamp', keep='last').set_index('timestamp')
ppg_df

Unnamed: 0_level_0,ECG
timestamp,Unnamed: 1_level_1
2017-05-25 09:44:35.216,-0.080
2017-05-25 09:44:35.224,-0.080
2017-05-25 09:44:35.232,-0.075
2017-05-25 09:44:35.240,-0.080
2017-05-25 09:44:35.248,-0.080
2017-05-25 09:44:35.256,-0.080
2017-05-25 09:44:35.264,-0.080
2017-05-25 09:44:35.272,-0.080
2017-05-25 09:44:35.280,-0.070
2017-05-25 09:44:35.288,-0.085


First merge two waveform signals dataframe into one.

In [18]:
signals_df = pd.merge(ecg_df, ppg_df, how='inner', left_index=True, right_index=True)
signals_df

Unnamed: 0_level_0,PPG,ECG
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-05-25 09:44:35.216,2996,-0.080
2017-05-25 09:44:35.224,2986,-0.080
2017-05-25 09:44:35.232,2966,-0.075
2017-05-25 09:44:35.240,2937,-0.080
2017-05-25 09:44:35.248,2901,-0.080
2017-05-25 09:44:35.256,2862,-0.080
2017-05-25 09:44:35.264,2821,-0.080
2017-05-25 09:44:35.272,2776,-0.080
2017-05-25 09:44:35.280,2728,-0.070
2017-05-25 09:44:35.288,2677,-0.085


Create a new dataframe to store our training targets —— High priority ECG alarms.

In [19]:
annot_timestamps = sliced_df["Annotated ECG alarm"].dropna().index.to_series()
annot_alarms = sliced_df["Annotated ECG alarm"].dropna().values
annoalarms_df = pd.DataFrame(annot_alarms, index=annot_timestamps, columns = ['Annotated ECG alarm'])
annoalarms_df = annoalarms_df.reset_index().drop_duplicates(subset='timestamp', keep='last').set_index('timestamp')
annoalarms_df

Unnamed: 0_level_0,Annotated ECG alarm
timestamp,Unnamed: 1_level_1
2017-05-25 09:44:35.216,0
2017-05-25 09:44:35.472,0
2017-05-25 09:44:35.728,0
2017-05-25 09:44:35.792,0
2017-05-25 09:44:35.984,0
2017-05-25 09:44:36.240,0
2017-05-25 09:44:36.496,0
2017-05-25 09:44:36.752,0
2017-05-25 09:44:36.816,0
2017-05-25 09:44:37.008,0


Merge the target dataframe with our features dataframe.

In [20]:
final_df = pd.merge(signals_df, annoalarms_df, how='left', left_index=True, right_index=True)
final_df

Unnamed: 0_level_0,PPG,ECG,Annotated ECG alarm
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-05-25 09:44:35.216,2996,-0.080,0.0
2017-05-25 09:44:35.224,2986,-0.080,
2017-05-25 09:44:35.232,2966,-0.075,
2017-05-25 09:44:35.240,2937,-0.080,
2017-05-25 09:44:35.248,2901,-0.080,
2017-05-25 09:44:35.256,2862,-0.080,
2017-05-25 09:44:35.264,2821,-0.080,
2017-05-25 09:44:35.272,2776,-0.080,
2017-05-25 09:44:35.280,2728,-0.070,
2017-05-25 09:44:35.288,2677,-0.085,


In [21]:
final_df["Annotated ECG alarm"].value_counts()

0.0    33858
1.0        9
Name: Annotated ECG alarm, dtype: int64

Only 9 rows are annotated alarms of high priority. Let fill the NaN's with zeros.

In [22]:
final_df["Annotated ECG alarm"].fillna(0, inplace = True)
final_df["Annotated ECG alarm"].value_counts()

0.0    806071
1.0         9
Name: Annotated ECG alarm, dtype: int64

The final dataframe is shown below. We intend to predict high priority alarms (indicated in the last column) using the waveforms signals.

In [23]:
final_df

Unnamed: 0_level_0,PPG,ECG,Annotated ECG alarm
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2017-05-25 09:44:35.216,2996,-0.080,0.0
2017-05-25 09:44:35.224,2986,-0.080,0.0
2017-05-25 09:44:35.232,2966,-0.075,0.0
2017-05-25 09:44:35.240,2937,-0.080,0.0
2017-05-25 09:44:35.248,2901,-0.080,0.0
2017-05-25 09:44:35.256,2862,-0.080,0.0
2017-05-25 09:44:35.264,2821,-0.080,0.0
2017-05-25 09:44:35.272,2776,-0.080,0.0
2017-05-25 09:44:35.280,2728,-0.070,0.0
2017-05-25 09:44:35.288,2677,-0.085,0.0


In [26]:
final_df.to_csv("../../clean_data/Model_Sherry/waveform_anno.csv", encoding='utf-8', index=True)

## Further steps
Import annotations file for Goal #3:
   1. import window annotations for both PPG and ECG signal
   2. import qos annotations for PPG signal