# Imports

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
%matplotlib inline

## Load and Clean Signals

* Load physio data
* Cleaning:
    * Timestamps/timezones -- physio data CSVs are time local (EST) but alamrs come with UTC offset.
        * Bokeh visualizer and pandas treat times as UTC, so must explicitly declare TZ intent.
    * Merge duplicate timestamp entries into 1 row

In [3]:
fname = '../../original_data/5-25-5-26'
df = pd.read_json(fname, lines=True)

physio_df = df.set_index("timestamp")
physio_df

Unnamed: 0_level_0,Airway,ECG,Heart Rate,Non-invasive Blood Pressure,Pleth,Respiration Rate,SpO2,alarms,qos
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-05-24 23:56:16.760,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2723, 2807, 2874, 2925, 2961, 2985, 2994, 299...",,,,1
2017-05-24 23:56:17.016,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1718, 1695, 1673, 1652, 1611, 1560, 1524, 149...",,,,1
2017-05-24 23:56:18.680,"{'Respiration Rate': None, 'etCO2': None}",,Not a number,"{'mean': 88, 'systolic': 139, 'diastolic': 73}",,Not a number,,,1
2017-05-24 23:56:18.680,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,96.7,,1
2017-05-24 23:56:18.680,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,,,1
2017-05-24 23:56:18.712,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,,"{'Alarm_T_0': {'source': 'NOM_RESP', 'state': ...",1
2017-05-24 23:56:17.272,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1208, 1320, 1460, 1621, 1771, 1914, 2071, 222...",,,,1
2017-05-24 23:56:17.528,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2191, 2102, 2015, 1933, 1859, 1794, 1737, 168...",,,,1
2017-05-24 23:56:17.784,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1173, 1147, 1119, 1097, 1079, 1062, 1053, 106...",,,,1
2017-05-24 23:56:18.040,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2849, 2809, 2762, 2707, 2643, 2570, 2490, 240...",,,,1


In [4]:
physio_df = physio_df.tz_localize('Etc/GMT+4')
physio_df

Unnamed: 0_level_0,Airway,ECG,Heart Rate,Non-invasive Blood Pressure,Pleth,Respiration Rate,SpO2,alarms,qos
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-05-24 23:56:16.760000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2723, 2807, 2874, 2925, 2961, 2985, 2994, 299...",,,,1
2017-05-24 23:56:17.016000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1718, 1695, 1673, 1652, 1611, 1560, 1524, 149...",,,,1
2017-05-24 23:56:18.680000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,Not a number,"{'mean': 88, 'systolic': 139, 'diastolic': 73}",,Not a number,,,1
2017-05-24 23:56:18.680000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,96.7,,1
2017-05-24 23:56:18.680000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,,,1
2017-05-24 23:56:18.712000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,,"{'Alarm_T_0': {'source': 'NOM_RESP', 'state': ...",1
2017-05-24 23:56:17.272000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1208, 1320, 1460, 1621, 1771, 1914, 2071, 222...",,,,1
2017-05-24 23:56:17.528000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2191, 2102, 2015, 1933, 1859, 1794, 1737, 168...",,,,1
2017-05-24 23:56:17.784000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1173, 1147, 1119, 1097, 1079, 1062, 1053, 106...",,,,1
2017-05-24 23:56:18.040000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2849, 2809, 2762, 2707, 2643, 2570, 2490, 240...",,,,1


In [5]:
'''
Collapse multiple entries for a single timestamp to one row.

Example, this:

        value_1 value_2 value_3
time_1    1        1      NaN
time_1    NaN     NaN      1
time_1    NaN     NaN      1

Becomes:
        value_1 value_2 value_3
time_1     1       1       1

'''
merged_df = physio_df.groupby("timestamp").first()
merged_df

Unnamed: 0_level_0,Airway,ECG,Heart Rate,Non-invasive Blood Pressure,Pleth,Respiration Rate,SpO2,alarms,qos
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2017-05-24 23:56:16.760000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2723, 2807, 2874, 2925, 2961, 2985, 2994, 299...",,,,1
2017-05-24 23:56:17.016000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1718, 1695, 1673, 1652, 1611, 1560, 1524, 149...",,,,1
2017-05-24 23:56:17.272000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1208, 1320, 1460, 1621, 1771, 1914, 2071, 222...",,,,1
2017-05-24 23:56:17.528000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2191, 2102, 2015, 1933, 1859, 1794, 1737, 168...",,,,1
2017-05-24 23:56:17.784000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1173, 1147, 1119, 1097, 1079, 1062, 1053, 106...",,,,1
2017-05-24 23:56:18.040000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2849, 2809, 2762, 2707, 2643, 2570, 2490, 240...",,,,1
2017-05-24 23:56:18.296000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1397, 1382, 1368, 1350, 1330, 1309, 1287, 126...",,,,1
2017-05-24 23:56:18.552000-04:00,"{'Respiration Rate': None, 'etCO2': None}","[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2914, 2934, 2943, 2942, 2933, 2917, 2894, 286...",,,,1
2017-05-24 23:56:18.680000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,Not a number,"{'mean': 88, 'systolic': 139, 'diastolic': 73}",,Not a number,,,1
2017-05-24 23:56:18.712000-04:00,"{'Respiration Rate': None, 'etCO2': None}",,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,,"{'Alarm_T_0': {'source': 'NOM_RESP', 'state': ...",1


Drop numerics values like airway values (including Respiration rate),etc. can be ignored for current analysis as well.

In [6]:
cleaned_df = merged_df.drop(["Respiration Rate", "Airway"],axis = 1)
cleaned_df

Unnamed: 0_level_0,ECG,Heart Rate,Non-invasive Blood Pressure,Pleth,SpO2,alarms,qos
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-05-24 23:56:16.760000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2723, 2807, 2874, 2925, 2961, 2985, 2994, 299...",,,1
2017-05-24 23:56:17.016000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1718, 1695, 1673, 1652, 1611, 1560, 1524, 149...",,,1
2017-05-24 23:56:17.272000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1208, 1320, 1460, 1621, 1771, 1914, 2071, 222...",,,1
2017-05-24 23:56:17.528000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2191, 2102, 2015, 1933, 1859, 1794, 1737, 168...",,,1
2017-05-24 23:56:17.784000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1173, 1147, 1119, 1097, 1079, 1062, 1053, 106...",,,1
2017-05-24 23:56:18.040000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2849, 2809, 2762, 2707, 2643, 2570, 2490, 240...",,,1
2017-05-24 23:56:18.296000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[1397, 1382, 1368, 1350, 1330, 1309, 1287, 126...",,,1
2017-05-24 23:56:18.552000-04:00,"[-40.96, -40.96, -40.96, -40.96, -40.96, -40.9...",,"{'mean': None, 'systolic': None, 'diastolic': ...","[2914, 2934, 2943, 2942, 2933, 2917, 2894, 286...",,,1
2017-05-24 23:56:18.680000-04:00,,Not a number,"{'mean': 88, 'systolic': 139, 'diastolic': 73}",,,,1
2017-05-24 23:56:18.712000-04:00,,,"{'mean': None, 'systolic': None, 'diastolic': ...",,,"{'Alarm_T_0': {'source': 'NOM_RESP', 'state': ...",1


In [7]:
cleaned_df.to_csv("../../clean_data/Model_Sherry/clean_5_25_5_26.csv", encoding='utf-8', index=True)