In [1]:
import numpy as np
import pandas as pd

from cyclops.query import mimic
from cyclops.query import process as qp
from cyclops.utils.file import load_dataframe, save_dataframe
from cyclops.processors.aggregate import Aggregator
from cyclops.processors.cleaning import (
    normalize_names,
    normalize_categories,
    normalize_values
)
from cyclops.processors.column_names import (
    ADMIT_TIMESTAMP,
    AGE,
    CARE_UNIT,
    DIAGNOSIS_CODE,
    DIAGNOSIS_TRAJECTORY,
    ENCOUNTER_ID,
    EVENT_CATEGORY,
    EVENT_NAME,
    EVENT_TIMESTAMP,
    EVENT_VALUE,
    EVENT_VALUE_UNIT,
    HOSPITAL_ID,
    SEX,
    SUBJECT_ID,
    TIMESTEP,
    YEAR,
)
from cyclops.processors.constants import (
    BINARY,
    CATEGORICAL_INDICATOR,
    FEATURE_INDICATOR_ATTR,
    FEATURE_MAPPING_ATTR,
    FEATURE_TYPE_ATTR,
    FEATURE_TYPES,
    FEATURES,
    MEAN,
    MIN_MAX,
    MISSING_CATEGORY,
    NUMERIC,
    ORDINAL,
    STANDARD,
    STRING,
    TARGETS,
)
from cyclops.processors.feature.feature import TabularFeatures, TemporalFeatures
from cyclops.processors.feature.normalization import GroupbyNormalizer

2022-07-08 12:42:12,168 [1;37mINFO[0m cyclops.orm     - Database setup, ready to run queries!


In [2]:
YEARS = [2015, 2016, 2018, 2019, 2020]
MIN_YEAR = min(YEARS)

# Querying

In [52]:
encounters_interface = mimic.patient_encounters(years=YEARS, died=True, died_binarize_col="died")
encounters_query = encounters_interface.query
encounters_query = qp.Drop(["insurance", "language", "marital_status", "hospital_expire_flag"])(encounters_query)
encounters_interface = mimic.get_interface(encounters_query)
encounters = encounters_interface.run()
encounters

2022-07-07 14:35:47,155 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
2022-07-07 14:35:47,156 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 1.991126 s


Unnamed: 0,subject_id,encounter_id,admit_timestamp,discharge_timestamp,deathtime,admission_type,admission_location,discharge_location,ethnicity,edregtime,edouttime,sex,age,birth_year,dod,anchor_year_difference,died
0,19759112,20610510,2018-07-03 02:10:00,2018-07-05 12:15:00,NaT,ELECTIVE,,HOME,UNKNOWN,NaT,NaT,M,0,2018,NaT,-140,False
1,14523215,29575656,2018-12-30 21:07:00,2019-01-01 11:38:00,NaT,ELECTIVE,,HOME,WHITE,NaT,NaT,M,0,2018,NaT,-118,False
2,12188356,23159459,2018-02-12 00:38:00,2018-02-14 14:01:00,NaT,ELECTIVE,,HOME,WHITE,NaT,NaT,F,0,2018,NaT,-123,False
3,16487201,22587598,2018-06-10 21:58:00,2018-06-20 12:02:00,NaT,ELECTIVE,,HOME,WHITE,NaT,NaT,F,0,2018,NaT,-103,False
4,10730776,26401328,2015-08-27 00:09:00,2015-08-29 12:19:00,NaT,ELECTIVE,,HOME,WHITE,NaT,NaT,M,0,2015,NaT,-107,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224759,16204743,22297786,2018-02-26 23:21:00,2018-03-02 20:00:00,NaT,OBSERVATION ADMIT,EMERGENCY ROOM,HOME,BLACK/AFRICAN AMERICAN,2191-02-26 18:36:00,2191-02-27 00:55:00,M,31,1987,NaT,-173,False
224760,18679547,20411251,2016-04-04 15:28:00,2016-04-17 18:05:00,NaT,URGENT,TRANSFER FROM HOSPITAL,HOME HEALTH CARE,WHITE,2169-04-04 02:38:00,2169-04-04 15:02:00,M,59,1957,NaT,-153,False
224761,16601429,28017820,2015-03-06 07:10:00,2015-03-09 12:00:00,NaT,URGENT,PHYSICIAN REFERRAL,HOME,WHITE,NaT,NaT,F,32,1983,NaT,-146,False
224762,13352372,28752421,2015-05-24 19:31:00,2015-06-17 10:35:00,2015-06-17 10:35:00,URGENT,TRANSFER FROM HOSPITAL,DIED,WHITE,NaT,NaT,M,67,1948,2015-06-17,-132,True


In [53]:
encounters_path = save_dataframe(encounters, "encounters.parquet")

2022-07-07 14:35:47,176 [1;37mINFO[0m cyclops.utils.file - Saving dataframe to encounters.parquet


In [54]:
events_interface = mimic.events()
events_query = events_interface.query
events_query = qp.Drop(["warning", "itemid", "storetime"])(events_query)
events_interface = mimic.get_interface(events_query)
events = events_interface.run(limit=1000000)
events = pd.merge(encounters[[ENCOUNTER_ID, "anchor_year_difference"]], events, on=ENCOUNTER_ID)

def add_offset(row):
    row[EVENT_TIMESTAMP] += pd.DateOffset(years=row["anchor_year_difference"])
    return row

events = events.apply(add_offset, axis=1)
events = events.drop("anchor_year_difference", axis=1)
events

2022-07-07 14:35:52,205 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
2022-07-07 14:35:52,207 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 4.729472 s


Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
0,25195599,18795283,38047987,2018-09-04 19:00:00,79,79.0,bpm,Routine Vital Signs,Heart Rate
1,25195599,18795283,38047987,2018-09-04 19:00:00,15,15.0,insp/min,Respiratory,Respiratory Rate
2,25195599,18795283,38047987,2018-09-04 19:40:00,98.6,98.6,°F,Routine Vital Signs,Temperature Fahrenheit
3,25195599,18795283,38047987,2018-09-04 19:40:00,1,1.0,,Access Lines - Invasive,Multi Lumen placed in outside facility
4,25195599,18795283,38047987,2018-09-04 19:40:00,1,1.0,,Access Lines - Peripheral,20 Gauge placed in outside facility
...,...,...,...,...,...,...,...,...,...
558198,25268968,18520455,35184667,2015-07-05 10:00:00,5.8,5.8,,Heartware,Flow (L/min) (HeartWare)
558199,25268968,18520455,35184667,2015-07-05 10:00:00,4.8,4.8,,Heartware,Power (Watts) (HeartWare)
558200,25268968,18520455,35184667,2015-07-05 10:00:00,27,27.0,,Heartware,HCT (HeartWare)
558201,25268968,18520455,35184667,2015-07-05 10:00:00,4,4.0,,Heartware,Low Flow Alarm (HeartWare)


In [56]:
events_path = save_dataframe(events, "events.parquet")

2022-07-07 14:36:50,894 [1;37mINFO[0m cyclops.utils.file - Saving dataframe to events.parquet


------------------------------------------------------------------------------------------------

# Processing

## Tabular

In [20]:
encounters = load_dataframe("encounters.parquet")
encounters

2022-07-08 12:42:51,571 [1;37mINFO[0m cyclops.utils.file - Loading dataframe from encounters.parquet


Unnamed: 0,subject_id,encounter_id,admit_timestamp,discharge_timestamp,deathtime,admission_type,admission_location,discharge_location,ethnicity,edregtime,edouttime,sex,age,birth_year,dod,anchor_year_difference,died
0,19759112,20610510,2018-07-03 02:10:00,2018-07-05 12:15:00,NaT,ELECTIVE,,HOME,UNKNOWN,NaT,NaT,M,0,2018,NaT,-140,False
1,14523215,29575656,2018-12-30 21:07:00,2019-01-01 11:38:00,NaT,ELECTIVE,,HOME,WHITE,NaT,NaT,M,0,2018,NaT,-118,False
2,12188356,23159459,2018-02-12 00:38:00,2018-02-14 14:01:00,NaT,ELECTIVE,,HOME,WHITE,NaT,NaT,F,0,2018,NaT,-123,False
3,16487201,22587598,2018-06-10 21:58:00,2018-06-20 12:02:00,NaT,ELECTIVE,,HOME,WHITE,NaT,NaT,F,0,2018,NaT,-103,False
4,10730776,26401328,2015-08-27 00:09:00,2015-08-29 12:19:00,NaT,ELECTIVE,,HOME,WHITE,NaT,NaT,M,0,2015,NaT,-107,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224759,16204743,22297786,2018-02-26 23:21:00,2018-03-02 20:00:00,NaT,OBSERVATION ADMIT,EMERGENCY ROOM,HOME,BLACK/AFRICAN AMERICAN,2191-02-26 18:36:00,2191-02-27 00:55:00,M,31,1987,NaT,-173,False
224760,18679547,20411251,2016-04-04 15:28:00,2016-04-17 18:05:00,NaT,URGENT,TRANSFER FROM HOSPITAL,HOME HEALTH CARE,WHITE,2169-04-04 02:38:00,2169-04-04 15:02:00,M,59,1957,NaT,-153,False
224761,16601429,28017820,2015-03-06 07:10:00,2015-03-09 12:00:00,NaT,URGENT,PHYSICIAN REFERRAL,HOME,WHITE,NaT,NaT,F,32,1983,NaT,-146,False
224762,13352372,28752421,2015-05-24 19:31:00,2015-06-17 10:35:00,2015-06-17 10:35:00,URGENT,TRANSFER FROM HOSPITAL,DIED,WHITE,NaT,NaT,M,67,1948,2015-06-17,-132,True


In [21]:
death_events = encounters[encounters["died"] == True]
death_events = death_events[[ENCOUNTER_ID, "deathtime"]]
death_events = death_events.rename({"deathtime": EVENT_TIMESTAMP}, axis=1)
death_events[EVENT_NAME] = "death"
death_events[EVENT_VALUE] = 1
death_events

Unnamed: 0,encounter_id,event_timestamp,event_name,event_value
1612,28962934,2015-10-05 13:40:00,death,1
1652,22398596,2015-12-02 00:39:00,death,1
1667,26870252,2018-03-28 16:56:00,death,1
1672,22253579,2015-09-30 08:30:00,death,1
1705,26973813,2015-11-06 02:36:00,death,1
...,...,...,...,...
224668,23802854,2020-11-27 10:48:00,death,1
224708,29859744,2015-12-19 15:00:00,death,1
224724,23975043,2015-03-22 13:15:00,death,1
224733,29836978,2018-12-08 14:49:00,death,1


In [22]:
#df['rooms'].str.replace("°", " deg")

In [23]:
#Series.str.encode('utf-8')

In [24]:
type(b"Test")

bytes

In [25]:
a = b"Test"
b = str.encode("Test")
a == b

True

In [26]:
events

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
0,25195599,18795283,38047987,2018-09-04 19:00:00,79,79.0,bpm,routine vital signs,heart rate - routine vital signs
1,25195599,18795283,38047987,2018-09-04 19:00:00,15,15.0,insp/min,respiratory,respiratory rate - respiratory
2,25195599,18795283,38047987,2018-09-04 19:40:00,98.6,98.6,°F,routine vital signs,temperature fahrenheit - routine vital signs
3,25195599,18795283,38047987,2018-09-04 19:40:00,1,1.0,,access lines - invasive,multi lumen placed in outside facility - acces...
4,25195599,18795283,38047987,2018-09-04 19:40:00,1,1.0,,access lines - peripheral,20 gauge placed in outside facility - access l...
...,...,...,...,...,...,...,...,...,...
558186,25268968,18520455,35184667,2015-07-05 09:00:00,95,95.0,%,respiratory,o2 saturation pulseoxymetry - respiratory
558193,25268968,18520455,35184667,2015-07-05 10:00:00,94,94.0,bpm,routine vital signs,heart rate - routine vital signs
558194,25268968,18520455,35184667,2015-07-05 10:00:00,86,86.0,mmHg,routine vital signs,non invasive blood pressure mean - routine vit...
558195,25268968,18520455,35184667,2015-07-05 10:00:00,13,13.0,insp/min,respiratory,respiratory rate - respiratory


In [27]:
features = [
    AGE,
    SEX,
    "admission_type",
    "admission_location",
    "discharge_location",
    "ethnicity",
]

tab_features = TabularFeatures(
    encounters,
    features,
)

In [28]:
tab_features.get_data()

Unnamed: 0,subject_id,encounter_id,admit_timestamp,discharge_timestamp,deathtime,admission_type,admission_location,discharge_location,ethnicity,edregtime,edouttime,sex,age,birth_year,dod,anchor_year_difference,died
0,19759112,20610510,2018-07-03 02:10:00,2018-07-05 12:15:00,NaT,3,,6.0,6,NaT,NaT,1,0,2018,NaT,-140,False
1,14523215,29575656,2018-12-30 21:07:00,2019-01-01 11:38:00,NaT,3,,6.0,7,NaT,NaT,1,0,2018,NaT,-118,False
2,12188356,23159459,2018-02-12 00:38:00,2018-02-14 14:01:00,NaT,3,,6.0,7,NaT,NaT,0,0,2018,NaT,-123,False
3,16487201,22587598,2018-06-10 21:58:00,2018-06-20 12:02:00,NaT,3,,6.0,7,NaT,NaT,0,0,2018,NaT,-103,False
4,10730776,26401328,2015-08-27 00:09:00,2015-08-29 12:19:00,NaT,3,,6.0,7,NaT,NaT,1,0,2015,NaT,-107,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224759,16204743,22297786,2018-02-26 23:21:00,2018-03-02 20:00:00,NaT,6,2.0,6.0,2,2191-02-26 18:36:00,2191-02-27 00:55:00,1,31,1987,NaT,-173,False
224760,18679547,20411251,2016-04-04 15:28:00,2016-04-17 18:05:00,NaT,8,8.0,7.0,7,2169-04-04 02:38:00,2169-04-04 15:02:00,1,59,1957,NaT,-153,False
224761,16601429,28017820,2015-03-06 07:10:00,2015-03-09 12:00:00,NaT,8,6.0,6.0,7,NaT,NaT,0,32,1983,NaT,-146,False
224762,13352372,28752421,2015-05-24 19:31:00,2015-06-17 10:35:00,2015-06-17 10:35:00,8,8.0,4.0,7,NaT,NaT,1,67,1948,2015-06-17,-132,True


In [29]:
tab_features.types

{'age': 'numeric',
 'admission_location': 'ordinal',
 'admission_type': 'ordinal',
 'discharge_location': 'ordinal',
 'ethnicity': 'ordinal',
 'sex': 'binary'}

In [30]:
tab_features.meta["admission_location"].get_mapping()

{0: 'AMBULATORY SURGERY TRANSFER',
 1: 'CLINIC REFERRAL',
 2: 'EMERGENCY ROOM',
 3: 'INFORMATION NOT AVAILABLE',
 4: 'INTERNAL TRANSFER TO OR FROM PSYCH',
 5: 'PACU',
 6: 'PHYSICIAN REFERRAL',
 7: 'PROCEDURE SITE',
 8: 'TRANSFER FROM HOSPITAL',
 9: 'TRANSFER FROM SKILLED NURSING FACILITY',
 10: 'WALK-IN/SELF REFERRAL',
 11: 'nan'}

In [31]:
tab_features.meta["discharge_location"].get_mapping()

{0: 'ACUTE HOSPITAL',
 1: 'AGAINST ADVICE',
 2: 'ASSISTED LIVING',
 3: 'CHRONIC/LONG TERM ACUTE CARE',
 4: 'DIED',
 5: 'HEALTHCARE FACILITY',
 6: 'HOME',
 7: 'HOME HEALTH CARE',
 8: 'HOSPICE',
 9: 'OTHER FACILITY',
 10: 'PSYCH FACILITY',
 11: 'REHAB',
 12: 'SKILLED NURSING FACILITY',
 13: 'nan'}

In [32]:
tab_features.meta["admission_type"].get_mapping()

{0: 'AMBULATORY OBSERVATION',
 1: 'DIRECT EMER.',
 2: 'DIRECT OBSERVATION',
 3: 'ELECTIVE',
 4: 'EU OBSERVATION',
 5: 'EW EMER.',
 6: 'OBSERVATION ADMIT',
 7: 'SURGICAL SAME DAY ADMISSION',
 8: 'URGENT'}

## Temporal

In [33]:
events = load_dataframe("events.parquet")
events[EVENT_NAME] = normalize_names(events[EVENT_NAME])
events[EVENT_CATEGORY] = normalize_categories(events[EVENT_CATEGORY])
#events[EVENT_VALUE] = normalize_values(events[EVENT_VALUE])
events

2022-07-08 12:42:55,387 [1;37mINFO[0m cyclops.utils.file - Loading dataframe from events.parquet


Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
0,25195599,18795283,38047987,2018-09-04 19:00:00,79,79.0,bpm,routine vital signs,heart rate
1,25195599,18795283,38047987,2018-09-04 19:00:00,15,15.0,insp/min,respiratory,respiratory rate
2,25195599,18795283,38047987,2018-09-04 19:40:00,98.6,98.6,°F,routine vital signs,temperature fahrenheit
3,25195599,18795283,38047987,2018-09-04 19:40:00,1,1.0,,access lines - invasive,multi lumen placed in outside facility
4,25195599,18795283,38047987,2018-09-04 19:40:00,1,1.0,,access lines - peripheral,20 gauge placed in outside facility
...,...,...,...,...,...,...,...,...,...
558198,25268968,18520455,35184667,2015-07-05 10:00:00,5.8,5.8,,heartware,flow
558199,25268968,18520455,35184667,2015-07-05 10:00:00,4.8,4.8,,heartware,power
558200,25268968,18520455,35184667,2015-07-05 10:00:00,27,27.0,,heartware,hct
558201,25268968,18520455,35184667,2015-07-05 10:00:00,4,4.0,,heartware,low flow alarm


In [34]:
# Same event names are the same, but mean different things for different categories
# E.g., 'flow' for categories heartware and ecmo
events[EVENT_NAME] = events[EVENT_NAME] + " - " + events[EVENT_CATEGORY]
events

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
0,25195599,18795283,38047987,2018-09-04 19:00:00,79,79.0,bpm,routine vital signs,heart rate - routine vital signs
1,25195599,18795283,38047987,2018-09-04 19:00:00,15,15.0,insp/min,respiratory,respiratory rate - respiratory
2,25195599,18795283,38047987,2018-09-04 19:40:00,98.6,98.6,°F,routine vital signs,temperature fahrenheit - routine vital signs
3,25195599,18795283,38047987,2018-09-04 19:40:00,1,1.0,,access lines - invasive,multi lumen placed in outside facility - acces...
4,25195599,18795283,38047987,2018-09-04 19:40:00,1,1.0,,access lines - peripheral,20 gauge placed in outside facility - access l...
...,...,...,...,...,...,...,...,...,...
558198,25268968,18520455,35184667,2015-07-05 10:00:00,5.8,5.8,,heartware,flow - heartware
558199,25268968,18520455,35184667,2015-07-05 10:00:00,4.8,4.8,,heartware,power - heartware
558200,25268968,18520455,35184667,2015-07-05 10:00:00,27,27.0,,heartware,hct - heartware
558201,25268968,18520455,35184667,2015-07-05 10:00:00,4,4.0,,heartware,low flow alarm - heartware


In [35]:
top_events = events[EVENT_NAME].value_counts()[:150].index
events = events[events[EVENT_NAME].isin(top_events)]
events

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
0,25195599,18795283,38047987,2018-09-04 19:00:00,79,79.0,bpm,routine vital signs,heart rate - routine vital signs
1,25195599,18795283,38047987,2018-09-04 19:00:00,15,15.0,insp/min,respiratory,respiratory rate - respiratory
2,25195599,18795283,38047987,2018-09-04 19:40:00,98.6,98.6,°F,routine vital signs,temperature fahrenheit - routine vital signs
3,25195599,18795283,38047987,2018-09-04 19:40:00,1,1.0,,access lines - invasive,multi lumen placed in outside facility - acces...
4,25195599,18795283,38047987,2018-09-04 19:40:00,1,1.0,,access lines - peripheral,20 gauge placed in outside facility - access l...
...,...,...,...,...,...,...,...,...,...
558186,25268968,18520455,35184667,2015-07-05 09:00:00,95,95.0,%,respiratory,o2 saturation pulseoxymetry - respiratory
558193,25268968,18520455,35184667,2015-07-05 10:00:00,94,94.0,bpm,routine vital signs,heart rate - routine vital signs
558194,25268968,18520455,35184667,2015-07-05 10:00:00,86,86.0,mmHg,routine vital signs,non invasive blood pressure mean - routine vit...
558195,25268968,18520455,35184667,2015-07-05 10:00:00,13,13.0,insp/min,respiratory,respiratory rate - respiratory


In [36]:
events = pd.concat([events, death_events])
events.head(5)

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
0,25195599,18795283.0,38047987.0,2018-09-04 19:00:00,79.0,79.0,bpm,routine vital signs,heart rate - routine vital signs
1,25195599,18795283.0,38047987.0,2018-09-04 19:00:00,15.0,15.0,insp/min,respiratory,respiratory rate - respiratory
2,25195599,18795283.0,38047987.0,2018-09-04 19:40:00,98.6,98.6,°F,routine vital signs,temperature fahrenheit - routine vital signs
3,25195599,18795283.0,38047987.0,2018-09-04 19:40:00,1.0,1.0,,access lines - invasive,multi lumen placed in outside facility - acces...
4,25195599,18795283.0,38047987.0,2018-09-04 19:40:00,1.0,1.0,,access lines - peripheral,20 gauge placed in outside facility - access l...


In [37]:
events[events[EVENT_NAME].str.contains("death")].head(5)

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
1612,28962934,,,2015-10-05 13:40:00,,1.0,,,death
1652,22398596,,,2015-12-02 00:39:00,,1.0,,,death
1667,26870252,,,2018-03-28 16:56:00,,1.0,,,death
1672,22253579,,,2015-09-30 08:30:00,,1.0,,,death
1705,26973813,,,2015-11-06 02:36:00,,1.0,,,death


In [38]:
feature_normalizer = GroupbyNormalizer({EVENT_VALUE: STANDARD}, by=EVENT_NAME)

aggregator = Aggregator(
    aggfuncs={EVENT_VALUE: MEAN},
    timestamp_col=EVENT_TIMESTAMP,
    time_by=ENCOUNTER_ID,
    agg_by=[ENCOUNTER_ID, EVENT_NAME],
    timestep_size=8,
    window_duration=24,  # Optional
    #agg_meta_for=EVENT_VALUE,  # Optional
)

events = events.reset_index().drop("index", axis=1)

tmp_features = TemporalFeatures(
    events,
    [EVENT_VALUE],
    [ENCOUNTER_ID, EVENT_NAME],
    EVENT_TIMESTAMP,
    aggregator=aggregator,
)
tmp_features.add_normalizer(FEATURES, feature_normalizer)
tmp_features.get_data()

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
0,25195599,18795283.0,38047987.0,2018-09-04 19:00:00,79,79.0,bpm,routine vital signs,heart rate - routine vital signs
1,25195599,18795283.0,38047987.0,2018-09-04 19:00:00,15,15.0,insp/min,respiratory,respiratory rate - respiratory
2,25195599,18795283.0,38047987.0,2018-09-04 19:40:00,98.6,98.6,°F,routine vital signs,temperature fahrenheit - routine vital signs
3,25195599,18795283.0,38047987.0,2018-09-04 19:40:00,1,1.0,,access lines - invasive,multi lumen placed in outside facility - acces...
4,25195599,18795283.0,38047987.0,2018-09-04 19:40:00,1,1.0,,access lines - peripheral,20 gauge placed in outside facility - access l...
...,...,...,...,...,...,...,...,...,...
518700,23802854,,,2020-11-27 10:48:00,,1.0,,,death
518701,29859744,,,2015-12-19 15:00:00,,1.0,,,death
518702,23975043,,,2015-03-22 13:15:00,,1.0,,,death
518703,29836978,,,2018-12-08 14:49:00,,1.0,,,death


In [39]:
#tmp_features.normalize(FEATURES)
#tmp_features.get_data()

In [40]:
aggregated = tmp_features.aggregate()
aggregated

2022-07-08 12:42:57,610 [1;37mINFO[0m cyclops.processors.cleaning - Dropped nulls over columns: event_timestamp. Removed 90 rows.
2022-07-08 12:43:31,395 [1;37mINFO[0m cyclops.utils.profile - Finished executing function __call__ in 33.814692 s


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,event_value,timestep_start
encounter_id,event_name,timestep,Unnamed: 3_level_1,Unnamed: 4_level_1
20002810,death,0,1.00,2018-07-05 06:05:00
20004718,death,0,1.00,2015-01-10 14:04:00
20008807,death,0,1.00,2018-11-27 16:45:00
20018555,death,0,1.00,2018-06-28 05:07:00
20020562,death,0,1.00,2015-08-20 00:45:00
...,...,...,...,...
29991695,ptt - labs,2,24.85,2018-07-30 20:00:00
29991695,inr - labs,2,1.10,2018-07-30 20:00:00
29991695,ionized calcium - labs,2,1.11,2018-07-30 20:00:00
29997500,death,0,1.00,2016-05-27 16:19:00


In [41]:
aggregated.tail(100)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,event_value,timestep_start
encounter_id,event_name,timestep,Unnamed: 3_level_1,Unnamed: 4_level_1
29991695,alarms on - alarms,2,1.00,2018-07-30 20:00:00
29991695,spo2 desat limit - alarms,0,86.50,2018-07-30 04:00:00
29991695,spo2 desat limit - alarms,2,88.00,2018-07-30 20:00:00
29991695,st segment monitoring on - alarms,0,1.00,2018-07-30 04:00:00
29991695,st segment monitoring on - alarms,1,1.00,2018-07-30 12:00:00
29991695,...,...,...,...
29991695,ptt - labs,2,24.85,2018-07-30 20:00:00
29991695,inr - labs,2,1.10,2018-07-30 20:00:00
29991695,ionized calcium - labs,2,1.11,2018-07-30 20:00:00
29997500,death,0,1.00,2016-05-27 16:19:00


In [42]:
events_vectorized, group_indices = aggregator.vectorize(aggregated)
events_vectorized.shape

(1, 4597, 151, 3)

In [43]:
events_vectorized = np.squeeze(events_vectorized)
events_vectorized.shape

(4597, 151, 3)

In [44]:
agg_col_map, encounter_id_map, event_name_map = group_indices

In [45]:
events_vectorized[
    encounter_id_map[29991695],
    event_name_map["spo2 desat limit - alarms"]
]

array([86.5,  nan, 88. ])

In [1]:
import numpy as np
#np.save("events_vectorized.npy", events_vectorized)
events_vectorized = np.load("events_vectorized.npy")

In [2]:
from cyclops.processors.split import split_data

In [3]:
train_data, val_data, test_data = split_data(events_vectorized, [0.7, 0.2])

(4597, 151, 3)
(3218, 1, 1)
(4597, 151, 3)
(919, 1, 1)
(4597, 151, 3)
(460, 1, 1)


In [5]:
train_data.shape

(3218, 151, 3)

In [6]:
val_data.shape

(919, 151, 3)

In [7]:
test_data.shape

(460, 151, 3)