In [1]:
import numpy as np
import pandas as pd

from cyclops.processors.aggregate import Aggregator
from cyclops.processors.cleaning import (
    normalize_categories,
    normalize_names,
    normalize_values,
)
from cyclops.processors.column_names import (
    ADMIT_TIMESTAMP,
    AGE,
    CARE_UNIT,
    DIAGNOSIS_CODE,
    DIAGNOSIS_TRAJECTORY,
    ENCOUNTER_ID,
    EVENT_CATEGORY,
    EVENT_NAME,
    EVENT_TIMESTAMP,
    EVENT_VALUE,
    EVENT_VALUE_UNIT,
    HOSPITAL_ID,
    SEX,
    SUBJECT_ID,
    TIMESTEP,
    YEAR,
)
from cyclops.processors.constants import (
    BINARY,
    CATEGORICAL_INDICATOR,
    FEATURE_INDICATOR_ATTR,
    FEATURE_MAPPING_ATTR,
    FEATURE_TYPE_ATTR,
    FEATURE_TYPES,
    FEATURES,
    MEAN,
    MIN_MAX,
    MISSING_CATEGORY,
    NUMERIC,
    ORDINAL,
    STANDARD,
    STRING,
    TARGETS,
)
from cyclops.processors.feature.feature import TabularFeatures, TemporalFeatures
from cyclops.processors.feature.normalization import GroupbyNormalizer
from cyclops.query import mimic
from cyclops.query import process as qp
from cyclops.utils.file import load_dataframe, save_dataframe

2022-07-09 10:31:47,907 [1;37mINFO[0m cyclops.orm     - Database setup, ready to run queries!


In [2]:
YEARS = [2015, 2016, 2018, 2019, 2020]
MIN_YEAR = min(YEARS)

# Querying

In [3]:
encounters_interface = mimic.patient_encounters(
    years=YEARS, died=True, died_binarize_col="died"
)
encounters_query = encounters_interface.query
encounters_query = qp.Drop(
    ["insurance", "language", "marital_status", "hospital_expire_flag"]
)(encounters_query)
encounters_interface = mimic.get_interface(encounters_query)
encounters = encounters_interface.run()
encounters

2022-07-09 08:31:30,230 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
2022-07-09 08:31:30,231 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 2.336763 s


Unnamed: 0,subject_id,encounter_id,admit_timestamp,discharge_timestamp,deathtime,admission_type,admission_location,discharge_location,ethnicity,edregtime,edouttime,sex,age,birth_year,dod,anchor_year_difference,died
0,16115393,21765983,2015-02-22 18:52:00,2015-03-05 10:10:00,NaT,ELECTIVE,,HOME,UNKNOWN,NaT,NaT,M,0,2015,NaT,-157,False
1,17763996,26149939,2018-03-29 22:14:00,2018-04-01 12:35:00,NaT,ELECTIVE,,HOME,UNKNOWN,NaT,NaT,M,0,2018,NaT,-155,False
2,12901523,26546757,2018-08-14 17:32:00,2018-08-17 03:18:00,NaT,ELECTIVE,,HOME,BLACK/AFRICAN AMERICAN,NaT,NaT,F,0,2018,NaT,-126,False
3,17871631,20600400,2015-12-03 12:43:00,2015-12-05 12:05:00,NaT,ELECTIVE,,HOME,ASIAN,NaT,NaT,F,0,2015,NaT,-126,False
4,13048111,23204476,2018-11-01 14:35:00,2018-11-05 15:55:00,NaT,ELECTIVE,,HOME,BLACK/AFRICAN AMERICAN,NaT,NaT,M,0,2018,NaT,-156,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224759,17825003,21947272,2016-04-08 20:35:00,2016-04-10 14:22:00,NaT,OBSERVATION ADMIT,EMERGENCY ROOM,HOME HEALTH CARE,WHITE,2129-04-08 09:24:00,2129-04-08 21:48:00,M,83,1933,NaT,-113,False
224760,18679547,25304535,2016-09-24 01:23:00,2016-09-30 19:30:00,NaT,URGENT,TRANSFER FROM HOSPITAL,HOME,WHITE,2169-09-23 18:57:00,2169-09-24 02:20:00,M,59,1957,NaT,-153,False
224761,14731798,26121773,2016-02-17 00:48:00,2016-02-20 15:33:00,NaT,SURGICAL SAME DAY ADMISSION,PHYSICIAN REFERRAL,HOME HEALTH CARE,WHITE,NaT,NaT,F,38,1978,NaT,-121,False
224762,14822167,22790767,2015-03-14 21:12:00,2015-03-16 17:32:00,NaT,EW EMER.,EMERGENCY ROOM,HOME HEALTH CARE,WHITE,2148-03-14 16:46:00,2148-03-14 19:29:00,M,70,1945,NaT,-133,False


In [4]:
encounters_path = save_dataframe(encounters, "encounters.parquet")

2022-07-09 08:16:10,927 [1;37mINFO[0m cyclops.utils.file - Saving dataframe to encounters.parquet


In [15]:
events_interface = mimic.events()
events_query = events_interface.query
events_query = qp.Drop(["warning", "itemid", "storetime"])(events_query)
events_interface = mimic.get_interface(events_query)
events = events_interface.run(limit=1000000)
events = pd.merge(
    encounters[[ENCOUNTER_ID, "anchor_year_difference"]], events, on=ENCOUNTER_ID
)


def add_offset(row):
    row[EVENT_TIMESTAMP] += pd.DateOffset(years=row["anchor_year_difference"])
    return row


events = events.apply(add_offset, axis=1)
events = events.drop("anchor_year_difference", axis=1)
events

2022-07-09 08:46:43,936 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
2022-07-09 08:46:43,937 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 0.075125 s


Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name


In [6]:
events_path = save_dataframe(events, "events.parquet")

2022-07-09 08:46:47,087 [1;37mINFO[0m cyclops.utils.file - Saving dataframe to events.parquet


------------------------------------------------------------------------------------------------

# Processing

In [3]:
encounters = load_dataframe("encounters.parquet")
encounters.head(5)

2022-07-09 10:31:48,834 [1;37mINFO[0m cyclops.utils.file - Loading dataframe from encounters.parquet


Unnamed: 0,subject_id,encounter_id,admit_timestamp,discharge_timestamp,deathtime,admission_type,admission_location,discharge_location,ethnicity,edregtime,edouttime,sex,age,birth_year,dod,anchor_year_difference,died
0,19692656,27332368,2018-08-07 12:36:00,2018-08-09 12:45:00,NaT,ELECTIVE,,HOME,WHITE,NaT,NaT,M,0,2018,NaT,-154,False
1,18373085,24514056,2015-10-20 11:22:00,2015-10-23 12:40:00,NaT,ELECTIVE,,HOME,WHITE,NaT,NaT,M,0,2015,NaT,-125,False
2,16591664,22764393,2015-03-02 04:41:00,2015-03-03 13:30:00,NaT,ELECTIVE,,HOME,HISPANIC/LATINO,NaT,NaT,F,0,2015,NaT,-150,False
3,14083263,26476486,2015-01-28 02:01:00,2015-01-29 14:45:00,NaT,ELECTIVE,,HOME,WHITE,NaT,NaT,M,0,2015,NaT,-101,False
4,12067589,23047540,2018-05-20 02:21:00,2018-05-22 11:57:00,NaT,ELECTIVE,,HOME,ASIAN,NaT,NaT,M,0,2018,NaT,-124,False


In [4]:
events = load_dataframe("events_small.parquet")
events[EVENT_NAME] = normalize_names(events[EVENT_NAME])
events[EVENT_CATEGORY] = normalize_categories(events[EVENT_CATEGORY])
# events[EVENT_VALUE] = normalize_values(events[EVENT_VALUE])
events.head(5)

2022-07-09 10:31:50,026 [1;37mINFO[0m cyclops.utils.file - Loading dataframe from events_small.parquet


Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
0,21609634,10358003,30336108,2018-05-02 04:50:00,No N/V,0.0,,toxicology,nausea and vomiting
1,21609634,10358003,30336108,2018-05-02 04:50:00,No Tremor,0.0,,toxicology,tremor
2,21609634,10358003,30336108,2018-05-02 04:50:00,Palms Moist...,2.0,,toxicology,paroxysmal sweats
3,21609634,10358003,30336108,2018-05-02 04:50:00,No Anxiety,0.0,,toxicology,anxiety
4,21609634,10358003,30336108,2018-05-02 04:50:00,Not Present,0.0,,toxicology,auditory disturbance


In [5]:
death_events = encounters[encounters["died"] == True]
death_events = death_events[[ENCOUNTER_ID, "deathtime"]]
death_events = death_events.rename({"deathtime": EVENT_TIMESTAMP}, axis=1)
death_events[EVENT_NAME] = "death"
death_events[EVENT_VALUE] = 1
death_events.head(5)

Unnamed: 0,encounter_id,event_timestamp,event_name,event_value
1617,24422389,2018-06-06 15:00:00,death,1
1619,21834123,2018-02-28 04:30:00,death,1
1627,21164559,2018-02-14 09:30:00,death,1
1650,29917369,2015-08-30 23:45:00,death,1
1668,20620061,2018-05-20 00:00:00,death,1


## Tabular

In [9]:
# df['rooms'].str.replace("°", " deg")

In [10]:
# Series.str.encode('utf-8')

In [11]:
type(b"Test")

bytes

In [12]:
a = b"Test"
b = str.encode("Test")
a == b

True

In [13]:
events

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
0,27009664,10219100,38629729,2015-11-04 17:27:00,Systolic,,,Alarms,NBP Alarm Source
1,27009664,10219100,38629729,2015-11-04 17:28:00,ST (Sinus Tachycardia),,,Routine Vital Signs,Heart Rhythm
2,27009664,10219100,38629729,2015-11-04 17:28:00,Oral,,,Routine Vital Signs,Temperature Site
3,27009664,10219100,38629729,2015-11-04 17:31:00,15 Degrees,,,Treatments,Head of Bed
4,27009664,10219100,38629729,2015-11-04 17:31:00,Turns by Self,,,Treatments,Turn
...,...,...,...,...,...,...,...,...,...
534199,24633100,18652786,33320137,2015-06-17 09:47:00,67,67.0,Inch,General,Height
534200,24633100,18652786,33320137,2015-06-17 09:47:00,170,170.0,cm,General,Height (cm)
534201,29905334,19319186,37854688,2018-02-02 04:58:00,65.4,65.4,kg,General,Admission Weight (Kg)
534202,20249842,18416284,37990729,2015-01-02 00:32:00,67.7,67.7,kg,General,Admission Weight (Kg)


In [14]:
features = [
    AGE,
    SEX,
    "admission_type",
    "admission_location",
    "discharge_location",
    "ethnicity",
]

tab_features = TabularFeatures(
    encounters,
    features,
)

In [15]:
tab_features.get_data()

Unnamed: 0,subject_id,encounter_id,admit_timestamp,discharge_timestamp,deathtime,admission_type,admission_location,discharge_location,ethnicity,edregtime,edouttime,sex,age,birth_year,dod,anchor_year_difference,died
0,19692656,27332368,2018-08-07 12:36:00,2018-08-09 12:45:00,NaT,3,,6.0,7,NaT,NaT,1,0,2018,NaT,-154,False
1,18373085,24514056,2015-10-20 11:22:00,2015-10-23 12:40:00,NaT,3,,6.0,7,NaT,NaT,1,0,2015,NaT,-125,False
2,16591664,22764393,2015-03-02 04:41:00,2015-03-03 13:30:00,NaT,3,,6.0,3,NaT,NaT,0,0,2015,NaT,-150,False
3,14083263,26476486,2015-01-28 02:01:00,2015-01-29 14:45:00,NaT,3,,6.0,7,NaT,NaT,1,0,2015,NaT,-101,False
4,12067589,23047540,2018-05-20 02:21:00,2018-05-22 11:57:00,NaT,3,,6.0,1,NaT,NaT,1,0,2018,NaT,-124,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224759,18300656,25714158,2018-08-14 11:14:00,2018-08-18 14:39:00,NaT,5,8.0,7.0,1,2162-08-14 05:31:00,2162-08-14 12:57:00,0,75,1943,NaT,-144,False
224760,16592631,28354206,2018-03-20 16:29:00,2018-04-07 19:00:00,NaT,5,8.0,11.0,7,2152-03-20 15:01:00,2152-03-20 17:54:00,1,73,1945,NaT,-134,False
224761,19635799,23124732,2018-03-01 05:33:00,2018-03-02 17:41:00,NaT,7,6.0,7.0,7,NaT,NaT,1,75,1943,2018-08-31,-167,False
224762,10610272,23342908,2019-01-20 00:00:00,2019-01-24 15:30:00,NaT,3,6.0,7.0,7,NaT,NaT,0,66,1953,NaT,-112,False


In [16]:
tab_features.types

{'sex': 'binary',
 'admission_type': 'ordinal',
 'age': 'numeric',
 'admission_location': 'ordinal',
 'discharge_location': 'ordinal',
 'ethnicity': 'ordinal'}

In [17]:
tab_features.meta["admission_location"].get_mapping()

{0: 'AMBULATORY SURGERY TRANSFER',
 1: 'CLINIC REFERRAL',
 2: 'EMERGENCY ROOM',
 3: 'INFORMATION NOT AVAILABLE',
 4: 'INTERNAL TRANSFER TO OR FROM PSYCH',
 5: 'PACU',
 6: 'PHYSICIAN REFERRAL',
 7: 'PROCEDURE SITE',
 8: 'TRANSFER FROM HOSPITAL',
 9: 'TRANSFER FROM SKILLED NURSING FACILITY',
 10: 'WALK-IN/SELF REFERRAL',
 11: 'nan'}

In [18]:
tab_features.meta["discharge_location"].get_mapping()

{0: 'ACUTE HOSPITAL',
 1: 'AGAINST ADVICE',
 2: 'ASSISTED LIVING',
 3: 'CHRONIC/LONG TERM ACUTE CARE',
 4: 'DIED',
 5: 'HEALTHCARE FACILITY',
 6: 'HOME',
 7: 'HOME HEALTH CARE',
 8: 'HOSPICE',
 9: 'OTHER FACILITY',
 10: 'PSYCH FACILITY',
 11: 'REHAB',
 12: 'SKILLED NURSING FACILITY',
 13: 'nan'}

In [19]:
tab_features.meta["admission_type"].get_mapping()

{0: 'AMBULATORY OBSERVATION',
 1: 'DIRECT EMER.',
 2: 'DIRECT OBSERVATION',
 3: 'ELECTIVE',
 4: 'EU OBSERVATION',
 5: 'EW EMER.',
 6: 'OBSERVATION ADMIT',
 7: 'SURGICAL SAME DAY ADMISSION',
 8: 'URGENT'}

## Temporal

In [6]:
# Same event names are the same, but mean different things for different categories
# E.g., 'flow' for categories heartware and ecmo
events[EVENT_NAME] = events[EVENT_NAME] + " - " + events[EVENT_CATEGORY]
events

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
0,21609634,10358003,30336108,2018-05-02 04:50:00,No N/V,0.0,,toxicology,nausea and vomiting - toxicology
1,21609634,10358003,30336108,2018-05-02 04:50:00,No Tremor,0.0,,toxicology,tremor - toxicology
2,21609634,10358003,30336108,2018-05-02 04:50:00,Palms Moist...,2.0,,toxicology,paroxysmal sweats - toxicology
3,21609634,10358003,30336108,2018-05-02 04:50:00,No Anxiety,0.0,,toxicology,anxiety - toxicology
4,21609634,10358003,30336108,2018-05-02 04:50:00,Not Present,0.0,,toxicology,auditory disturbance - toxicology
...,...,...,...,...,...,...,...,...,...
51092,26656908,10351597,34313017,2017-01-04 09:51:00,No,,,care plans,impaired mobility ncp - plan revised - care plans
51093,26656908,10351597,34313017,2017-01-04 09:51:00,Medium,2.0,,care plans,acuity workload question 1 - care plans
51094,26656908,10351597,34313017,2017-01-04 09:51:00,Medium,2.0,,care plans,acuity workload question 2 - care plans
51095,26656908,10351597,34313017,2017-01-04 10:00:00,AF (Atrial Fibrillation),,,routine vital signs,heart rhythm - routine vital signs


In [7]:
top_events = events[EVENT_NAME].value_counts()[:150].index
events = events[events[EVENT_NAME].isin(top_events)]
events

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
16,21609634,10358003,30336108,2018-05-02 04:50:00,Clear,,,pulmonary,rul lung sounds - pulmonary
17,21609634,10358003,30336108,2018-05-02 04:50:00,Exp Wheeze,,,pulmonary,rul lung sounds - pulmonary
18,21609634,10358003,30336108,2018-05-02 04:50:00,Clear,,,pulmonary,rll lung sounds - pulmonary
19,21609634,10358003,30336108,2018-05-02 04:50:00,Clear,,,pulmonary,lul lung sounds - pulmonary
20,21609634,10358003,30336108,2018-05-02 04:50:00,Exp Wheeze,,,pulmonary,lul lung sounds - pulmonary
...,...,...,...,...,...,...,...,...,...
51051,26656908,10351597,34313017,2017-01-04 09:51:00,"Encourage verbalization of feelings, perceptio...",,,care plans,coping/knowledge deficit ncp - interventions ...
51052,26656908,10351597,34313017,2017-01-04 09:51:00,Identify and address any barriers to learning,,,care plans,coping/knowledge deficit ncp - interventions ...
51053,26656908,10351597,34313017,2017-01-04 09:51:00,Provide patient-specific information and educa...,,,care plans,coping/knowledge deficit ncp - interventions ...
51095,26656908,10351597,34313017,2017-01-04 10:00:00,AF (Atrial Fibrillation),,,routine vital signs,heart rhythm - routine vital signs


In [8]:
events = pd.concat([events, death_events])
events.head(5)

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
16,21609634,10358003.0,30336108.0,2018-05-02 04:50:00,Clear,,,pulmonary,rul lung sounds - pulmonary
17,21609634,10358003.0,30336108.0,2018-05-02 04:50:00,Exp Wheeze,,,pulmonary,rul lung sounds - pulmonary
18,21609634,10358003.0,30336108.0,2018-05-02 04:50:00,Clear,,,pulmonary,rll lung sounds - pulmonary
19,21609634,10358003.0,30336108.0,2018-05-02 04:50:00,Clear,,,pulmonary,lul lung sounds - pulmonary
20,21609634,10358003.0,30336108.0,2018-05-02 04:50:00,Exp Wheeze,,,pulmonary,lul lung sounds - pulmonary


In [9]:
events[events[EVENT_NAME].str.contains("death")].head(5)

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
1617,24422389,,,2018-06-06 15:00:00,,1.0,,,death
1619,21834123,,,2018-02-28 04:30:00,,1.0,,,death
1627,21164559,,,2018-02-14 09:30:00,,1.0,,,death
1650,29917369,,,2015-08-30 23:45:00,,1.0,,,death
1668,20620061,,,2018-05-20 00:00:00,,1.0,,,death


In [10]:
feature_normalizer = GroupbyNormalizer({EVENT_VALUE: STANDARD}, by=EVENT_NAME)

aggregator = Aggregator(
    aggfuncs={EVENT_VALUE: MEAN},
    timestamp_col=EVENT_TIMESTAMP,
    time_by=ENCOUNTER_ID,
    agg_by=[ENCOUNTER_ID, EVENT_NAME],
    timestep_size=8,
    window_duration=24,  # Optional
    # agg_meta_for=EVENT_VALUE,  # Optional
)

events = events.reset_index().drop("index", axis=1)

tmp_features = TemporalFeatures(
    events,
    [EVENT_VALUE],
    [ENCOUNTER_ID, EVENT_NAME],
    EVENT_TIMESTAMP,
    aggregator=aggregator,
)
# tmp_features.add_normalizer(FEATURES, feature_normalizer)
tmp_features.get_data()

Unnamed: 0,encounter_id,subject_id,stay_id,event_timestamp,value,event_value,event_value_unit,event_category,event_name
0,21609634,10358003.0,30336108.0,2018-05-02 04:50:00,Clear,,,pulmonary,rul lung sounds - pulmonary
1,21609634,10358003.0,30336108.0,2018-05-02 04:50:00,Exp Wheeze,,,pulmonary,rul lung sounds - pulmonary
2,21609634,10358003.0,30336108.0,2018-05-02 04:50:00,Clear,,,pulmonary,rll lung sounds - pulmonary
3,21609634,10358003.0,30336108.0,2018-05-02 04:50:00,Clear,,,pulmonary,lul lung sounds - pulmonary
4,21609634,10358003.0,30336108.0,2018-05-02 04:50:00,Exp Wheeze,,,pulmonary,lul lung sounds - pulmonary
...,...,...,...,...,...,...,...,...,...
47479,28038618,,,2018-12-31 19:00:00,,1.0,,,death
47480,28971207,,,2018-11-06 21:00:00,,1.0,,,death
47481,24809610,,,2019-01-08 18:10:00,,1.0,,,death
47482,21924009,,,2020-06-16 11:05:00,,1.0,,,death


In [11]:
# tmp_features.normalize(FEATURES)
# tmp_features.get_data()

In [12]:
aggregated = tmp_features.aggregate()
aggregated.head(5)

2022-07-09 10:26:50,495 [1;37mINFO[0m cyclops.processors.cleaning - Dropped nulls over columns: event_timestamp. Removed 90 rows.
2022-07-09 10:27:01,390 [1;37mINFO[0m cyclops.utils.profile - Finished executing function __call__ in 10.918880 s


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,event_value,timestep_start
encounter_id,event_name,timestep,Unnamed: 3_level_1,Unnamed: 4_level_1
20002810,death,0,1.0,2018-07-05 06:05:00
20004718,death,0,1.0,2015-01-10 14:04:00
20008807,death,0,1.0,2018-11-27 16:45:00
20018555,death,0,1.0,2018-06-28 05:07:00
20020562,death,0,1.0,2015-08-20 00:45:00


In [21]:
aggregated.head(100)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,event_value,timestep_start
encounter_id,event_name,timestep,Unnamed: 3_level_1,Unnamed: 4_level_1
20002810,death,0,1.0,2018-07-05 06:05:00
20004718,death,0,1.0,2015-01-10 14:04:00
20008807,death,0,1.0,2018-11-27 16:45:00
20018555,death,0,1.0,2018-06-28 05:07:00
20020562,death,0,1.0,2015-08-20 00:45:00
...,...,...,...,...
20204398,death,0,1.0,2018-03-11 02:44:00
20210766,death,0,1.0,2015-01-23 16:38:00
20211604,death,0,1.0,2015-11-05 08:20:00
20215576,death,0,1.0,2018-04-09 17:47:00


In [14]:
save_dataframe(aggregated, "aggregated.parquet")

2022-07-09 10:27:01,417 [1;37mINFO[0m cyclops.utils.file - Saving dataframe to aggregated.parquet


'aggregated.parquet'

In [1]:
import numpy as np
import pandas as pd

from cyclops.processors.aggregate import Aggregator
from cyclops.processors.cleaning import (
    normalize_categories,
    normalize_names,
    normalize_values,
)
from cyclops.processors.column_names import (
    ADMIT_TIMESTAMP,
    AGE,
    CARE_UNIT,
    DIAGNOSIS_CODE,
    DIAGNOSIS_TRAJECTORY,
    ENCOUNTER_ID,
    EVENT_CATEGORY,
    EVENT_NAME,
    EVENT_TIMESTAMP,
    EVENT_VALUE,
    EVENT_VALUE_UNIT,
    HOSPITAL_ID,
    SEX,
    SUBJECT_ID,
    TIMESTEP,
    YEAR,
)
from cyclops.processors.constants import (
    BINARY,
    CATEGORICAL_INDICATOR,
    FEATURE_INDICATOR_ATTR,
    FEATURE_MAPPING_ATTR,
    FEATURE_TYPE_ATTR,
    FEATURE_TYPES,
    FEATURES,
    MEAN,
    MIN_MAX,
    MISSING_CATEGORY,
    NUMERIC,
    ORDINAL,
    STANDARD,
    STRING,
    TARGETS,
)
from cyclops.processors.feature.feature import TabularFeatures, TemporalFeatures
from cyclops.processors.feature.normalization import GroupbyNormalizer
from cyclops.query import mimic
from cyclops.query import process as qp
from cyclops.utils.file import load_dataframe, save_dataframe

2022-07-09 11:04:59,685 [1;37mINFO[0m cyclops.orm     - Database setup, ready to run queries!


In [2]:
aggregator = Aggregator(
    aggfuncs={EVENT_VALUE: MEAN},
    timestamp_col=EVENT_TIMESTAMP,
    time_by=ENCOUNTER_ID,
    agg_by=[ENCOUNTER_ID, EVENT_NAME],
    timestep_size=8,
    window_duration=24,  # Optional
    # agg_meta_for=EVENT_VALUE,  # Optional
)

aggregated = load_dataframe("aggregated.parquet")

2022-07-09 11:04:59,707 [1;37mINFO[0m cyclops.utils.file - Loading dataframe from aggregated.parquet


In [3]:
events_vectorized, group_indices = aggregator.vectorize(aggregated)
events_vectorized.shape

                                                                   event_value
encounter_id event_name                                  timestep             
20002810     abdominal assessment - gi/gu                0                 NaN
                                                         1                 NaN
                                                         2                 NaN
             activity / mobility - treatments            0                 NaN
                                                         1                 NaN
...                                                                        ...
             position - treatments                       0                 NaN
                                                         1                 NaN
                                                         2                 NaN
             posttib. pulses l - cardiovascular (pulses) 0                 NaN
                                                    

2022-07-09 11:23:43,223 [1;37mINFO[0m cyclops.utils.profile - Finished executing function vectorize in 1123.481689 s


(1, 4255, 104, 3)

In [4]:
events_vectorized = np.squeeze(events_vectorized)
events_vectorized.shape

(4255, 104, 3)

In [19]:
agg_col_map, encounter_id_map, event_name_map = group_indices

In [20]:
events_vectorized[
    encounter_id_map[29991695], event_name_map["spo2 desat limit - alarms"]
]

KeyError: 29991695

In [None]:
import numpy as np

# np.save("events_vectorized.npy", events_vectorized)
events_vectorized = np.load("events_vectorized.npy")

In [None]:
from cyclops.processors.split import split_data

In [None]:
train_data, val_data, test_data = split_data(events_vectorized, [0.7, 0.2])

In [None]:
train_data.shape

In [None]:
val_data.shape

In [None]:
test_data.shape