### This notebook shows examples of how to use the cyclops.processor API on GEMINI.

## Get all patient encounters including ER data, with diagnoses from St. Michael's hospital from March 1 2020 to March 15 2020, with vitals and labs and interventions.

In [1]:
from cyclops.processors.column_names import AGE, DIAGNOSIS_CODE, ENCOUNTER_ID, SEX
from cyclops.processors.constants import SMH
from cyclops.processors.utils import gather_columns
from cyclops.query import gemini

# patients = gemini.patients(
#     hospitals=[SMH], from_date="2020-03-01", to_date="2020-03-15", include_er_data=True
# )
# patients_diagnoses = gemini.diagnoses(patients=patients)
# patients_vitals = gemini.events(patients=patients, category="vitals")
# patients_labs = gemini.events(patients=patients, category="lab")
# patients_interventions = gemini.events(patients=patients, category="intervention")

# diagnoses_data = patients_diagnoses.run()
# vitals_data = patients_vitals.run()
# labs_data = patients_labs.run()
# interventions_data = patients_interventions.run()

# print(f"{len(diagnoses_data)} diagnoses rows extracted!")
# print(f"{len(vitals_data)} vitals rows extracted!")
# print(f"{len(labs_data)} labs rows extracted!")
# print(f"{len(interventions_data)} interventions rows extracted!")

# patients_diagnoses.save(".", "diagnoses")
# patients_vitals.save(".", "vitals")
# patients_labs.save(".", "lab")
# patients_interventions.save(".", "intervention")

2022-04-26 09:52:09,175 [1;37mINFO[0m cyclops.processors.utils - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-26 09:52:09,333 [1;37mINFO[0m cyclops.config  - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-26 09:52:09,423 [1;37mINFO[0m cyclops.query.utils - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-26 09:52:09,431 [1;37mINFO[0m cyclops.utils.profile - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-26 09:52:09,435 [1;37mINFO[0m cyclops.orm     - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-26 09:52:09,440 [1;37mINFO[0m cyclops.query.interface - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-26 09:52:10,085 [1;37mINFO[0m cyclops.orm     - Database setup, ready to run queries!


## Process queried data into features. (Static + Temporal).

In [2]:
import numpy as np
import pandas as pd

from cyclops.processor import Aggregator, Imputer, featurize

diagnoses_data = pd.read_parquet("diagnoses.gzip")
vitals_data = pd.read_parquet("vitals.gzip")
labs_data = pd.read_parquet("lab.gzip")
interventions_data = pd.read_parquet("intervention.gzip")

static_diagnoses_data = gather_columns(
    diagnoses_data, [ENCOUNTER_ID, AGE, SEX, DIAGNOSIS_CODE]
)
feature_handler = featurize(
    static_data=[static_diagnoses_data],
    temporal_data=[labs_data, vitals_data],
    aggregator=Aggregator(bucket_size=6, window=4),
    static_imputer=Imputer(strategy="mean"),
    temporal_imputer=Imputer(strategy="mean"),
)

2022-04-26 09:52:10,134 [1;37mINFO[0m cyclops.processors.aggregate - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-26 09:52:10,140 [1;37mINFO[0m cyclops.processors.diagnoses - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-26 09:52:10,145 [1;37mINFO[0m cyclops.processors.events - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-26 09:52:10,979 [1;37mINFO[0m cyclops.processors.impute - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-26 09:52:10,986 [1;37mINFO[0m cyclops.processors.feature_handler - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-26 09:52:11,265 [1;37mINFO[0m cyclops.processors.utils - Processing raw diagnosis codes...
2022-04-26 09:52:11,268 [1;37mINFO[0m cyclops.processors.utils - # samples: 1068, # encounters: 150
2022-04-26 09:52:11,310 [1;37mINFO[0m cyclops.processors.utils - Grouping ICD codes to trajectories...
2022-04-26 09:52:11,313 [1;37mI

F01_F99     0.320000
K00_K95     0.240000
R00_R99     0.460000
D50_D89     0.186667
S00_T88     0.100000
V00_Y99     0.213333
H60_H95     0.006667
M00_M99     0.133333
A00_B99     0.233333
G00_G99     0.120000
Z00_Z99     0.306667
C00_D49     0.173333
I00_I99     0.360000
J00_J99     0.260000
N00_N99     0.353333
L00_L99     0.073333
E00_E89     0.453333
H00_H59     0.006667
Q00_Q99     0.006667
age        64.240000
sex         0.606667
dtype: float64


## Inspect feature handler, and check features.

In [4]:
# print(feature_handler.get_numerical_feature_names())
# print(feature_handler.get_categorical_feature_names())