### This notebook shows examples of how to use the cyclops.processor API on GEMINI.

## Get all patient encounters including ER data, with diagnoses from St. Michael's hospital from March 1 2020 to March 15 2020, with vitals and labs and interventions.

In [1]:
from cyclops.processors.column_names import AGE, DIAGNOSIS_CODE, ENCOUNTER_ID, SEX
from cyclops.processors.constants import SMH
from cyclops.processors.utils import gather_columns
from cyclops.query import gemini

# patients = gemini.patients(
#     hospitals=[SMH], from_date="2020-03-01", to_date="2020-03-15", include_er_data=True
# )
# patients_diagnoses = gemini.diagnoses(patients=patients)
# patients_vitals = gemini.events(patients=patients, category="vitals")
# patients_labs = gemini.events(patients=patients, category="lab")
# patients_interventions = gemini.events(patients=patients, category="intervention")

# diagnoses_data = patients_diagnoses.run()
# vitals_data = patients_vitals.run()
# labs_data = patients_labs.run()
# interventions_data = patients_interventions.run()

# print(f"{len(diagnoses_data)} diagnoses rows extracted!")
# print(f"{len(vitals_data)} vitals rows extracted!")
# print(f"{len(labs_data)} labs rows extracted!")
# print(f"{len(interventions_data)} interventions rows extracted!")

# patients_diagnoses.save(".", "diagnoses")
# patients_vitals.save(".", "vitals")
# patients_labs.save(".", "lab")
# patients_interventions.save(".", "intervention")

2022-04-21 07:52:05,218 [1;37mINFO[0m cyclops.processors.utils - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-21 07:52:05,384 [1;37mINFO[0m cyclops.config  - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-21 07:52:05,470 [1;37mINFO[0m cyclops.query.utils - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-21 07:52:05,498 [1;37mINFO[0m cyclops.utils.profile - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-21 07:52:05,503 [1;37mINFO[0m cyclops.orm     - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-21 07:52:05,508 [1;37mINFO[0m cyclops.query.interface - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-21 07:52:06,077 [1;37mINFO[0m cyclops.orm     - Database setup, ready to run queries!


## Process queried data into features. (Static + Temporal).

In [2]:
import pandas as pd

from cyclops.processor import Aggregator, Imputer, featurize

diagnoses_data = pd.read_parquet("diagnoses.gzip")
vitals_data = pd.read_parquet("vitals.gzip")
labs_data = pd.read_parquet("lab.gzip")
interventions_data = pd.read_parquet("intervention.gzip")

diagnoses_data = gather_columns(
    diagnoses_data, [ENCOUNTER_ID, AGE, SEX, DIAGNOSIS_CODE]
)
feature_handler = featurize(
    static_data=[diagnoses_data],
    temporal_data=[labs_data, vitals_data],
    imputer=Imputer(strategy="none"),
    aggregator=Aggregator(),
)

2022-04-21 07:52:06,109 [1;37mINFO[0m cyclops.processors.aggregate - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-21 07:52:06,116 [1;37mINFO[0m cyclops.processors.diagnoses - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-21 07:52:06,131 [1;37mINFO[0m cyclops.processors.events - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-21 07:52:06,879 [1;37mINFO[0m cyclops.processors.feature_handler - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-21 07:52:07,074 [1;37mINFO[0m cyclops.processors.utils - Processing raw diagnosis codes...
2022-04-21 07:52:07,078 [1;37mINFO[0m cyclops.processors.utils - # samples: 1068, # encounters: 150
2022-04-21 07:52:07,125 [1;37mINFO[0m cyclops.processors.utils - Grouping ICD codes to trajectories...
2022-04-21 07:52:07,127 [1;37mINFO[0m cyclops.processors.utils - # samples: 1068, # encounters: 150
2022-04-21 07:52:07,130 [1;37mINFO[0m cyclops.processors.

2022-04-21 07:52:19,406 [1;37mINFO[0m cyclops.processors.utils - # columns: 58, # encounters: 150
2022-04-21 07:52:19,423 [1;37mINFO[0m cyclops.processors.utils - Filtering events within window...
2022-04-21 07:52:19,426 [1;37mINFO[0m cyclops.processors.utils - # samples: 3688, # encounters: 148
2022-04-21 07:52:19,428 [1;37mINFO[0m cyclops.processors.utils - # columns: 58, # encounters: 148
2022-04-21 07:52:25,665 [1;37mINFO[0m cyclops.utils.profile - Finished executing function gather_event_features in 6.263329 s


## Inspect feature handler, and check features.

In [3]:
print(feature_handler.get_numerical_features())
print(feature_handler.get_categorical_features())

# feature_handler.features

['age', 'index', 'sodium', 'urine sodium', 'urine specific gravity', 'bicarbonate', 'tsh', 'blood urea nitrogen', 'vitamin b12', 'white blood cell count', 'high sensitivity troponin', 'vitamin d', 'hba1c', 'lymphocyte', 'neutrophils', 'albumin', 'alp', 'alt', 'aptt', 'ast', 'bilirubin', 'urinalysis', 'calcium', 'calcium, ionized', 'creatinine', 'd-dimer', 'esr', 'serum alcohol', 'ferritin', 'fibrinogen', 'glucose point of care', 'glucose random', 'hematocrit', 'hemoglobin', 'crp', 'inr', 'ketone', 'lactate arterial', 'lactate venous', 'ldh', 'mean cell volume', 'serum osmolality', 'urine osmolality', 'arterial paco2', 'venous pco2', 'arterial ph', 'venous ph', 'platelet count', 'arterial pao2', 'potassium', 'pt', 'index', 'diastolic bp', 'oxygen saturation', 'oxygen flow rate', 'heart rate', 'respiratory rate', 'systolic bp', 'temperature', 'weight']
['F01_F99', 'K00_K95', 'R00_R99', 'D50_D89', 'S00_T88', 'V00_Y99', 'H60_H95', 'M00_M99', 'A00_B99', 'G00_G99', 'Z00_Z99', 'C00_D49', 'I00

## Look at data quality report (patient statics + labs + vitals)

In [4]:
# from evidently import ColumnMapping
# from evidently.dashboard import Dashboard
# from evidently.dashboard.tabs import DataQualityTab

# dashboard = Dashboard(tabs=[DataQualityTab()])
# column_mapping = ColumnMapping(
#     numerical_features=feature_handler.get_numerical_features(),
#     categorical_features=feature_handler.get_categorical_features(),
# )
# dashboard.calculate(
#     reference_data=feature_handler.features,
#     current_data=None,
#     column_mapping=column_mapping,
# )
# dashboard.save(f"data_processed.html")