### This notebook shows examples of how to use the cyclops.processor API on GEMINI.

## Get all patient encounters from St. Michael's hospital from March 1 2020 to March 15 2020, with vitals and labs.

In [1]:
from cyclops.processors.constants import SMH
from cyclops.query import gemini

QUERY_SAVE_PATH = "/mnt/nfs/project/delirium/_extract/first_models/extract.h5"

# patients = gemini.patients(
#     hospitals=[SMH], from_date="2020-03-01", to_date="2020-03-15"
# )
# patients_vitals = gemini.events(patients=patients, category="vitals")
# patients_labs = gemini.events(patients=patients, category="labs")
# patients_vitals.run()
# patients_labs.run()

# print(f"{len(patients_vitals.data)} rows extracted!")
# print(f"{len(patients_labs.data)} rows extracted!")

# patients_vitals.data.to_hdf(
#     QUERY_SAVE_PATH,
#     key=f"query_demo_vitals",
# )
# patients_labs.data.to_hdf(
#     QUERY_SAVE_PATH,
#     key=f"query_demo_labs",
# )

2022-04-07 10:12:03,815 [1;37mINFO[0m cyclops.config  - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-07 10:12:04,336 [1;37mINFO[0m cyclops.query.utils - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-07 10:12:04,344 [1;37mINFO[0m cyclops.utils.profile - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-07 10:12:04,348 [1;37mINFO[0m cyclops.orm     - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-07 10:12:04,803 [1;37mINFO[0m cyclops.orm     - Database setup, ready to run queries!


## Process queried data into features. (Static aggregation + no imputation).

In [2]:
import pandas as pd

from cyclops.processor import Aggregator, Imputer
from cyclops.processor import featurize

vitals_data = pd.read_hdf(QUERY_SAVE_PATH, key=f"query_demo_vitals")
labs_data = pd.read_hdf(QUERY_SAVE_PATH, key=f"query_demo_labs")

feature_handler = featurize(data=[labs_data, vitals_data],
                            imputer=Imputer(strategy="none"),
                            aggregator=Aggregator(strategy="static",
                                                  range_=24,
                                                  window=24))

2022-04-07 10:12:04,823 [1;37mINFO[0m cyclops.processors.base - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-07 10:12:04,827 [1;37mINFO[0m cyclops.processors.admin - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-07 10:12:04,833 [1;37mINFO[0m cyclops.processors.labs - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-07 10:12:04,837 [1;37mINFO[0m cyclops.processors.vitals - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-07 10:12:05,559 [1;37mINFO[0m cyclops.processors.feature_handler - Log file is /mnt/nfs/home/krishnanam/vector-delirium/log.log
2022-04-07 10:12:05,983 [1;37mINFO[0m cyclops.processors.base - Processing raw admin data...
2022-04-07 10:12:05,987 [1;37mINFO[0m cyclops.processors.base - # samples: 16243, # encounters: 156
2022-04-07 10:12:05,989 [1;37mINFO[0m cyclops.processors.admin - # admin features: 2, # encounters: 156
2022-04-07 10:12:06,088 [1;37mINFO[0m cyclops.

## Inspect feature handler, and check features.

In [3]:
print(feature_handler.get_numerical_features())
print(feature_handler.get_categorical_features())

feature_handler.features

['potassium', 'pt', 'sodium', 'urine sodium', 'urine specific gravity', 'bicarbonate', 'blood urea nitrogen', 'vitamin b12', 'white blood cell count', 'high sensitivity troponin', 'vitamin d', 'hba1c', 'lymphocyte', 'neutrophils', 'albumin', 'aptt', 'bilirubin', 'calcium', 'calcium, ionized', 'creatinine', 'esr', 'serum alcohol', 'ferritin', 'fibrinogen', 'glucose fasting', 'glucose point of care', 'glucose random', 'hematocrit', 'hemoglobin', 'crp', 'inr', 'ketone', 'lactate arterial', 'lactate venous', 'mean cell volume', 'arterial paco2', 'venous pco2', 'arterial ph', 'venous ph', 'platelet count', 'arterial pao2', 'CAM', 'Diastolic BP', 'Heart Rate', 'respiratory rate', 'Systolic BP', 'Temperature', 'weight']
['fio2']


Unnamed: 0,potassium,pt,sodium,urine sodium,urine specific gravity,bicarbonate,blood urea nitrogen,vitamin b12,white blood cell count,high sensitivity troponin,...,platelet count,arterial pao2,CAM,Diastolic BP,Heart Rate,respiratory rate,Systolic BP,Temperature,weight,fio2
11102331,3.800,,142.0,,1.010,26.0,,507.0,7.340000,9.0,...,158.000000,,,62.000000,88.000000,18.000000,111.000000,36.500000,,1.0
11112372,4.000,13.8,138.0,,,26.0,,,12.445000,,...,498.500000,,0.0,78.500000,61.750000,18.000000,118.750000,35.533333,,1.0
11105307,3.800,18.6,133.5,,,23.5,,,4.426667,,...,78.000000,,,66.000000,91.000000,18.285714,111.714286,36.466667,,1.0
11118152,4.500,29.8,135.0,,1.035,27.0,,,7.060000,4.0,...,59.000000,,,76.000000,98.000000,20.000000,136.000000,35.500000,,1.0
11156580,4.100,,137.0,,1.015,36.0,,,9.780000,,...,375.000000,,,79.500000,86.500000,18.000000,134.500000,35.600000,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11965564,4.175,,129.0,44.0,1.014,25.8,,,14.800000,4.0,...,389.000000,,0.0,101.666667,84.333333,22.000000,157.666667,36.466667,,1.0
11980294,3.600,16.9,134.5,,1.013,25.0,,,11.806667,,...,237.333333,,0.0,81.500000,84.500000,17.000000,113.500000,37.350000,,1.0
11799248,,,,,,,,,,,...,,,0.5,74.333333,75.333333,20.000000,156.333333,36.133333,,1.0
11281072,,,,,,,,,,,...,,,,71.500000,69.000000,18.000000,118.000000,35.700000,,1.0


## Look at data quality report (admin + labs + vitals)

In [4]:
from evidently import ColumnMapping
from evidently.dashboard import Dashboard
from evidently.dashboard.tabs import DataQualityTab

dashboard = Dashboard(tabs=[DataQualityTab()])
column_mapping = ColumnMapping(
    numerical_features=feature_handler.get_numerical_features(),
    categorical_features=feature_handler.get_categorical_features(),
)
dashboard.calculate(
    reference_data=feature_handler.features,
    current_data=None,
    column_mapping=column_mapping,
)
dashboard.save(f"data_processed.html")