### This notebook shows examples of how to use the cyclops.processor API on GEMINI.

## Get all patient encounters with diagnoses from St. Michael's hospital from March 1 2020 to March 15 2020, with vitals and labs.

In [None]:
from cyclops.processors.constants import SMH
from cyclops.processors.utils import gather_columns
from cyclops.query import gemini
from cyclops.processors.column_names import (
    AGE,
    ENCOUNTER_ID,
    DIAGNOSIS_CODE,
    SEX,
)


patients = gemini.patients(
    hospitals=[SMH], from_date="2020-03-01", to_date="2020-03-15"
)
patients_diagnoses = gemini.diagnoses(patients=patients)
patients_vitals = gemini.events(patients=patients, category="vitals")
patients_labs = gemini.events(patients=patients, category="labs")
diagnoses_data = patients_diagnoses.run()
vitals_data = patients_vitals.run()
labs_data = patients_labs.run()

print(f"{len(diagnoses_data)} diagnoses rows extracted!")
print(f"{len(vitals_data)} vitals rows extracted!")
print(f"{len(labs_data)} labs rows extracted!")

patients_diagnoses.save(".", "diagnoses.gzip")
patients_vitals.save(".", "vitals.gzip")
patients_labs.save(".", "labs.gzip")

## Process queried data into features. (Static aggregation + no imputation).

In [None]:
import pandas as pd

from cyclops.processor import Aggregator, Imputer, featurize

diagnoses_data = pd.read_parquet("diagnoses.gzip")
vitals_data = pd.read_parquet("vitals.gzip")
labs_data = pd.read_parquet("labs.gzip")


diagnoses_data = gather_columns(diagnoses_data, [ENCOUNTER_ID, AGE, SEX, DIAGNOSIS_CODE]) 
feature_handler = featurize(
    static_data=[diagnoses_data],
    temporal_data=[labs_data, vitals_data],
    imputer=Imputer(strategy="none"),
    aggregator=Aggregator(strategy="static", range_=24, window=24),
)

## Inspect feature handler, and check features.

In [None]:
print(feature_handler.get_numerical_features())
print(feature_handler.get_categorical_features())

feature_handler.features

## Look at data quality report (patient statics + labs + vitals)

In [None]:
from evidently import ColumnMapping
from evidently.dashboard import Dashboard
from evidently.dashboard.tabs import DataQualityTab

dashboard = Dashboard(tabs=[DataQualityTab()])
column_mapping = ColumnMapping(
    numerical_features=feature_handler.get_numerical_features(),
    categorical_features=feature_handler.get_categorical_features(),
)
dashboard.calculate(
    reference_data=feature_handler.features,
    current_data=None,
    column_mapping=column_mapping,
)
dashboard.save(f"data_processed.html")