## Get all patients from the Jan 2009, join with diagnoses and labs and vitals data.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

from cyclops.feature_handler import FeatureHandler
from cyclops.processor import featurize
from cyclops.processors.aggregate import Aggregator
from cyclops.processors.column_names import (
    ADMIT_TIMESTAMP,
    AGE,
    DIAGNOSIS_CODE,
    ENCOUNTER_ID,
    SEX,
    YEAR,
)
from cyclops.processors.impute import Imputer
from cyclops.processors.utils import gather_columns
from cyclops.query import mimic

patients = mimic.patients(years=[2009], months=[1])
patients_diagnoses = mimic.diagnoses(patients=patients)
patients_labs = mimic.events(patients=patients, category="labs")
patients_vitals = mimic.events(patients=patients, category="routine vital signs")
patients_diagnoses.run(filter_recognised=True)
patients_labs.run(filter_recognised=True)
patients_vitals.run(filter_recognised=True)

print(f"{len(patients_diagnoses.data)} rows extracted!")
print(f"{len(patients_labs.data)} rows extracted!")
print(f"{len(patients_vitals.data)} rows extracted!")

patients_diagnoses.save(".", "diagnoses")
patients_labs.save(".", "labs")
patients_vitals.save(".", "vitals")

## Process queried data into features. (Static + Temporal).

In [None]:
diagnoses_data = pd.read_parquet("diagnoses.gzip")
vitals_data = pd.read_parquet("vitals.gzip")
labs_data = pd.read_parquet("labs.gzip")

static_diagnoses_data = gather_columns(
    diagnoses_data,
    [ENCOUNTER_ID, AGE, SEX, DIAGNOSIS_CODE, ADMIT_TIMESTAMP, YEAR],
)
feature_handler = featurize(
    static_data=[static_diagnoses_data],
    temporal_data=[labs_data, vitals_data],
    aggregator=Aggregator(bucket_size=6, window=120),
    static_imputer=Imputer(
        strategy="median",
        encounter_missingness_threshold=0.25,
        feature_missingness_threshold=0.5,
    ),
    temporal_imputer=Imputer(
        strategy="median",
        encounter_missingness_threshold=0.95,
        feature_missingness_threshold=0.75,
    ),
    reference_cols=[ADMIT_TIMESTAMP],
)

## Plot example temporal features.

In [None]:
plot_features = [
    "hemoglobin",
    "sodium",
    "temperature fahrenheit",
    "non invasive blood pressure systolic",
    "non invasive blood pressure mean",
    "ph",
]

encounter_id = 20448616
temporal_features = feature_handler.get_numerical_feature_names()["temporal"]
features_encounter = feature_handler.features["temporal"].loc[encounter_id][
    plot_features
]
feature_handler.plot_features(encounter_id, names=plot_features)

## Plot histogram of static features.

In [None]:
feature_handler.plot_features(aggregate_type="static", names="age")

## Create new FeatureHandler and load features from file.

In [None]:
feature_handler.save(".", "test_features")
feature_handler = FeatureHandler()
feature_handler.load(".", "test_features")