## This notebook shows examples of how to use the cyclops.processor API on MIMIC-IV.

## Get all patients from the 1st Jan 2009 to 6th Jan 2009, join with diagnoses, labs, care-unit (transfers) information and vitals data.

In [None]:
import random

import matplotlib.pyplot as plt
import pandas as pd

from cyclops.feature_handler import FeatureHandler
from cyclops.processor import featurize
from cyclops.processors.aggregate import Aggregator
from cyclops.processors.column_names import (
    ADMIT_TIMESTAMP,
    AGE,
    DIAGNOSIS_CODE,
    ENCOUNTER_ID,
    SEX,
    YEAR,
)
from cyclops.processors.impute import Imputer
from cyclops.processors.util import gather_columns
from cyclops.query import mimic

In [None]:
patients = mimic.patients(from_date="2009-01-01", to_date="2009-01-06")
patients_diagnoses = mimic.diagnoses(patients=patients)
patients_labs = mimic.events(patients=patients, category="labs")
patients_vitals = mimic.events(patients=patients, category="routine vital signs")
patients_diagnoses.run(filter_recognised=True)
patients_labs.run(filter_recognised=True)
patients_vitals.run(filter_recognised=True)
patients_transfers = mimic.care_units(patients=patients)
patients_transfers.run()

print(f"{len(patients_diagnoses.data)} rows extracted!")
print(f"{len(patients_labs.data)} rows extracted!")
print(f"{len(patients_vitals.data)} rows extracted!")
print(f"{len(patients_transfers.data)} rows extracted!")

patients_diagnoses.save(".", "diagnoses")
patients_labs.save(".", "labs")
patients_vitals.save(".", "vitals")
patients_transfers.save(".", "transfers")

## Process queried data into features. (Static + Temporal).

In [None]:
diagnoses_data = pd.read_parquet("diagnoses.gzip")
vitals_data = pd.read_parquet("vitals.gzip")
labs_data = pd.read_parquet("labs.gzip")
transfers_data = pd.read_parquet("transfers.gzip")

static_diagnoses_data = gather_columns(
    diagnoses_data,
    [ENCOUNTER_ID, AGE, SEX, DIAGNOSIS_CODE, ADMIT_TIMESTAMP],
)
feature_handler = featurize(
    static_data=[static_diagnoses_data],
    temporal_data=[labs_data, vitals_data],
    aggregator=Aggregator(bucket_size=6, window=120),
    static_imputer=Imputer(
        strategy="median",
        encounter_missingness_threshold=0.25,
        feature_missingness_threshold=0.5,
    ),
    temporal_imputer=Imputer(
        strategy="median",
        encounter_missingness_threshold=0.95,
        feature_missingness_threshold=0.75,
    ),
    reference_cols=[ADMIT_TIMESTAMP],
)

## Plot example temporal features.

In [None]:
encounter_ids = list(
    set(feature_handler.features["temporal"].index.get_level_values(0))
)
encounter_id = random.choice(encounter_ids)
temporal_features = feature_handler.get_numerical_feature_names()["temporal"]
feature_handler.plot_features(encounter_id, names=temporal_features)

## Plot histogram of static features.

In [None]:
feature_handler.plot_features(aggregate_type="static", names="age")

## Create new FeatureHandler and load features from file.

In [None]:
feature_handler.save(".", "test_features")
feature_handler = FeatureHandler()
feature_handler.load(".", "test_features")