## This notebook shows examples of how to use the cyclops.processor API on MIMIC-IV.

## Get all patients from the 1st Jan 2009 to 6th Jan 2009, join with diagnoses, labs, care-unit (transfers) information and vitals data.

In [None]:
import random

import matplotlib.pyplot as plt
import pandas as pd

from cyclops.feature_handler import FeatureHandler
from cyclops.processor import run_data_pipeline
from cyclops.processors.aggregate import Aggregator
from cyclops.processors.column_names import (
    ADMIT_TIMESTAMP,
    AGE,
    CARE_UNIT,
    DIAGNOSIS_CODE,
    ENCOUNTER_ID,
    SEX,
    YEAR,
)
from cyclops.processors.impute import Imputer
from cyclops.processors.util import gather_columns
from cyclops.query import mimic
from cyclops.query.postprocess.mimic import process_mimic_care_unit_changepoints
from cyclops.query.postprocess.util import plot_admit_discharge

In [None]:
patients = mimic.patients()
care_units = mimic.care_units(patients_table=patients.query).run()
greatest = list(care_units["encounter_id"].value_counts().index[:10])

encounter_id = greatest[0]  # SET encounter_id
care_units_i = care_units[care_units["encounter_id"] == encounter_id]
care_units_i = care_units_i.sort_values(by="admit")

In [None]:
plot_admit_discharge(care_units_i, description=CARE_UNIT, figsize=(40, 4))

In [None]:
process_mimic_care_unit_changepoints(care_units_i)

In [None]:
encounters = mimic.patient_encounters(after_date="2009-01-01", before_date="2009-01-06")
patients_diagnoses = mimic.patient_diagnoses(patients_table=encounters.query)
patients_labs = mimic.events(
    patient_encounters_table=encounters.query, categories="labs"
)
patients_vitals = mimic.events(
    patient_encounters_table=encounters.query, categories="routine vital signs"
)

diagnoses_data = patients_diagnoses.run()
labs_data = patients_labs.run()
vitals_data = patients_vitals.run()
patients_transfers = mimic.care_units(patients=patients)
transfers_data = patients_transfers.run()

print(f"{len(diagnoses_data)} rows extracted!")
print(f"{len(labs_data)} rows extracted!")
print(f"{len(vitals_data)} rows extracted!")
print(f"{len(transfers_data)} rows extracted!")

patients_diagnoses.save(".", "diagnoses")
patients_labs.save(".", "labs")
patients_vitals.save(".", "vitals")
patients_transfers.save(".", "transfers")

## Process queried data into features. (Static + Temporal).

In [None]:
diagnoses_data = pd.read_parquet("query_diagnoses.gzip")
vitals_data = pd.read_parquet("query_vitals.gzip")
labs_data = pd.read_parquet("query_labs.gzip")
transfers_data = pd.read_parquet("query_transfers.gzip")

static_diagnoses_data = gather_columns(
    diagnoses_data,
    [ENCOUNTER_ID, AGE, SEX, DIAGNOSIS_CODE, ADMIT_TIMESTAMP],
)
feature_handler = run_data_pipeline(
    static_data=[static_diagnoses_data],
    temporal_data=[labs_data, vitals_data],
    aggregator=Aggregator(bucket_size=6, window=120),
)

## Plot example temporal features.

In [None]:
encounter_ids = list(
    set(feature_handler.features["temporal"].index.get_level_values(0))
)
encounter_id = random.choice(encounter_ids)
temporal_features = feature_handler.get_numerical_feature_names()["temporal"]
feature_handler.plot_features(encounter_id, names=temporal_features)

## Plot histogram of static features.

In [None]:
feature_handler.plot_features(aggregate_type="static", names="age")

## Create new FeatureHandler and load features from file.

In [None]:
feature_handler.save(".", "test_features")
feature_handler = FeatureHandler()
feature_handler.load(".", "test_features")