## Get all patients from the Jan 2009, join with diagnoses and labs and vitals data.

In [1]:
import matplotlib.pyplot as plt
import pandas as pd

from cyclops.feature_handler import FeatureHandler
from cyclops.processor import featurize
from cyclops.processors.aggregate import Aggregator
from cyclops.processors.column_names import (
    ADMIT_TIMESTAMP,
    AGE,
    DIAGNOSIS_CODE,
    ENCOUNTER_ID,
    SEX,
    YEAR,
)
from cyclops.processors.impute import Imputer
from cyclops.processors.utils import gather_columns
from cyclops.query import mimic

patients = mimic.patients(years=[2009], months=[1])
patients_diagnoses = mimic.diagnoses(patients=patients)
patients_labs = mimic.events(patients=patients, category="labs")
patients_vitals = mimic.events(patients=patients, category="routine vital signs")
patients_diagnoses.run()
patients_labs.run(filter_recognised=True)
patients_vitals.run(filter_recognised=True)

print(f"{len(patients_diagnoses.data)} rows extracted!")
print(f"{len(patients_labs.data)} rows extracted!")
print(f"{len(patients_vitals.data)} rows extracted!")

patients_diagnoses.save(".", "diagnoses")
patients_labs.save(".", "labs")
patients_vitals.save(".", "vitals")

2022-05-04 01:31:33,721 [1;37mINFO[0m cyclops.orm     - Database setup, ready to run queries!
2022-05-04 01:31:37,555 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
2022-05-04 01:31:37,556 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 3.809986 s
2022-05-04 01:32:05,676 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
2022-05-04 01:32:05,677 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 28.119584 s
2022-05-04 01:32:39,293 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
2022-05-04 01:32:39,294 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 33.615553 s
2022-05-04 01:32:39,295 [1;37mINFO[0m cyclops.query.interface - Saving queried data to ./diagnoses.gzip


203509 rows extracted!
363634 rows extracted!
1340251 rows extracted!


2022-05-04 01:32:39,736 [1;37mINFO[0m cyclops.query.interface - Saving queried data to ./labs.gzip
2022-05-04 01:32:39,875 [1;37mINFO[0m cyclops.query.interface - Saving queried data to ./vitals.gzip


## Process queried data into features. (Static + Temporal).

In [3]:
diagnoses_data = pd.read_parquet("diagnoses.gzip")
vitals_data = pd.read_parquet("vitals.gzip")
labs_data = pd.read_parquet("labs.gzip")

static_diagnoses_data = gather_columns(
    diagnoses_data,
    [ENCOUNTER_ID, AGE, SEX, DIAGNOSIS_CODE, ADMIT_TIMESTAMP, YEAR],
)

feature_handler = featurize(
    static_data=[static_diagnoses_data],
    temporal_data=[labs_data, vitals_data],
    aggregator=Aggregator(bucket_size=6, window=120),
    static_imputer=Imputer(
        strategy="median",
        encounter_missingness_threshold=0.25,
        feature_missingness_threshold=0.5,
    ),
    temporal_imputer=Imputer(
        strategy="median",
        encounter_missingness_threshold=0.95,
        feature_missingness_threshold=0.75,
    ),
    reference_cols=[ADMIT_TIMESTAMP],
)

2022-05-04 01:33:27,842 [1;37mINFO[0m cyclops.processors.utils - Processing raw diagnosis codes...
2022-05-04 01:33:27,846 [1;37mINFO[0m cyclops.processors.utils - # samples: 203509, # encounters: 19421
2022-05-04 01:33:31,211 [1;37mINFO[0m cyclops.processors.utils - Grouping ICD codes to trajectories...
2022-05-04 01:33:31,214 [1;37mINFO[0m cyclops.processors.utils - # samples: 203509, # encounters: 19421
2022-05-04 01:33:31,225 [1;37mINFO[0m cyclops.processors.diagnoses - # diagnosis features: 22, # encounters: 19421
2022-05-04 01:33:39,619 [1;37mINFO[0m cyclops.utils.profile - Finished executing function group_diagnosis_codes_to_trajectories in 11.777074 s
2022-05-04 01:33:39,651 [1;37mINFO[0m cyclops.processors.utils - Gathering static features...
2022-05-04 01:33:39,654 [1;37mINFO[0m cyclops.processors.utils - # samples: 203509, # encounters: 19421
2022-05-04 01:33:39,655 [1;37mINFO[0m cyclops.processors.utils - # columns: 5, # encounters: 19421
2022-05-04 01:33:

AttributeError: 'float' object has no attribute 'lower'