## Get all patient encounters including ER data, with diagnoses from a hospital from a Start Date to End Date, with vitals and labs and interventions.

In [1]:
import random

import matplotlib.pyplot as plt
import pandas as pd
import sys
import os

sys.path.append('../..')

from cyclops.feature_handler import FeatureHandler
from cyclops.processor import featurize
from cyclops.processors.aggregate import Aggregator
from cyclops.processors.column_names import (
    ADMIT_TIMESTAMP,
    AGE,
    DIAGNOSIS_CODE,
    ENCOUNTER_ID,
    HOSPITAL_ID,
    SEX,
    DISCHARGE_DISPOSITION,
    DISCHARGE_TIMESTAMP,
    EVENT_NAME,
    EVENT_TIMESTAMP,
    EVENT_VALUE,
    EVENT_VALUE_UNIT,
    LENGTH_OF_STAY_IN_ER,
    DIAGNOSIS_TYPE
)

from cyclops.processors.constants import SMH
from cyclops.processors.impute import Imputer
from cyclops.processors.util import gather_columns
from cyclops.query import gemini

2022-05-31 19:27:42,009 [1;37mINFO[0m cyclops.orm     - Database setup, ready to run queries!


## Config

In [2]:
path = "/mnt/nfs/project/delirium/drift_exp/MAY-27-2022/"
hospitals=[SMH]
from_date="2019-01-01"
to_date="2020-02-29"

In [3]:
patients = gemini.patients(
    hospitals=hospitals, from_date=from_date, to_date=to_date, include_er_data=True
)
patients_diagnoses = gemini.diagnoses(patients=patients)
patients_vitals = gemini.events(patients=patients, category="vitals")
patients_labs = gemini.events(patients=patients, category="lab")
patients_interventions = gemini.events(patients=patients, category="intervention")

diagnoses_data = patients_diagnoses.run()
vitals_data = patients_vitals.run()
labs_data = patients_labs.run()
interventions_data = patients_interventions.run()

print(f"{len(diagnoses_data)} diagnoses rows extracted!")
print(f"{len(vitals_data)} vitals rows extracted!")
print(f"{len(labs_data)} labs rows extracted!")
print(f"{len(interventions_data)} interventions rows extracted!")

patients_diagnoses.save(path, "diagnoses")
patients_vitals.save(path, "vitals")
patients_labs.save(path, "lab")
patients_interventions.save(path, "intervention")

2022-05-31 19:32:53,951 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
2022-05-31 19:32:53,955 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 311.897845 s
2022-05-31 19:39:37,378 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
2022-05-31 19:39:37,382 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 403.425079 s
2022-05-31 21:30:59,871 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
2022-05-31 21:30:59,874 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 6682.489769 s
2022-05-31 21:32:24,746 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
2022-05-31 21:32:24,759 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 84.882515 s
2022-05-31 21:32:24,762 [1;37mINFO[0m cyclops.query.interface - Saving queried data to /mnt/nfs/project/delirium/drift_exp/MAY-27-2022/diagnoses.gzip


36071 diagnoses rows extracted!
334717 vitals rows extracted!
582105 labs rows extracted!
12734 interventions rows extracted!


2022-05-31 21:32:25,306 [1;37mINFO[0m cyclops.query.interface - Saving queried data to /mnt/nfs/project/delirium/drift_exp/MAY-27-2022/vitals.gzip
2022-05-31 21:32:27,814 [1;37mINFO[0m cyclops.query.interface - Saving queried data to /mnt/nfs/project/delirium/drift_exp/MAY-27-2022/lab.gzip
2022-05-31 21:32:31,034 [1;37mINFO[0m cyclops.query.interface - Saving queried data to /mnt/nfs/project/delirium/drift_exp/MAY-27-2022/intervention.gzip


## Process queried data into features. (Static + Temporal).

In [4]:
diagnoses_data = pd.read_parquet(os.path.join(path,"diagnoses.gzip"))
vitals_data = pd.read_parquet(os.path.join(path,"vitals.gzip"))
labs_data = pd.read_parquet(os.path.join(path,"lab.gzip"))
interventions_data = pd.read_parquet(os.path.join(path,"intervention.gzip"))

static_diagnoses_data = gather_columns(
    diagnoses_data,
    [
        ENCOUNTER_ID,
        AGE,
        SEX,
        DIAGNOSIS_CODE,
        HOSPITAL_ID,
        ADMIT_TIMESTAMP,
        LENGTH_OF_STAY_IN_ER,
        DISCHARGE_DISPOSITION,
        DISCHARGE_TIMESTAMP,
        ADMIT_TIMESTAMP
        
    ],
)

feature_handler = featurize(
    static_data=[static_diagnoses_data],
    temporal_data=[labs_data, vitals_data],
    aggregator=Aggregator(bucket_size=6, window=48),
    static_imputer=Imputer(
        strategy="median",
        encounter_missingness_threshold=0.25,
        feature_missingness_threshold=0.5,
    ),
    temporal_imputer=Imputer(
        strategy=None,
        encounter_missingness_threshold=0.75,
        feature_missingness_threshold=0.95,
    ),
    reference_cols=[HOSPITAL_ID, ADMIT_TIMESTAMP, DISCHARGE_TIMESTAMP],
)

2022-05-31 21:32:33,196 [1;37mINFO[0m cyclops.processors.util - Processing raw diagnosis codes...
2022-05-31 21:32:33,200 [1;37mINFO[0m cyclops.processors.util - # samples: 36071, # encounters: 5064
2022-05-31 21:32:34,501 [1;37mINFO[0m cyclops.processors.util - Grouping ICD codes to trajectories...
2022-05-31 21:32:34,504 [1;37mINFO[0m cyclops.processors.util - # samples: 36071, # encounters: 5064
2022-05-31 21:32:34,509 [1;37mINFO[0m cyclops.processors.diagnoses - # diagnosis features: 21, # encounters: 5064
2022-05-31 21:32:34,916 [1;37mINFO[0m cyclops.utils.profile - Finished executing function group_diagnosis_codes_to_trajectories in 1.720139 s
2022-05-31 21:32:34,948 [1;37mINFO[0m cyclops.processors.util - Gathering static features...
2022-05-31 21:32:34,952 [1;37mINFO[0m cyclops.processors.util - # samples: 36071, # encounters: 5064
2022-05-31 21:32:34,954 [1;37mINFO[0m cyclops.processors.util - # columns: 9, # encounters: 5064
2022-05-31 21:32:41,432 [1;37mIN

## Create new FeatureHandler and load features from file.

In [5]:
feature_handler.save(path, "test_features")
feature_handler = FeatureHandler()
feature_handler.load(path, "test_features")

2022-05-31 22:03:10,964 [1;37mINFO[0m cyclops.feature_handler - Saving static features to /mnt/nfs/project/delirium/drift_exp/MAY-27-2022/test_features_static.gzip
2022-05-31 22:03:10,983 [1;37mINFO[0m cyclops.feature_handler - Saving temporal features to /mnt/nfs/project/delirium/drift_exp/MAY-27-2022/test_features_temporal.gzip
2022-05-31 22:03:11,000 [1;37mINFO[0m cyclops.feature_handler - Loading features from file...
2022-05-31 22:03:11,002 [1;37mINFO[0m cyclops.feature_handler - Found file to load for static features...
2022-05-31 22:03:11,003 [1;37mINFO[0m cyclops.feature_handler - Successfully loaded static features from file...
2022-05-31 22:03:11,050 [1;37mINFO[0m cyclops.feature_handler - Found file to load for temporal features...
2022-05-31 22:03:11,144 [1;37mINFO[0m cyclops.feature_handler - Successfully loaded temporal features from file...
