## Get all patient encounters including ER data, with diagnoses from a hospital from a Start Date to End Date, with vitals and labs and interventions.

In [None]:
import random

import matplotlib.pyplot as plt
import pandas as pd
import sys
import os

sys.path.append('../..')

from cyclops.feature_handler import FeatureHandler
from cyclops.processor import featurize
from cyclops.processors.aggregate import Aggregator
from cyclops.processors.column_names import (
    ADMIT_TIMESTAMP,
    AGE,
    DIAGNOSIS_CODE,
    ENCOUNTER_ID,
    HOSPITAL_ID,
    SEX,
    DISCHARGE_DISPOSITION,
    DISCHARGE_TIMESTAMP,
    EVENT_NAME,
    EVENT_TIMESTAMP,
    EVENT_VALUE,
    EVENT_VALUE_UNIT,
    LENGTH_OF_STAY_IN_ER,
    DIAGNOSIS_TYPE
)
from cyclops.processors.constants import SMH
from cyclops.processors.impute import Imputer
from cyclops.processors.util import gather_columns
from cyclops.query import gemini

## Config

In [None]:
path = "/mnt/nfs/project/delirium/drift_exp/"
hospitals=[SMH,THPM, SBK, UHNTG, UHNTW, THPC, PMH, MSH]
from_date="2018-01-01"
to_date="2020-12-31"
years=[2018, 2019, 2020]

In [None]:
patients = gemini.patients(
    hospitals=hospitals, years=years, include_er_data=True
)
patients_diagnoses = gemini.diagnoses(patients=patients)
patients_vitals = gemini.events(patients=patients, category="vitals")
patients_labs = gemini.events(patients=patients, category="lab")
patients_interventions = gemini.events(patients=patients, category="intervention")

diagnoses_data = patients_diagnoses.run()
vitals_data = patients_vitals.run()
labs_data = patients_labs.run()
interventions_data = patients_interventions.run()

print(f"{len(diagnoses_data)} diagnoses rows extracted!")
print(f"{len(vitals_data)} vitals rows extracted!")
print(f"{len(labs_data)} labs rows extracted!")
print(f"{len(interventions_data)} interventions rows extracted!")

patients_diagnoses.save(path, "diagnoses")
patients_vitals.save(path, "vitals")
patients_labs.save(path, "lab")
patients_interventions.save(path, "intervention")

## Process queried data into features. (Static + Temporal).

In [None]:
diagnoses_data = pd.read_parquet(os.path.join(path,"query_diagnoses.gzip"))
vitals_data = pd.read_parquet(os.path.join(path,"query_vitals.gzip"))
labs_data = pd.read_parquet(os.path.join(path,"query_lab.gzip"))
interventions_data = pd.read_parquet(os.path.join(path,"query_intervention.gzip"))

static_diagnoses_data = gather_columns(
    diagnoses_data,
    [
        ENCOUNTER_ID,
        AGE,
        SEX,
        DIAGNOSIS_CODE,
        HOSPITAL_ID,
        ADMIT_TIMESTAMP,
        LENGTH_OF_STAY_IN_ER,
        DISCHARGE_DISPOSITION,
        DISCHARGE_TIMESTAMP,
        ADMIT_TIMESTAMP
        
    ],
)

feature_handler = featurize(
    static_data=[static_diagnoses_data],
    temporal_data=[labs_data, vitals_data],
    aggregator=Aggregator(bucket_size=6, window=48),
    static_imputer=Imputer(
        strategy="median",
        encounter_missingness_threshold=0.25,
        feature_missingness_threshold=0.5,
    ),
    temporal_imputer=Imputer(
        strategy=None,
        encounter_missingness_threshold=0.75,
        feature_missingness_threshold=0.95,
    ),
    reference_cols=[HOSPITAL_ID, ADMIT_TIMESTAMP, DISCHARGE_TIMESTAMP],
)

## Create new FeatureHandler and load features from file.

In [None]:
feature_handler.save(path, "test_features")
feature_handler = FeatureHandler()
feature_handler.load(path, "test_features")