In [12]:
import os
import random
import sys

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim

sys.path.append("..")
from baseline_models.temporal.pytorch.optimizer import Optimizer
from baseline_models.temporal.pytorch.utils import *
from alibi_detect.cd import MMDDrift

In [3]:
from cyclops.feature_handler import FeatureHandler
from cyclops.plotter import plot_timeline, set_bars_color, setup_plot
from cyclops.processor import run_data_pipeline
from cyclops.processors.aggregate import Aggregator
from cyclops.processors.column_names import (
    ADMIT_TIMESTAMP,
    AGE,
    DIAGNOSIS_CODE,
    DISCHARGE_DISPOSITION,
    DISCHARGE_TIMESTAMP,
    ENCOUNTER_ID,
    EVENT_CATEGORY,
    EVENT_NAME,
    EVENT_TIMESTAMP,
    EVENT_VALUE,
    HOSPITAL_ID,
    LENGTH_OF_STAY_IN_ER,
    RESTRICT_TIMESTAMP,
    SEX,
    TIMESTEP,
    TRIAGE_LEVEL,
    WINDOW_START_TIMESTAMP,
)
from cyclops.processors.constants import SMH
from cyclops.processors.events import (
    combine_events,
    convert_to_events,
    normalize_events,
)
from cyclops.processors.impute import Imputer
from cyclops.processors.statics import compute_statics
from cyclops.processors.string_ops import replace_if_string_match, to_lower
from cyclops.processors.util import (
    create_indicator_variables,
    fill_missing_timesteps,
    gather_columns,
    pivot_aggregated_events_to_features,
)
from cyclops.query import gemini
from cyclops.utils.file import load_dataframe, save_dataframe

BASE_DATA_PATH = "/mnt/nfs/project/delirium/drift_exp/risk_of_mortality"
feature_handler = FeatureHandler()
feature_handler.load(BASE_DATA_PATH, "features")

2022-06-24 21:32:50,724 [1;37mINFO[0m cyclops.feature_handler - Loading features from file...
2022-06-24 21:32:50,730 [1;37mINFO[0m cyclops.feature_handler - Found file to load for static features...
2022-06-24 21:32:50,731 [1;37mINFO[0m cyclops.feature_handler - Successfully loaded static features from file...
2022-06-24 21:32:52,288 [1;37mINFO[0m cyclops.feature_handler - Found file to load for temporal features...
2022-06-24 21:32:55,057 [1;37mINFO[0m cyclops.feature_handler - Successfully loaded temporal features from file...


In [11]:
hospital = ["SMH","MSH","PMH"]
import os
import random
BASE_DATA_PATH = "/mnt/nfs/project/delirium/drift_exp/risk_of_mortality"
hosp_label = "_".join(sorted(hospital, key=str.lower))
if True:
        # Declare feature handler
        feature_handler = FeatureHandler()
        feature_handler.load(BASE_DATA_PATH, "features")
        
        # Get static and temporal data
        static = feature_handler.features["static"]
        temporal = feature_handler.features["temporal"]

         # Get types of columns
        numerical_cols = feature_handler.get_numerical_feature_names()["temporal"]
        cat_cols = feature_handler.get_categorical_feature_names()["temporal"]
        
        ## Impute numerical columns
        temporal[numerical_cols] = temporal[numerical_cols].ffill().bfill()

        # Check no more missingness!
        assert not temporal.isna().sum().sum() and not static.isna().sum().sum()
        
        # Combine static and temporal
        merged_static_temporal = temporal.combine_first(static)
        numerical_cols += ["age"]

2022-06-24 23:06:34,753 [1;37mINFO[0m cyclops.feature_handler - Loading features from file...
2022-06-24 23:06:34,757 [1;37mINFO[0m cyclops.feature_handler - Found file to load for static features...
2022-06-24 23:06:34,759 [1;37mINFO[0m cyclops.feature_handler - Successfully loaded static features from file...
2022-06-24 23:06:34,810 [1;37mINFO[0m cyclops.feature_handler - Found file to load for temporal features...
2022-06-24 23:06:37,381 [1;37mINFO[0m cyclops.feature_handler - Successfully loaded temporal features from file...


NameError: name 'fractions' is not defined

In [26]:
n_start_window = 1
n_end_window = 1
n_window = 2

DIR = "/mnt/nfs/project/delirium/drift_exp/risk_of_mortality"
split_type = "random"
series = np.load(os.path.join(DIR, split_type, "X_test.npy"))
threshold = 0.05

def rolling_window(n_start_window, n_end_window, n_window, series, threshold):

    p_vals = np.asarray([])
    dist_vals = np.asarray([])

    run_length = int(n_start_window)
    i = n_start_window

    while i+n_end_window+n_window <= series.shape[1]:
        feat_index = 0
        print(max(int(i)-run_length,0),"-", int(i),"-->",max(int(i)+n_window,0),"-",int(i)+n_end_window+n_window)
        prev = series[: , max(int(i)-run_length,0):int(i), :]
        prev = prev.reshape(prev.shape[0]*prev.shape[1],prev.shape[2])
        next = series[: , max(int(i)+n_window,0):int(i)+n_end_window+n_window, :]
        next = next.reshape(next.shape[0]*next.shape[1],next.shape[2])
        if next.shape[0]<=2 or prev.shape[0]<=2:
            break
            
        ## run distribution shift check here
        cd = MMDDrift(prev, backend='pytorch', p_val=.05)
        preds = cd.predict(next, return_p_val=True, return_distance=True)
        p_val = preds['data']['p_val']
        dist_val = preds['data']['distance']
        print(dist_vals)
        if p_val >= threshold:
            dist_vals = np.concatenate((dist_vals, np.repeat(dist_val, 1)))
            dist_vals = np.concatenate((dist_vals, np.repeat(0, n_end_window-1)))
            i += n_end_window
            run_length += n_start_window
        else:
            dist_vals = np.concatenate((dist_vals, np.repeat(dist_val, 1)))
            i+=1
            run_length = n_start_window

    return dist_vals, 

dist_vals = rolling_window(n_start_window, n_end_window, 1, series, threshold)

0 - 1 --> 2 - 3
No GPU detected, fall back on CPU.
[]
1 - 2 --> 3 - 4
No GPU detected, fall back on CPU.
[0.05022074]
2 - 3 --> 4 - 5
No GPU detected, fall back on CPU.
[0.05022074 0.46022977]
3 - 4 --> 5 - 6
No GPU detected, fall back on CPU.
[0.05022074 0.46022977 0.37153142]


In [27]:
print(dist_vals.shape)

(1738,)


In [23]:
DIR = "/mnt/nfs/project/delirium/drift_exp/_extract_v2"

timeframe=14
limit_tn=True

## query data 
encounters_data, labs_data, imaging_data, transfusions_data, interventions_data = query_data(BASE_DATA_PATH)
encounters_mortality, encounters_not_mortality = split_encounters_bymortality(encounter_data)

if limit_tn:
    num_encounters_not_mortality = len(encounters_mortality)
    encounters_not_mortality_subset = encounters_not_mortality[
        0:num_encounters_not_mortality
]
        
encounters_train_val_test = pd.concat(
    [encounters_mortality, encounters_not_mortality_subset], ignore_index=True
)
encounters_mortality_within_risk_timeframe = encounters_mortality.loc[
    encounters_mortality[LOS] <= pd.to_timedelta(timeframe * 24, unit="h")
]

mortality_events = create_mortality_events(encounters_mortality_within_risk_timeframe, encounters_mortality)
combined_events = create_events(encounters_train_val_test, labs_data, imaging_data, transfusions_data, interventions_data, mortality_events)
static_features = get_static_features(encounters_train_val_test)



NameError: name 'query_data' is not defined

In [None]:
mean_p_vals, std_p_vals, mean_dist, std_dist = run_shift_experiment(
                        outcome=OUTCOME,
                        hospital=HOSPITAL,
                        path=PATH,
                        dr_technique=DR_TECHNIQUE,
                        md_test=MD_TEST,
                        samples=SAMPLES,
                        dataset=DATASET,
                        sign_level=SIGN_LEVEL,
                        na_cutoff=NA_CUTOFF,
                        random_runs=RANDOM_RUNS,
                        calc_acc=CALC_ACC,
                        bucket_size=6, 
                        window=6
)