In [None]:
import sys
import random
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

sys.append("..")
from baseline_models.temporal.pytorch.optimizer import Optimizer
from baseline_models.temporal.pytorch.temporal_model_utils import *

sys.path.append("../..")
from cyclops.feature_handler import FeatureHandler

%load_ext autoreload
%load_ext nb_black

In [None]:
DIR="/mnt/nfs/project/delirium/drift_exp/MAY-31-2022/"
feature_handler = FeatureHandler()
feature_handler.load(DIR, "test_features")

In [None]:
ref_static = feature_handler.reference['static']
ref_static

In [None]:
static = feature_handler.features['static']
print("Encounters:",static.shape[0])
print("Static Features:",static.shape[1])

In [None]:
static

In [None]:
static['mortality'] = np.where(
        static["discharge_disposition"].isin([7, 66, 72, 73]), 1, 0
    )

m1 = (static['length_of_stay_in_er']>=0) & (static['length_of_stay_in_er']<7)
m2 = (static['length_of_stay_in_er']>=7) & (static['length_of_stay_in_er']<14)
m3 = (static['length_of_stay_in_er']>=14) & (static['length_of_stay_in_er']<30)

vals = [1, 2, 3]
default = 4

In [None]:
fig, ax = plt.subplots(figsize=(14, 4))
plt.hist(static["mortality"], bins=2, alpha=0.5, width=0.5, label="mortality")
fig.legend(loc="upper right")
plt.show()

In [None]:
ref_temporal = feature_handler.reference['temporal']
ref_temporal

In [None]:
temporal = feature_handler.features['temporal']
samples=len(temporal.index.unique(level=0))
timesteps=len(temporal.index.unique(level=1))
features=temporal.shape[1]
print("Encounters:",samples)
print("Timesteps:",timesteps)
print("Temporal Features:",features)

In [None]:
temporal = temporal.rename_axis("features", axis="columns")
temporal

In [None]:
batch_size = 1
input_dim = features*3
output_dim = 1
hidden_dim = 128
layer_dim = 1
dropout = 0.2
n_epochs = 10
learning_rate = 1e-3
weight_decay = 1e-6

device = get_device()

outcome = "mortality"

X = temporal[np.in1d(temporal.index.get_level_values(0), static.index.get_level_values(0))]
y = static[np.in1d(static.index.get_level_values(0), X.index.get_level_values(0))]
y_train, y_val = train_test_split(y, test_size=1/2)
y_val, y_test = train_test_split(y_val, test_size=1/2)
X_train = X[np.in1d(X.index.get_level_values(0), y_train.index.get_level_values(0))]
X_train_inputs = format_dataset(X_train, level="features",imputation_method="simple")
X_val = X[np.in1d(X.index.get_level_values(0), y_val.index.get_level_values(0))]
X_val_inputs = format_dataset(X_val, level="features",imputation_method="simple")
X_test = X[np.in1d(X.index.get_level_values(0), y_test.index.get_level_values(0))]
X_test_inputs = format_dataset(X_test, level="features",imputation_method="simple")
y_train = y_train[[outcome]].to_numpy()
y_val = y_val[[outcome]].to_numpy()
y_test = y_test[[outcome]].to_numpy()

train_dataset = get_data(X_train_inputs,y_train)
train_loader = train_dataset.to_loader(batch_size) 

val_dataset = get_data(X_val_inputs,y_val)
val_loader = val_dataset.to_loader(batch_size)

In [None]:
model_params = {'device': device,
                'input_dim': input_dim,
                'hidden_dim' : hidden_dim,
                'layer_dim' : layer_dim,
                'output_dim' : output_dim,
                'dropout_prob' : dropout}

model = get_temporal_model('lstmcell', model_params).to(device)

In [None]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adagrad(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
activation = nn.Sigmoid()
opt = Optimizer(model=model, loss_fn=loss_fn, optimizer=optimizer,activation=activation)
opt.train(train_loader, val_loader, batch_size=batch_size, n_epochs=n_epochs, n_features=input_dim)
opt.plot_losses()

In [None]:
test_dataset = get_data(X_test_inputs,y_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, drop_last=True)
predictions, values, tags = opt.evaluate(test_loader, batch_size=1, n_features=input_dim)

In [None]:
df_result = format_predictions(predictions, values, tags, X_test)

In [None]:
df_result.head(n=20)