In [None]:
from datetime import datetime, timedelta

import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error as mse

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping


from utils import *
from models import *

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"]=""

In [None]:
# Importing Time Series Data

# Data by the hour
first_48_data = pd.read_csv('../../../../data/datasets/mimiciv_timeseries/mimiciv_timeseries.csv')


first_48_data.head()

In [None]:
first_48_data = convert_to_nan(first_48_data)

In [None]:
# Reading label data

label_data = pd.read_csv('mimic_iv_label_data.csv')
label_data.head()

In [None]:
# Reading patient clusters

patient_clusters = pd.read_csv('mimic_iv_patient_clusters.csv')
patient_clusters.head()

In [None]:
# adding column for subject_id and episode_num

subject_w_ep = first_48_data['subject']

subject_ids = subject_w_ep.apply(lambda x: int(x.split('_')[0]))
episode_nums = subject_w_ep.apply(lambda x: int(x.split('_')[1][7:]))

In [None]:
first_48_data['subject_id'] = subject_ids
first_48_data['episode_num'] = episode_nums

first_48_data.head()

In [None]:
first_48_data.rename(columns={"time(hr)": "Hours"}, inplace=True)

In [None]:
# Merging data with labels and cluster to get correct sample

first_48_data = first_48_data.merge(label_data, on=['subject', 'subject_id', 'episode_num'])
first_48_data = first_48_data.merge(patient_clusters, on='subject_id')

first_48_data.head()

In [None]:
# keeping 'Unnamed: 0_x' and renaming to 'original_idx' to retain original indexes
first_48_data = first_48_data.drop(columns=['Unnamed: 0_y', 'Unnamed: 0'])

first_48_data = first_48_data.rename(columns={'Unnamed: 0_x': 'original_idx'})

In [None]:
# Grouping by admission

data = first_48_data.groupby('subject')
print(len(data))

In [None]:
X, y = create_time_series_data(data)

In [None]:
from sklearn.model_selection import train_test_split

random_seed = 33

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)


In [None]:
# Standardizing the data

X, X_train, X_test, scalers = create_std_data(X, X_train, X_test)

In [None]:
# Getting the means of each feature for each data set

all_feature_means = get_feature_means(X)
train_feature_means = get_feature_means(X_train)
test_feature_means = get_feature_means(X_test)

In [None]:
X_train_mean_imputed, X_test_mean_imputed = create_mean_imputed_data(X_train, X_test, train_feature_means, train_feature_means)

In [None]:
processed_X_train, processed_X_test, train_mask, test_mask = vae_preprocessing(X_train, X_test)

In [None]:
cnn_vae_instance = cnn_vae(n_filters=32, kernel_size=5, learning_rate=1e-4, 
                                    sequence_length=48, n_features=48)


cnn_vae_model = cnn_vae_instance.get_model()

trained_cnn_vae_model, cnn_reconstruc_train, cnn_reconstruc_test = train_eval_vae_model(cnn_vae_model, 
                                                processed_X_train, processed_X_test, train_mask, test_mask, 1)



In [None]:
a = 0
b = 0

plt.figure(figsize=(9,5))
plt.plot(cnn_reconstruc_test[a][:,b], label='reconstructed', c='red')
plt.plot(processed_X_test[a][:,b], c='blue', label='original', alpha=0.6)
plt.legend()

In [None]:
cnn_X_train_imputed, cnn_X_test_imputed = imputed_vae_data(X_train, X_test, cnn_reconstruc_train, cnn_reconstruc_test)


In [None]:
# Readmission data for each method
readm_mean_X_train, readm_mean_X_test, readm_mean_y_train, readm_mean_y_test = readm_preprocessing(X_train_mean_imputed, 
                                                                               X_test_mean_imputed, y_train, y_test)
readm_cnn_X_train, readm_cnn_X_test, readm_cnn_y_train, readm_cnn_y_test = readm_preprocessing(cnn_X_train_imputed, 
                                                                           cnn_X_test_imputed, y_train, y_test)




# Mortality data for each method
mortality_mean_X_train, mortality_mean_X_test, mortality_mean_y_train, mortality_mean_y_test = mortality_preprocessing(X_train_mean_imputed, 
                                                                               X_test_mean_imputed, y_train, y_test)
mortality_cnn_X_train, mortality_cnn_X_test, mortality_cnn_y_train, mortality_cnn_y_test = mortality_preprocessing(cnn_X_train_imputed, 
                                                                           cnn_X_test_imputed, y_train, y_test)



# Length of stay data for each method
los_mean_X_train, los_mean_X_test, los_mean_y_train, los_mean_y_test = los_preprocessing(X_train_mean_imputed, 
                                                                               X_test_mean_imputed, y_train, y_test)
los_cnn_X_train, los_cnn_X_test, los_cnn_y_train, los_cnn_y_test = los_preprocessing(cnn_X_train_imputed, 
                                                                           cnn_X_test_imputed, y_train, y_test)



In [None]:
# LSTM Classification and Regression Models

class_model = create_class_model()
reg_model = create_reg_model()

In [None]:
cnn_mort_model, cnn_mort_preds  = train_eval_pred_model(class_model, 2, 200, mortality_cnn_X_train, mortality_cnn_X_test,
                                       mortality_cnn_y_train, mortality_cnn_y_test)

In [None]:
cnn_los_model, cnn_los_preds = train_eval_pred_model(reg_model, 1, 200, los_cnn_X_train, los_cnn_X_test, los_cnn_y_train, los_cnn_y_test)

In [None]:
# LR cnn ihm



mortality_cnn_X_train_2d = mortality_cnn_X_train.reshape((mortality_cnn_X_train.shape[0],
                                                          mortality_cnn_X_train.shape[1]*mortality_cnn_X_train.shape[2]))

mortality_cnn_X_test_2d = mortality_cnn_X_test.reshape((mortality_cnn_X_test.shape[0],
                                                     mortality_cnn_X_test.shape[1]*mortality_cnn_X_test.shape[2]))

train_test_lr_model(mortality_cnn_X_train_2d, mortality_cnn_X_test_2d, mortality_cnn_y_train, mortality_cnn_y_test)



In [None]:
# XGB cnn ihm



mortality_cnn_X_train_2d = mortality_cnn_X_train.reshape((mortality_cnn_X_train.shape[0],
                                                          mortality_cnn_X_train.shape[1]*mortality_cnn_X_train.shape[2]))

mortality_cnn_X_test_2d = mortality_cnn_X_test.reshape((mortality_cnn_X_test.shape[0],
                                                     mortality_cnn_X_test.shape[1]*mortality_cnn_X_test.shape[2]))

train_test_XGBoost_class(mortality_cnn_X_train_2d, mortality_cnn_X_test_2d, mortality_cnn_y_train, mortality_cnn_y_test)



In [None]:
# XGB cnn los


los_cnn_X_train_2d = los_cnn_X_train.reshape((los_cnn_X_train.shape[0], los_cnn_X_train.shape[1]*los_cnn_X_train.shape[2]))

los_cnn_X_test_2d = los_cnn_X_test.reshape((los_cnn_X_test.shape[0], los_cnn_X_test.shape[1]*los_cnn_X_test.shape[2]))

train_test_XGBoost_reg(los_cnn_X_train_2d, los_cnn_X_test_2d, los_cnn_y_train, los_cnn_y_test)



