In [None]:
from datetime import datetime, timedelta

import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error as mse

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import EarlyStopping


from utils import *
from models import *

In [None]:
os.environ["CUDA_VISIBLE_DEVICES"]=""

In [None]:
# Importing Time Series Data

# Data by the hour
first_48_data = pd.read_csv('../../../../data/datasets/mimiciv_timeseries/mimiciv_timeseries.csv')


first_48_data.head()

In [None]:
first_48_data = convert_to_nan(first_48_data)

In [None]:
# Reading label data

label_data = pd.read_csv('mimic_iv_label_data.csv')
label_data.head()

In [None]:
# Reading patient clusters

patient_clusters = pd.read_csv('mimic_iv_patient_clusters.csv')
patient_clusters.head()

In [None]:
# adding column for subject_id and episode_num

subject_w_ep = first_48_data['subject']

subject_ids = subject_w_ep.apply(lambda x: int(x.split('_')[0]))
episode_nums = subject_w_ep.apply(lambda x: int(x.split('_')[1][7:]))

In [None]:
first_48_data['subject_id'] = subject_ids
first_48_data['episode_num'] = episode_nums

first_48_data.head()

In [None]:
first_48_data.rename(columns={"time(hr)": "Hours"}, inplace=True)

In [None]:
# Merging data with labels and cluster to get correct sample

first_48_data = first_48_data.merge(label_data, on=['subject', 'subject_id', 'episode_num'])
first_48_data = first_48_data.merge(patient_clusters, on='subject_id')

first_48_data.head()

In [None]:
# keeping 'Unnamed: 0_x' and renaming to 'original_idx' to retain original indexes
first_48_data = first_48_data.drop(columns=['Unnamed: 0_y', 'Unnamed: 0'])

first_48_data = first_48_data.rename(columns={'Unnamed: 0_x': 'original_idx'})

In [None]:
first_48_data.head()

In [None]:
# Grouping by admission

data = first_48_data.groupby('subject')
print(len(data))

In [None]:
X, y = create_time_series_data(data)

In [None]:
X = extract_obs_seq(X)

In [None]:
y = y.loc[idxs]
y.head()

In [None]:
from sklearn.model_selection import train_test_split

random_seed = 33

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)
X_train_minmax, X_test_minmax, y_train_minmax, y_test_minmax = train_test_split(X, y, test_size=0.2, random_state=random_seed)


In [None]:
# Standardizing the data

X, X_train, X_test, scalers = create_std_data(X, X_train, X_test)

# Minmax Standardizing the data

X_train_minmax, X_test_minmax, train_minmax_scalers = create_minmax_data(X_train_minmax, X_test_minmax)
    
    

In [None]:
# Getting the means of each feature for each data set

all_feature_means = get_feature_means(X)
train_feature_means = get_feature_means(X_train)
test_feature_means = get_feature_means(X_test)


train_feature_means_minmax = get_feature_means(X_train_minmax)
test_feature_means_minmax  = get_feature_means(X_test_minmax)

In [None]:
X_train_mean_imputed, X_test_mean_imputed = create_mean_imputed_data(X_train_minmax, X_test_minmax, 
                                                                     train_feature_means_minmax, train_feature_means_minmax)

In [None]:
miss_forest_imputer = get_miss_forest_imputer(X_train_minmax)

In [None]:
X_train_mf_imputed, X_test_mf_imputed = create_mf_imputed_data(X_train_minmax, X_test_minmax, miss_forest_imputer)

In [None]:
processed_X_train, processed_X_test, train_mask, test_mask = vae_preprocessing(X_train_minmax, X_test_minmax)

In [None]:
cnn_vae_instance = cnn_vae(n_filters=32, kernel_size=5, learning_rate=1e-4, 
                                    sequence_length=48, n_features=48)


cnn_vae_model = cnn_vae_instance.get_model()

trained_cnn_vae_model, cnn_reconstruc_train, cnn_reconstruc_test = train_eval_vae_model(cnn_vae_model, 
                                                processed_X_train, processed_X_test, train_mask, test_mask, 1)



In [None]:
lstm_vae_instance = lstm_vae(n_filters=32, kernel_size=5, learning_rate=1e-4, 
                                    sequence_length=48, n_features=48)


lstm_vae_model = lstm_vae_instance.get_model()

trained_lstm_vae_model, lstm_reconstruc_train, lstm_reconstruc_test = train_eval_vae_model(lstm_vae_model, 
                                                processed_X_train, processed_X_test, train_mask, test_mask, 1)



In [None]:
X_train_w_noise = create_X_train_w_noise(X_train_minmax, 0.2)

In [None]:
# Perform KNN on train set with noise
X_train_noise, knn_imputers = knn_impute_data(X_train_w_noise)

X_train_knn, minmax_knn_imputers =  knn_impute_data(X_train_minmax)


In [None]:
processed_X_train_minmax = np.where(np.isnan(X_train_minmax), 0, X_train_minmax)
processed_X_test_minmax = np.where(np.isnan(X_test_minmax), 0, X_test_minmax)

In [None]:
# Fit LSTM autoencoder accounting for missing data

lstm_ae_instance = lstm_ae(learning_rate=1e-3, sequence_length=48, n_features=48)
lstm_ae_model = lstm_ae_instance.get_lstm_ae()

es = EarlyStopping(patience=10, verbose=1, min_delta=0.001, monitor='val_loss', mode='auto', restore_best_weights=True)
lstm_ae_model.fit(x=X_train_noise, y=X_train_knn, batch_size=1,
                  validation_split=0.2, epochs=100, shuffle=False, callbacks=[es])



In [None]:
all_eval(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1,
         miss_forest_imputer, train_feature_means_minmax, knn_imputers, lstm_ae_model,  0.1)

In [None]:
all_eval(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1,
         miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,  0.2)

In [None]:
all_eval(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1,
         miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model, 0.3)

In [None]:
all_eval(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers,trained_cnn_vae_model, trained_lstm_vae_model, 1,
         miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,  0.4)

In [None]:
all_eval(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1, 
         miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,  0.5)

In [None]:
all_eval(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1, 
         miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,  0.6)

In [None]:
all_eval(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1,
         miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,  0.7)

In [None]:
all_eval(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1, 
         miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,  0.8)

In [None]:
all_eval_v2(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1, 
            miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,
            np.sqrt(0.1))

In [None]:
all_eval_v2(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1,
            miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,
            np.sqrt(0.2))

In [None]:
all_eval_v2(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1, miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,
            np.sqrt(0.3))

In [None]:
all_eval_v2(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1, 
            miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,
            np.sqrt(0.4))

In [None]:
all_eval_v2(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1,
            miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,
            np.sqrt(0.5))

In [None]:
all_eval_v2(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1, 
            miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model,
            np.sqrt(0.6))

In [None]:
all_eval_v2(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1,
            miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model, 
            np.sqrt(0.7))

In [None]:
all_eval_v2(X_test_minmax, X_test_minmax, scalers, train_minmax_scalers, trained_cnn_vae_model, trained_lstm_vae_model, 1,
            miss_forest_imputer, train_feature_means, knn_imputers, lstm_ae_model, 
            np.sqrt(0.8))

In [None]:

X_test_knn = knn_impute_data_w_trained(X_test_minmax, knn_imputers)

In [None]:
lstm_ae_reconstruc_train = lstm_ae_model.predict([X_train_noise, processed_X_train_minmax, train_mask], batch_size=1)

In [None]:
a = 1
b = 0

plt.figure(figsize=(9,5))
plt.plot(cnn_reconstruc_train[a][:,b], label='cnn reconstruction', c='red')
plt.plot(lstm_reconstruc_train[a][:,b], label='lstm reconstruction', c='green')
plt.plot(lstm_ae_reconstruc_train[a][:,b], label='dynimp reconstruction', c='purple')
plt.plot(processed_X_train_minmax[a][:,b], c='blue', label='original', alpha=0.6)
plt.legend()

In [None]:
lstm_ae_reconstruc_test = lstm_ae_model.predict([X_test_knn, processed_X_test_minmax, test_mask], batch_size=1)

In [None]:
a = 1
b = 0

plt.figure(figsize=(9,5))
plt.plot(cnn_reconstruc_test[a][:,b], label='cnn reconstruction', c='red')
plt.plot(lstm_reconstruc_test[a][:,b], label='lstm reconstruction', c='green')
plt.plot(lstm_ae_reconstruc_test[a][:,b], label='dynimp reconstruction', c='purple')
plt.plot(processed_X_test_minmax[a][:,b], c='blue', label='original', alpha=0.6)
plt.legend()