# MIMIC-III

In [27]:
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
import os
import seaborn as sns
import numpy as np
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
import json
import pandas as pd
from models.cvae import VariationalAutoencoder, vae_loss_fn

from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics

seed = 804

In [28]:
class MIMICDATASET(Dataset):
    def __init__(self, x_t,x_s, y, train=None, transform=None):
        # Transform
        self.transform = transform
        self.train = train
        self.xt = x_t
        self.xs = x_s
        self.y = y

    def return_data(self):
        return self.xt, self.xs, self.label

    def __len__(self):
        return len(self.xt)

    def __getitem__(self, idx):
        sample = self.xt[idx]
        stat = self.xs[idx]
        sample_y = self.y[idx]
        return sample, stat, sample_y

In [42]:
def LR_evaluation(test_S, test_X, test_y):
    
    N,D = test_X.shape
    _,d = test_S.shape
    
    X_all = np.hstack([test_S, test_X.reshape((N,D))])
    y_all = test_y
    
    scaler = MinMaxScaler()
    X_all = scaler.fit_transform(X_all)
    
    print(X_all.shape, y_all.shape)
    Xtr, Xte, ytr, yte = train_test_split(X_all, y_all, stratify=y_all, random_state=1)
    
    clf = LogisticRegression()
    clf.fit(Xtr, ytr)
    
    score = metrics.roc_auc_score(yte, clf.decision_function(Xte))
    print('Test AUROC score:', score)


In [30]:
def MIMIC_syn_data(model_name, folder = "Synthetic_MIMIC"):
    
    model_list = ["vae", "medDiff", "flexgen"]
    if model_name not in model_list:
        raise ValueError(f"model_name must be in {model_list}")

    static_npy_path = folder + "/" + model_name + "_static.npy"
    temporal_npy_path =  folder + "/" + model_name + "_temporal.npy"
    
    test_S = np.load(static_npy_path)
    test_X = np.load(temporal_npy_path)
    
    df_pop = pd.read_csv('FIDDLE_mimic3/population/mortality_48h.csv')
    test_y = torch.tensor(df_pop["mortality_LABEL"].values).to(torch.float32)
    
    return test_S, test_X, test_y

## Original

In [31]:
# test_S = np.load('FIDDLE_mimic3/features/mortality_48h/s.npz')
# test_X = np.load('FIDDLE_mimic3/features/mortality_48h/X.npz')
# test_S = torch.sparse_coo_tensor(torch.tensor(test_S['coords']), torch.tensor(test_S['data'])).to_dense().to(torch.float32)
# test_X = torch.sparse_coo_tensor(torch.tensor(test_X['coords']), torch.tensor(test_X['data'])).to_dense().to(torch.float32)
# 
# df_pop = pd.read_csv('FIDDLE_mimic3/population/mortality_48h.csv')
# test_y = torch.tensor(df_pop["mortality_LABEL"].values).to(torch.float32)
# 
# test_X = test_X.sum(dim=1)
# LR_evaluation(test_S, test_X, test_y)

(8577, 7403) torch.Size([8577])
Test AUROC score: 0.8700061210321128


## VAE

In [62]:
test_S, test_X, test_y = MIMIC_syn_data("vae")
LR_evaluation(test_S, test_X, test_y)

(8577, 7403) torch.Size([8577])
Test AUROC score: 1.0


## MedDiff

In [61]:
test_S, test_X, test_y = MIMIC_syn_data("medDiff")
y = np.concatenate((np.zeros(2000), np.ones(2000)))
LR_evaluation(test_S, test_X, y)

(4000, 7403) (4000,)
Test AUROC score: 0.50344


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## FlexGen

In [57]:
test_S, test_X, test_y = MIMIC_syn_data("flexgen")
y = np.concatenate((np.zeros(2000), np.ones(2000)))
LR_evaluation(test_S, test_X, y)

(4000, 7403) (4000,)
Test AUROC score: 0.50344


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
