In [1]:
# %load_ext autoreload
# %autoreload 2
import os
import sys
from misc.utils import read_json
from trainer.pl_trainer import Trainer
from dataloader.eicu_reader import eICUDataloader
from dataloader.mimic_reader import MIMICReader
import pytorch_lightning as pl
import numpy as np
from misc.utils import los_metrics,mort_metrics

## Load configuration file

* The 'read_json' function reads the hyperparameters files for specific model and dataset.

* Choose the model you want by assigning the variable model_type to either 'tpc', 'lstm' or 'transformer'.

* For this demo we will only use mimic dataset


In [11]:
model_type = 'tpc' # you can choose 'lstm' or 'transformer'

if model_type =='tpc':
    cfg = read_json('config/tpc_config_mimic.json')
elif model_type=='lstm':
    cfg = read_json('config/lstm_config_mimic.json')
elif model_type=='transformer':
    cfg = read_json('config/transformer_config_mimic.json')

**datapath** is the directory that points to train/val/test folders

In [12]:
# datapath = r'C:\Users\gtoba\Documents\GATECH\CSE6250\FinalProject\data\eicuData'
datapath = './dataset'

Since we're are only using mimic dataset. use  mimic dataloader

In [13]:
traindataset = MIMICReader(datapath + '/train')
valdataset = MIMICReader(datapath + '/val')
testdataset = MIMICReader(datapath + '/test')

Assign the task you want to do: Multitask, Length of Stay or Mortality predictions

In [14]:
cfg.task = 'multi' # you can also do 'los' or 'mort'

Once you have your dataloader and configurations loaded. You will then start training the model using the Trainer class

In [15]:
model = Trainer(cfg)
demo = True # for demo purposes train only on one epoch

if demo:
    epochs = 1
else :
    epochs = cfg.epochs

# Not ideal way of training model but the generator opens the file once and starts reading the data, the trainer needs to be restarted everytime to reopen the csv files
for i in range(epochs):
    
    print(f"Epoch: {i}")
    trainer = pl.Trainer(gpus = 0, max_epochs=1,logger=False,enable_model_summary = False)
    train_batch = traindataset.batch_gen(batch_size= 32)
    val_batch = valdataset.batch_gen(batch_size= cfg.batch_size)
    trainer.fit(model,train_batch,val_batch)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Epoch: 0
Epoch 0: : 6it [00:14,  2.50s/it, loss=70.7, train_loss=67.90, val_loss_step=76.20, val_loss_epoch=76.20]


After model is done training, run the cell below to obtain the predictions of the trained model. Don't worry if it doesn't show any outputs. The predictions are stored in the model class 

In [16]:
test_batch = testdataset.batch_gen(batch_size= 32)
trainer.test(model,test_batch)

Testing DataLoader 0: : 1it [00:01,  1.39s/it]


[{}]

After testing run the cell below to generate the LoS and Mortality metrics of the trained model

In [17]:

print(f'{cfg.model_name} metrics')
# nested arrays, flatten them out
if model.test_y_list:
    y = np.array([ i for sublist in model.test_y_list for i in sublist])
    preds =np.array([i for sublist in model.test_pred_list for i in sublist])
    los_metrics(y,preds)

if model.test_y_mort_list:
    y_mort = np.array([ i for sublist in model.test_y_mort_list for i in sublist])
    pred_mort =np.array([ i for sublist in model.test_pred_mort_list for i in sublist])
    mort_metrics(y_mort,pred_mort)

tpc metrics
********** Lenght of Stay Metrics **********
Mean Square Error: 23.941396713256836
Mean Square Log Error: 2.102731466293335
Mean Absolute Error: 3.0441558361053467
Mean Absolute Percentage Error: 101.20042562484741
Rsquare: -0.42291305828983594
Cohen Kappa Score: 0.06299047642206013
********** Mortality Metrics **********
              precision    recall  f1-score   support

        dead       0.00      0.00      0.00      1969
       alive       0.16      1.00      0.28       387

    accuracy                           0.16      2356
   macro avg       0.08      0.50      0.14      2356
weighted avg       0.03      0.16      0.05      2356

Accuracy:0.16426146010186757
AUROC: 0.6638937117045471
AUPRC:0.46487051538066515


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


If you want to test your trained model against a naive mean/median model run the cells below

In [18]:
from model.mean_median import mean_median_model

In [19]:
train_batch = traindataset.batch_gen(batch_size= 32)
test_batch = testdataset.batch_gen(batch_size= cfg.batch_size)
mean_median_model(train_batch,test_batch)


Total predictions:
Using mean value of 6.287318774569041...
********** Lenght of Stay Metrics **********
Mean Square Error: 23.0802166257416
Mean Square Log Error: 3.222914584607761
Mean Absolute Error: 4.247131589535073
Mean Absolute Percentage Error: 592.3793526827769
Rsquare: -0.37173048902106864
Cohen Kappa Score: 0.0
Using median value of 2.288090229034424...
********** Lenght of Stay Metrics **********
Mean Square Error: 19.07055181318631
Mean Square Log Error: 1.8496397219316225
Mean Absolute Error: 2.8485169178539023
Mean Absolute Percentage Error: 201.25766695910863
Rsquare: -0.1334233897712882
Cohen Kappa Score: 0.0
