In [1]:
from data_processing import *
from MBTCN_Module import *
from training_evaluation import *

import sys
import numpy as np
import pandas as pd
import sklearn
import torch
from torch import nn
import torch.optim as optim
import torch.nn.functional as F


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("Python Version:", sys.version)
print('Pytorch Version:', torch.__version__)
print('Numpy Version:', np.__version__)
print('Pandas Version:', pd.__version__)
print('Sklearn Version:', sklearn.__version__)
print('===========================================')
print("Num GPUs Available: ",  torch.cuda.device_count())
print('===========================================')
print('Device Information:',torch.cuda.get_device_name(0))

Python Version: 3.9.12 (main, Apr  4 2022, 05:22:27) [MSC v.1916 64 bit (AMD64)]
Pytorch Version: 1.13.1
Numpy Version: 1.23.4
Pandas Version: 2.0.3
Sklearn Version: 1.2.2
Num GPUs Available:  1
Device Information: NVIDIA RTX A4500


In [2]:
# read data from psv file, two data files from https://physionet.org/content/challenge-2019/1.0.0/
list_A, list_B = load_data_list('training/training/','training_setB/training_setB/')

#create training and test missingness mask
mask_A, mask_B = missing_mask_matrix(list_A, list_B)

#fill nan
list_A, list_B = fill_nan(list_A, list_B)

#split predictors and response from lists
A_predictors, B_predictors, A_label, B_label = split_predictor_response(list_A, list_B) 

#get training sample indice
sepsis_index_A, nonsepsis_index_A = get_index(A_label)
sepsis_index_B, nonsepsis_index_B = get_index(B_label)

#pads sequences to the same length (3D tensor): SHAPE (N, L, C)
A_padded = create_tensor(A_predictors, padding_value=0)
B_padded = create_tensor(B_predictors, padding_value=0)
A_mask_padded = create_tensor(mask_A, padding_value=0)
B_mask_padded = create_tensor(mask_B, padding_value=0)

# reshape (N, C, L)
A_padded = A_padded.reshape(-1, 40, 336)
B_padded = B_padded.reshape(-1, 40, 336)
A_mask_padded = A_mask_padded.reshape(-1, 40, 336)
B_mask_padded = B_mask_padded.reshape(-1, 40, 336)

In [3]:
print("Summary Information:")
print('----------------------'*2)
total = len(sepsis_index_A) + len(nonsepsis_index_A)
print('Training set:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, len(sepsis_index_A), 100 * len(sepsis_index_A) / total))
print('----------------------'*2)
total2 = len(sepsis_index_B) + len(nonsepsis_index_B)
print('Testing set:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total2, len(sepsis_index_B), 100 * len(sepsis_index_B) / total2))

Summary Information:
--------------------------------------------
Training set:
    Total: 20336
    Positive: 1790 (8.80% of total)

--------------------------------------------
Testing set:
    Total: 20000
    Positive: 1142 (5.71% of total)



In [4]:
# initialize MB-TCN model

num_inputs = 40
num_channels = [16,16]
n_outputs = 1
n_branches = 10              
kernel_size = 10
dropout = 0.4
batch_size = 128

model = MBTCN(num_inputs = num_inputs, 
              num_channels = num_channels, 
              n_outputs = n_outputs, 
              n_branches = n_branches,                 
              kernel_size = kernel_size, 
              dropout = dropout).to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

#create training set
subset_list = create_subsets(A_padded, A_label, A_mask_padded, n_subsets = 10, minority_class = 1)

#create validation set (in this demo, I select training_B from physionet.org as validation set)
val_loader = val_create(B_padded, B_label, B_mask_padded)

In [290]:
# Training the model
num_epochs = 60
train_model(sublist = subset_list, 
            val_loader = val_loader, 
            model = model, 
            criterion = criterion, 
            optimizer = optimizer, 
            num_epochs = num_epochs, 
            MB_NUM = n_branches, 
            batch_size = batch_size)

Loss: 0.4243 - Accuracy: 0.9349 - Recall: 0.0053 - Precision: 0.0349 - F1: 0.0091 - AUC: 0.6183 - PR Curve Score: 0.0969
Epoch 1/60 - Train Loss: 3.2477 - Val Loss: 0.4243
Loss: 0.9714 - Accuracy: 0.9032 - Recall: 0.5911 - Precision: 0.3148 - F1: 0.4108 - AUC: 0.7910 - PR Curve Score: 0.3762
Epoch 2/60 - Train Loss: 2.8337 - Val Loss: 0.9714
Loss: 1.2413 - Accuracy: 0.8406 - Recall: 0.6839 - Precision: 0.2165 - F1: 0.3288 - AUC: 0.8159 - PR Curve Score: 0.3858
Epoch 3/60 - Train Loss: 2.5160 - Val Loss: 1.2413
Loss: 1.2404 - Accuracy: 0.8639 - Recall: 0.6699 - Precision: 0.2460 - F1: 0.3598 - AUC: 0.8213 - PR Curve Score: 0.3983
Epoch 4/60 - Train Loss: 2.4933 - Val Loss: 1.2404
Loss: 1.5224 - Accuracy: 0.6137 - Recall: 0.7933 - Precision: 0.1079 - F1: 0.1900 - AUC: 0.8143 - PR Curve Score: 0.3908
Epoch 5/60 - Train Loss: 2.4199 - Val Loss: 1.5224
Loss: 1.2884 - Accuracy: 0.7100 - Recall: 0.7461 - Precision: 0.1339 - F1: 0.2271 - AUC: 0.8142 - PR Curve Score: 0.3892
Epoch 6/60 - Train 

Loss: 1.2500 - Accuracy: 0.8800 - Recall: 0.7224 - Precision: 0.2836 - F1: 0.4073 - AUC: 0.8815 - PR Curve Score: 0.4662
Epoch 49/60 - Train Loss: 1.8901 - Val Loss: 1.2500
Loss: 1.2552 - Accuracy: 0.8607 - Recall: 0.7399 - Precision: 0.2534 - F1: 0.3775 - AUC: 0.8791 - PR Curve Score: 0.4682
Epoch 50/60 - Train Loss: 1.8897 - Val Loss: 1.2552
Loss: 1.2053 - Accuracy: 0.8674 - Recall: 0.7250 - Precision: 0.2615 - F1: 0.3844 - AUC: 0.8793 - PR Curve Score: 0.4706
Epoch 51/60 - Train Loss: 1.8819 - Val Loss: 1.2053
Loss: 1.1918 - Accuracy: 0.8836 - Recall: 0.7119 - Precision: 0.2891 - F1: 0.4112 - AUC: 0.8793 - PR Curve Score: 0.4686
Epoch 52/60 - Train Loss: 1.8757 - Val Loss: 1.1918
Loss: 1.2645 - Accuracy: 0.8690 - Recall: 0.7382 - Precision: 0.2664 - F1: 0.3915 - AUC: 0.8815 - PR Curve Score: 0.4709
Epoch 53/60 - Train Loss: 1.8561 - Val Loss: 1.2645
Loss: 1.2703 - Accuracy: 0.8643 - Recall: 0.7338 - Precision: 0.2581 - F1: 0.3819 - AUC: 0.8821 - PR Curve Score: 0.4707
Epoch 54/60 - 