# Notes Network

Import Libraries

In [1]:
from __future__ import print_function
import numpy as np
import string
import datetime
import pandas as pd
import os
import torch
import torch.nn as nn
import time
import pickle
import random
from torch.utils.data import Dataset
from torch.utils.data.dataset import random_split
from torch.utils.data import DataLoader, Subset
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score

## Inititalization

In [2]:
# 
seed = 230729
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda

NVIDIA GeForce GTX 1060 6GB
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


We know that not all patients have the same number of visit dates, therefore, we need to set what is the maximum number of visit dates for any given patient

In [4]:
patients_max_visits = 505

## Dataset Definition

First we will need a function to load the pre-processed train and test datasets:

In [5]:
def load_notes_dataset_object(prefix = ''):
    
    patient_subject_id = np.load(prefix + 'subject_id.npy', allow_pickle=True).tolist()
    patients_notes_fetures = np.load(prefix + 'patients_notes_fetures.npy', allow_pickle=True)
    index_0 = np.load(prefix + 'index_0.npy', allow_pickle=True)
    index_1 = np.load(prefix + 'index_1.npy', allow_pickle=True)
    patient_mortality = np.load(prefix + 'patient_mortality.npy', allow_pickle=True)
    return patient_subject_id, patients_notes_fetures, index_0, index_1, patient_mortality

In [25]:
class NotesDataset(Dataset):
    
    def __init__(self, patient_id, patients_notes, notes_mask, mortality):
        
        self.patient_id = patient_id
        self.x = patients_notes.to(device, non_blocking=True)
        self.notes_mask = notes_mask.to(device, non_blocking=True)
        self.y = mortality.to(device, non_blocking=True)
    
    def __len__(self):

        return len(self.x)
    
    def __getitem__(self, index):
        
              

        return(self.x[index].to_dense(), self.notes_mask[index].to_dense(), self.y[index])

In [23]:
def create_dataset (cohort_type = 'original'):
    """
    cohort_type = 'original' -> Unbalanced cohort will be created
    cohort_type = 'balanced_train' -> Balanced cohort for training will be created
    cohort_type = 'balanced_test' -> Balanced cohort for testing will be created
    """
    notes_prefix = "orig_" if cohort_type == 'original' else "train_" if cohort_type == 'balanced_train' else "test_"
    subject_id, patients_notes_fetures, index_0, index_1, patient_mortality= load_notes_dataset_object(prefix = notes_prefix)
    index = [index_0, index_1]
    patients_notes_fetures = torch.sparse_coo_tensor(index, patients_notes_fetures, (len(subject_id),patients_max_visits,200), dtype = torch.float)
    
    ones = np.ones((len(index_0),200))
    notes_mask = torch.sparse_coo_tensor(index, ones, (len(subject_id),patients_max_visits,200), dtype = torch.float)
    
    patient_mortality = torch.from_numpy(patient_mortality).float()    
    
    
    dataset = NotesDataset(subject_id, patients_notes_fetures, notes_mask, patient_mortality)
    assert len(patient_mortality) == len(dataset), 'Wrong dataset length!'
    print ("Number of Patients:", len(patient_mortality))

    return dataset

## Dataloader Definition

In [8]:
batch_size = 50

In [27]:
def get_unbalanced_dataloaders (max_size = 0):

    dataset = create_dataset('original')
    if (max_size > 0):
        print ("***** Slicing to " + str(max_size))
        dataset = Subset(dataset, np.arange(max_size))

    split = int(len(dataset)*0.8)
    lengths = [split, len(dataset) - split]

    train_dataset, val_dataset = random_split(dataset, lengths)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

In [28]:
def get_balanced_dataloaders (max_size = 0):

    print ("* Train dataset *")
    balanced_train_dataset = create_dataset('balanced_train')
    print ("* Test dataset *")
    balanced_test_dataset = create_dataset('balanced_test')

    if (max_size > 0):
        print ("***** Slicing to " + str(max_size))
        balanced_train_dataset = Subset(balanced_train_dataset, np.arange(max_size))
        balanced_test_dataset = Subset(balanced_test_dataset, np.arange(max_size))

    train_loader = DataLoader(balanced_train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(balanced_test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader

# Notes Network Definition

## Alpha Attention

In [11]:
class NotesAlphaAttention(torch.nn.Module):

    def __init__(self, hidden_dim):
        super().__init__()
        """
        Define the linear layer `self.a_att` for alpha-attention using `nn.Linear()`;
        Arguments:
            hidden_dim: the hidden dimension
        """
        
        self.a_att = nn.Linear(hidden_dim, 1)

    def forward(self, g):
        """"
        Arguments:
            g: the output tensor from RNN-alpha of shape (batch_size, seq_length, hidden_dim) 
        
        Outputs:
            alpha: the corresponding attention weights of shape (batch_size, seq_length, 1)
        """
        
        weights = self.a_att(g)
        alpha = torch.softmax(weights,1)
        return alpha

## Beta Attention

In [12]:
class NotesBetaAttention(torch.nn.Module):

    def __init__(self, hidden_dim):
        super().__init__()
        """
        Define the linear layer `self.b_att` for beta-attention using `nn.Linear()`;
        
        Arguments:
            hidden_dim: the hidden dimension
        """
        
        self.b_att = nn.Linear(hidden_dim, hidden_dim)


    def forward(self, h):
        """
        
        Arguments:
            h: the output tensor from RNN-beta of shape (batch_size, seq_length, hidden_dim) 
        
        Outputs:
            beta: the corresponding attention weights of shape (batch_size, seq_length, hidden_dim)
            
        """
        
        weights = self.b_att(h)
        beta = torch.tanh(weights)
        return beta

## Notes Network

In [13]:
class NotesRNN(nn.Module):
    
    def attention_sum(self, alpha, beta, x, masks):
        """
        Arguments:
            alpha: the alpha attention weights of shape (batch_size, seq_length, 1)
            beta: the beta attention weights of shape (batch_size, seq_length, hidden_dim)
            rev_v: the visit embeddings in reversed time of shape (batch_size, # visits, embedding_dim)
            rev_masks: the padding masks in reversed time of shape (# visits, batch_size, # diagnosis codes)

        Outputs:
            c: the context vector of shape (batch_size, hidden_dim)
        """
        
        return torch.sum( x * alpha * beta * masks , dim=1 )
    
    def __init__(self, hidden_dim=128, notes_emb_size=200):
        super().__init__()
        
        self.rnn_a = nn.GRU(notes_emb_size, notes_emb_size, batch_first=True)
        self.rnn_b = nn.GRU(notes_emb_size, notes_emb_size, batch_first=True)
        self.att_a = NotesAlphaAttention(notes_emb_size)
        self.att_b = NotesBetaAttention(notes_emb_size)
        self.fc = nn.Linear(notes_emb_size, 1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x, masks):
        g, _ = self.rnn_a(x)
        h, _ = self.rnn_b(x)
        alpha = self.att_a(g)
        beta = self.att_b(h)
        c = self.attention_sum(alpha, beta, x, masks)
        logits = self.fc(c)
        probs = self.sigmoid(logits)

        return probs.squeeze()

In [14]:
def create_model_and_optimizer():
    model = NotesRNN(hidden_dim=128)
    if torch.cuda.device_count() >0:
        model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    #optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum = 0.9, nesterov = True)
    return model, optimizer

# Model Training and Evaluation

In [15]:
def train(model, train_loader, n_epochs):
    model.train() # prep model for training
    
    for epoch in range(n_epochs):
        curr_epoch_loss = []
        print('Batch :', end = ' ')
        for step, batch in enumerate(train_loader):
            if step % 10 == 0 and step>0:
                print(str(step)+',', end=' ' )
            x, masks, labels = batch
        
            """ Step 1. clear gradients """
            optimizer.zero_grad()
            """ Step 2. evaluate model ouput  """
            probs = model(x, masks)
            """ Step 3. Calculate loss  """
            loss = criterion(probs, labels)
            """ Step 4. Backward propagation  """
            loss.backward()
            """ Step 5. optimization """
            optimizer.step()
            """ Step 6. record loss """
            curr_epoch_loss.append(loss.cpu().data.numpy())
        
        
        print(f"Epoch {epoch}: curr_epoch_loss={np.mean(curr_epoch_loss)}")
    return model

In [16]:
def eval_model(model, val_loader):
    model.eval()
    val_labels = []
    val_probs = []
    
    for step, batch in enumerate(val_loader):
        x, masks, labels = batch
        
        with torch.no_grad():
            
            probs = model(x, masks)
            val_labels.extend(labels.detach().cpu().numpy().tolist())
            val_probs.extend(probs.detach().cpu().numpy().reshape(-1).tolist())

    precision, recall, f1, _ = precision_recall_fscore_support(val_labels, np.array(val_probs)>0.5, average='binary')
    roc_auc = roc_auc_score(val_labels, val_probs)
    
    return precision, recall, f1, roc_auc

In [17]:
def train_and_eval(model, train_loader, val_loader, n_epochs=10, filename='model.pt'):
    t0 = time.time()
    train(model, train_loader, n_epochs)
    t1 = time.time()
    processing_time = t1-t0
    print('Model Training time: ' + str(processing_time))
    
    p, r, f, roc_auc = eval_model(model, val_loader)
    print ("Learning rate: " + str(learning_rate))
    print("Model Training time: " + str(processing_time))
    print("Precision = ",p)
    print("Recall    = ", r)
    print("F1        = ", f)
    print("ROC AUC   = ", roc_auc)
    print(p,"\t",r,"\t",f,"\t",roc_auc)

## Main Program

In [36]:
learning_rate = 0.0001
n_epochs = 20
criterion = nn.BCELoss()
print('Learning Rate: ' + str(learning_rate))
print ("Number of Epochs: " + str(n_epochs))

print ('')
print ('--------------')
print ('Original model')
print ('--------------')
model, optimizer = create_model_and_optimizer()
train_loader, val_loader = get_unbalanced_dataloaders()   # You can pass a number to limit the number of samples
train_and_eval(model, train_loader, val_loader, n_epochs, 'unbalanced_model.pt')



Learning Rate: 0.0001
Number of Epochs: 20

--------------
Original model
--------------
Number of Patients: 9822
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, Epoch 0: curr_epoch_loss=0.6112962365150452
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, Epoch 1: curr_epoch_loss=0.4883677363395691
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, Epoch 2: curr_epoch_loss=0.42417651414871216
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, Epoch 3: curr_epoch_loss=0.37340787053108215
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, Epoch 4: curr_epoch_loss=0.3304714262485504
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, Epoch 5: curr_epoch_loss=0.30555108189582825
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, Epoch 6: curr_epoch_loss=0.29249006509780884
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 12

In [30]:
learning_rate = 0.0001
n_epochs = 10
criterion = nn.BCELoss()
print('Learning Rate: ' + str(learning_rate))
print ("Number of Epochs: " + str(n_epochs))
print ('')
print ('')
print ('--------------')
print ('Balanced model')
print ('--------------')
model, optimizer = create_model_and_optimizer()
train_loader, val_loader = get_balanced_dataloaders()       # You can pass a number to limit the number of samples
train_and_eval(model, train_loader, val_loader, n_epochs, 'balanced_model.pt')
#load_and_eval(model, 'balanced_model.pt', val_loader)

Learning Rate: 0.0001
Number of Epochs: 10


--------------
Balanced model
--------------
* Train dataset *
Number of Patients: 13790
* Test dataset *
Number of Patients: 1965
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, Epoch 0: curr_epoch_loss=0.6445119976997375
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, Epoch 1: curr_epoch_loss=0.49357670545578003
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, Epoch 2: curr_epoch_loss=0.3861011266708374
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, Epoch 3: curr_epoch_loss=0.30750030279159546
Batch : 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 2