# Deep Learning For Healthcare Course Project: INPREM

https://www.kdd.org/kdd2020/accepted-papers/view/inprem-an-interpretable-and-trustworthy-predictive-model-for-healthcare

## Setup

In [6]:
!pip3 install -U sparsemax

[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[0mLooking in indexes: https://pypi.python.org/simple, https://pypi.apple.com/simple
Collecting sparsemax
  Downloading https://pypi.apple.com/packages/packages/1c/f8/e56723d8279ff156dea120c67afde88be80448958bb88d5307426390794f/sparsemax-0.1.9-py2.py3-none-any.whl (5.5 kB)
Installing collected packages: sparsemax
[33m  DEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[0m[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near fu

In [3]:
import os
import pickle
import json
import random
import numpy as np
import pandas as pd


# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
from sparsemax import Sparsemax

In [4]:
# set seed
seed = 24
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)

# define data path
DATA_PATH = "data/"

In [5]:
!ls {DATA_PATH}

DIAGNOSES_ICD.csv   D_ICD_DIAGNOSES.csv ICUSTAYS.csv


## Import Raw Data

For example, SUBJECT_ID refers to a unique patient, HADM_ID refers to a unique admission to the hospital, and ICUSTAY_ID refers to a unique admission to an intensive care unit.

In [None]:
def load_dataset(filepath):
    return pd.read_csv(filepath)


diag_icd = load_dataset(os.path.join(DATA_PATH, 'DIAGNOSES_ICD.csv'))
icd_descriptions = load_dataset(os.path.join(DATA_PATH, 'D_ICD_DIAGNOSES.csv'))
icustays = load_dataset(os.path.join(DATA_PATH, 'ICUSTAYS.csv'))

## Build Custom Dataset

In [None]:
from torch.utils.data import Dataset


class CustomDataset(Dataset):
    
    def __init__(self):
        self.x = None
        self.y = None
    
    def __len__(self):
        
        return len(self.y)
    
    def __getitem__(self, index):
        
        return (self.x[index], self.y[index])
        

dataset = CustomDataset()

## Load the Data (DataLoader)

For each task, we randomly split each dataset into training, validation, and testing sets five times in a 75:10:15 ratio

In [None]:
from torch.utils.data import DataLoader

def load_data(train_dataset, val_dataset, collate_fn):
    
    batch_size = 32
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=collate_fn, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, collate_fn=collate_fn)
    test_loader = DataLoader(val_dataset, batch_size=batch_size, collate_fn=collate_fn)
    
    return train_loader, val_loader


train_loader, val_loader = load_data(train_dataset, val_dataset, collate_fn)

## Build Model

We treat the medical events taking place in EHR as medical codes, which are denoted as $c_{1}, c_{2},... c_{|C|}$ ∈ 𝐶, where |𝐶| is the total number of unique medical codes.

One specific patient consist of a sequence of visits $v_{1}, v_{2},... v_{T}$ where we denote the number of visits in total as T.

Each visit contains a subset of medical codes, and we denote each visit as a binary vector  $v_{t} ∈ \{0, 1\}_{|C|}$, where the 𝑖-th element is set to 1 if the 𝑡-th visit contains the medical code $c_{i}$, otherwise 0. The visits  $v_{1}, v_{2},... v_{T}$ are stacked to form an input matrix $X ∈ \{0, 1\}^{|C|xT}$ , which we use as the input for the network

$E_{v} = {W}_{v}X$

$E_{o} = {W}_{o}O$

$E_{r} = \alpha(\beta(E_{v}+E_{o}))^{T}$

In [None]:
class AlphaAttention(torch.nn.Module):

    def __init__(self, hidden_dim=256):
        super().__init__()
        
        self.a_att = nn.Linear(hidden_dim, 1)
        
        self.sparsemax = Sparsemax(dim=-1)
        self.softmax = torch.nn.Softmax(dim=-1)

    def forward(self, g):
        
        y = self.a_att(g)
        sparse_max = self.sparsemax(y)
        soft_max = self.softmax(y)
        
        out = (sparse_max + soft_max) / 2
        
        return out
    
class BetaAttention(torch.nn.Module):

    def __init__(self, hidden_dim=256):
        
        self.b_att = nn.Linear(hidden_dim, hidden_dim)


    def forward(self, h):
        
        y = self.b_att(h)
        out = torch.tanh(y)
        
        return out

In [None]:
class INPREM(nn.Module):
    
    def __init__(self, num_codes, embedding_dim=256):
        super().__init__()
        
        self.embedding_v = nn.Embedding(num_codes, embedding_dim)
        self.embedding_o = nn.Embedding(num_codes, embedding_dim)
        
        
        self.att_a = AlphaAttention(embedding_dim)
        
        self.att_b = BetaAttention(embedding_dim)
        
        self.do = nn.Dropout(.5)
    
    def forward(self, X):
    
        # Pass through embedding
        ev = self.embedding_v(X)
        eo = self.embedding_o(o)
        
        er = 
        
        # Softmax
        out = F.softmax(x)
    

# load the model here
model = INPREM(num_codes = len(types))
model

## Evaluation

In [None]:
def eval_model(model, dataloader, device=None):
    model.eval()
    y_pred = torch.LongTensor()
    y_score = torch.Tensor()
    y_true = torch.LongTensor()
    
    for DATA in dataloader:
        y_logit = model(DATA)

        y_hat = (y_logit > 0.5).int()

        y_score = torch.cat((y_score,  y_logit.detach().to('cpu')), dim=0)
        y_pred = torch.cat((y_pred,  y_hat.detach().to('cpu')), dim=0)
        y_true = torch.cat((y_true, y.detach().to('cpu')), dim=0)
    
    p, r, f, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    roc_auc = roc_auc_score(y_true, y_score)
    
    return p, r, f, roc_auc

## Train the Model

In [None]:
def train(model, train_loader, val_loader, n_epochs):

    for epoch in range(n_epochs):
        model.train()
        
        train_loss = 0
        for DATA, y in train_loader:
            optimizer.zero_grad()
            y_hat = model(x, masks, rev_x, rev_masks)

            loss = criterion(y_hat, y)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
        train_loss = train_loss / len(train_loader)
        
        print('Epoch: {} \t Training Loss: {:.6f}'.format(epoch+1, train_loss))
        
        p, r, f, roc_auc = eval(model, val_loader)
        
        print('Epoch: {} \t Validation p: {:.2f}, r:{:.2f}, f: {:.2f}, roc_auc: {:.2f}'.format(epoch+1, p, r, f, roc_auc))
        
    return round(roc_auc, 2)

## Run

For training all approaches, we use Adam with the batch size of 32 and the learning rate of 0.0005. The weight decay is set to 𝜆 = 0.0001 and the dropout rate is set to 0.5 for all approaches

In [7]:
# load the model
model = IMPREM(num_codes = len(types))

# load the loss function
criterion = nn.BCELoss()
# load the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4, weight_decay=1e-4)

n_epochs = 5
train(model, train_loader, val_loader, n_epochs)

NameError: name 'IMPREM' is not defined

## Abblations