In [1]:
import sys

In [2]:
sys.path.insert(1, '../')

In [3]:
from utils.dataset import EHRDatasetPredictionTask
from model.tokenizer import EHRTokenizer
import pytorch_pretrained_bert as Bert
from torch.utils.data import DataLoader
from model.model import *
from utils.config import BertConfig
from model.trainer import PatientTrajectoryPredictor
import pytorch_lightning as pl

In [4]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [5]:
path = '../processing/readmission_data'

In [6]:
data = pd.read_parquet(path)

In [7]:
data.head()

Unnamed: 0,subject_id,label,icd_code,age,alcohol_abuse,tobacco_abuse,ndc,hadm_id,gender
0,10031358,"[0, 0, 0]","[[25080, 73007, 7854, 2761, 70715, 5849, 25060...","[62.0, 63.0, 64.0]","[1, 1, 0]","[0, 0, 0]","[[63323026201, 71015892, 51079045420, 74706811...","[27421511, 28279098, 24522342]",M
1,10040737,[0],"[[5602, 4019, 53081, 2713, 30000, 2449]]",[51.0],[0],[0],"[[78112001103, 60793011601, 63323026201, 90456...",[26871570],F
2,10047088,[0],"[[78659, 30500, 41400, 4139, 4019, 25000, 3000...",[48.0],[1],[0],"[[55390000401, 182050789, 904578561, 904404073...",[29486018],M
3,10051825,[0],"[[1963, 1725, 78702, 30390]]",[50.0],[0],[0],"[[904198861, 338011704, 33332001201, 510790759...",[29436824],M
4,10082965,[0],"[[7211, 9982, E8708, 4019]]",[81.0],[0],[0],"[[61553020648, 574705050, 121043130, 536338101...",[22046658],F


In [8]:
global_params = {
    'max_seq_len': 32,
    'max_age': 110,
    'month': 1,
    'age_symbol': None,
    'min_visit': 5,
    'gradient_accumulation_steps': 1
}

optim_param = {
    'lr': 3e-5,
    'warmup_proportion': 0.1,
    'weight_decay': 0.01
}

train_params = {
    'batch_size': 32,
    'use_cuda': True,
    'max_len_seq': global_params['max_seq_len'],
    'device': 'cuda' #change this to run on cuda #'cuda:0'
}

In [9]:
tokenizer = EHRTokenizer()

In [10]:
trainset = EHRDatasetPredictionTask(data, max_len=train_params['max_len_seq'], tokenizer=tokenizer, prediction_task='readmission')

In [11]:
trainload = DataLoader(dataset=trainset, batch_size=train_params['batch_size'], shuffle=True)

In [12]:
model_config = {
    'vocab_size': len(tokenizer.getVoc('code').keys()), # number of disease + symbols for word embedding
    'hidden_size': 200, # word embedding and seg embedding hidden size
    'seg_vocab_size': 2, # number of vocab for seg embedding
    'age_vocab_size': len(tokenizer.getVoc('age').keys()), # number of vocab for age embedding,
    'gender_vocab_size': 3,
    'max_position_embeddings': train_params['max_len_seq'], # maximum number of tokens
    'hidden_dropout_prob': 0.1, # dropout rate
    'num_hidden_layers': 2, # number of multi-head attention layers required
    'num_attention_heads': 4, # number of attention heads
    'attention_probs_dropout_prob': 0.1, # multi-head attention dropout rate
    'intermediate_size': 300, # the size of the "intermediate" layer in the transformer encoder
    'hidden_act': 'gelu', # The non-linear activation function in the encoder and the pooler "gelu", 'relu', 'swish' are supported
    'initializer_range': 0.02, # parameter weight initializer range
}

In [13]:
conf = BertConfig(**model_config)

In [14]:
model = BertSinglePrediction(conf, num_labels=1)

In [15]:
def load_model(path, model):
    # load pretrained model and update weights
    pretrained_dict = torch.load(path)
    model_dict = model.state_dict()
    # 1. filter out unnecessary keys
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
    # 2. overwrite entries in the existing state dict
    model_dict.update(pretrained_dict)
    # 3. load the new state dict
    model.load_state_dict(model_dict)
    return model

PATH = "checkpoint2"
mode = load_model(PATH, model)

In [16]:
no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
params = list(model.named_parameters())
optimizer_grouped_parameters = [
    {'params': [p for n, p in params if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in params if any(nd in n for nd in no_decay)], 'weight_decay': 0}
]

optim = Bert.optimization.BertAdam(optimizer_grouped_parameters,
                                   lr=optim_param['lr'],
                                   warmup=optim_param['warmup_proportion'])

t_total value of -1 results in schedule not being applied


In [17]:
model = model.to('cuda') 

In [18]:
patienttrajectory = PatientTrajectoryPredictor(model, optim, optim_param, train_objective='readmission', metrics=True)

In [19]:
tensorboarddir = '../logs/'

In [20]:
trainer = pl.Trainer(
            max_epochs=20, 
            gpus=1,
            logger=pl.loggers.TensorBoardLogger(save_dir=tensorboarddir),
            callbacks=[pl.callbacks.progress.TQDMProgressBar()], 
            progress_bar_refresh_rate=1,
            weights_summary=None, # Can be None, top or full
            num_sanity_val_steps=10,
        )

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [21]:
trainer.fit(
    patienttrajectory, 
    train_dataloader=trainload,
);

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 2:  83%|███████████████████████████████████████        | 3051/3672 [01:33<00:19, 32.57it/s, loss=0.232, v_num=131]