In [1]:
def load_model(path, model):
    # load pretrained model and update weights
    pretrained_dict = torch.load(path)
    model_dict = model.state_dict()
    # 1. filter out unnecessary keys
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
    # 2. overwrite entries in the existing state dict
    model_dict.update(pretrained_dict)
    # 3. load the new state dict
    model.load_state_dict(model_dict)
    return model

In [2]:
def adam(params, config=None):
    if config is None:
        config = {
            'lr': 3e-5,
            'warmup_proportion': 0.1,
            'weight_decay': 0.01
        }
        
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']

    optimizer_grouped_parameters = [
        {'params': [p for n, p in params if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in params if any(nd in n for nd in no_decay)], 'weight_decay': 0}
    ]

    optim = Bert.optimization.BertAdam(optimizer_grouped_parameters,
                                       lr=config['lr'],
                                       warmup=config['warmup_proportion'])
    return optim

In [3]:
import sys

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
sys.path.insert(1, '../')

In [6]:
from utils.dataset import EHRDatasetCodePrediction
from model.tokenizer import EHRTokenizer
import pytorch_pretrained_bert as Bert
from torch.utils.data import DataLoader
from model.model import *
from utils.config import BertConfig
from model.model2 import BertMultiLabelPrediction
from model.trainer import PatientTrajectoryPredictor
import pytorch_lightning as pl

In [7]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [8]:
path = '../processing/readmission_data_ccsr_'

In [9]:
data = pd.read_parquet(path)

In [10]:
#data.head()

In [11]:
data['visit_len'] = data['hadm_id'].apply(lambda x: len(x))

In [12]:
datamorethan1visit = data[data['visit_len'] > 1]

In [13]:
datamorethan1visit['ccsr_traincodes'] = datamorethan1visit['ccsr'].apply(lambda x: x[:-1])
datamorethan1visit['ccsr_labels'] = datamorethan1visit['ccsr'].apply(lambda x: x[-1])

In [14]:
#alllabels = []

In [15]:
#ccsr = []

In [16]:
#datamorethan1visit

In [17]:
def getcssrvoc(array):
    for a in array:
        for b in a:
            ccsr.append(b)
            

In [18]:
def getvoc(array):
    for a in array:
        alllabels.append(a)

In [19]:
#datamorethan1visit['ccsr_traincodes'].apply(lambda x: getcssrvoc(x))

In [20]:
#datamorethan1visit['ccsr_labels'].apply(lambda x: getvoc(x))

In [21]:
#len(set(alllabels))

In [22]:
#np.save('../processing/ccsr_voc.npy', np.array(ccsr))

In [23]:
global_params = {
    'max_seq_len': 32,
    'gradient_accumulation_steps': 1
}

optim_param = {
    'lr': 3e-5,
    'warmup_proportion': 0.1,
    'weight_decay': 0.01
}

train_params = {
    'batch_size': 64,
    'use_cuda': True,
    'max_len_seq': global_params['max_seq_len'],
    'device': 'cuda' #change this to run on cuda #'cuda:0'
}

In [24]:
tokenizer = EHRTokenizer(task='ccsr')

In [45]:
# Shallow

model_config = {
    'vocab_size': len(tokenizer.getVoc('code').keys()), # number of disease + symbols for word embedding
    'hidden_size': 300, # word embedding and seg embedding hidden size
    'seg_vocab_size': 2, # number of vocab for seg embedding
    'age_vocab_size': len(tokenizer.getVoc('age').keys()), # number of vocab for age embedding,
    'gender_vocab_size': 3,
    'max_position_embeddings': train_params['max_len_seq'], # maximum number of tokens
    'hidden_dropout_prob': 0.1, # dropout rate
    'num_hidden_layers': 2, # number of multi-head attention layers required
    'num_attention_heads': 4, # number of attention heads
    'attention_probs_dropout_prob': 0.1, # multi-head attention dropout rate
    'intermediate_size': 300, # the size of the "intermediate" layer in the transformer encoder
    'hidden_act': 'gelu', # The non-linear activation function in the encoder and the pooler "gelu", 'relu', 'swish' are supported
    'initializer_range': 0.02, # parameter weight initializer range
}

#Deep
'''
model_config = {
    'vocab_size': len(tokenizer.getVoc('code').keys()), # number of disease + symbols for word embedding
    'hidden_size': 300, # word embedding and seg embedding hidden size
    'seg_vocab_size': 2, # number of vocab for seg embedding
    'age_vocab_size': len(tokenizer.getVoc('age').keys()), # number of vocab for age embedding,
    'gender_vocab_size': 3,
    'max_position_embeddings': train_params['max_len_seq'], # maximum number of tokens
    'hidden_dropout_prob': 0.1, # dropout rate
    'num_hidden_layers': 6, # number of multi-head attention layers required
    'num_attention_heads': 12, # number of attention heads
    'attention_probs_dropout_prob': 0.1, # multi-head attention dropout rate
    'intermediate_size': 300, # the size of the "intermediate" layer in the transformer encoder
    'hidden_act': 'gelu', # The non-linear activation function in the encoder and the pooler "gelu", 'relu', 'swish' are supported
    'initializer_range': 0.02, # parameter weight initializer range
}
'''

'\nmodel_config = {\n    \'vocab_size\': len(tokenizer.getVoc(\'code\').keys()), # number of disease + symbols for word embedding\n    \'hidden_size\': 300, # word embedding and seg embedding hidden size\n    \'seg_vocab_size\': 2, # number of vocab for seg embedding\n    \'age_vocab_size\': len(tokenizer.getVoc(\'age\').keys()), # number of vocab for age embedding,\n    \'gender_vocab_size\': 3,\n    \'max_position_embeddings\': train_params[\'max_len_seq\'], # maximum number of tokens\n    \'hidden_dropout_prob\': 0.1, # dropout rate\n    \'num_hidden_layers\': 6, # number of multi-head attention layers required\n    \'num_attention_heads\': 12, # number of attention heads\n    \'attention_probs_dropout_prob\': 0.1, # multi-head attention dropout rate\n    \'intermediate_size\': 300, # the size of the "intermediate" layer in the transformer encoder\n    \'hidden_act\': \'gelu\', # The non-linear activation function in the encoder and the pooler "gelu", \'relu\', \'swish\' are support

In [46]:
class BertConfig(Bert.modeling.BertConfig):
    def __init__(self, config):
        super(BertConfig, self).__init__(
            vocab_size_or_config_json_file=config.get('vocab_size'),
            hidden_size=config['hidden_size'],
            num_hidden_layers=config.get('num_hidden_layers'),
            num_attention_heads=config.get('num_attention_heads'),
            intermediate_size=config.get('intermediate_size'),
            hidden_act=config.get('hidden_act'),
            hidden_dropout_prob=config.get('hidden_dropout_prob'),
            attention_probs_dropout_prob=config.get('attention_probs_dropout_prob'),
            max_position_embeddings = config.get('max_position_embeddings'),
            initializer_range=config.get('initializer_range'),
        )
        self.seg_vocab_size = config.get('seg_vocab_size')
        self.age_vocab_size = config.get('age_vocab_size')
        self.gender_vocab_size = config.get('gender_vocab_size')

In [47]:
conf = BertConfig(model_config)
tensorboarddir = '../logs/'

In [28]:
voc = {}
def get(x):
    for a in x:
        for b in a:
            if b not in voc:
                voc[b] = 1
            else:
                voc[b] += 1
                
s = datamorethan1visit['ccsr_traincodes'].apply(lambda x: get(x))

In [48]:
trainset, testset = train_test_split(datamorethan1visit, test_size=0.2)

In [30]:
labelvoc = tokenizer.getVoc('label')

In [49]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer(classes=list(labelvoc.values()))
mlb.fit([[each] for each in list(labelvoc.values())])

MultiLabelBinarizer(classes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
                             15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
                             28, 29, ...])

In [50]:
traindata = EHRDatasetCodePrediction(trainset, max_len=train_params['max_len_seq'], tokenizer=tokenizer) 
testdata = EHRDatasetCodePrediction(testset, max_len=train_params['max_len_seq'], tokenizer=tokenizer) 

In [51]:
trainer = pl.Trainer(
        max_epochs=5, 
        gpus=-1,
        logger=pl.loggers.TensorBoardLogger(save_dir=tensorboarddir),
        callbacks=[pl.callbacks.progress.TQDMProgressBar()], 
        progress_bar_refresh_rate=1,
        weights_summary=None, # Can be None, top or full
        num_sanity_val_steps=10,
    )

trainloader = torch.utils.data.DataLoader(traindata, batch_size=train_params['batch_size'], shuffle=True, num_workers=4)
testloader = torch.utils.data.DataLoader(testdata, batch_size=train_params['batch_size'], shuffle=True, num_workers=4)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [52]:
model = BertMultiLabelPrediction(conf, num_labels=len(labelvoc.keys())) 
PATH = "../saved_models/MLM/shallow_notsuffled"
model = load_model(PATH, model)
params = list(model.named_parameters())
optim = adam(params, optim_param)

t_total value of -1 results in schedule not being applied


In [53]:
patienttrajectory = PatientTrajectoryPredictor(model, optim, optim_param, train_objective='visit', metrics=True, binarizer=mlb)

trainer.fit(
    patienttrajectory, 
    train_dataloader=trainloader,
);

predictions = trainer.predict(patienttrajectory, dataloaders=testloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 4: 100%|████████████████████████████████████████████████| 913/913 [01:41<00:00,  8.98it/s, loss=0.0776, v_num=188]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Predicting: 913it [00:28, ?it/s]


In [56]:
PATH = '../saved_models/NextxVisit/shallow_notsuffled'
torch.save(model.state_dict(), PATH)

In [54]:
aucpr = 0
for pred in predictions:
    aucpr+=pred['AUCPR']
aucpr / len(predictions)

0.304247055475446

In [55]:
auc = 0
for pred in predictions:
    auc+=pred['AUC']
auc / len(predictions)

0.8880110274767896