# Transformer Hawkes Process
* The objective is to implement THP paper 
    * [Zuo, Simiao, Haoming Jiang, Zichong Li, Tuo Zhao, and Hongyuan Zha. “Transformer Hawkes Process.” In Proceedings of the 37th International Conference on Machine Learning, 11692–702. PMLR, 2020. https://proceedings.mlr.press/v119/zuo20a.html.]
    * github: https://github.com/SimiaoZuo/Transformer-Hawkes-Process
* with the example data in the paper
    * https://drive.google.com/drive/folders/0BwqmV0EcoUc8UklIR1BKV25YR1U?resourcekey=0-OrlU87jyc1m-dVMmY5aC4w
    



# Libraries

In [1]:
# activate line execution
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# general
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import os
import shutil

# plotly
import plotly.express as px  # (version 4.7.0 or higher)
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# import custom libraries
import sys
sys.path.append("C:\\DATA\\Tasks\\lib\\hk")
import hk_psql
import hk_utils

# folder paths
ADD_DATA = "C:\\DATA\\data\\raw\\mimic4\\lookup\\"
ADD_DATA_proc = "C:/DATA/data/processed/"


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# libraries for THP

import argparse
import numpy as np
import pickle
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

import transformer.Constants as Constants
import Utils

from preprocess.Dataset_mhp import get_dataloader, prepare_dataloader

# from transformer.Models import Transformer
from transformer.hk_transformer import Transformer
from tqdm import tqdm

from torchinfo import summary

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

torch.cuda.empty_cache()
torch.cuda.memory_allocated()
torch.cuda.memory_reserved()

from sklearn import metrics
from hk_pytorch import save_checkpoint,load_checkpoint
import hk_pytorch


from customNEW import myparser

from tsnecuda import TSNE
from MulticoreTSNE import MulticoreTSNE as TSNE





0

0

## THP
* we import THP libraries from main.py

In [3]:
# setting hyper parameters:

params_set = [
    None,
    {
        'n_head': 3,
        'n_layers': 3,
        'd_model': 64,
        'd_inner_hid':   256,
        'd_k': 16,
        'd_v': 16,
        'dropout': 0.1,
    },
    {
        'n_head': 6,
        'n_layers': 6,
        'd_model': 128,
        'd_inner_hid':   2048,
        'd_k': 64,
        'd_v': 64,
        'dropout': 0.1,
    },
    {
        'n_head': 4,
        'n_layers': 4,
        'd_model': 512,
        'd_inner_hid':   1024,
        'd_k': 512,
        'd_v': 512,
        'dropout': 0.1,
    },
    {
        'n_head': 2,
        'n_layers': 2,
        'd_model': 64,
        'd_inner_hid':   128,
        'd_k': 32,
        'd_v': 32,
        'dropout': 0.1,
    },
    {
        'n_head': 3,
        'n_layers': 3,
        'd_model': 64,
        'd_inner_hid': 256,
        'd_k': 64,
        'd_v': 64,
        'dropout': 0.1,
    },

    {
        'n_head': 4,
        'n_layers': 4,
        'd_model': 32,
        'd_inner_hid': 64,
        'd_k': 32,
        'd_v': 32,
        'dropout': 0.1,
    },
    
    
]


data_meme = {
    'add_data': "data/data_meme/",
    'C':5000,
    'batch': 128,
    'lr': 1e-3,
    'solver': 'MC',
    'set': params_set[1],
    'freq': np.ones(5000)
}

data_mimic = {
    'add_data': "data/data_mimic/fold1/",
    'C':75,
    'batch': 1,
    'lr': 1e-4,
    'solver': 'NU',
    'set': params_set[1],
    'freq': np.ones(75)
}




data_retweet = {
    'add_data': "data/data_retweet/",
    'C':3,
    'batch': 16,
    'lr': 5e-3,
    'solver': 'MC',
    'set': params_set[1],
    'freq':np.array([4870, 4415,  526])
}

# data=ADD_DATA_proc+"THP_mimic/"

data_mimic_hojjat = {
    'add_data': ADD_DATA_proc+"THP_mimic/",
    'C':40,
    'batch': 4,
    'lr': 0.0020313,
    'solver': 'NU',
    'set': params_set[1],
    'freq': np.ones(40),
}


data_hawkes = {
    'add_data': "data/data_hawkes/",
    'C':5, 
    'batch': 16,
    'lr': 1e-4,
    'solver': 'NU',
    'set': params_set[5],
    'freq': np.array([3698, 1411, 1182, 1119, 2410]),

}




data_bookorder = {

    'add_data': "data/data_bookorder/fold1/",
    'C':2, 
    'batch': 2,
    'lr': 1e-4,
    'solver': 'NU',
    'set': params_set[4],
    'freq': np.array([ 50, 50]),

}
data_conttime = {

    'add_data': "data/data_conttime/",
    'C':5, 
    'batch': 4,
    'lr': 1e-4,
    'solver': 'NU',
    'set': params_set[4],
    'freq': np.array([1296, 1302, 1778, 3388, 2020]),

}

data_so = {
    'add_data': "data/data_so/fold3/",
    'C':22, 
    'batch': 4,
    'lr': 3e-4,
    'solver': 'NU',
    'set': params_set[3],
    'freq': np.array([ 21310,   6725,   2203, 147994,  19908,  20713,   6703,   6508,
        78603,   4712,    630,   9165,   2956,   6452,   1081,    334,
          970,   1852,   1206,    250,     47,     17]),

}

data_mhp = {
    'add_data': "C:/DATA/data/processed/MHP/",
    'C':10, 
    'batch': 4,
    'lr': 3e-4,
    'solver': 'NU',
    'set': params_set[6],
    'freq': np.ones(10),
    # 'dataloader': preprocess.Dataset_mhp

}

DATASET_NAMES = {
    'SO': data_so,
    'hawkes': data_hawkes,
    'ReTweet': data_retweet,
    'MIMIC-II': data_mimic,
    'MemeTracker': data_meme,
    'MHP': data_mhp,
    # 'SO': data_so,
}

# Settings

In [None]:
!python starter.py dfsd -vae

In [None]:
# from numpy import float64


data = data_mhp
data['model_name'] = 'model' + '_' + 'THP_p100_con1_new_detach1_vae0_DivT1' + '/'
data['load_model'] = False
data['save_model'] = True   
data['num_workers'] = 0
data['max_epochs'] = 100
data['add_model'] = data['add_data'] + data['model_name']

data['temp_enc'] = 1
data['temp_enc_mode'] = 'time' # 'lang' or 'time'
data['temp_concate'] = 1 # adds previous interevent time to the embedded events
data['gap_trans'] = 0

data['w_norm_mode'] = 'none' # none, 1/freq, 1/sqrt(freq), 1-freq, CB
# data['w_norm_mode'] = '1/sqrt(freq)' # none, 1/freq, 1/sqrt(freq), 1-freq, CB

data['n_sub'] = 100

data['write2tsne'] = 2

data['vae'] = 0
data['detach'] = True


# Loss settings
data['time_loss'] = Utils.time_loss
data['log_likelihood'] = Utils.log_likelihood # lambda x:x

data['alpha_pred_loss'] = 1
data['alpha_se'] = 1e2
data['smooth']=0

# data['loss_type'] = 'FocalLoss'
# data['focal_gamma'] = 2
data['loss_type'] = 'CEL'
# data['loss_type'] = 'F1'

data['w_CB_beta'] = 0.99




data['horizon'] = 1

print(f"tensorboard --logdir={data['add_data']} --port 1374")
print(f"tensorboard --logdir={data['add_model']} --port 1374")

# RUN!

In [None]:

print(f"tensorboard --logdir={data['add_model']} --port 1374")
print(f"tensorboard --logdir={data['add_data']} --port 1374")

from pipeline import main


if __name__ == '__main__':
    
    main(data)

# Optimization


In [None]:
np.random.randint(40,60)

In [None]:
data = data_so
data['load_model'] = False
data['save_model'] = False
data['num_workers'] = 0                                                                                                                                                                                                                                                                     
data['Optuna_scenario'] = 'Optuna_full_p6_noConcat/'

print(f"tensorboard --logdir={data['add_data'] + data['Optuna_scenario'] } --port 1374")

In [None]:
def build_train_and_evaluate(data, hparams, trial):

    
    data['model_name'] = f"Trial {trial.number}"
    data['add_model'] = data['add_data'] + data['model_name']

    torch.cuda.empty_cache()
    # torch.cuda.empty_cache()
    print("###########################################################################",torch.cuda.memory_reserved())
    opt = myparser(data, hparams = hparams)
    writer = SummaryWriter(log_dir=opt.optuna_dir, comment = f"Trial {trial.number}")


    opt.device = torch.device('cuda')
    
    # setup the log file




    print('[Info] parameters: {}'.format(opt))

    """ prepare dataloader """
    trainloader, testloader, num_types = prepare_dataloader(opt)

    """ prepare model """
    model = Transformer(
        num_types=num_types,
        d_model=opt.d_model,
        d_rnn=opt.d_rnn,
        d_inner=opt.d_inner_hid,
        n_layers=opt.n_layers,
        n_head=opt.n_head,
        d_k=opt.d_k,
        d_v=opt.d_v,
        dropout=opt.dropout,

        temp_enc = opt.temp_enc,
        temp_enc_mode = opt.temp_enc_mode,
        temp_concate = opt.temp_concate,
        vae = opt.vae

    )
    model.to(opt.device)

    """ optimizer and scheduler """
    optimizer = optim.Adam(filter(lambda x: x.requires_grad, model.parameters()),
                           opt.lr, betas=(0.9, 0.999), eps=1e-05)
    scheduler = optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.5)

    """ prediction loss function, either cross entropy or label smoothing """
    if opt.smooth > 0:
        pred_loss_func = Utils.LabelSmoothingLoss(opt.smooth, num_types, ignore_index=-1)
    else:
        # pred_loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduction='none')
        pred_loss_func = opt.pred_fun
    pred_loss_func = opt.pred_fun

    """ number of parameters """
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('[Info] Number of parameters: {}'.format(num_params))

    print(f'[Info] w_norm_mode: {opt.w_norm_mode}')
    print(f'[Info] loss_type: {opt.loss_type}')
    print(f'[Info] temp_enc: {opt.temp_enc}')
    print(f'[Info] temp_enc_mode: {opt.temp_enc_mode}')
    print(f'[Info] temp_concate: {opt.temp_concate}')


    """ load model if necessary """
    # if opt.load_model:
    #     model, optimizer = load_checkpoint(model, optimizer, folder=opt.data+'model/')

    """ train the model """

    opt.writer = SummaryWriter(log_dir=opt.optuna_dir + f"Trial {trial.number}")
    valid_event, valid_type, valid_time, inter_Obj_val = train(model, trainloader, testloader, optimizer, scheduler, pred_loss_func, opt, trial=trial)
    opt.writer.close()

    # objective_value = valid_event + valid_type + valid_time/100
    # objective_value = valid_event + valid_type - valid_time/100
    objective_value = inter_Obj_val

    writer.add_hparams(hparam_dict=hparams,metric_dict= {'hparam/valid_event': valid_event, 'hparam/valid_type': valid_type,
                                                             'hparam/valid_time, ': valid_time,'hparam/objective_value': objective_value, },run_name=f"Trial {trial.number}")
    writer.close()
    
    del model, trainloader, testloader, optimizer, scheduler, pred_loss_func, opt
    print(f"obj_val{objective_value}")
    
    return objective_value

In [None]:
def objective(trial):

    print(trial)
    hparams = {
        # 'batch_size':     trial.suggest_categorical('batch_size', [4,8]),
        # 'n_head':         trial.suggest_categorical('n_head', [2,4,8]),
        # 'n_layers':       trial.suggest_categorical('n_layers', [2,4,8]),
        # 'd_model':        trial.suggest_categorical('d_model', [64,128,256,512]),
        # 'd_rnn':          trial.suggest_categorical('d_rnn', [64]),
        # 'd_inner_hid' :   trial.suggest_categorical('d_inner_hid', [64,128,256,512]),
        # 'd_k':            trial.suggest_categorical('d_k', [128,256,512,]),
        # 'd_v':            trial.suggest_categorical('d_v', [128,256,512,]),
        # 'dropout':        trial.suggest_loguniform('dropout', 0.1, 0.9),
        # 'lr':             trial.suggest_loguniform('lr', 1e-4, 1e-2),
        # 'smooth':         trial.suggest_uniform('smooth', 0, 1),
        # 'w_norm_mode':    trial.suggest_categorical('w_norm_mode', ['none','1/freq','1-freq','CB']),
        
        # 'w_CB':           trial.suggest_categorical('w_CB', [0,1]),
        # 'w_CB_beta':        trial.suggest_loguniform('w_CB_beta', 0.9 ,0.9999),
        
        # 'loss_type':      trial.suggest_categorical('loss_type', ['CEL','FocalLoss']),
        # 'focal_gamma':             trial.suggest_categorical('focal_gamma', [Utils.type_loss_CE,Utils.type_loss_focal]),

        # 'smooth':         trial.suggest_categorical('smooth', [0]),
        # 'horizon':        trial.suggest_categorical('horizon', [10]),

        # 'temp_enc':        trial.suggest_categorical('temp_enc', [0,1]),
        'temp_concate':        trial.suggest_categorical('temp_concate', [0]),
        'w_norm_mode':        trial.suggest_categorical('w_norm_mode', ['1/sqrt(freq)','none']),

        # 'alpha_pred_loss':        trial.suggest_loguniform('alpha_pred_loss', 1,100),
        # 'alpha_se':        trial.suggest_loguniform('alpha_se', 1,100),
    }


    if 'w_norm_mode' in hparams:
        if hparams['w_norm_mode']=='CB':
            hparams['w_CB_beta'] = trial.suggest_categorical('w_CB_beta',[0.9, 0.99, 0.999, 0.9999])
    if 'loss_type' in hparams:
        if hparams['loss_type']=='FocalLoss':
            hparams['focal_gamma'] = trial.suggest_loguniform('focal_gamma', 1 ,5)
    # batch_size=4
    # n_head=4
    # n_layers=4
    # d_model=512
    # d_rnn=64
    # d_inner=1024
    # d_inner_hid = 128
    # d_k=512
    # d_v=512
    # dropout=0.1
    # lr=1e-4
    # smooth=0.0


    
    # model = build_model(hparams)
    # objective_value = 1
    objective_value = build_train_and_evaluate(data,hparams,trial)

    return objective_value

In [None]:

# torch.cuda.empty_cache()
torch.cuda.memory_allocated()
torch.cuda.memory_reserved()
torch.cuda.memory_reserved()


In [None]:
import optuna
from optuna.integration import PyTorchLightningPruningCallback
# from optuna.integration.tensorboard import TensorBoardCallback

# tensorboard_callback = TensorBoardCallback("logs/optuna/", metric_name="accuracy")

if os.path.exists(data['add_data'] + data['Optuna_scenario']) and os.path.isdir(data['add_data'] + data['Optuna_scenario']):
    shutil.rmtree(data['add_data'] + data['Optuna_scenario'])
    
print(f"######################### TENSORBOARD #######################\ntensorboard --logdir={data['add_data'] + data['Optuna_scenario']} --port 1374")
print(f"######################### TENSORBOARD #######################\ntensorboard --logdir={data['add_data']} --port 1374")

pruner=optuna.pruners.MedianPruner()
study = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(), pruner=None)
study.optimize(objective, n_trials=100)

# Visualization

In [11]:
model_folders = [

    # 'data/data_so/fold3/model_SET1_SO_p10_con0_gap0_sqrt0_detach0_DivT0',
    # 'data/data_hawkes/model_SET1_hawkes_p10_con0_gap0_sqrt0_detach0_DivT0',
    # 'data/data_hawkes/model_SET1_hawkes_p10_con1_gap0_sqrt0_detach0_DivT0',
    # 'C:\DATA\data\processed\MHP_sp0.9\MHP-detach-concat-vae'
    # 'data/data_so/fold3/model_SET1_SO_p10_con1_gap0_sqrt0_detach0_DivT0',

    # 'C:\\DATA\\data\\processed\\MHP_sp0.9\\MHP\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.9\\MHP-concat\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.9\\MHP-concat-vae\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.9\\MHP-concat-detach\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.9\\MHP-concat-detach-vae\\',

    # 'C:\\DATA\\data\\processed\\MHP_sp0.8\\MHP\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.8\\MHP-concat\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.8\\MHP-concat-vae\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.8\\MHP-concat-detach\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.8\\MHP-concat-detach-vae\\',

    # 'C:\\DATA\\data\\processed\\MHP_sp0.6\\MHP\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.6\\MHP-concat\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.6\\MHP-concat-vae\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.6\\MHP-concat-detach\\',
    # 'C:\\DATA\\data\\processed\\MHP_sp0.6\\MHP-concat-detach-vae\\',


    # 'C:\\DATA\\data\\processed\\v2_MHP_sp0.9\\MHP\\',
    # 'C:\\DATA\\data\\processed\\v2_MHP_sp0.9\\MHP-concat\\',
    # 'C:\\DATA\\data\\processed\\v2_MHP_sp0.9\\MHP-concat-vae\\',
    # 'C:\\DATA\\data\\processed\\v2_MHP_sp0.9\\MHP-concat-detach\\',
    # 'C:\\DATA\\data\\processed\\v2_MHP_sp0.9\\MHP-concat-detach-vae\\',

    # 'C:\\DATA\\data\\processed\\v3_MHP_sp0.9\\MHP\\',
    # 'C:\\DATA\\data\\processed\\v3_MHP_sp0.9\\MHP-concat\\',
    # 'C:\\DATA\\data\\processed\\v2_MHP_sp0.9\\MHP-concat-vae\\',
    # 'C:\\DATA\\data\\processed\\v2_MHP_sp0.9\\MHP-concat-detach\\',
    # 'C:\\DATA\\data\\processed\\v2_MHP_sp0.9\\MHP-concat-detach-vae\\',
    'C:\\DATA\\data\\processed\\THP_mimic_icd10\\MIMIC_ICD10-concat - Copy\\'
]




data_vis = {}

In [12]:
import glob

search_dir = 'data/data_so/fold3/model_SET1_SO_p10_con0_gap0_sqrt0_detach0_DivT0/'
files = list(filter(os.path.isfile, glob.glob(search_dir + "*.tar")))

# files = os.listdir(path)
files.sort(key=os.path.getctime)
files

['data/data_so/fold3/model_SET1_SO_p10_con0_gap0_sqrt0_detach0_DivT0\\my_checkpoint.pth.tar']

In [13]:
def import_model(model_folder):    
    
    tar_file = [x for x in os.listdir(model_folder) if x.endswith('.tar') ]
    spec_file = [x for x in os.listdir(model_folder) if x.endswith('spec.pkl') ]
    with open(model_folder+'/spec.pkl', 'rb') as file:
        print(file)
        data = pickle.load(file)
        

    opt = myparser(data,hparams=None)
    opt.device = torch.device('cuda')

    """ prepare model """
    model = Transformer(
        num_types=opt.C,
        d_model=opt.d_model,
        # d_rnn=opt.d_rnn,
        d_inner=opt.d_inner_hid,
        n_layers=opt.n_layers,
        n_head=opt.n_head,
        d_k=opt.d_k,
        d_v=opt.d_v,
        dropout=opt.dropout,
        

        temp_enc = opt.temp_enc,
        temp_enc_mode = opt.temp_enc_mode,
        temp_concate = opt.temp_concate,
        vae = opt.vae,

        horizon = opt.horizon,
        detach = opt.detach,

    )
    _ = model.to(opt.device)

    return model, opt




In [14]:
from preprocess import Dataset_mhp
f_dataloader = Dataset_mhp.prepare_dataloader



def read_selfAttn (model, opt):
    
    trainloader, testloader, num_types = f_dataloader(opt)

    list_enc_self_attn=[]
    list_event_type=[]

    for rep in range(1):    
        with torch.no_grad():
            
            for batch in tqdm(testloader, mininterval=2,                desc='  - (Evaluation) ', leave=False):

                event_time, time_gap, event_type = map(lambda x: x.to(opt.device), batch)
                # print(event_type[1])


                if opt.gap_trans:
                    time_gap_trans = torch.tensor( opt.pt.transform(time_gap.cpu().numpy().reshape(-1,1)).reshape(time_gap.size()) ).to(opt.device)

                    enc_out, prediction = model(event_type, event_time, time_gap_trans)
                else:
                    enc_out, prediction = model(event_type, event_time, time_gap)
                
                # print(event_type[1])
                # print(event_time[1])
                # print(time_gap[1])

                enc_self_attn = prediction[2]
                # print(enc_self_attn[1,-1,:,:].mean())

                list_enc_self_attn.extend([enc_self_attn[i,-1,:,:].cpu().detach() for i in range(enc_self_attn.shape[0])])  # last attention layer
                list_event_type.extend(   [   event_type[i].cpu().detach() for i in range(event_type.shape[0]) ] )


    
    return list_enc_self_attn, list_event_type


In [15]:
def cal_exc_mat(list_enc_self_attn, list_event_type, C):


    list_exc_mat = []

    def fun(mat, types, C):
        ex_mat = np.zeros((C, C))


        for i in range(C):
            for j in range(C):
                # y
                q1 = np.where(types==i+1)[0]
                q2 = np.where(types==j+1)[0]
                # mat.shape
                # mat[np.where(y==i),np.where(y==j)]
                # mat[q1][:,q2]
                if i==9:
                    a=1
                ex_mat[i,j] = mat[q1][:,q2].mean()
        ex_mat[np.isnan(ex_mat)] = 0
        # if np.isnan(ex_mat).sum():
        #     a=1
        return ex_mat

    for c, enc_self_attn in enumerate(list_enc_self_attn):
        
        # adjustment
        a = torch.arange(len(enc_self_attn))+1
        b = a[:,None].expand(-1, len(enc_self_attn))
        enc_self_attn = enc_self_attn*b
        
        event_type = list_event_type[c]
        # print(enc_self_attn.min())
        ex_mat = fun(enc_self_attn, event_type, C)

        list_exc_mat.append(ex_mat)

    total_exc_mat = np.mean( np.stack(list_exc_mat), axis=0 )
    return total_exc_mat

## Compute

In [16]:
# reading the model

for model_folder in model_folders:
    auc=[]   
    for rep in range(10):
        print(model_folder)

        # reading spec and model
        if model_folder not in data_vis:
            data_vis[model_folder] = {}

        model, opt = import_model(model_folder)
        data_vis[model_folder]['model'], data_vis[model_folder]['opt'] = model, opt




        # evaluate model on test loader --> save [list_enc_self_attn, list_event_type]
        list_enc_self_attn, list_event_type = read_selfAttn (
            data_vis[model_folder]['model'],
            data_vis[model_folder]['opt']
        )
        data_vis[model_folder]['list_enc_self_attn'], data_vis[model_folder]['list_event_type'] = list_enc_self_attn, list_event_type

        
        # calculate EXCITATION matrix
        total_exc_mat = cal_exc_mat(
            data_vis[model_folder]['list_enc_self_attn'],
            data_vis[model_folder]['list_event_type'],
            data_vis[model_folder]['opt'].C
        )
        data_vis[model_folder]['total_exc_mat'] = total_exc_mat

        # load DGP

        with open(data_vis[model_folder]['opt'].add_data+'dgp.pkl', 'rb') as file:
            dgp = pickle.load(file)
        data_vis[model_folder]['dgp'] = dgp


        # compute fpr, tpr, auc
        y_true = ( data_vis[model_folder]['dgp']['adjacency']>0).astype(int).flatten()
        y_score = data_vis[model_folder]['total_exc_mat'].flatten()

        
        auc.append(metrics.roc_auc_score(y_true, y_score))

        fpr, tpr, ths = metrics.roc_curve(y_true, y_score)
        pr, rec, ths2 = metrics.precision_recall_curve(y_true, y_score)


        data_vis[model_folder]['metrics'] = {
            'auc_mean':np.mean(auc), 'auc_std':np.std(auc), 'fpr':fpr, 'tpr':tpr,
            'pr':pr, 'rec':rec

        }
    

C:\DATA\data\processed\THP_mimic_icd10\MIMIC_ICD10-concat - Copy\
<_io.BufferedReader name='C:\\DATA\\data\\processed\\THP_mimic_icd10\\MIMIC_ICD10-concat - Copy\\/spec.pkl'>
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


AttributeError: 'myparser' object has no attribute 'd_model'

## Compare

In [241]:
# plot auc curves for estimatioin of adjacency matrix

fig = make_subplots(rows=3, cols=1).update_layout(width=750, height=1500)


_ = fig.add_trace(go.Scatter(x=[0,1], y=[0,1], name='baseline'))


for i, model_folder in enumerate(model_folders):

    model_folder
    fpr = data_vis[model_folder]['metrics']['fpr']
    tpr = data_vis[model_folder]['metrics']['tpr']
    auc_mean = data_vis[model_folder]['metrics']['auc_mean']
    auc_std = data_vis[model_folder]['metrics']['auc_std']

    pr = data_vis[model_folder]['metrics']['pr']
    rec = data_vis[model_folder]['metrics']['rec']

    _ = fig.add_trace(go.Scatter(x=fpr, y=tpr, name = f'model{i} - auc[sd] {auc_mean :.2f}[{auc_std :.2f}]'), row=1, col=1)
    # _ = fig.add_trace(go.Scatter(x=rec, y=pr, name = f'model{i} - auc[sd] {auc_mean :.2f}[{auc_std :.2f}]'), row=2, col=1)


    # plot GT and predicted excitation matrix
    # _ = fig.add_trace(go.Heatmap(z = data_vis[model_folder]['dgp']['adjacency'] , name = f'model{i} - mat_DGP'), row=2, col=1)
    # _ = fig.add_trace(go.Heatmap(z = data_vis[model_folder]['total_exc_mat'] , name = f'model{i} - mat_PRED'), row=3, col=1)
    # fig.update_layout(width=750, height=1500)

fig.show()
    

'C:\\DATA\\data\\processed\\v2_MHP_sp0.9\\MHP\\'

'C:\\DATA\\data\\processed\\v2_MHP_sp0.9\\MHP-concat\\'

'C:\\DATA\\data\\processed\\v3_MHP_sp0.9\\MHP\\'

'C:\\DATA\\data\\processed\\v3_MHP_sp0.9\\MHP-concat\\'

In [103]:
# reading the model

for model_folder in model_folders:
    print(model_folder)
    if model_folder not in data_vis:
        data_vis[model_folder] = {}
    tar_file = [x for x in os.listdir(model_folder) if x.endswith('.tar') ]
    spec_file = [x for x in os.listdir(model_folder) if x.endswith('spec.pkl') ]
    with open(model_folder+'/spec.pkl', 'rb') as file:
        print(file)
        data = pickle.load(file)
        



    opt = myparser(data,hparams=None)
    opt.device = torch.device('cuda')

    """ prepare model """
    model = Transformer(
        num_types=opt.C,
        d_model=opt.d_model,
        # d_rnn=opt.d_rnn,
        d_inner=opt.d_inner_hid,
        n_layers=opt.n_layers,
        n_head=opt.n_head,
        d_k=opt.d_k,
        d_v=opt.d_v,
        dropout=opt.dropout,
        

        temp_enc = opt.temp_enc,
        temp_enc_mode = opt.temp_enc_mode,
        temp_concate = opt.temp_concate,
        vae = opt.vae,

        horizon = opt.horizon,
        detach = opt.detach,

    )
    _ = model.to(opt.device)

    optimizer = optim.Adam(filter(lambda x: x.requires_grad, model.parameters()),
                           opt.lr, betas=(0.9, 0.999), eps=1e-05)
    

    model, optimizer = load_checkpoint(model, optimizer, folder=opt.add_model)



    data_vis[model_folder]['model'] = model
    data_vis[model_folder]['opt'] = opt

    tar_file
    spec_file

    # search_dir = 'data/data_so/fold2/model_THP_p666_n1000_cat_sqrt_trans_NLL_tsne3/'
    # tsne_files = list(filter(os.path.isfile, glob.glob(model_folder + "/tsne*.pkl")))


C:\DATA\data\processed\MHP_sp0.9\MHP\
<_io.BufferedReader name='C:\\DATA\\data\\processed\\MHP_sp0.9\\MHP\\/spec.pkl'>
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


' prepare model '

=> ######################## loading checkpoint from `C:\DATA\data\processed\MHP_sp0.9\MHP\`


['my_checkpoint.pth.tar']

['spec.pkl']

In [104]:
# plot example CIFs


""" prepare dataloader """
opt = data_vis[model_folders[0]]['opt']
trainloader, testloader, num_types = prepare_dataloader(opt)

for model_folder in model_folders:
    model = data_vis[model_folders[0]]['model']
    opt = data_vis[model_folders[0]]['opt']
    
    with torch.no_grad():
        for batch in tqdm(testloader, mininterval=2,
                            desc='  - (Evaluation) ', leave=False):

            event_time, time_gap, event_type = map(lambda x: x.to(opt.device), batch)
            
            # enc_out, prediction = model(event_type,event_time, time_gap)
            torch.cuda.empty_cache()

            
            if opt.gap_trans:
                time_gap_trans = torch.tensor( opt.pt.transform(time_gap.cpu().numpy().reshape(-1,1)).reshape(time_gap.size()) ).to(opt.device)

                enc_out, prediction = model(event_type, event_time, time_gap_trans)
            else:
                enc_out, prediction = model(event_type, event_time, time_gap)
            
            enc_self_attn = prediction[2]
            data_vis[model_folder]['enc_self_attn'] = enc_self_attn
            
            continue

            # with hk_utils.Timer('compute loss'):
                
            #     with hk_utils.Timer('event loss'):

            event_ll, non_event_ll, (all_lambda, type_mask) = opt.event_loss(model, enc_out, event_time, event_type)
            event_loss = -torch.sum(event_ll - non_event_ll)

            all_lambda.shape
            type_mask.shape
            event_type.shape
            event_time.shape
            x=event_time[0].cpu().numpy()
            y=event_type[0].cpu().numpy()
            lamda = (all_lambda * 1)[0].cpu().numpy() # [L,C]
            fig = go.Figure(data=go.Scatter(x=x[x>0], y=y[x>0]*1, mode='markers'))

            for i in range(lamda.shape[-1]):
                _ = fig.add_trace(go.Scatter(x = x[x>0], y= lamda[x>0,i] +i*1, mode='markers+lines' ))

            fig.show()


            mat = enc_self_attn[3,0].cpu().numpy()
            fig = go.Figure(data=go.Heatmap(z=mat))
            fig.show()

            term


' prepare dataloader '

[Info] Loading train data...
[Info] Loading dev data...
[Info] Loading test data...
 
        train size: 7
        test size: 7
        dev size: 7

        percentage: 100
    


                                                        

In [15]:
mat = np.flip(enc_self_attn[0].sum(axis=0).cpu().numpy(), axis=0)
x=[str(i) for i in np.arange(1,len(mat)+1)]
y=[str(i) for i in np.arange(len(mat),0,-1)]

fig = go.Figure(data=go.Heatmap(z=mat, x=x, y=y))
fig.show()

mat = np.flip(enc_self_attn[1].sum(axis=0).cpu().numpy(), axis=0)
fig = go.Figure(data=go.Heatmap(z=mat, x=x, y=y))
fig.show()

mat = np.flip(enc_self_attn[2].sum(axis=0).cpu().numpy(), axis=0)
fig = go.Figure(data=go.Heatmap(z=mat, x=x, y=y))
fig.show()



mat = np.flip(enc_self_attn[3].sum(axis=0).cpu().numpy(), axis=0)
fig = go.Figure(data=go.Heatmap(z=mat, x=x, y=y    ))
fig.show()

In [None]:
np.arange(len(mat),0,-1)
np.arange(1,len(mat)+1)

In [None]:
all_lambda
all_lambda * type_mask


In [16]:
from numpy import squeeze


df = pd.DataFrame()
score_list = []
with torch.no_grad():
    # h=1
    for i_sub, batch in enumerate(tqdm(trainloader, mininterval=2,
                            desc='  - (Validation) ', leave=False) ):
        labtimes, labtimes_gap,labtimes_gap_trans, labevents, stateevents_converted = map(lambda x: x.to(opt.device), batch)
        
        # i_sub
        if labtimes.shape[1]<opt.horizon:
            continue



        if opt.gap_trans:
            enc_out, prediction = model(labevents, labtimes, stateevents_converted, labtimes_gap_trans)
        else:
            enc_out, prediction = model(labevents, labtimes, stateevents_converted, labtimes_gap)

        # prediction[0].shape
        _, pred_num, y_true, y_pred, y_score = opt.type_loss(prediction[0], labevents,  opt.pred_fun, h = opt.horizon, params = opt.loss_params)
        
        if opt.gap_trans:
            se,ae, nae, pred_masked, true_masked = opt.time_loss(prediction[1], labtimes_gap_trans, h = opt.horizon) 
            
            # # time_gap_all.append(labtimes_gap_trans[:,h:][~masks_times].detach().cpu().numpy() )
            # time_gap_all.append(true_masked.detach().cpu().numpy() )
            # # time_gap_next.append( prediction[1].squeeze(-1)[:,:-h][~masks_times].detach().cpu().numpy() )
            # time_gap_next.append( pred_masked.detach().cpu().numpy() )
        
        else:
            se,ae, nae, pred_masked, true_masked = opt.time_loss(prediction[1], labtimes_gap, h = opt.horizon) 
            
            # # time_gap_all.append(labtimes_gap[:,h:][~masks_times].detach().cpu().numpy() )
            # time_gap_all.append(true_masked.detach().cpu().numpy() )
            # # time_gap_next.append( prediction[1].squeeze(-1)[:,:-h][~masks_times].detach().cpu().numpy() )
            # time_gap_next.append( pred_masked.detach().cpu().numpy() )
            
        score_list.append(y_score)


        df_temp = pd.DataFrame()
        
        df_temp['time_gap_2next'] = labtimes_gap.squeeze()[:].cpu().numpy()


        df_temp['time'] = labtimes.squeeze()[:].cpu().numpy() #
        # df_temp['time'] = df_temp['time'] - df_temp.iloc[0]['time']
       
        # df_temp['type'] = truth.squeeze().cpu().numpy()
        df_temp['color'] = 'GT'

        # df_temp['time_next'] = np.cumsum(prediction[1].squeeze().cpu().numpy())[:-h]
        df_temp.loc[1:,'time_gap_2prev_pred'] = pred_masked.cpu().numpy()
        df_temp.loc[1:,'time_pred'] =  df_temp['time'] + df_temp.loc[1,'time_gap_2prev_pred']
        
        
        df_temp['type'] = labevents[0,:,:].tolist()
        df_temp['type_pred'] = [labevents[0,0,:].tolist()] + y_pred.int().tolist()
        # df_temp['color'] = 'Pred'


        df_temp.insert(0,'i', i_sub)

        df = pd.concat([df, df_temp])

        # if len(df)>500000000:
        #     break

    
    
# create time diff and accumalted time
df['time_diff'] = df.groupby('i')['time'].diff()
df['time_diff_pred'] = df.groupby('i')['time_pred'].diff()

df.loc[df['time_diff'].isnull(),'time_diff'] = 0
df['time_acm'] = df['time_diff'].cumsum()

df.loc[df['time_diff_pred'].isnull(),'time_diff_pred'] = 0
df['time_acm_pred'] = df['time_diff_pred'].cumsum()

# create splits
df = df.reset_index(drop=True).reset_index()
df = df.sort_values(by=['color','index'])
df['i_diff'] = df['i'].diff()
q = df['i_diff']==1
list_split = df.loc[q,'time_acm'].tolist()


# extract labels if available

import pickle
opt.data

# if os.path.exists(opt.data + 'mappings.pkl'):
#     with open(opt.data + 'mappings.pkl', 'rb') as f:
#         mappings = pickle.load(f, encoding='latin-1')
#         inv_mappings = {v+1: k for k, v in mappings.items()}
# else:
#     inv_mappings = {i:i for i in range(df['type'].max()+1)}

# inv_mappings
# df['label'] = df['type'].replace(inv_map)


# create labels

# df['label'] = df['type'].replace(inv_mappings)


df

                                                         

ValueError: not enough values to unpack (expected 5, got 3)

# Model summary

In [None]:

# default device is CUDA
opt = myparser(data,hparams=None)
opt.device = torch.device('cuda')
""" prepare dataloader """
trainloader, testloader, num_types = prepare_dataloader(opt)


""" prepare model """
model = Transformer(
    num_types=num_types,
    d_model=opt.d_model,
    d_rnn=opt.d_rnn,
    d_inner=opt.d_inner_hid,
    n_layers=opt.n_layers,
    n_head=opt.n_head,
    d_k=opt.d_k,
    d_v=opt.d_v,
    dropout=opt.dropout,
    horizon = opt.horizon,

)
model.to(opt.device)


""" visulaize model """
print(model)
temp = summary(model, input_data=[torch.randint(num_types, (10, 41)).to(opt.device), torch.randn(10, 41).to(opt.device)])
print(temp)