In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import random
import pickle
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler

from eval.evaluate import predict

from nets import deepEM
from loader.prepData import prepdata
from loader.prepNN import prep4nn
from utils import utils
from torch.profiler import profile, record_function, ProfilerActivity


2022-11-18 14:15:39.478 | INFO     | bert.modeling:<module>:231 - Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex .


In [3]:

def read_test_data(test_data, params):
    test = prep4nn.data2network(test_data, 'predict', params)

    if len(test) == 0:
        raise ValueError("Test set empty.")

    test_data = prep4nn.torch_data_2_network(
        cdata2network=test, params=params, do_get_nn_data=True)
    te_data_size = len(test_data['nn_data']['ids'])

    test_data_ids = TensorDataset(torch.arange(te_data_size))
    test_sampler = SequentialSampler(test_data_ids)
    test_dataloader = DataLoader(
        test_data_ids, sampler=test_sampler, batch_size=params['batchsize'])
    return test_data, test_dataloader


In [4]:
# inp_args = utils._parsing_jupyter()
config_path = '/home/julio/repos/event_finder/DeepEventMine_fork/experiments/pubmed100/configs/predict-pubmed-100.yaml'

# set config path manually
# config_path = 'configs/debug.yaml'

with open(config_path, 'r') as stream:
    pred_params = utils._ordered_load(stream)

# Fix seed for reproducibility
os.environ["PYTHONHASHSEED"] = str(pred_params['seed'])
random.seed(pred_params['seed'])
np.random.seed(pred_params['seed'])
torch.manual_seed(pred_params['seed'])

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Load pre-trained parameters
with open(pred_params['saved_params'], "rb") as f:
    parameters = pickle.load(f)

parameters['predict'] = True

# Set predict settings value for params
parameters['gpu'] = pred_params['gpu']
parameters['batchsize'] = pred_params['batchsize']
print('GPU available:', torch.cuda.is_available())
if parameters['gpu'] >= 0:
    device = torch.device(
        "cuda:" + str(parameters['gpu']) if torch.cuda.is_available() else "cpu")
    # torch.cuda.set_device(parameters['gpu'])
else:
    device = torch.device("cpu")
parameters['device'] = device

# Set evaluation settings
parameters['test_data'] = pred_params['test_data']

parameters['bert_model'] = pred_params['bert_model']

result_dir = pred_params['result_dir']
if not os.path.exists(result_dir):
    os.makedirs(result_dir)

parameters['result_dir'] = pred_params['result_dir']

# raw text
parameters['raw_text'] = pred_params['raw_text']
parameters['ner_predict_all'] = pred_params['raw_text']
parameters['a2_entities'] = pred_params['a2_entities']
parameters['json_file'] = pred_params['json_file']

# process data
test_data = prepdata.prep_input_data(
    pred_params['test_data'], parameters, json_file=parameters['json_file'])
nntest_data, test_dataloader = read_test_data(test_data, parameters)

# model
deepee_model = deepEM.DeepEM(parameters)

model_path = pred_params['model_path']

# Load all models
utils.handle_checkpoints(model=deepee_model,
                            checkpoint_dir=model_path,
                            params={
                                'device': device
                            },
                            resume=True)

deepee_model.to(device)

# with profile(activities=[
#         ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True,with_stack=True) as prof:
#     with record_function("model_inference"):


2022-11-18 14:15:41.783 | INFO     | bert.tokenization:from_pretrained:171 - loading vocabulary file data/bert/scibert_scivocab_cased/vocab.txt


GPU available: True
	Words found in train: 10876
	Words found in pre-trained only: 0
	Words not found anywhere: 2083


2022-11-18 14:15:42.477 | INFO     | bert.tokenization:from_pretrained:171 - loading vocabulary file data/bert/scibert_scivocab_cased/vocab.txt
2022-11-18 14:15:52.648 | INFO     | bert.modeling:from_pretrained:577 - loading archive file data/bert/scibert_scivocab_cased
2022-11-18 14:15:52.650 | INFO     | bert.modeling:from_pretrained:595 - Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 31116
}

2022-11-18 14:15:54.355 | INFO     | bert.modeling:from_pretrained:645 - Weights of NestedNERModel not initialized from pretrained model: ['label_ids', 'entity_classifier.weight', 'entity_classifier.bias', 'trigger_classifier.weight', 'trigger_classifier.bias']
2022-11-18 14:15:54.356 | INFO     | bert.modeling:from_pretrained:

data/models/cg/model/20190911030703702499_deepee_base_92_59.49.pt
Loading model from checkpoint data/models/cg/model/


DeepEM(
  (NER_layer): NestedNERModel(
    (bert): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(31116, 768, padding_idx=0)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): BertLayerNorm()
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0): BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): BertSelfOutput(
                (dense): Linear(in_features=768, out_features=768, bias=True)
                (LayerNorm): BertLayerNorm()
        

In [5]:
%load_ext line_profiler


In [20]:
%lprun -f predict predict(model=deepee_model,result_dir=result_dir, eval_dataloader=test_dataloader,eval_data=nntest_data,g_entity_ids_=test_data['g_entity_ids_'],params=parameters)


Iteration:   0%|          | 0/5 [00:00<?, ?it/s]

[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

Iteration:  20%|██        | 1/5 [00:06<00:27,  6.99s/it]

PREDICT LOOP: --- 6.993008375167847 seconds ---
[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

                                                        

*** KeyboardInterrupt exception caught in code being profiled.



Timer unit: 1e-09 s

Total time: 15.6906 s
File: /home/julio/repos/event_finder/DeepEventMine_fork/eval/evaluate.py
Function: predict at line 9

Line #      Hits         Time  Per Hit   % Time  Line Contents
     9                                           def predict(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, params):
    10         1       7129.0   7129.0      0.0      mapping_id_tag = params['mappings']['nn_mapping']['id_tag_mapping']
    11                                           
    12                                               # store predicted entities
    13         1        720.0    720.0      0.0      ent_preds = []
    14                                           
    15                                               # store predicted events
    16         1        411.0    411.0      0.0      ev_preds = []
    17                                           
    18         1       1051.0   1051.0      0.0      fidss, wordss, offsetss, sub_to_wordss, spa

In [10]:
%lprun -f predict predict(model=deepee_model, result_dir=result_dir, eval_dataloader=test_dataloader, eval_data=nntest_data, g_entity_ids_=test_data['g_entity_ids_'], params=parameters)


Iteration:   0%|          | 0/5 [00:00<?, ?it/s]

data_ids[0] [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127]
NER LOOP: --- 4.019585609436035 seconds ---
NER LAYER: --- 4.100007772445679 seconds ---
REL LAYER: --- 0.015290260314941406 seconds ---
EV LAYER: --- 0.044412851333618164 seconds ---
ALL FOWARD LAYER: --- 4.33444881439209 seconds ---


Iteration:  20%|██        | 1/5 [00:06<00:27,  6.97s/it]

PREDICT LOOP: --- 6.975859642028809 seconds ---
data_ids[0] [128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255]
NER LOOP: --- 6.958784580230713 seconds ---
NER LAYER: --- 7.066285610198975 seconds ---
REL LAYER: --- 0.02063274383544922 seconds ---
EV LAYER: --- 0.13971471786499023 seconds ---
ALL FOWARD LAYER: --- 7.517888069152832 seconds ---


Iteration:  40%|████      | 2/5 [00:18<00:29,  9.74s/it]

PREDICT LOOP: --- 18.6567223072052 seconds ---
data_ids[0] [256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383]
NER LOOP: --- 6.491260528564453 seconds ---
NER LAYER: --- 6.588133096694946 seconds ---
REL LAYER: --- 0.018155574798583984 seconds ---
EV LAYER: --- 0.116729736328125 seconds ---
ALL FOWARD LAYER: --- 7.005605697631836 seconds ---


Iteration:  60%|██████    | 3/5 [00:29<00:20, 10.31s/it]

PREDICT LOOP: --- 29.65032649040222 seconds ---
data_ids[0] [384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511]
NER LOOP: --- 5.789400577545166 seconds ---
NER LAYER: --- 5.879717111587524 seconds ---
REL LAYER: --- 0.017420291900634766 seconds ---
EV LAYER: --- 0.0989067554473877 seconds ---
ALL FOWARD LAYER: --- 6.28899884223938 seconds ---


Iteration:  80%|████████  | 4/5 [00:39<00:10, 10.13s/it]

PREDICT LOOP: --- 39.50711679458618 seconds ---
data_ids[0] [512, 513, 514, 515]


                                                        

NER LOOP: --- 0.23136258125305176 seconds ---
NER LAYER: --- 0.24485397338867188 seconds ---
REL LAYER: --- 0.0026149749755859375 seconds ---
EV LAYER: --- 0.006009101867675781 seconds ---
ALL FOWARD LAYER: --- 0.26873016357421875 seconds ---
PREDICT LOOP: --- 39.9071102142334 seconds ---




(FILE writing): --- 7.7569825649261475 seconds ---


Timer unit: 1e-09 s

Total time: 47.2513 s
File: /home/julio/repos/event_finder/DeepEventMine_fork/eval/evaluate.py
Function: predict at line 9

Line #      Hits         Time  Per Hit   % Time  Line Contents
     9                                           def predict(model, result_dir, eval_dataloader, eval_data, g_entity_ids_, params):
    10         1       3971.0   3971.0      0.0      mapping_id_tag = params['mappings']['nn_mapping']['id_tag_mapping']
    11                                           
    12                                               # store predicted entities
    13         1        540.0    540.0      0.0      ent_preds = []
    14                                           
    15                                               # store predicted events
    16         1        409.0    409.0      0.0      ev_preds = []
    17                                           
    18         1       1071.0   1071.0      0.0      fidss, wordss, offsetss, sub_to_wordss, spa