In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from __future__ import absolute_import
import argparse
import numpy as np
import torch
import os
import sys
import logging
import pdb

sys.path.insert(0,'/home/ytaille/AttentionSegmentation')

from allennlp.data import Vocabulary
from allennlp.data.iterators import DataIterator
# import allennlp.data.dataset_readers as Readers
import AttentionSegmentation.reader as Readers

# import model as Models
import AttentionSegmentation.model.classifiers as Models

from AttentionSegmentation.commons.utils import \
    setup_output_dir, read_from_config_file
from AttentionSegmentation.commons.model_utils import \
    construct_vocab, load_model_from_existing
# from AttentionSegmentation.visualization.visualize_attns import \
#     html_visualizer
import AttentionSegmentation.model.attn2labels as SegmentationModels

"""The main entry point

This is the main entry point for training HAN SOLO models.

Usage::

    ${PYTHONPATH} -m AttentionSegmentation/main
        --config_file ${CONFIG_FILE}

"""
args = type('MyClass', (object,), {'content':{}})()
args.config_file = 'Configs/config_ncbi.json'
args.log = 'INFO'
args.loglevel = 'INFO'
args.seed = 1

# Setup Experiment Directory
config = read_from_config_file(args.config_file)
if args.seed > 0:
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if config.get('trainer', None) is not None and \
       config.get('trainer', None).get('cuda_device', -1) > 0:
        torch.cuda.manual_seed(args.seed)
serial_dir, config = setup_output_dir(config, args.loglevel)
logger = logging.getLogger(__name__)

# Load Training Data
TRAIN_PATH = config.pop("train_data_path")
logger.info("Loading Training Data from {0}".format(TRAIN_PATH))
dataset_reader_params = config.pop("dataset_reader")
reader_type = dataset_reader_params.pop("type", None)
assert reader_type is not None and hasattr(Readers, reader_type),\
    f"Cannot find reader {reader_type}"
reader = getattr(Readers, reader_type).from_params(dataset_reader_params)
instances_train = reader.read(file_path=TRAIN_PATH)
instances_train = instances_train
logger.info("Length of {0}: {1}".format(
    "Training Data", len(instances_train)))

# Load Validation Data
VAL_PATH = config.pop("validation_data_path")
logger.info("Loading Validation Data from {0}".format(VAL_PATH))
instances_val = reader.read(VAL_PATH)
instances_val = instances_val
logger.info("Length of {0}: {1}".format(
    "Validation Data", len(instances_val)))

# Load Test Data
TEST_PATH = config.pop("test_data_path", None)
instances_test = None
if TEST_PATH is not None:
    logger.info("Loading Test Data from {0}".format(TEST_PATH))
    instances_test = reader.read(TEST_PATH)
    instances_test = instances_test
    logger.info("Length of {0}: {1}".format(
        "Testing Data", len(instances_test)))

# # Load Pretrained Existing Model
# load_config = config.pop("load_from", None)

# # Construct Vocabulary
vocab_size = config.pop("max_vocab_size", -1)
logger.info("Constructing Vocab of size: {0}".format(vocab_size))
vocab_size = None if vocab_size == -1 else vocab_size
vocab = Vocabulary.from_instances(instances_train,
                                  max_vocab_size=vocab_size)
vocab_dir = os.path.join(serial_dir, "vocab")
assert os.path.exists(vocab_dir), "Couldn't find the vocab directory"
vocab.save_to_files(vocab_dir)

# if load_config is not None:
#     # modify the vocab from the source model vocab
#     src_vocab_path = load_config.pop("vocab_path", None)
#     if src_vocab_path is not None:
#         vocab = construct_vocab(src_vocab_path, vocab_dir)
#         # Delete the old vocab
#         for file in os.listdir(vocab_dir):
#             os.remove(os.path.join(vocab_dir, file))
#         # save the new vocab
#         vocab.save_to_files(vocab_dir)
logger.info("Saving vocab to {0}".format(vocab_dir))
logger.info("Vocab Construction Done")

# # Construct the data iterators
logger.info("Constructing Data Iterators")
data_iterator = DataIterator.from_params(config.pop("iterator"))
data_iterator.index_with(vocab)

logger.info("Data Iterators Done")

# Create the model
logger.info("Constructing The model")
model_params = config.pop("model")
model_type = model_params.pop("type")
assert model_type is not None and hasattr(Models, model_type),\
    f"Cannot find reader {model_type}"
model = getattr(Models, model_type).from_params(
    vocab=vocab,
    params=model_params,
    label_indexer=reader.get_label_indexer()
)
logger.info("Model Construction done")

# visualize = config.pop("visualize", False)
# visualizer = None
# if visualize:
#     visualizer = html_visualizer(vocab, reader)
segmenter_params = config.pop("segmentation")
segment_class = segmenter_params.pop("type")
segmenter = getattr(SegmentationModels, segment_class).from_params(
    vocab=vocab,
    reader=reader,
    params=segmenter_params
)

# logger.info("Segmenter Done")

# print("##################################\nAYYYYYYYYYYYYYYYYYYYYYYYY\n\n\n\n\n\n\n\n###########################")

# exit()


# if load_config is not None:
#     # Load the weights, as specified by the load_config
#     model_path = load_config.pop("model_path", None)
#     layers = load_config.pop("layers", None)
#     load_config.assert_empty("Load Config")
#     assert model_path is not None,\
#         "You need to specify model path to load from"
#     model = load_model_from_existing(model_path, model, layers)
#     logger.info("Pretrained weights loaded")

# logger.info("Starting the training process")





2020-12-14 11:32:46,262: INFO: train_data_path = /home/ytaille/data/resources/medic/ncbi_conll_ner_train.conll
2020-12-14 11:32:46,265: INFO: Loading Training Data from /home/ytaille/data/resources/medic/ncbi_conll_ner_train.conll
2020-12-14 11:32:46,268: INFO: dataset_reader.type = WeakConll2003DatasetReader
2020-12-14 11:32:46,271: INFO: dataset_reader.token_indexers.bert.type = bert-pretrained
2020-12-14 11:32:46,272: INFO: dataset_reader.token_indexers.bert.pretrained_model = ./Data/embeddings/bert-base-multilingual-cased-vocab.txt
2020-12-14 11:32:46,274: INFO: dataset_reader.token_indexers.bert.use_starting_offsets = True
2020-12-14 11:32:46,275: INFO: dataset_reader.token_indexers.bert.do_lowercase = False
2020-12-14 11:32:46,276: INFO: dataset_reader.token_indexers.bert.never_lowercase = None
2020-12-14 11:32:46,279: INFO: dataset_reader.token_indexers.bert.max_pieces = 512
2020-12-14 11:32:46,280: INFO: loading vocabulary file ./Data/embeddings/bert-base-multilingual-cased-voc

0it [00:00, ?it/s]

2020-12-14 11:32:46,429: INFO: Reading instances from lines in file at: /home/ytaille/data/resources/medic/ncbi_conll_ner_train.conll


1803it [00:01, 1699.26it/s]

2020-12-14 11:32:47,489: INFO: Length of Training Data: 1803
2020-12-14 11:32:47,490: INFO: validation_data_path = /home/ytaille/data/resources/medic/ncbi_conll_ner_dev.conll
2020-12-14 11:32:47,491: INFO: Loading Validation Data from /home/ytaille/data/resources/medic/ncbi_conll_ner_dev.conll



0it [00:00, ?it/s]

2020-12-14 11:32:47,495: INFO: Reading instances from lines in file at: /home/ytaille/data/resources/medic/ncbi_conll_ner_dev.conll


319it [00:00, 1946.46it/s]

2020-12-14 11:32:47,658: INFO: Length of Validation Data: 319
2020-12-14 11:32:47,659: INFO: test_data_path = /home/ytaille/data/resources/medic/ncbi_conll_ner_test.conll
2020-12-14 11:32:47,660: INFO: Loading Test Data from /home/ytaille/data/resources/medic/ncbi_conll_ner_test.conll



0it [00:00, ?it/s]

2020-12-14 11:32:47,665: INFO: Reading instances from lines in file at: /home/ytaille/data/resources/medic/ncbi_conll_ner_test.conll


316it [00:00, 1173.91it/s]

2020-12-14 11:32:47,932: INFO: Length of Testing Data: 316
2020-12-14 11:32:47,934: INFO: max_vocab_size = -1
2020-12-14 11:32:47,935: INFO: Constructing Vocab of size: -1
2020-12-14 11:32:47,936: INFO: Fitting token dictionary from dataset.



100%|██████████| 1803/1803 [00:00<00:00, 36978.16it/s]

2020-12-14 11:32:48,000: INFO: Saving vocab to ./trained_models/NCBI-BERT-realFT-PS/run-13/vocab
2020-12-14 11:32:48,001: INFO: Vocab Construction Done
2020-12-14 11:32:48,002: INFO: Constructing Data Iterators
2020-12-14 11:32:48,003: INFO: iterator.type = bucket
2020-12-14 11:32:48,004: INFO: iterator.sorting_keys = [['tokens', 'bert']]
2020-12-14 11:32:48,005: INFO: iterator.padding_noise = 0.1
2020-12-14 11:32:48,006: INFO: iterator.biggest_batch_first = False
2020-12-14 11:32:48,010: INFO: iterator.batch_size = 32
2020-12-14 11:32:48,011: INFO: iterator.instances_per_epoch = None
2020-12-14 11:32:48,012: INFO: iterator.max_instances_in_memory = None
2020-12-14 11:32:48,013: INFO: Data Iterators Done
2020-12-14 11:32:48,013: INFO: Constructing The model
2020-12-14 11:32:48,015: INFO: model.type = MultiClassifier
2020-12-14 11:32:48,017: INFO: model.method = binary
2020-12-14 11:32:48,018: INFO: model.text_field_embedder.type = basic
2020-12-14 11:32:48,019: INFO: model.text_field_e




2020-12-14 11:32:53,131: INFO: Model config {
  "attention_probs_dropout_prob": 0.1,
  "directionality": "bidi",
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pooler_fc_size": 768,
  "pooler_num_attention_heads": 12,
  "pooler_num_fc_layers": 3,
  "pooler_size_per_head": 128,
  "pooler_type": "first_token_transform",
  "type_vocab_size": 2,
  "vocab_size": 119547
}

2020-12-14 11:32:56,647: INFO: Layer encoder.layer.10.attention.self.query.weight is finetuned
2020-12-14 11:32:56,650: INFO: Layer encoder.layer.10.attention.self.query.bias is finetuned
2020-12-14 11:32:56,651: INFO: Layer encoder.layer.10.attention.self.key.weight is finetuned
2020-12-14 11:32:56,652: INFO: Layer encoder.layer.10.attention.self.key.bias is finetuned
2020-12-14 11:32:56,653: INFO: Layer encoder.layer.10.attention.self.value.wei

  "num_layers={}".format(dropout, num_layers))


In [3]:
data_iterator.vocab.add_token_to_namespace("@@UNKNOWN@@", "chunk_tags")

1141

In [4]:
from AttentionSegmentation.trainer import Trainer

trainer = Trainer.from_params(
    model=model,
    base_dir=serial_dir,
    iterator=data_iterator,
    train_data=instances_train,
    validation_data=instances_val,
    segmenter=segmenter,
    params=config.pop("trainer")
)


2020-12-14 11:32:57,526: INFO: PyTorch version 1.5.1 available.
2020-12-14 11:33:00,409: INFO: TensorFlow version 2.3.1 available.
2020-12-14 11:33:00,929: INFO: Loading faiss with AVX2 support.
2020-12-14 11:33:00,932: INFO: Loading faiss.
2020-12-14 11:33:01,422: INFO: trainer.patience = 10
2020-12-14 11:33:01,423: INFO: trainer.validation_metric = +accuracy
2020-12-14 11:33:01,424: INFO: trainer.num_epochs = 50
2020-12-14 11:33:01,424: INFO: trainer.cuda_device = 0
2020-12-14 11:33:01,425: INFO: trainer.grad_norm = None
2020-12-14 11:33:01,426: INFO: trainer.grad_clipping = None
2020-12-14 11:33:01,427: INFO: trainer.num_serialized_models_to_keep = 1
2020-12-14 11:33:04,148: INFO: trainer.optimizer.type = adam
2020-12-14 11:33:04,150: INFO: trainer.optimizer.parameter_groups = [[['.*bert.*'], ConfigTree([('lr', 2e-07)])], [['.*encoder_word.*', '.*attn.*', '.*logit.*'], ConfigTree([('lr', 0.001)])]]
2020-12-14 11:33:04,151: INFO: Converting Params object to dict; logging of default v

In [5]:
from __future__ import absolute_import
import logging
import os
import shutil
import json
from collections import deque
import time
import re
import datetime
import traceback
import numpy as np
from typing import Dict, Optional, List, Tuple, Union, Iterable, Any, Set
import pdb

import torch
import torch.optim.lr_scheduler
from torch.nn.parallel import replicate, parallel_apply
from torch.nn.parallel.scatter_gather import scatter_kwargs, gather
from tensorboardX import SummaryWriter

from itertools import tee

from allennlp.common import Params
from allennlp.common.checks import ConfigurationError
from allennlp.common.util import peak_memory_mb, gpu_memory_mb
from allennlp.common.tqdm import Tqdm
from allennlp.data.instance import Instance
from allennlp.data.iterators.data_iterator import DataIterator
from allennlp.models.model import Model
from allennlp.nn import util
from allennlp.training.learning_rate_schedulers import LearningRateScheduler
from allennlp.training.optimizers import Optimizer

from AttentionSegmentation.commons.trainer_utils import is_sparse,\
    sparse_clip_norm, move_optimizer_to_cuda, TensorboardWriter
# from AttentionSegmentation.visualization.visualize_attns \
#     import html_visualizer
from AttentionSegmentation.model.attn2labels import BasePredictionClass
logger = logging.getLogger(__name__)

TQDM_COLUMNS = 200

import sys
sys.path.insert(0,'/home/ytaille/deep_multilingual_normalization')
from deep_multilingual_normalization.create_classifiers import create_classifiers
from nlstruct.dataloaders import load_from_brat

logger2 = logging.getLogger("nlstruct")
logger2.setLevel(logging.ERROR)

from notebook_utils import *

def _train_epoch(self, epoch: int) -> Dict[str, float]:
        """
        Trains one epoch and returns metrics.
        """
        logger.info(f"Peak CPU memory usage MB: {peak_memory_mb()}")
        if torch.cuda.is_available():
            for gpu, memory in gpu_memory_mb().items():
                logger.info(f"GPU {gpu} memory usage MB: {memory}")

        train_loss = 0.0

        from allennlp.data.fields.array_field import ArrayField

        for i, td in enumerate(self._train_data):
            td.fields['sample_id'] = ArrayField(np.array([i]))

        # Get tqdm for the training batches
        train_generator = self._iterator(self._train_data,
                                         num_epochs=1,
                                         cuda_device=self._iterator_device,
                                         shuffle=True,
                                         )

        train_generator, cp_generator, id_generator = tee(train_generator, 3)

        ids = []

        for ig in id_generator:
            ids.extend([int(sid.item()) for sid in ig['sample_id']])

        shuffled_train_data = [self._train_data[i] for i in ids]

#         train_predictions = self._segmenter.get_predictions(
#                     instances=shuffled_train_data,
#                     iterator = cp_generator,
#                     model=self._model,
#                     cuda_device=self._iterator_device,
#                     verbose=True)

        num_training_batches = self._iterator.get_num_batches(self._train_data)
        train_generator_tqdm = Tqdm.tqdm(train_generator,
                                         total=num_training_batches
                                         )
        self._last_log = time.time()
        last_save_time = time.time()

        batches_this_epoch = 0
        if self._batch_num_total is None:
            self._batch_num_total = 0

        cpt_batch = 0

        # Set the model to "train" mode.
        self._model.train()

    
        for batch in train_generator_tqdm:
            
            batches_this_epoch += 1
            self._batch_num_total += 1
            batch_num_total = self._batch_num_total
            batch_len = len(batch['labels'])

            # FOR train_predictions:
            # pred/gold is sentence level
            # pred_labels/gold_labels is word level


            # FOR batch:
            # labels is sentence level
            # tags is word level

            # print(train_texts)
            # print("SENTENCE LEVEL")
            # print([tp['gold'] for tp in train_predictions[:10]])
            # print(batch['labels'][:10])

            # print("WORD LEVEL")
            # print([tp['gold_labels'] for tp in train_predictions[:2]])
            # print(batch['tags'][:2])

            # exit()
            
            output_dict = self._model(**batch)
            
            attns = output_dict['attentions']
            
            # Policy is "attention mask": attention scores should be higher if we want to predict CUI
            # Only take words with attention above threshold when predicting with deep norm -> see if it's enough (reward indicates that)
            # REINFORCE algo: (also known as Monte Carlo PG)
            # - draw N trajectories (N attention paths?) -> discretise attentions to make them 1 / 0? -> see if it works with bernoulli first
            # - evaluate each trajectory then sum (maybe add baseline -> subtract mean of all trajectories rewards)
            # - Expected return is given by sum(prob(Ti | W) * reward(Ti)) -> see again if it works with bernoulli first
            # W are WeakL weights 
            # - Gradient ascent of return / gradient descent of negative return
            
            # Set horizon ? -> number / proportion of attention at 1 per batch
            # Set number of trajectories ? -> maybe make trajectories number vary based on sentence length
            # gamma = 0.9 ? -> used to simulate temporal importance of reward (multiply each step by a certain power of gamma, furthest rewards are less impactful) -> may not be possible to model here
            
            horizon = 0.2
            n_trajectories = 10
            gamma = 0.9
            attn_threshold = 0.01
            
            mask = batch['tokens']['mask']
            
            # PROBLEM HERE BECAUSE "O" LABEL NOT TAKEN INTO ACCOUNT WITH SOFTMAX -> works with threshold (but without discretizing)
            
#             prob_attn = (attns>attn_threshold).to(float).to('cpu')
#             prob_attn = torch.softmax(attns, -1).to(float).to('cpu')
            prob_attn = attns
            from torch.distributions import Binomial
            
            m = Binomial(probs=prob_attn) # draw more than one sample
            
            trajectory_scores = []
            
            policy_loss = []
            
            # PREDICT ATTENTION SCORES ONE BY ONE TO SIMULATE RL TRAJECTORIES ? -> very heavy computation 
            # use like a density estimation
            
            for traj in range(n_trajectories):
                
                attn_sample = m.sample()
                attn_mask = (attn_sample.sum(-1) > 0) # attn_mask is used to apply to tokens/labels
                
                real_tokens = [np.array(b.fields['tokens'].tokens) for b in shuffled_train_data[cpt_batch:cpt_batch+batch_len]]
#                 gold_labels = [np.array(b.fields['tags'].labels) for b in shuffled_train_data[cpt_batch:cpt_batch+batch_len]]
                gold_norm_labels = [np.array(b.fields['chunk_tags'].labels) for b in shuffled_train_data[cpt_batch:cpt_batch+batch_len]]

                masked_tokens = [rt[attn_mask[w_id,:len(rt)].cpu().to(bool)] if len(rt) > 1 else rt[[attn_mask[w_id,:len(rt)].cpu().to(bool)]] for w_id, rt in enumerate(real_tokens)] # weird behaviour for len == 1
#                 masked_gold = [rt[attn_mask[w_id,:len(rt)].cpu().to(bool)] for w_id, rt in enumerate(gold_labels)]
                masked_gold_norm = [rt[attn_mask[w_id,:len(rt)].cpu().to(bool)] if len(rt) > 1 else rt[[attn_mask[w_id,:len(rt)].cpu().to(bool)]] for w_id, rt in enumerate(gold_norm_labels)]
    
                save_to_ann(masked_tokens, masked_gold_norm, '/home/ytaille/data/tmp/ws_inputs/')
            
# #                 for imgn, mgn in enumerate(masked_gold_norm):
# #                     if not all([m=='O' for m in mgn]):
# #                         print("ID:", imgn)
# #                         print(attn_mask[imgn].sum())
# #                         for rt, gn in zip(real_tokens[imgn],gold_norm_labels[imgn]):
# #                             print(f'{rt}:{gn}')
# #                         print("MASKED",)
# #                         print(masked_tokens[imgn])
# #                         print(masked_gold_norm[imgn])

# #                         raise
                

                # NLSTRUCT PART

                bert_name = "bert-base-multilingual-uncased"

                dataset = load_from_brat("/home/ytaille/data/tmp/ws_inputs")
                dataset['mentions']['mention_id'] = dataset['mentions']['doc_id'] +'.'+ dataset['mentions']['mention_id'].astype(str)

                try:
                    batcher, vocs, mention_ids = preprocess_train(
                        dataset,
                        vocabularies=self.vocabularies1,
                        bert_name=bert_name,
                    )

                    batch_size = len(batcher)
                    with_tqdm = True

                    tg.set_device('cuda:0')
                    device = tg.device

                    pred_batcher = predict(batcher, self.classifier2)

                    scores = compute_scores(pred_batcher, batcher)
                    trajectory_scores.append(scores['loss'] * m.log_prob(attn_sample))
                    

                except:
                    break
                    pass
            
#             def finish_episode():
#                 R = 0
#                 policy_loss = []
#                 returns = []
#                 for r in policy.rewards[::-1]:
#                     R = r + args.gamma * R
#                     returns.insert(0, R)
#                 returns = torch.tensor(returns)
#                 returns = (returns - returns.mean()) / (returns.std() + eps)
#                 for log_prob, R in zip(policy.saved_log_probs, returns):
#                     policy_loss.append(-log_prob * R)
#                 optimizer.zero_grad()
#                 policy_loss = torch.cat(policy_loss).sum()
#                 policy_loss.backward()
#                 optimizer.step()
#                 del policy.rewards[:]
#                 del policy.saved_log_probs[:]
            
            
            cpt_batch += batch_len
            
            
#             if len(policy_loss):
#                 policy_loss = [ts * prob_attn for ts in trajectory_scores]
#                 policy_loss = torch.cat(policy_loss).sum()
#                 print("POLICY loss", policy_loss)
                
            if len(trajectory_scores):
                policy_loss = [ts for ts in trajectory_scores]
                policy_loss = torch.cat(policy_loss).sum()
                print("POLICY loss", policy_loss)
                
            else: policy_loss = 0
                
            self._optimizer.zero_grad()
            loss = self._batch_loss(batch, for_training=True) + policy_loss 
            loss.backward()

            # Make sure Variable is on the cpu before converting to numpy.
            # .cpu() is a no-op if you aren't using GPUs.
            train_loss += loss.data.cpu().numpy()

            batch_grad_norm = self._rescale_gradients()

            # This does nothing if batch_num_total is None or you are using an
            # LRScheduler which doesn't update per batch.
            if self._learning_rate_scheduler:
                self._learning_rate_scheduler.step_batch(batch_num_total)
            self._optimizer.step()

            # Update the description with the latest metrics
            metrics = self._get_metrics(train_loss, batches_this_epoch)
            description = self._description_from_metrics(metrics)

            train_generator_tqdm.set_description(description, refresh=False)
            if hasattr(self, "_tf_params") and self._tf_params is not None:
                # We have TF logging
                if self._batch_num_total % self._tf_params["log_every"] == 0:
                    self._tf_log(metrics, self._batch_num_total)

        return self._get_metrics(train_loss, batches_this_epoch, reset=True)
    
import functools
    
trainer._train_epoch = functools.partial(_train_epoch, trainer)

In [6]:
trainer.train()

2020-12-14 11:34:15,109: INFO: Beginning training.
2020-12-14 11:34:15,112: INFO: Starting Training Epoch 1/50
2020-12-14 11:34:15,113: INFO: Peak CPU memory usage MB: 6982.376
2020-12-14 11:34:15,208: INFO: GPU 0 memory usage MB: 7358


  return_array[slices] = self.array
  0%|          | 0/57 [00:00<?, ?it/s]

POLICY loss tensor(-112292.7109, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.0000, DiseaseClass: 0.2031, Modifier: 0.3281, SpecificDisease: 0.6094, accuracy: 0.2852, loss: -95642.8984 ||:   4%|▎         | 2/57 [00:26<12:13, 13.34s/it] 

POLICY loss tensor(-78994.3594, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-119193.4609, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.0208, DiseaseClass: 0.2396, Modifier: 0.4271, SpecificDisease: 0.6667, accuracy: 0.3385, loss: -103492.8698 ||:   5%|▌         | 3/57 [00:40<12:03, 13.40s/it]

POLICY loss tensor(-111444.2656, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.0312, DiseaseClass: 0.2625, Modifier: 0.4437, SpecificDisease: 0.6562, accuracy: 0.3484, loss: -104415.1734 ||:   9%|▉         | 5/57 [01:06<11:26, 13.20s/it]

POLICY loss tensor(-100154.2578, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-129726.5469, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.0312, DiseaseClass: 0.2098, Modifier: 0.3973, SpecificDisease: 0.5670, accuracy: 0.3013, loss: -103905.7522 ||:  12%|█▏        | 7/57 [01:32<11:02, 13.24s/it]

POLICY loss tensor(-75539.0703, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.0312, DiseaseClass: 0.1875, Modifier: 0.3984, SpecificDisease: 0.5391, accuracy: 0.2891, loss: -99679.0967 ||:  14%|█▍        | 8/57 [01:46<10:54, 13.36s/it] 

POLICY loss tensor(-70093.1094, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-110578.7812, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.0312, DiseaseClass: 0.1944, Modifier: 0.4201, SpecificDisease: 0.5625, accuracy: 0.3021, loss: -100890.1059 ||:  16%|█▌        | 9/57 [02:00<10:42, 13.40s/it]

POLICY loss tensor(-121927.8828, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.0483, DiseaseClass: 0.2102, Modifier: 0.4403, SpecificDisease: 0.5625, accuracy: 0.3153, loss: -100695.4134 ||:  19%|█▉        | 11/57 [02:25<10:05, 13.15s/it]

POLICY loss tensor(-77711.9766, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.0469, DiseaseClass: 0.2057, Modifier: 0.4401, SpecificDisease: 0.5651, accuracy: 0.3145, loss: -100727.6953 ||:  21%|██        | 12/57 [02:38<09:45, 13.01s/it]

POLICY loss tensor(-101083.3750, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.0457, DiseaseClass: 0.1923, Modifier: 0.4351, SpecificDisease: 0.5529, accuracy: 0.3065, loss: -99099.9543 ||:  23%|██▎       | 13/57 [02:51<09:29, 12.94s/it] 

POLICY loss tensor(-79567.6172, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.0480, DiseaseClass: 0.1808, Modifier: 0.4196, SpecificDisease: 0.5424, accuracy: 0.2977, loss: -97732.9989 ||:  25%|██▍       | 14/57 [03:04<09:14, 12.89s/it]

POLICY loss tensor(-79963.1250, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-118277.8516, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.0835, DiseaseClass: 0.1914, Modifier: 0.4114, SpecificDisease: 0.5519, accuracy: 0.3096, loss: -92908.6647 ||:  28%|██▊       | 16/57 [03:19<06:33,  9.59s/it]

POLICY loss tensor(-114030.2891, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.1310, DiseaseClass: 0.2122, Modifier: 0.4130, SpecificDisease: 0.5736, accuracy: 0.3325, loss: -94151.0741 ||:  30%|██▉       | 17/57 [03:32<07:07, 10.68s/it]

POLICY loss tensor(-120067.7344, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.1775, DiseaseClass: 0.2180, Modifier: 0.4144, SpecificDisease: 0.5910, accuracy: 0.3502, loss: -95590.8547 ||:  32%|███▏      | 18/57 [03:45<07:23, 11.36s/it]

POLICY loss tensor(-116553.3750, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.2155, DiseaseClass: 0.2351, Modifier: 0.4191, SpecificDisease: 0.6048, accuracy: 0.3686, loss: -96694.1091 ||:  33%|███▎      | 19/57 [03:58<07:29, 11.84s/it]

POLICY loss tensor(-121016.3125, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.2528, DiseaseClass: 0.2456, Modifier: 0.4330, SpecificDisease: 0.6155, accuracy: 0.3867, loss: -97910.1868 ||:  35%|███▌      | 20/57 [04:11<07:31, 12.20s/it]

POLICY loss tensor(-123211.8906, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.2849, DiseaseClass: 0.2504, Modifier: 0.4409, SpecificDisease: 0.6298, accuracy: 0.4015, loss: -99114.9997 ||:  37%|███▋      | 21/57 [04:24<07:29, 12.47s/it]

POLICY loss tensor(-115978.0781, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.3141, DiseaseClass: 0.2650, Modifier: 0.4480, SpecificDisease: 0.6428, accuracy: 0.4175, loss: -99881.4728 ||:  39%|███▊      | 22/57 [04:37<07:23, 12.66s/it]

POLICY loss tensor(-116304.3750, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.3648, DiseaseClass: 0.2637, Modifier: 0.4458, SpecificDisease: 0.6519, accuracy: 0.4316, loss: -99821.2381 ||:  42%|████▏     | 24/57 [05:03<07:04, 12.88s/it] 

POLICY loss tensor(-82014.1406, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.3909, DiseaseClass: 0.2580, Modifier: 0.4403, SpecificDisease: 0.6483, accuracy: 0.4344, loss: -99228.2654 ||:  44%|████▍     | 25/57 [05:16<06:51, 12.84s/it]

POLICY loss tensor(-84997.4688, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-110261.8750, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.4137, DiseaseClass: 0.2565, Modifier: 0.4427, SpecificDisease: 0.6486, accuracy: 0.4404, loss: -99652.6128 ||:  46%|████▌     | 26/57 [05:29<06:37, 12.83s/it]

POLICY loss tensor(-120895.9375, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.4497, DiseaseClass: 0.2583, Modifier: 0.4480, SpecificDisease: 0.6503, accuracy: 0.4516, loss: -99893.8882 ||:  49%|████▉     | 28/57 [05:55<06:13, 12.88s/it] 

POLICY loss tensor(-85166.2188, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.4664, DiseaseClass: 0.2536, Modifier: 0.4377, SpecificDisease: 0.6384, accuracy: 0.4490, loss: -98575.1373 ||:  51%|█████     | 29/57 [06:07<05:59, 12.84s/it]

POLICY loss tensor(-61650.6797, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.4824, DiseaseClass: 0.2460, Modifier: 0.4292, SpecificDisease: 0.6305, accuracy: 0.4470, loss: -97450.1522 ||:  53%|█████▎    | 30/57 [06:20<05:47, 12.86s/it]

POLICY loss tensor(-64826.1328, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.4974, DiseaseClass: 0.2389, Modifier: 0.4253, SpecificDisease: 0.6272, accuracy: 0.4472, loss: -96613.3763 ||:  54%|█████▍    | 31/57 [06:33<05:34, 12.87s/it]

POLICY loss tensor(-71510.6562, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-118060.1797, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.5246, DiseaseClass: 0.2406, Modifier: 0.4251, SpecificDisease: 0.6319, accuracy: 0.4556, loss: -97267.2969 ||:  58%|█████▊    | 33/57 [06:59<05:08, 12.86s/it]

POLICY loss tensor(-96747.1250, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-109450.7734, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.5496, DiseaseClass: 0.2420, Modifier: 0.4249, SpecificDisease: 0.6369, accuracy: 0.4634, loss: -97254.1570 ||:  61%|██████▏   | 35/57 [07:24<04:41, 12.81s/it]

POLICY loss tensor(-84625.0781, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.5606, DiseaseClass: 0.2370, Modifier: 0.4182, SpecificDisease: 0.6295, accuracy: 0.4613, loss: -96613.3278 ||:  63%|██████▎   | 36/57 [07:37<04:27, 12.75s/it]

POLICY loss tensor(-74184.8750, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-113053.0469, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.5718, DiseaseClass: 0.2433, Modifier: 0.4205, SpecificDisease: 0.6363, accuracy: 0.4680, loss: -97057.6284 ||:  65%|██████▍   | 37/57 [07:50<04:16, 12.82s/it]

POLICY loss tensor(-116102.7031, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.5816, DiseaseClass: 0.2510, Modifier: 0.4226, SpecificDisease: 0.6435, accuracy: 0.4747, loss: -97558.7984 ||:  67%|██████▋   | 38/57 [08:03<04:04, 12.87s/it]

POLICY loss tensor(-109766.4844, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.5997, DiseaseClass: 0.2550, Modifier: 0.4202, SpecificDisease: 0.6465, accuracy: 0.4803, loss: -97832.9364 ||:  70%|███████   | 40/57 [08:29<03:37, 12.82s/it]

POLICY loss tensor(-96317.8281, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6034, DiseaseClass: 0.2486, Modifier: 0.4105, SpecificDisease: 0.6328, accuracy: 0.4739, loss: -96329.0316 ||:  72%|███████▏  | 41/57 [08:41<03:23, 12.72s/it]

POLICY loss tensor(-36173.3828, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-19917.8438, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6199, DiseaseClass: 0.2465, Modifier: 0.4030, SpecificDisease: 0.6369, accuracy: 0.4766, loss: -94091.6030 ||:  75%|███████▌  | 43/57 [08:58<02:32, 10.92s/it]

POLICY loss tensor(-76531.9062, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6287, DiseaseClass: 0.2430, Modifier: 0.4001, SpecificDisease: 0.6323, accuracy: 0.4760, loss: -93426.4935 ||:  77%|███████▋  | 44/57 [09:11<02:29, 11.49s/it]

POLICY loss tensor(-64827.3359, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6371, DiseaseClass: 0.2396, Modifier: 0.3975, SpecificDisease: 0.6272, accuracy: 0.4753, loss: -92829.3410 ||:  79%|███████▉  | 45/57 [09:23<02:21, 11.83s/it]

POLICY loss tensor(-66555.1797, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-97746.9766, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6416, DiseaseClass: 0.2460, Modifier: 0.4011, SpecificDisease: 0.6313, accuracy: 0.4800, loss: -92936.2317 ||:  81%|████████  | 46/57 [09:36<02:13, 12.17s/it]

POLICY loss tensor(-101461.0781, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6561, DiseaseClass: 0.2488, Modifier: 0.4026, SpecificDisease: 0.6310, accuracy: 0.4847, loss: -92630.2629 ||:  84%|████████▍ | 48/57 [10:02<01:52, 12.49s/it]

POLICY loss tensor(-69726.0469, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-100383.8438, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6632, DiseaseClass: 0.2469, Modifier: 0.4021, SpecificDisease: 0.6315, accuracy: 0.4859, loss: -92788.4878 ||:  86%|████████▌ | 49/57 [10:15<01:41, 12.63s/it]

POLICY loss tensor(-91240.3516, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6688, DiseaseClass: 0.2521, Modifier: 0.4072, SpecificDisease: 0.6371, accuracy: 0.4913, loss: -92757.5127 ||:  88%|████████▊ | 50/57 [10:28<01:29, 12.75s/it]

POLICY loss tensor(-38798.4609, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6735, DiseaseClass: 0.2514, Modifier: 0.4115, SpecificDisease: 0.6381, accuracy: 0.4936, loss: -91699.4798 ||:  89%|████████▉ | 51/57 [10:35<01:05, 10.92s/it]

POLICY loss tensor(-91303.9844, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6833, DiseaseClass: 0.2513, Modifier: 0.4161, SpecificDisease: 0.6418, accuracy: 0.4981, loss: -91418.9719 ||:  93%|█████████▎| 53/57 [11:00<00:47, 11.88s/it]

POLICY loss tensor(-77229.2500, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6886, DiseaseClass: 0.2484, Modifier: 0.4112, SpecificDisease: 0.6385, accuracy: 0.4967, loss: -90683.8627 ||:  95%|█████████▍| 54/57 [11:13<00:36, 12.12s/it]

POLICY loss tensor(-51723.6172, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6938, DiseaseClass: 0.2461, Modifier: 0.4100, SpecificDisease: 0.6366, accuracy: 0.4966, loss: -90214.0183 ||:  96%|█████████▋| 55/57 [11:26<00:24, 12.28s/it]

POLICY loss tensor(-64842.9844, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.6988, DiseaseClass: 0.2445, Modifier: 0.4071, SpecificDisease: 0.6364, accuracy: 0.4967, loss: -89753.3730 ||:  98%|█████████▊| 56/57 [11:38<00:12, 12.45s/it]

POLICY loss tensor(-64418.4375, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.7041, DiseaseClass: 0.2413, Modifier: 0.4038, SpecificDisease: 0.6306, accuracy: 0.4949, loss: -89128.3783 ||: 100%|██████████| 57/57 [11:52<00:00, 12.50s/it]

POLICY loss tensor(-54129.2148, device='cuda:0', grad_fn=<SumBackward0>)
2020-12-14 11:46:09,429: INFO: Starting with Validation



CompositeMention: 0.9122, DiseaseClass: 0.2414, Modifier: 0.3981, SpecificDisease: 0.5611, accuracy: 0.5282, loss: 0.5991 ||: 100%|██████████| 10/10 [00:01<00:00,  6.80it/s]

2020-12-14 11:46:10,905: INFO: Validation done. (954.0 / 1276) zero predicted





2020-12-14 11:46:19,167: INFO: Best validation performance so far. Copying weights to './trained_models/NCBI-BERT-realFT-PS/run-13/models/best.th'.
2020-12-14 11:46:26,048: INFO: Metrics:
                        Training DiseaseClass      : 0.241  Validation DiseaseClass      : 0.241
                        Training SpecificDisease   : 0.631  Validation SpecificDisease   : 0.561
                        Training CompositeMention  : 0.704  Validation CompositeMention  : 0.912
                        Training accuracy          : 0.495  Validation accuracy          : 0.528
                        Training loss              : -89128.378  Validation loss              : 0.599
                        Training Modifier          : 0.404  Validation Modifier          : 0.398

2020-12-14 11:46:26,050: INFO: Writing validation visualization at ./trained_models/NCBI-BERT-realFT-PS/run-13/visualization/validation.html


100%|██████████| 10/10 [00:02<00:00,  3.65it/s]


2020-12-14 11:46:29,331: INFO: Tag: CompositeMention, Acc: 91.22
2020-12-14 11:46:29,333: INFO: Tag: DiseaseClass, Acc: 24.14
2020-12-14 11:46:29,335: INFO: Tag: Modifier, Acc: 39.81
2020-12-14 11:46:29,337: INFO: Tag: SpecificDisease, Acc: 56.11
2020-12-14 11:46:29,339: INFO: Average ACC: 52.82
2020-12-14 11:46:29,390: INFO: processed 22501 tokens with 273 phrases; 
2020-12-14 11:46:29,391: INFO: found: 756 phrases; correct: 2.

2020-12-14 11:46:29,392: INFO: accuracy:  92.20%; 
2020-12-14 11:46:29,393: INFO: precision:   0.26%; recall:   0.73%; FB1:   0.39
2020-12-14 11:46:29,394: INFO:               TAG  precision   recall      FB1
2020-12-14 11:46:29,396: INFO:  CompositeMention      0.00%    0.00%    0.00%
2020-12-14 11:46:29,397: INFO:      DiseaseClass      1.59%    1.79%    1.68%
2020-12-14 11:46:29,399: INFO:          Modifier      0.00%    0.00%    0.00%
2020-12-14 11:46:29,400: INFO:   SpecificDisease      0.00%    0.00%    0.00%
2020-12-14 11:46:29,403: INFO: Writing predic

  return_array[slices] = self.array
CompositeMention: 0.9164, DiseaseClass: 0.2167, Modifier: 0.3577, SpecificDisease: 0.5561, accuracy: 0.5117, loss: -65600.5781 ||:   2%|▏         | 1/57 [00:12<11:53, 12.73s/it]

POLICY loss tensor(-65601.1406, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-39217.4336, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9060, DiseaseClass: 0.2438, Modifier: 0.3781, SpecificDisease: 0.6018, accuracy: 0.5324, loss: -52408.6934 ||:   4%|▎         | 2/57 [00:19<10:04, 10.99s/it]

POLICY loss tensor(-91747.0234, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9139, DiseaseClass: 0.2603, Modifier: 0.4051, SpecificDisease: 0.6399, accuracy: 0.5548, loss: -65521.2773 ||:   5%|▌         | 3/57 [00:33<10:33, 11.73s/it]

POLICY loss tensor(-84909.1484, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9139, DiseaseClass: 0.2441, Modifier: 0.4116, SpecificDisease: 0.6401, accuracy: 0.5524, loss: -68327.9922 ||:   9%|▉         | 5/57 [00:58<10:40, 12.32s/it]

POLICY loss tensor(-60168.1133, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9218, DiseaseClass: 0.2361, Modifier: 0.3940, SpecificDisease: 0.6216, accuracy: 0.5434, loss: -66705.0312 ||:  11%|█         | 6/57 [01:11<10:31, 12.39s/it]

POLICY loss tensor(-58590.7656, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-28468.4023, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9230, DiseaseClass: 0.2467, Modifier: 0.4007, SpecificDisease: 0.6294, accuracy: 0.5499, loss: -62157.7456 ||:  14%|█▍        | 8/57 [01:29<09:01, 11.06s/it]

POLICY loss tensor(-68564.5625, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-87283.1719, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9270, DiseaseClass: 0.2586, Modifier: 0.3973, SpecificDisease: 0.6330, accuracy: 0.5540, loss: -63675.8477 ||:  18%|█▊        | 10/57 [01:55<09:20, 11.93s/it]

POLICY loss tensor(-52214.4883, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-67915.0625, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9319, DiseaseClass: 0.2649, Modifier: 0.3891, SpecificDisease: 0.6431, accuracy: 0.5573, loss: -63102.2962 ||:  21%|██        | 12/57 [02:19<09:03, 12.07s/it]

POLICY loss tensor(-52555.1328, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9342, DiseaseClass: 0.2626, Modifier: 0.3909, SpecificDisease: 0.6494, accuracy: 0.5593, loss: -58169.6399 ||:  25%|██▍       | 14/57 [02:33<07:11, 10.04s/it]

POLICY loss tensor(-57148.5430, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-78104.6484, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9359, DiseaseClass: 0.2651, Modifier: 0.3808, SpecificDisease: 0.6466, accuracy: 0.5571, loss: -59498.6014 ||:  26%|██▋       | 15/57 [02:46<07:35, 10.84s/it]

POLICY loss tensor(-27767.0898, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9352, DiseaseClass: 0.2733, Modifier: 0.3864, SpecificDisease: 0.6500, accuracy: 0.5612, loss: -57515.3436 ||:  28%|██▊       | 16/57 [02:51<06:17,  9.20s/it]

POLICY loss tensor(-16965.1387, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9353, DiseaseClass: 0.2722, Modifier: 0.3845, SpecificDisease: 0.6517, accuracy: 0.5609, loss: -55130.0026 ||:  30%|██▉       | 17/57 [02:56<05:09,  7.73s/it]

POLICY loss tensor(-80354.9688, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9381, DiseaseClass: 0.2753, Modifier: 0.4004, SpecificDisease: 0.6601, accuracy: 0.5685, loss: -56531.3575 ||:  32%|███▏      | 18/57 [03:08<06:01,  9.26s/it]

POLICY loss tensor(-30294.6211, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9368, DiseaseClass: 0.2873, Modifier: 0.4176, SpecificDisease: 0.6691, accuracy: 0.5777, loss: -55150.4440 ||:  33%|███▎      | 19/57 [03:15<05:22,  8.48s/it]

POLICY loss tensor(-7908.8032, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9393, DiseaseClass: 0.2868, Modifier: 0.4143, SpecificDisease: 0.6741, accuracy: 0.5786, loss: -52633.2026 ||:  37%|███▋      | 21/57 [03:31<05:08,  8.56s/it]

POLICY loss tensor(-49531.1328, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9404, DiseaseClass: 0.2785, Modifier: 0.4082, SpecificDisease: 0.6618, accuracy: 0.5722, loss: -52239.6079 ||:  39%|███▊      | 22/57 [03:43<05:41,  9.76s/it]

POLICY loss tensor(-43974.6875, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9403, DiseaseClass: 0.2708, Modifier: 0.3992, SpecificDisease: 0.6527, accuracy: 0.5657, loss: -51803.8823 ||:  40%|████      | 23/57 [03:56<06:00, 10.61s/it]

POLICY loss tensor(-42218.4609, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-38810.3984, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9391, DiseaseClass: 0.2776, Modifier: 0.4038, SpecificDisease: 0.6604, accuracy: 0.5702, loss: -51262.4622 ||:  42%|████▏     | 24/57 [04:04<05:23,  9.80s/it]

POLICY loss tensor(-24921.6484, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9390, DiseaseClass: 0.2698, Modifier: 0.3928, SpecificDisease: 0.6480, accuracy: 0.5624, loss: -49890.4594 ||:  46%|████▌     | 26/57 [04:22<05:02,  9.75s/it]

POLICY loss tensor(-41932.3945, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9409, DiseaseClass: 0.2643, Modifier: 0.3903, SpecificDisease: 0.6385, accuracy: 0.5585, loss: -49615.3574 ||:  47%|████▋     | 27/57 [04:35<05:19, 10.65s/it]

POLICY loss tensor(-42463.2578, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-77809.3594, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9427, DiseaseClass: 0.2648, Modifier: 0.3899, SpecificDisease: 0.6409, accuracy: 0.5596, loss: -50622.2660 ||:  49%|████▉     | 28/57 [04:47<05:25, 11.23s/it]

POLICY loss tensor(-7981.8486, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9407, DiseaseClass: 0.2653, Modifier: 0.3959, SpecificDisease: 0.6469, accuracy: 0.5622, loss: -49151.8857 ||:  51%|█████     | 29/57 [04:50<04:04,  8.72s/it]

POLICY loss tensor(-15484.2891, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9388, DiseaseClass: 0.2666, Modifier: 0.4024, SpecificDisease: 0.6507, accuracy: 0.5646, loss: -48029.6117 ||:  53%|█████▎    | 30/57 [04:54<03:19,  7.37s/it]

POLICY loss tensor(-13689.6523, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9404, DiseaseClass: 0.2607, Modifier: 0.3975, SpecificDisease: 0.6443, accuracy: 0.5607, loss: -46837.1370 ||:  56%|█████▌    | 32/57 [05:11<03:25,  8.23s/it]

POLICY loss tensor(-44211.4844, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-83189.7344, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9384, DiseaseClass: 0.2677, Modifier: 0.4018, SpecificDisease: 0.6404, accuracy: 0.5621, loss: -45612.9649 ||:  63%|██████▎   | 36/57 [05:39<02:38,  7.55s/it]

POLICY loss tensor(-60091.0312, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9406, DiseaseClass: 0.2625, Modifier: 0.3990, SpecificDisease: 0.6309, accuracy: 0.5582, loss: -43689.8090 ||:  67%|██████▋   | 38/57 [05:45<01:44,  5.50s/it]

POLICY loss tensor(-18147.0820, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9411, DiseaseClass: 0.2668, Modifier: 0.3895, SpecificDisease: 0.6179, accuracy: 0.5538, loss: -42852.8122 ||:  70%|███████   | 40/57 [06:00<01:55,  6.79s/it]

POLICY loss tensor(-53900.7344, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9424, DiseaseClass: 0.2723, Modifier: 0.3968, SpecificDisease: 0.6236, accuracy: 0.5588, loss: -41807.6058 ||:  72%|███████▏  | 41/57 [06:01<01:23,  5.22s/it]

POLICY loss tensor(-80754.9531, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9396, DiseaseClass: 0.2800, Modifier: 0.4005, SpecificDisease: 0.6269, accuracy: 0.5617, loss: -43065.2414 ||:  75%|███████▌  | 43/57 [06:27<02:07,  9.07s/it]

POLICY loss tensor(-56939.8711, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-52269.3203, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9408, DiseaseClass: 0.2819, Modifier: 0.4045, SpecificDisease: 0.6314, accuracy: 0.5647, loss: -43274.4114 ||:  77%|███████▋  | 44/57 [06:36<01:57,  9.07s/it]

POLICY loss tensor(-24504.4707, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9395, DiseaseClass: 0.2828, Modifier: 0.4070, SpecificDisease: 0.6318, accuracy: 0.5653, loss: -43376.5562 ||:  81%|████████  | 46/57 [06:54<01:42,  9.35s/it]

POLICY loss tensor(-66744.2266, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9406, DiseaseClass: 0.2861, Modifier: 0.4027, SpecificDisease: 0.6256, accuracy: 0.5638, loss: -43175.6895 ||:  82%|████████▏ | 47/57 [07:06<01:43, 10.37s/it]

POLICY loss tensor(-33936.3281, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9418, DiseaseClass: 0.2884, Modifier: 0.3992, SpecificDisease: 0.6214, accuracy: 0.5627, loss: -43263.7230 ||:  84%|████████▍ | 48/57 [07:19<01:39, 11.01s/it]

POLICY loss tensor(-47401.8242, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9429, DiseaseClass: 0.2880, Modifier: 0.3982, SpecificDisease: 0.6185, accuracy: 0.5619, loss: -43423.1472 ||:  86%|████████▌ | 49/57 [07:31<01:31, 11.42s/it]

POLICY loss tensor(-51076.0391, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-16676.7539, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9428, DiseaseClass: 0.2908, Modifier: 0.3995, SpecificDisease: 0.6238, accuracy: 0.5642, loss: -42888.2070 ||:  88%|████████▊ | 50/57 [07:36<01:04,  9.25s/it]

POLICY loss tensor(-25995.6602, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9437, DiseaseClass: 0.2885, Modifier: 0.4008, SpecificDisease: 0.6222, accuracy: 0.5638, loss: -42439.7506 ||:  91%|█████████ | 52/57 [07:53<00:46,  9.40s/it]

POLICY loss tensor(-36462.1328, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-7114.2505, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9429, DiseaseClass: 0.2915, Modifier: 0.4021, SpecificDisease: 0.6234, accuracy: 0.5650, loss: -41599.3440 ||:  95%|█████████▍| 54/57 [08:09<00:27,  9.03s/it]

POLICY loss tensor(-32384.4746, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-13555.3730, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9407, DiseaseClass: 0.2955, Modifier: 0.4043, SpecificDisease: 0.6224, accuracy: 0.5657, loss: -40948.2972 ||:  98%|█████████▊| 56/57 [08:26<00:09,  9.20s/it]

POLICY loss tensor(-33185.9297, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9414, DiseaseClass: 0.2943, Modifier: 0.4033, SpecificDisease: 0.6250, accuracy: 0.5660, loss: -40520.1209 ||: 100%|██████████| 57/57 [08:31<00:00,  8.98s/it]

POLICY loss tensor(-16542.7930, device='cuda:0', grad_fn=<SumBackward0>)
2020-12-14 11:55:02,354: INFO: Starting with Validation



CompositeMention: 0.8966, DiseaseClass: 0.2633, Modifier: 0.3981, SpecificDisease: 0.5611, accuracy: 0.5298, loss: 0.5939 ||: 100%|██████████| 10/10 [00:01<00:00,  7.73it/s]

2020-12-14 11:55:03,653: INFO: Validation done. (902.0 / 1276) zero predicted





2020-12-14 11:55:11,915: INFO: Best validation performance so far. Copying weights to './trained_models/NCBI-BERT-realFT-PS/run-13/models/best.th'.
2020-12-14 11:55:18,996: INFO: Metrics:
                        Training DiseaseClass      : 0.294  Validation DiseaseClass      : 0.263
                        Training SpecificDisease   : 0.625  Validation SpecificDisease   : 0.561
                        Training CompositeMention  : 0.941  Validation CompositeMention  : 0.897
                        Training accuracy          : 0.566  Validation accuracy          : 0.530
                        Training loss              : -40520.121  Validation loss              : 0.594
                        Training Modifier          : 0.403  Validation Modifier          : 0.398

2020-12-14 11:55:18,998: INFO: Writing validation visualization at ./trained_models/NCBI-BERT-realFT-PS/run-13/visualization/validation.html


100%|██████████| 10/10 [00:02<00:00,  3.62it/s]


2020-12-14 11:55:22,310: INFO: Tag: CompositeMention, Acc: 89.66
2020-12-14 11:55:22,312: INFO: Tag: DiseaseClass, Acc: 26.33
2020-12-14 11:55:22,313: INFO: Tag: Modifier, Acc: 39.81
2020-12-14 11:55:22,315: INFO: Tag: SpecificDisease, Acc: 56.11
2020-12-14 11:55:22,317: INFO: Average ACC: 52.98
2020-12-14 11:55:22,367: INFO: processed 22501 tokens with 158 phrases; 
2020-12-14 11:55:22,368: INFO: found: 756 phrases; correct: 0.

2020-12-14 11:55:22,369: INFO: accuracy:  92.63%; 
2020-12-14 11:55:22,370: INFO: precision:   0.00%; recall:   0.00%; FB1:   0.00
2020-12-14 11:55:22,372: INFO:               TAG  precision   recall      FB1
2020-12-14 11:55:22,373: INFO:  CompositeMention      0.00%    0.00%    0.00%
2020-12-14 11:55:22,375: INFO:      DiseaseClass      0.00%    0.00%    0.00%
2020-12-14 11:55:22,376: INFO:          Modifier      0.00%    0.00%    0.00%
2020-12-14 11:55:22,377: INFO:   SpecificDisease      0.00%    0.00%    0.00%
2020-12-14 11:55:22,380: INFO: Writing predic

  return_array[slices] = self.array
CompositeMention: 0.9138, DiseaseClass: 0.2402, Modifier: 0.3943, SpecificDisease: 0.5718, accuracy: 0.5300, loss: -33993.1602 ||:   2%|▏         | 1/57 [00:12<11:39, 12.50s/it]

POLICY loss tensor(-33993.6953, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9262, DiseaseClass: 0.2148, Modifier: 0.3870, SpecificDisease: 0.5436, accuracy: 0.5179, loss: -32117.0586 ||:   4%|▎         | 2/57 [00:25<11:29, 12.54s/it]

POLICY loss tensor(-30241.4805, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9354, DiseaseClass: 0.2094, Modifier: 0.3542, SpecificDisease: 0.5460, accuracy: 0.5113, loss: -26819.8044 ||:   5%|▌         | 3/57 [00:32<09:55, 11.03s/it]

POLICY loss tensor(-16225.8105, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9426, DiseaseClass: 0.2070, Modifier: 0.3461, SpecificDisease: 0.5304, accuracy: 0.5065, loss: -23730.9233 ||:   7%|▋         | 4/57 [00:40<08:48,  9.97s/it]

POLICY loss tensor(-14464.8213, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-69066.0547, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9311, DiseaseClass: 0.2207, Modifier: 0.3709, SpecificDisease: 0.5618, accuracy: 0.5211, loss: -32797.8293 ||:   9%|▉         | 5/57 [00:52<09:22, 10.81s/it]

POLICY loss tensor(-7017.1191, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9348, DiseaseClass: 0.2451, Modifier: 0.3690, SpecificDisease: 0.5698, accuracy: 0.5297, loss: -24760.8497 ||:  12%|█▏        | 7/57 [00:58<05:32,  6.65s/it]

POLICY loss tensor(-2320.8005, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-31565.5996, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9318, DiseaseClass: 0.2536, Modifier: 0.3899, SpecificDisease: 0.5955, accuracy: 0.5427, loss: -24093.4685 ||:  16%|█▌        | 9/57 [01:11<05:09,  6.44s/it]

POLICY loss tensor(-11950.8545, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-12429.6768, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9296, DiseaseClass: 0.2639, Modifier: 0.3861, SpecificDisease: 0.6031, accuracy: 0.5457, loss: -22356.8486 ||:  19%|█▉        | 11/57 [01:27<05:57,  7.77s/it]

POLICY loss tensor(-16655.5977, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9279, DiseaseClass: 0.2711, Modifier: 0.3970, SpecificDisease: 0.6195, accuracy: 0.5539, loss: -19541.2847 ||:  23%|██▎       | 13/57 [01:33<03:54,  5.33s/it]

POLICY loss tensor(-8112.5615, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-5110.3662, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9211, DiseaseClass: 0.2725, Modifier: 0.3820, SpecificDisease: 0.6083, accuracy: 0.5460, loss: -16835.1413 ||:  28%|██▊       | 16/57 [01:46<03:35,  5.25s/it]

POLICY loss tensor(-10216.9609, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9247, DiseaseClass: 0.2786, Modifier: 0.3760, SpecificDisease: 0.5991, accuracy: 0.5446, loss: -15844.8060 ||:  30%|██▉       | 17/57 [01:48<02:42,  4.06s/it]

POLICY loss tensor(-5444.5049, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9250, DiseaseClass: 0.3014, Modifier: 0.3796, SpecificDisease: 0.6085, accuracy: 0.5536, loss: -13926.5550 ||:  35%|███▌      | 20/57 [01:58<02:28,  4.01s/it]

POLICY loss tensor(-3726.6733, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-5206.7705, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9247, DiseaseClass: 0.3040, Modifier: 0.3781, SpecificDisease: 0.6190, accuracy: 0.5565, loss: -13026.2274 ||:  39%|███▊      | 22/57 [02:05<02:10,  3.72s/it]

POLICY loss tensor(-2840.2261, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9244, DiseaseClass: 0.3080, Modifier: 0.3799, SpecificDisease: 0.6060, accuracy: 0.5546, loss: -11463.0109 ||:  44%|████▍     | 25/57 [02:09<01:11,  2.24s/it]

POLICY loss tensor(-6849.1191, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9190, DiseaseClass: 0.3131, Modifier: 0.3974, SpecificDisease: 0.6229, accuracy: 0.5631, loss: -10479.3766 ||:  49%|████▉     | 28/57 [02:17<01:02,  2.17s/it]

POLICY loss tensor(-5246.8232, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9075, DiseaseClass: 0.3218, Modifier: 0.4147, SpecificDisease: 0.6309, accuracy: 0.5687, loss: -10185.5922 ||:  54%|█████▍    | 31/57 [02:34<02:17,  5.30s/it]

POLICY loss tensor(-17085.7734, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9083, DiseaseClass: 0.3185, Modifier: 0.4111, SpecificDisease: 0.6274, accuracy: 0.5663, loss: -9944.2215 ||:  56%|█████▌    | 32/57 [02:36<01:51,  4.47s/it] 

POLICY loss tensor(-2462.3049, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9099, DiseaseClass: 0.3151, Modifier: 0.4085, SpecificDisease: 0.6240, accuracy: 0.5644, loss: -9734.9770 ||:  58%|█████▊    | 33/57 [02:39<01:33,  3.92s/it]

POLICY loss tensor(-3039.7197, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9047, DiseaseClass: 0.3220, Modifier: 0.4193, SpecificDisease: 0.6350, accuracy: 0.5702, loss: -9178.6576 ||:  61%|██████▏   | 35/57 [02:42<00:59,  2.72s/it]

POLICY loss tensor(-4877.9692, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9008, DiseaseClass: 0.3250, Modifier: 0.4220, SpecificDisease: 0.6379, accuracy: 0.5714, loss: -8582.3468 ||:  67%|██████▋   | 38/57 [02:48<00:39,  2.10s/it]

POLICY loss tensor(-4130.5186, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9005, DiseaseClass: 0.3268, Modifier: 0.4252, SpecificDisease: 0.6433, accuracy: 0.5740, loss: -8468.1823 ||:  68%|██████▊   | 39/57 [02:51<00:41,  2.33s/it]

POLICY loss tensor(-3348.3022, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9000, DiseaseClass: 0.3204, Modifier: 0.4269, SpecificDisease: 0.6470, accuracy: 0.5736, loss: -8193.6529 ||:  75%|███████▌  | 43/57 [03:09<01:11,  5.14s/it]

POLICY loss tensor(-18722.0039, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-3684.1846, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9001, DiseaseClass: 0.3215, Modifier: 0.4294, SpecificDisease: 0.6549, accuracy: 0.5765, loss: -7783.2055 ||:  81%|████████  | 46/57 [03:16<00:36,  3.31s/it]

POLICY loss tensor(-2017.9292, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-41166.5273, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9027, DiseaseClass: 0.3161, Modifier: 0.4238, SpecificDisease: 0.6526, accuracy: 0.5738, loss: -8386.2634 ||:  84%|████████▍ | 48/57 [03:33<00:48,  5.41s/it]

POLICY loss tensor(-3347.7808, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9045, DiseaseClass: 0.3120, Modifier: 0.4135, SpecificDisease: 0.6391, accuracy: 0.5673, loss: -8120.9580 ||:  88%|████████▊ | 50/57 [03:39<00:30,  4.41s/it]

POLICY loss tensor(-3508.2839, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9081, DiseaseClass: 0.3123, Modifier: 0.4064, SpecificDisease: 0.6309, accuracy: 0.5644, loss: -7580.4423 ||:  95%|█████████▍| 54/57 [03:47<00:08,  2.85s/it]

POLICY loss tensor(-3298.3398, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9106, DiseaseClass: 0.3230, Modifier: 0.4048, SpecificDisease: 0.6270, accuracy: 0.5664, loss: -7432.2389 ||:  98%|█████████▊| 56/57 [03:56<00:03,  3.99s/it]

POLICY loss tensor(-6862.6113, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9116, DiseaseClass: 0.3320, Modifier: 0.4033, SpecificDisease: 0.6250, accuracy: 0.5680, loss: -7301.8390 ||: 100%|██████████| 57/57 [03:58<00:00,  4.18s/it]

2020-12-14 11:59:21,359: INFO: Starting with Validation



CompositeMention: 0.9122, DiseaseClass: 0.6113, Modifier: 0.4013, SpecificDisease: 0.5611, accuracy: 0.6215, loss: 0.6071 ||: 100%|██████████| 10/10 [00:01<00:00,  7.66it/s]

2020-12-14 11:59:22,671: INFO: Validation done. (1079.0 / 1276) zero predicted





2020-12-14 11:59:30,735: INFO: Best validation performance so far. Copying weights to './trained_models/NCBI-BERT-realFT-PS/run-13/models/best.th'.
2020-12-14 11:59:37,815: INFO: Metrics:
                        Training DiseaseClass      : 0.332  Validation DiseaseClass      : 0.611
                        Training SpecificDisease   : 0.625  Validation SpecificDisease   : 0.561
                        Training CompositeMention  : 0.912  Validation CompositeMention  : 0.912
                        Training accuracy          : 0.568  Validation accuracy          : 0.621
                        Training loss              : -7301.839  Validation loss              : 0.607
                        Training Modifier          : 0.403  Validation Modifier          : 0.401

2020-12-14 11:59:37,817: INFO: Writing validation visualization at ./trained_models/NCBI-BERT-realFT-PS/run-13/visualization/validation.html


100%|██████████| 10/10 [00:02<00:00,  3.36it/s]


2020-12-14 11:59:41,335: INFO: Tag: CompositeMention, Acc: 91.22
2020-12-14 11:59:41,337: INFO: Tag: DiseaseClass, Acc: 61.13
2020-12-14 11:59:41,340: INFO: Tag: Modifier, Acc: 40.13
2020-12-14 11:59:41,341: INFO: Tag: SpecificDisease, Acc: 56.11
2020-12-14 11:59:41,343: INFO: Average ACC: 62.15
2020-12-14 11:59:41,394: INFO: processed 22501 tokens with 64 phrases; 
2020-12-14 11:59:41,395: INFO: found: 756 phrases; correct: 0.

2020-12-14 11:59:41,396: INFO: accuracy:  93.05%; 
2020-12-14 11:59:41,398: INFO: precision:   0.00%; recall:   0.00%; FB1:   0.00
2020-12-14 11:59:41,399: INFO:               TAG  precision   recall      FB1
2020-12-14 11:59:41,400: INFO:  CompositeMention      0.00%    0.00%    0.00%
2020-12-14 11:59:41,401: INFO:      DiseaseClass      0.00%    0.00%    0.00%
2020-12-14 11:59:41,402: INFO:          Modifier      0.00%    0.00%    0.00%
2020-12-14 11:59:41,404: INFO:   SpecificDisease      0.00%    0.00%    0.00%
2020-12-14 11:59:41,407: INFO: Writing predict

  return_array[slices] = self.array
CompositeMention: 0.9269, DiseaseClass: 0.6527, Modifier: 0.3629, SpecificDisease: 0.5196, accuracy: 0.6155, loss: -11830.6641 ||:   2%|▏         | 1/57 [00:12<11:26, 12.26s/it]

POLICY loss tensor(-11831.1611, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9284, DiseaseClass: 0.6173, Modifier: 0.3432, SpecificDisease: 0.4914, accuracy: 0.5951, loss: -5915.0746 ||:   4%|▎         | 2/57 [00:13<08:17,  9.05s/it] 

POLICY loss tensor(-21759.6172, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9381, DiseaseClass: 0.6266, Modifier: 0.3508, SpecificDisease: 0.5272, accuracy: 0.6107, loss: -11065.1435 ||:   7%|▋         | 4/57 [00:29<07:19,  8.30s/it]

POLICY loss tensor(-10671.9863, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9462, DiseaseClass: 0.6179, Modifier: 0.3738, SpecificDisease: 0.5807, accuracy: 0.6297, loss: -8741.6837 ||:  12%|█▏        | 7/57 [00:43<05:36,  6.74s/it] 

POLICY loss tensor(-16933.1289, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9506, DiseaseClass: 0.6198, Modifier: 0.3815, SpecificDisease: 0.5741, accuracy: 0.6315, loss: -10063.9056 ||:  14%|█▍        | 8/57 [00:53<06:16,  7.69s/it]

POLICY loss tensor(-19320.0195, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9472, DiseaseClass: 0.6014, Modifier: 0.3880, SpecificDisease: 0.5920, accuracy: 0.6322, loss: -8945.6207 ||:  16%|█▌        | 9/57 [00:54<04:41,  5.87s/it] 

POLICY loss tensor(-3616.7292, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9491, DiseaseClass: 0.5133, Modifier: 0.3953, SpecificDisease: 0.6152, accuracy: 0.6182, loss: -6572.5608 ||:  26%|██▋       | 15/57 [01:13<03:11,  4.56s/it]

POLICY loss tensor(-14464.6562, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-14983.3750, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9489, DiseaseClass: 0.4850, Modifier: 0.4220, SpecificDisease: 0.6277, accuracy: 0.6209, loss: -6880.6068 ||:  32%|███▏      | 18/57 [01:26<02:57,  4.55s/it]

POLICY loss tensor(-10280.8848, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9512, DiseaseClass: 0.4554, Modifier: 0.4078, SpecificDisease: 0.6224, accuracy: 0.6092, loss: -6335.3123 ||:  35%|███▌      | 20/57 [01:31<02:12,  3.57s/it]

POLICY loss tensor(-2856.4282, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-4708.3711, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9501, DiseaseClass: 0.4433, Modifier: 0.4101, SpecificDisease: 0.6214, accuracy: 0.6062, loss: -6113.0685 ||:  39%|███▊      | 22/57 [01:37<01:49,  3.13s/it]

POLICY loss tensor(-3074.0930, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9485, DiseaseClass: 0.4362, Modifier: 0.4214, SpecificDisease: 0.6249, accuracy: 0.6078, loss: -5847.2565 ||:  40%|████      | 23/57 [01:38<01:30,  2.66s/it]

POLICY loss tensor(-12318.2012, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9499, DiseaseClass: 0.4172, Modifier: 0.4172, SpecificDisease: 0.6153, accuracy: 0.5999, loss: -6690.0943 ||:  44%|████▍     | 25/57 [01:56<03:15,  6.11s/it]

POLICY loss tensor(-20448.4297, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9505, DiseaseClass: 0.4065, Modifier: 0.4116, SpecificDisease: 0.6136, accuracy: 0.5956, loss: -7327.1008 ||:  46%|████▌     | 26/57 [02:08<04:09,  8.04s/it]

POLICY loss tensor(-23252.8066, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9494, DiseaseClass: 0.3999, Modifier: 0.4224, SpecificDisease: 0.6221, accuracy: 0.5985, loss: -7151.2794 ||:  51%|█████     | 29/57 [02:22<02:57,  6.33s/it]

POLICY loss tensor(-16884.2148, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9508, DiseaseClass: 0.3946, Modifier: 0.4165, SpecificDisease: 0.6213, accuracy: 0.5958, loss: -7103.7478 ||:  53%|█████▎    | 30/57 [02:26<02:30,  5.58s/it]

POLICY loss tensor(-5725.8701, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9464, DiseaseClass: 0.3913, Modifier: 0.4128, SpecificDisease: 0.6265, accuracy: 0.5943, loss: -6751.5766 ||:  61%|██████▏   | 35/57 [02:45<02:01,  5.51s/it]

POLICY loss tensor(-23195.7402, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9417, DiseaseClass: 0.3942, Modifier: 0.4131, SpecificDisease: 0.6286, accuracy: 0.5944, loss: -6218.5097 ||:  67%|██████▋   | 38/57 [02:49<00:54,  2.89s/it]

POLICY loss tensor(-4068.9309, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9432, DiseaseClass: 0.3942, Modifier: 0.3985, SpecificDisease: 0.6157, accuracy: 0.5879, loss: -6132.1161 ||:  75%|███████▌  | 43/57 [03:10<01:13,  5.23s/it]

POLICY loss tensor(-23311.3027, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9392, DiseaseClass: 0.3928, Modifier: 0.3999, SpecificDisease: 0.6178, accuracy: 0.5875, loss: -5992.7352 ||:  77%|███████▋  | 44/57 [03:12<00:53,  4.14s/it]

POLICY loss tensor(-8362.7207, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9332, DiseaseClass: 0.3859, Modifier: 0.4052, SpecificDisease: 0.6261, accuracy: 0.5876, loss: -5551.8408 ||:  86%|████████▌ | 49/57 [03:22<00:16,  2.07s/it]

POLICY loss tensor(-5599.2407, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9283, DiseaseClass: 0.3860, Modifier: 0.4092, SpecificDisease: 0.6300, accuracy: 0.5884, loss: -5443.8861 ||:  89%|████████▉ | 51/57 [03:26<00:12,  2.07s/it]

POLICY loss tensor(-5505.9980, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9286, DiseaseClass: 0.3884, Modifier: 0.4096, SpecificDisease: 0.6299, accuracy: 0.5891, loss: -5690.2987 ||:  93%|█████████▎| 53/57 [03:42<00:21,  5.35s/it]

POLICY loss tensor(-18442.8047, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9293, DiseaseClass: 0.3841, Modifier: 0.4085, SpecificDisease: 0.6282, accuracy: 0.5875, loss: -5662.9864 ||:  95%|█████████▍| 54/57 [03:44<00:13,  4.54s/it]

POLICY loss tensor(-4215.9893, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9312, DiseaseClass: 0.3839, Modifier: 0.4069, SpecificDisease: 0.6250, accuracy: 0.5868, loss: -5418.5738 ||: 100%|██████████| 57/57 [03:50<00:00,  4.04s/it]

POLICY loss tensor(-3059.1284, device='cuda:0', grad_fn=<SumBackward0>)
2020-12-14 12:03:32,534: INFO: Starting with Validation



CompositeMention: 0.9122, DiseaseClass: 0.3542, Modifier: 0.3981, SpecificDisease: 0.5611, accuracy: 0.5564, loss: 0.5908 ||: 100%|██████████| 10/10 [00:01<00:00,  7.58it/s]

2020-12-14 12:03:33,858: INFO: Validation done. (924.0 / 1276) zero predicted





2020-12-14 12:03:41,738: INFO: Metrics:
                        Training DiseaseClass      : 0.384  Validation DiseaseClass      : 0.354
                        Training SpecificDisease   : 0.625  Validation SpecificDisease   : 0.561
                        Training CompositeMention  : 0.931  Validation CompositeMention  : 0.912
                        Training accuracy          : 0.587  Validation accuracy          : 0.556
                        Training loss              : -5418.574  Validation loss              : 0.591
                        Training Modifier          : 0.407  Validation Modifier          : 0.398

2020-12-14 12:03:41,740: INFO: Reducing LR: 2.00e-07 -> 2.00e-07
2020-12-14 12:03:41,741: INFO: Epoch duration: 00:04:00
2020-12-14 12:03:41,743: INFO: Estimated training time remaining: 05:38:36
2020-12-14 12:03:41,747: INFO: Starting Training Epoch 5/50
2020-12-14 12:03:41,749: INFO: Peak CPU memory usage MB: 6982.376
2020-12-14 12:03:41,845: INFO: GPU 0 memory usage M

  return_array[slices] = self.array
  0%|          | 0/57 [00:00<?, ?it/s]

POLICY loss tensor(-3777.0327, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9688, DiseaseClass: 0.3125, Modifier: 0.4219, SpecificDisease: 0.7031, accuracy: 0.6016, loss: -1887.9365 ||:   4%|▎         | 2/57 [00:04<02:19,  2.53s/it]

POLICY loss tensor(-4884.2207, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9375, DiseaseClass: 0.3750, Modifier: 0.3750, SpecificDisease: 0.6719, accuracy: 0.5898, loss: -6753.3543 ||:   7%|▋         | 4/57 [00:20<05:00,  5.68s/it]

POLICY loss tensor(-18354.4805, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9129, DiseaseClass: 0.3638, Modifier: 0.4286, SpecificDisease: 0.7411, accuracy: 0.6116, loss: -3858.8065 ||:  12%|█▏        | 7/57 [00:24<02:24,  2.90s/it]

POLICY loss tensor(-4447.7334, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9149, DiseaseClass: 0.3385, Modifier: 0.4583, SpecificDisease: 0.7396, accuracy: 0.6128, loss: -4150.7699 ||:  16%|█▌        | 9/57 [00:31<02:33,  3.21s/it]

POLICY loss tensor(-5898.7183, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9062, DiseaseClass: 0.3242, Modifier: 0.4531, SpecificDisease: 0.7083, accuracy: 0.5980, loss: -4151.6089 ||:  21%|██        | 12/57 [00:40<02:39,  3.55s/it]

POLICY loss tensor(-12464.1680, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8180, DiseaseClass: 0.3520, Modifier: 0.4835, SpecificDisease: 0.7279, accuracy: 0.5954, loss: -3175.8548 ||:  30%|██▉       | 17/57 [00:50<01:42,  2.56s/it]

POLICY loss tensor(-4173.0942, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8264, DiseaseClass: 0.3429, Modifier: 0.4722, SpecificDisease: 0.7083, accuracy: 0.5875, loss: -3904.0120 ||:  32%|███▏      | 18/57 [01:03<03:33,  5.48s/it]

POLICY loss tensor(-16283.2285, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8257, DiseaseClass: 0.3462, Modifier: 0.4737, SpecificDisease: 0.7122, accuracy: 0.5894, loss: -3698.5059 ||:  33%|███▎      | 19/57 [01:04<02:44,  4.33s/it]

POLICY loss tensor(-6567.5054, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8281, DiseaseClass: 0.3539, Modifier: 0.4813, SpecificDisease: 0.7188, accuracy: 0.5955, loss: -3841.9256 ||:  35%|███▌      | 20/57 [01:08<02:38,  4.28s/it]

POLICY loss tensor(-4680.3589, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8304, DiseaseClass: 0.3534, Modifier: 0.4747, SpecificDisease: 0.7232, accuracy: 0.5954, loss: -3881.8216 ||:  37%|███▋      | 21/57 [01:11<02:20,  3.90s/it]

POLICY loss tensor(-2685.5181, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8438, DiseaseClass: 0.3398, Modifier: 0.4609, SpecificDisease: 0.7083, accuracy: 0.5882, loss: -4480.5108 ||:  42%|████▏     | 24/57 [01:29<03:13,  5.88s/it]

POLICY loss tensor(-23330.2695, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8755, DiseaseClass: 0.3304, Modifier: 0.4283, SpecificDisease: 0.6673, accuracy: 0.5754, loss: -3268.0102 ||:  60%|█████▉    | 34/57 [01:45<00:45,  1.98s/it]

POLICY loss tensor(-3585.6875, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8806, DiseaseClass: 0.3242, Modifier: 0.4227, SpecificDisease: 0.6623, accuracy: 0.5725, loss: -3360.2384 ||:  63%|██████▎   | 36/57 [01:52<01:05,  3.13s/it]

POLICY loss tensor(-9857.3154, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-3879.3362, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8867, DiseaseClass: 0.3328, Modifier: 0.4273, SpecificDisease: 0.6578, accuracy: 0.5762, loss: -3392.1190 ||:  70%|███████   | 40/57 [02:04<00:58,  3.45s/it]

POLICY loss tensor(-10839.1543, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8887, DiseaseClass: 0.3388, Modifier: 0.4238, SpecificDisease: 0.6517, accuracy: 0.5757, loss: -3844.9181 ||:  72%|███████▏  | 41/57 [02:17<01:38,  6.19s/it]

POLICY loss tensor(-21957.3945, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-3674.6113, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8944, DiseaseClass: 0.3472, Modifier: 0.4271, SpecificDisease: 0.6493, accuracy: 0.5795, loss: -3619.8523 ||:  79%|███████▉  | 45/57 [02:25<00:36,  3.06s/it]

POLICY loss tensor(-1579.3473, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8972, DiseaseClass: 0.3723, Modifier: 0.4124, SpecificDisease: 0.6283, accuracy: 0.5776, loss: -3729.6633 ||:  86%|████████▌ | 49/57 [02:42<00:41,  5.13s/it]

POLICY loss tensor(-19862.3477, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8976, DiseaseClass: 0.3818, Modifier: 0.4122, SpecificDisease: 0.6313, accuracy: 0.5807, loss: -3924.0156 ||:  89%|████████▉ | 51/57 [02:56<00:40,  6.67s/it]

POLICY loss tensor(-17372.4609, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-13299.6689, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8982, DiseaseClass: 0.3910, Modifier: 0.4113, SpecificDisease: 0.6334, accuracy: 0.5835, loss: -4108.8506 ||:  93%|█████████▎| 53/57 [03:05<00:22,  5.53s/it]

POLICY loss tensor(-4345.7568, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8995, DiseaseClass: 0.4001, Modifier: 0.4101, SpecificDisease: 0.6321, accuracy: 0.5855, loss: -4431.6715 ||:  95%|█████████▍| 54/57 [03:18<00:22,  7.57s/it]

POLICY loss tensor(-21541.7305, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9014, DiseaseClass: 0.4071, Modifier: 0.4054, SpecificDisease: 0.6314, accuracy: 0.5863, loss: -4740.9212 ||:  96%|█████████▋| 55/57 [03:30<00:18,  9.02s/it]

POLICY loss tensor(-21440.9297, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9038, DiseaseClass: 0.4154, Modifier: 0.4049, SpecificDisease: 0.6306, accuracy: 0.5887, loss: -4917.3183 ||: 100%|██████████| 57/57 [03:44<00:00,  3.94s/it]

POLICY loss tensor(-19537.6113, device='cuda:0', grad_fn=<SumBackward0>)
2020-12-14 12:07:27,279: INFO: Starting with Validation



CompositeMention: 0.8746, DiseaseClass: 0.5016, Modifier: 0.3981, SpecificDisease: 0.5611, accuracy: 0.5839, loss: 0.5914 ||: 100%|██████████| 10/10 [00:01<00:00,  7.68it/s]

2020-12-14 12:07:28,585: INFO: Validation done. (1003.0 / 1276) zero predicted





2020-12-14 12:07:36,942: INFO: Metrics:
                        Training DiseaseClass      : 0.415  Validation DiseaseClass      : 0.502
                        Training SpecificDisease   : 0.631  Validation SpecificDisease   : 0.561
                        Training CompositeMention  : 0.904  Validation CompositeMention  : 0.875
                        Training accuracy          : 0.589  Validation accuracy          : 0.584
                        Training loss              : -4917.318  Validation loss              : 0.591
                        Training Modifier          : 0.405  Validation Modifier          : 0.398

2020-12-14 12:07:36,946: INFO: Reducing LR: 2.00e-07 -> 2.00e-07
2020-12-14 12:07:36,947: INFO: Epoch duration: 00:03:55
2020-12-14 12:07:36,948: INFO: Estimated training time remaining: 05:00:16
2020-12-14 12:07:36,951: INFO: Starting Training Epoch 6/50
2020-12-14 12:07:36,952: INFO: Peak CPU memory usage MB: 6982.376
2020-12-14 12:07:37,051: INFO: GPU 0 memory usage M

  return_array[slices] = self.array
CompositeMention: 1.0000, DiseaseClass: 0.5625, Modifier: 0.3750, SpecificDisease: 0.6250, accuracy: 0.6406, loss: -22382.2012 ||:   2%|▏         | 1/57 [00:12<11:41, 12.53s/it]

POLICY loss tensor(-22382.7383, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9688, DiseaseClass: 0.5547, Modifier: 0.3750, SpecificDisease: 0.5312, accuracy: 0.6074, loss: -22086.2051 ||:   4%|▎         | 2/57 [00:23<11:07, 12.15s/it]

POLICY loss tensor(-21790.7832, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-6827.6138, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9583, DiseaseClass: 0.4896, Modifier: 0.3854, SpecificDisease: 0.5625, accuracy: 0.5990, loss: -16999.8127 ||:   5%|▌         | 3/57 [00:27<08:43,  9.70s/it]

POLICY loss tensor(-4354.9053, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9375, DiseaseClass: 0.4531, Modifier: 0.4453, SpecificDisease: 0.6562, accuracy: 0.6230, loss: -13838.4437 ||:   7%|▋         | 4/57 [00:30<06:46,  7.66s/it]

POLICY loss tensor(-4116.4390, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9245, DiseaseClass: 0.5104, Modifier: 0.4219, SpecificDisease: 0.6771, accuracy: 0.6335, loss: -10232.6301 ||:  11%|█         | 6/57 [00:36<04:21,  5.12s/it]

POLICY loss tensor(-1926.7592, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9353, DiseaseClass: 0.5491, Modifier: 0.4196, SpecificDisease: 0.6205, accuracy: 0.6311, loss: -10132.8739 ||:  12%|█▏        | 7/57 [00:44<05:09,  6.19s/it]

POLICY loss tensor(-9534.8760, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9180, DiseaseClass: 0.5469, Modifier: 0.4375, SpecificDisease: 0.6562, accuracy: 0.6396, loss: -8866.1873 ||:  14%|█▍        | 8/57 [00:46<03:55,  4.80s/it] 

POLICY loss tensor(-15840.3408, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9266, DiseaseClass: 0.5172, Modifier: 0.4219, SpecificDisease: 0.6531, accuracy: 0.6297, loss: -8843.9466 ||:  18%|█▊        | 10/57 [00:55<03:29,  4.47s/it]

POLICY loss tensor(-1670.7227, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9286, DiseaseClass: 0.4810, Modifier: 0.4665, SpecificDisease: 0.6853, accuracy: 0.6403, loss: -7476.2175 ||:  25%|██▍       | 14/57 [01:12<03:57,  5.52s/it]

POLICY loss tensor(-16229.8779, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-18569.6270, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9238, DiseaseClass: 0.4590, Modifier: 0.4668, SpecificDisease: 0.6855, accuracy: 0.6338, loss: -7919.3200 ||:  28%|██▊       | 16/57 [01:20<03:10,  4.65s/it]

POLICY loss tensor(-3473.6724, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9265, DiseaseClass: 0.4614, Modifier: 0.4540, SpecificDisease: 0.6654, accuracy: 0.6268, loss: -8262.8578 ||:  30%|██▉       | 17/57 [01:33<04:40,  7.00s/it]

POLICY loss tensor(-13759.9883, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9306, DiseaseClass: 0.4531, Modifier: 0.4410, SpecificDisease: 0.6562, accuracy: 0.6202, loss: -8063.4828 ||:  32%|███▏      | 18/57 [01:36<03:55,  6.04s/it]

POLICY loss tensor(-4674.6396, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9281, DiseaseClass: 0.4461, Modifier: 0.4375, SpecificDisease: 0.6484, accuracy: 0.6150, loss: -7257.0781 ||:  35%|███▌      | 20/57 [01:39<02:15,  3.66s/it]

POLICY loss tensor(-54412.9805, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9256, DiseaseClass: 0.4442, Modifier: 0.4494, SpecificDisease: 0.6592, accuracy: 0.6196, loss: -9502.5701 ||:  37%|███▋      | 21/57 [01:52<03:51,  6.43s/it]

POLICY loss tensor(-9510.6426, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9176, DiseaseClass: 0.4453, Modifier: 0.4503, SpecificDisease: 0.6676, accuracy: 0.6202, loss: -9502.9096 ||:  39%|███▊      | 22/57 [01:56<03:22,  5.78s/it]

POLICY loss tensor(-54319.6250, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8926, DiseaseClass: 0.4473, Modifier: 0.4583, SpecificDisease: 0.6784, accuracy: 0.6191, loss: -10974.2660 ||:  42%|████▏     | 24/57 [02:11<03:20,  6.07s/it]

POLICY loss tensor(-8561.6309, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8852, DiseaseClass: 0.4369, Modifier: 0.4507, SpecificDisease: 0.6719, accuracy: 0.6112, loss: -11276.4099 ||:  46%|████▌     | 26/57 [02:28<03:56,  7.63s/it]

POLICY loss tensor(-21243.7383, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-10318.3066, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8862, DiseaseClass: 0.4314, Modifier: 0.4487, SpecificDisease: 0.6708, accuracy: 0.6092, loss: -11327.3041 ||:  49%|████▉     | 28/57 [02:41<03:29,  7.24s/it]

POLICY loss tensor(-13660.6895, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8890, DiseaseClass: 0.4305, Modifier: 0.4386, SpecificDisease: 0.6659, accuracy: 0.6060, loss: -11669.2810 ||:  51%|█████     | 29/57 [02:53<04:06,  8.80s/it]

POLICY loss tensor(-21245.1445, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8917, DiseaseClass: 0.4302, Modifier: 0.4333, SpecificDisease: 0.6687, accuracy: 0.6060, loss: -11460.6212 ||:  53%|█████▎    | 30/57 [02:57<03:16,  7.29s/it]

POLICY loss tensor(-5410.0239, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8942, DiseaseClass: 0.4199, Modifier: 0.4264, SpecificDisease: 0.6673, accuracy: 0.6019, loss: -11674.0913 ||:  54%|█████▍    | 31/57 [03:06<03:20,  7.73s/it]

POLICY loss tensor(-18078.7227, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8950, DiseaseClass: 0.4126, Modifier: 0.4258, SpecificDisease: 0.6680, accuracy: 0.6003, loss: -11454.0587 ||:  56%|█████▌    | 32/57 [03:08<02:35,  6.22s/it]

POLICY loss tensor(-4633.5928, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8973, DiseaseClass: 0.4134, Modifier: 0.4195, SpecificDisease: 0.6648, accuracy: 0.5987, loss: -11686.1236 ||:  58%|█████▊    | 33/57 [03:19<03:04,  7.68s/it]

POLICY loss tensor(-19112.7402, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-10685.2012, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8982, DiseaseClass: 0.4232, Modifier: 0.4196, SpecificDisease: 0.6643, accuracy: 0.6013, loss: -11979.1028 ||:  61%|██████▏   | 35/57 [03:36<03:03,  8.35s/it]

POLICY loss tensor(-22942.4785, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-13586.7617, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9020, DiseaseClass: 0.4223, Modifier: 0.4139, SpecificDisease: 0.6571, accuracy: 0.5988, loss: -12266.2230 ||:  65%|██████▍   | 37/57 [03:54<03:00,  9.05s/it]

POLICY loss tensor(-20995.9961, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9038, DiseaseClass: 0.4285, Modifier: 0.4112, SpecificDisease: 0.6480, accuracy: 0.5979, loss: -12455.8249 ||:  67%|██████▋   | 38/57 [04:07<03:11, 10.06s/it]

POLICY loss tensor(-19471.6426, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-3710.2913, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9055, DiseaseClass: 0.4348, Modifier: 0.4141, SpecificDisease: 0.6469, accuracy: 0.6003, loss: -12331.8937 ||:  70%|███████   | 40/57 [04:22<02:39,  9.37s/it]

POLICY loss tensor(-16245.2354, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9062, DiseaseClass: 0.4348, Modifier: 0.4207, SpecificDisease: 0.6532, accuracy: 0.6038, loss: -12031.1016 ||:  72%|███████▏  | 41/57 [04:24<01:52,  7.02s/it]

POLICY loss tensor(-8316.9922, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9073, DiseaseClass: 0.4339, Modifier: 0.4237, SpecificDisease: 0.6504, accuracy: 0.6038, loss: -12254.0125 ||:  75%|███████▌  | 43/57 [04:40<01:52,  8.04s/it]

POLICY loss tensor(-25331.5977, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9087, DiseaseClass: 0.4347, Modifier: 0.4205, SpecificDisease: 0.6484, accuracy: 0.6031, loss: -12096.4921 ||:  77%|███████▋  | 44/57 [04:44<01:27,  6.75s/it]

POLICY loss tensor(-5323.6636, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-22400.8789, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9066, DiseaseClass: 0.4347, Modifier: 0.4243, SpecificDisease: 0.6535, accuracy: 0.6048, loss: -12325.4648 ||:  79%|███████▉  | 45/57 [04:52<01:24,  7.08s/it]

POLICY loss tensor(-7621.4746, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9092, DiseaseClass: 0.4359, Modifier: 0.4219, SpecificDisease: 0.6562, accuracy: 0.6058, loss: -12067.9053 ||:  84%|████████▍ | 48/57 [05:10<01:04,  7.15s/it]

POLICY loss tensor(-16993.7480, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9110, DiseaseClass: 0.4337, Modifier: 0.4177, SpecificDisease: 0.6511, accuracy: 0.6034, loss: -11864.6091 ||:  86%|████████▌ | 49/57 [05:13<00:46,  5.77s/it]

POLICY loss tensor(-2106.9417, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9128, DiseaseClass: 0.4400, Modifier: 0.4119, SpecificDisease: 0.6456, accuracy: 0.6026, loss: -11992.6612 ||:  88%|████████▊ | 50/57 [05:25<00:54,  7.77s/it]

POLICY loss tensor(-18267.7070, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9122, DiseaseClass: 0.4314, Modifier: 0.4015, SpecificDisease: 0.6302, accuracy: 0.5938, loss: -10707.6706 ||:  98%|█████████▊| 56/57 [05:34<00:02,  2.21s/it]

POLICY loss tensor(-23954.8242, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9138, DiseaseClass: 0.4304, Modifier: 0.4038, SpecificDisease: 0.6306, accuracy: 0.5946, loss: -10940.0662 ||: 100%|██████████| 57/57 [05:45<00:00,  6.05s/it]

2020-12-14 12:13:23,139: INFO: Starting with Validation



CompositeMention: 0.9028, DiseaseClass: 0.4263, Modifier: 0.4075, SpecificDisease: 0.5611, accuracy: 0.5745, loss: 0.5960 ||: 100%|██████████| 10/10 [00:01<00:00,  7.81it/s]

2020-12-14 12:13:24,423: INFO: Validation done. (1095.0 / 1276) zero predicted





2020-12-14 12:13:32,782: INFO: Metrics:
                        Training DiseaseClass      : 0.430  Validation DiseaseClass      : 0.426
                        Training SpecificDisease   : 0.631  Validation SpecificDisease   : 0.561
                        Training CompositeMention  : 0.914  Validation CompositeMention  : 0.903
                        Training accuracy          : 0.595  Validation accuracy          : 0.574
                        Training loss              : -10940.066  Validation loss              : 0.596
                        Training Modifier          : 0.404  Validation Modifier          : 0.408

2020-12-14 12:13:32,784: INFO: Reducing LR: 2.00e-07 -> 2.00e-07
2020-12-14 12:13:32,784: INFO: Epoch duration: 00:05:55
2020-12-14 12:13:32,785: INFO: Estimated training time remaining: 04:48:09
2020-12-14 12:13:32,787: INFO: Starting Training Epoch 7/50
2020-12-14 12:13:32,788: INFO: Peak CPU memory usage MB: 6982.376
2020-12-14 12:13:32,883: INFO: GPU 0 memory usage 

  return_array[slices] = self.array
CompositeMention: 1.0000, DiseaseClass: 0.2500, Modifier: 0.2812, SpecificDisease: 0.5312, accuracy: 0.5156, loss: -2522.7505 ||:   2%|▏         | 1/57 [00:02<02:25,  2.60s/it]

POLICY loss tensor(-2523.2815, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9688, DiseaseClass: 0.5391, Modifier: 0.2891, SpecificDisease: 0.4844, accuracy: 0.5703, loss: -3167.1351 ||:   4%|▎         | 2/57 [00:06<02:41,  2.94s/it]

POLICY loss tensor(-3812.0254, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9688, DiseaseClass: 0.5885, Modifier: 0.2917, SpecificDisease: 0.4688, accuracy: 0.5794, loss: -9326.6370 ||:   5%|▌         | 3/57 [00:18<05:12,  5.79s/it]

POLICY loss tensor(-21646.2344, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-5811.0547, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9766, DiseaseClass: 0.5625, Modifier: 0.3281, SpecificDisease: 0.5859, accuracy: 0.6133, loss: -8447.5844 ||:   7%|▋         | 4/57 [00:22<04:38,  5.26s/it]

POLICY loss tensor(-32716.4688, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9766, DiseaseClass: 0.5365, Modifier: 0.3594, SpecificDisease: 0.5625, accuracy: 0.6087, loss: -14243.2949 ||:  11%|█         | 6/57 [00:48<07:44,  9.11s/it]

POLICY loss tensor(-18954.0430, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9688, DiseaseClass: 0.5449, Modifier: 0.3594, SpecificDisease: 0.5938, accuracy: 0.6167, loss: -12936.1441 ||:  14%|█▍        | 8/57 [01:02<06:58,  8.55s/it]

POLICY loss tensor(-18030.5703, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9688, DiseaseClass: 0.5156, Modifier: 0.3542, SpecificDisease: 0.6076, accuracy: 0.6115, loss: -11498.7298 ||:  16%|█▌        | 9/57 [01:04<05:08,  6.42s/it]

POLICY loss tensor(-3595.8960, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9560, DiseaseClass: 0.4915, Modifier: 0.3835, SpecificDisease: 0.6250, accuracy: 0.6140, loss: -9734.8396 ||:  19%|█▉        | 11/57 [01:08<03:12,  4.18s/it] 

POLICY loss tensor(-11646.4170, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9466, DiseaseClass: 0.4714, Modifier: 0.4010, SpecificDisease: 0.6354, accuracy: 0.6136, loss: -9894.0878 ||:  21%|██        | 12/57 [01:13<03:23,  4.52s/it]

POLICY loss tensor(-21582.3398, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9313, DiseaseClass: 0.4552, Modifier: 0.3771, SpecificDisease: 0.5917, accuracy: 0.5888, loss: -10141.3012 ||:  26%|██▋       | 15/57 [01:36<04:49,  6.88s/it]

POLICY loss tensor(-11809.6855, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9248, DiseaseClass: 0.4648, Modifier: 0.3711, SpecificDisease: 0.5840, accuracy: 0.5862, loss: -10177.0289 ||:  28%|██▊       | 16/57 [01:49<05:50,  8.54s/it]

POLICY loss tensor(-10713.4795, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8705, DiseaseClass: 0.4370, Modifier: 0.3816, SpecificDisease: 0.5673, accuracy: 0.5641, loss: -9119.5337 ||:  33%|███▎      | 19/57 [02:04<04:34,  7.23s/it] 

POLICY loss tensor(-10440.5537, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8449, DiseaseClass: 0.4241, Modifier: 0.3748, SpecificDisease: 0.5525, accuracy: 0.5491, loss: -8663.5248 ||:  35%|███▌      | 20/57 [02:06<03:27,  5.61s/it]

POLICY loss tensor(-22698.9961, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8382, DiseaseClass: 0.4261, Modifier: 0.3895, SpecificDisease: 0.5520, accuracy: 0.5514, loss: -9458.3051 ||:  39%|███▊      | 22/57 [02:26<04:45,  8.15s/it]

POLICY loss tensor(-12114.3555, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8507, DiseaseClass: 0.4317, Modifier: 0.3855, SpecificDisease: 0.5408, accuracy: 0.5522, loss: -8704.0894 ||:  42%|████▏     | 24/57 [02:30<02:45,  5.01s/it]

POLICY loss tensor(-816.4762, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8624, DiseaseClass: 0.4502, Modifier: 0.3843, SpecificDisease: 0.5457, accuracy: 0.5606, loss: -8055.7973 ||:  47%|████▋     | 27/57 [02:46<03:01,  6.04s/it]

POLICY loss tensor(-8610.0518, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8663, DiseaseClass: 0.4629, Modifier: 0.3783, SpecificDisease: 0.5429, accuracy: 0.5626, loss: -8346.1728 ||:  49%|████▉     | 28/57 [02:58<03:51,  7.97s/it]

POLICY loss tensor(-16186.8467, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8710, DiseaseClass: 0.4735, Modifier: 0.3760, SpecificDisease: 0.5347, accuracy: 0.5638, loss: -8456.8875 ||:  51%|█████     | 29/57 [03:11<04:21,  9.35s/it]

POLICY loss tensor(-11557.4199, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8804, DiseaseClass: 0.4776, Modifier: 0.3709, SpecificDisease: 0.5424, accuracy: 0.5678, loss: -7714.2010 ||:  56%|█████▌    | 32/57 [03:16<01:52,  4.50s/it]

POLICY loss tensor(-1606.3933, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8831, DiseaseClass: 0.4850, Modifier: 0.3710, SpecificDisease: 0.5420, accuracy: 0.5703, loss: -7868.5947 ||:  58%|█████▊    | 33/57 [03:29<02:45,  6.91s/it]

POLICY loss tensor(-12809.7490, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8866, DiseaseClass: 0.4948, Modifier: 0.3636, SpecificDisease: 0.5370, accuracy: 0.5705, loss: -7926.1806 ||:  60%|█████▉    | 34/57 [03:42<03:19,  8.68s/it]

POLICY loss tensor(-9827.0166, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8899, DiseaseClass: 0.5073, Modifier: 0.3612, SpecificDisease: 0.5378, accuracy: 0.5740, loss: -7766.2276 ||:  61%|██████▏   | 35/57 [03:45<02:38,  7.20s/it]

POLICY loss tensor(-2328.3394, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-36238.1992, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8946, DiseaseClass: 0.4983, Modifier: 0.3791, SpecificDisease: 0.5632, accuracy: 0.5838, loss: -8106.6939 ||:  67%|██████▋   | 38/57 [04:00<01:35,  5.03s/it]

POLICY loss tensor(-5229.5181, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8978, DiseaseClass: 0.4942, Modifier: 0.3811, SpecificDisease: 0.5662, accuracy: 0.5848, loss: -7641.0290 ||:  72%|███████▏  | 41/57 [04:06<00:45,  2.81s/it]

POLICY loss tensor(-51827.9102, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8967, DiseaseClass: 0.4897, Modifier: 0.3786, SpecificDisease: 0.5720, accuracy: 0.5842, loss: -9194.9169 ||:  75%|███████▌  | 43/57 [04:31<01:49,  7.84s/it]

POLICY loss tensor(-30272.5527, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-10061.6699, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8958, DiseaseClass: 0.4877, Modifier: 0.3792, SpecificDisease: 0.5789, accuracy: 0.5854, loss: -9214.6024 ||:  77%|███████▋  | 44/57 [04:35<01:27,  6.75s/it]

POLICY loss tensor(-8615.3320, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8939, DiseaseClass: 0.4797, Modifier: 0.3839, SpecificDisease: 0.5872, accuracy: 0.5861, loss: -9477.6962 ||:  81%|████████  | 46/57 [04:49<01:16,  6.93s/it]

POLICY loss tensor(-21917.3672, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8924, DiseaseClass: 0.4761, Modifier: 0.3803, SpecificDisease: 0.5880, accuracy: 0.5842, loss: -9716.3454 ||:  82%|████████▏ | 47/57 [05:01<01:25,  8.56s/it]

POLICY loss tensor(-20694.7500, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8840, DiseaseClass: 0.4790, Modifier: 0.3878, SpecificDisease: 0.6012, accuracy: 0.5880, loss: -9319.7359 ||:  86%|████████▌ | 49/57 [05:04<00:40,  5.03s/it]

POLICY loss tensor(-51034.1758, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8779, DiseaseClass: 0.4693, Modifier: 0.3958, SpecificDisease: 0.6125, accuracy: 0.5889, loss: -9806.7834 ||:  93%|█████████▎| 53/57 [05:33<00:27,  6.85s/it] 

POLICY loss tensor(-12060.6152, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-33199.1172, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8781, DiseaseClass: 0.4669, Modifier: 0.3972, SpecificDisease: 0.6163, accuracy: 0.5896, loss: -10239.9642 ||:  95%|█████████▍| 54/57 [05:43<00:23,  7.88s/it]

POLICY loss tensor(-5274.7915, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8767, DiseaseClass: 0.4710, Modifier: 0.4014, SpecificDisease: 0.6210, accuracy: 0.5925, loss: -10149.6769 ||:  96%|█████████▋| 55/57 [05:46<00:12,  6.39s/it]

POLICY loss tensor(-4195.0586, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8788, DiseaseClass: 0.4681, Modifier: 0.4082, SpecificDisease: 0.6306, accuracy: 0.5964, loss: -9867.1249 ||: 100%|██████████| 57/57 [05:51<00:00,  6.16s/it] 

2020-12-14 12:19:25,055: INFO: Starting with Validation



CompositeMention: 0.9122, DiseaseClass: 0.2978, Modifier: 0.3981, SpecificDisease: 0.5611, accuracy: 0.5423, loss: 0.6056 ||: 100%|██████████| 10/10 [00:01<00:00,  7.82it/s]

2020-12-14 12:19:26,339: INFO: Validation done. (906.0 / 1276) zero predicted





2020-12-14 12:19:34,233: INFO: Metrics:
                        Training DiseaseClass      : 0.468  Validation DiseaseClass      : 0.298
                        Training SpecificDisease   : 0.631  Validation SpecificDisease   : 0.561
                        Training CompositeMention  : 0.879  Validation CompositeMention  : 0.912
                        Training accuracy          : 0.596  Validation accuracy          : 0.542
                        Training loss              : -9867.125  Validation loss              : 0.606
                        Training Modifier          : 0.408  Validation Modifier          : 0.398

2020-12-14 12:19:34,234: INFO: Reducing LR: 2.00e-07 -> 1.00e-07
2020-12-14 12:19:34,235: INFO: Epoch duration: 00:06:01
2020-12-14 12:19:34,235: INFO: Estimated training time remaining: 04:38:23
2020-12-14 12:19:34,237: INFO: Starting Training Epoch 8/50
2020-12-14 12:19:34,237: INFO: Peak CPU memory usage MB: 6982.376
2020-12-14 12:19:34,335: INFO: GPU 0 memory usage M

  return_array[slices] = self.array
CompositeMention: 0.9688, DiseaseClass: 0.1719, Modifier: 0.1875, SpecificDisease: 0.4375, accuracy: 0.4414, loss: -967.3205 ||:   2%|▏         | 1/57 [00:02<02:22,  2.54s/it]

POLICY loss tensor(-967.8302, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9688, DiseaseClass: 0.1562, Modifier: 0.2344, SpecificDisease: 0.4844, accuracy: 0.4609, loss: -11609.1573 ||:   4%|▎         | 2/57 [00:15<05:09,  5.63s/it]

POLICY loss tensor(-22251.5664, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9688, DiseaseClass: 0.1979, Modifier: 0.2812, SpecificDisease: 0.5312, accuracy: 0.4948, loss: -13148.8425 ||:   5%|▌         | 3/57 [00:27<06:52,  7.65s/it]

POLICY loss tensor(-16228.7764, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9750, DiseaseClass: 0.2313, Modifier: 0.2250, SpecificDisease: 0.4875, accuracy: 0.4797, loss: -8437.8111 ||:   9%|▉         | 5/57 [00:35<05:04,  5.86s/it] 

POLICY loss tensor(-2743.5615, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9792, DiseaseClass: 0.2161, Modifier: 0.2188, SpecificDisease: 0.4948, accuracy: 0.4772, loss: -8755.8582 ||:  11%|█         | 6/57 [00:46<06:20,  7.45s/it]

POLICY loss tensor(-10346.6270, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9777, DiseaseClass: 0.2433, Modifier: 0.2009, SpecificDisease: 0.4911, accuracy: 0.4782, loss: -8243.5728 ||:  12%|█▏        | 7/57 [00:58<07:24,  8.88s/it]

POLICY loss tensor(-5170.3730, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-13037.7715, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9653, DiseaseClass: 0.3108, Modifier: 0.2396, SpecificDisease: 0.5382, accuracy: 0.5135, loss: -8607.5460 ||:  16%|█▌        | 9/57 [01:16<07:22,  9.22s/it]

POLICY loss tensor(-6726.2598, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9688, DiseaseClass: 0.3000, Modifier: 0.2406, SpecificDisease: 0.5281, accuracy: 0.5094, loss: -7869.9493 ||:  18%|█▊        | 10/57 [01:18<05:39,  7.23s/it]

POLICY loss tensor(-1232.1230, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-7330.1484, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9659, DiseaseClass: 0.3295, Modifier: 0.2812, SpecificDisease: 0.5625, accuracy: 0.5348, loss: -7820.8218 ||:  19%|█▉        | 11/57 [01:23<04:49,  6.30s/it]

POLICY loss tensor(-3389.7944, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9615, DiseaseClass: 0.3305, Modifier: 0.3101, SpecificDisease: 0.5745, accuracy: 0.5442, loss: -7812.2667 ||:  23%|██▎       | 13/57 [01:39<05:37,  7.68s/it]

POLICY loss tensor(-12141.7637, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9540, DiseaseClass: 0.3410, Modifier: 0.3493, SpecificDisease: 0.6029, accuracy: 0.5618, loss: -6716.0307 ||:  30%|██▉       | 17/57 [01:56<04:12,  6.30s/it]

POLICY loss tensor(-12615.3770, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9564, DiseaseClass: 0.3512, Modifier: 0.3372, SpecificDisease: 0.5987, accuracy: 0.5609, loss: -6171.0675 ||:  33%|███▎      | 19/57 [02:01<02:50,  4.50s/it]

POLICY loss tensor(-3078.8330, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-3686.4570, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9555, DiseaseClass: 0.3461, Modifier: 0.3484, SpecificDisease: 0.6047, accuracy: 0.5637, loss: -6046.8074 ||:  35%|███▌      | 20/57 [02:04<02:28,  4.00s/it]

POLICY loss tensor(-37104.3984, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9581, DiseaseClass: 0.3580, Modifier: 0.3494, SpecificDisease: 0.6023, accuracy: 0.5669, loss: -7433.9487 ||:  39%|███▊      | 22/57 [02:24<03:59,  6.85s/it]

POLICY loss tensor(-5507.4121, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-12475.6426, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9603, DiseaseClass: 0.3574, Modifier: 0.3646, SpecificDisease: 0.6094, accuracy: 0.5729, loss: -7457.4609 ||:  42%|████▏     | 24/57 [02:33<02:59,  5.44s/it]

POLICY loss tensor(-2957.6951, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-3392.6938, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9510, DiseaseClass: 0.3771, Modifier: 0.3594, SpecificDisease: 0.5969, accuracy: 0.5711, loss: -6433.3095 ||:  53%|█████▎    | 30/57 [02:54<02:23,  5.33s/it]

POLICY loss tensor(-10631.0439, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-13583.2656, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9446, DiseaseClass: 0.3810, Modifier: 0.3679, SpecificDisease: 0.6028, accuracy: 0.5741, loss: -6663.9314 ||:  54%|█████▍    | 31/57 [02:59<02:19,  5.36s/it]

POLICY loss tensor(-22972.2402, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9451, DiseaseClass: 0.3759, Modifier: 0.3788, SpecificDisease: 0.6146, accuracy: 0.5786, loss: -6956.1504 ||:  58%|█████▊    | 33/57 [03:09<01:55,  4.80s/it]

POLICY loss tensor(-4330.9443, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9423, DiseaseClass: 0.3785, Modifier: 0.3863, SpecificDisease: 0.6172, accuracy: 0.5811, loss: -6620.2921 ||:  63%|██████▎   | 36/57 [03:19<01:23,  3.97s/it]

POLICY loss tensor(-4448.3540, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9393, DiseaseClass: 0.3732, Modifier: 0.3866, SpecificDisease: 0.6142, accuracy: 0.5783, loss: -6271.8229 ||:  67%|██████▋   | 38/57 [03:22<00:52,  2.74s/it]

POLICY loss tensor(-8169.8145, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9369, DiseaseClass: 0.3741, Modifier: 0.3908, SpecificDisease: 0.6148, accuracy: 0.5791, loss: -6486.4044 ||:  70%|███████   | 40/57 [03:39<01:40,  5.90s/it]

POLICY loss tensor(-12958.2666, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9384, DiseaseClass: 0.3788, Modifier: 0.3873, SpecificDisease: 0.6119, accuracy: 0.5791, loss: -6597.9933 ||:  72%|███████▏  | 41/57 [04:01<02:52, 10.78s/it]

POLICY loss tensor(-11062.0586, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9373, DiseaseClass: 0.3874, Modifier: 0.3787, SpecificDisease: 0.5986, accuracy: 0.5755, loss: -6440.8881 ||:  74%|███████▎  | 42/57 [04:02<01:59,  7.95s/it]

POLICY loss tensor(-46688.1875, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9369, DiseaseClass: 0.3923, Modifier: 0.3830, SpecificDisease: 0.6052, accuracy: 0.5793, loss: -7376.8580 ||:  75%|███████▌  | 43/57 [04:15<02:12,  9.43s/it]

POLICY loss tensor(-26559.8809, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9344, DiseaseClass: 0.3911, Modifier: 0.3879, SpecificDisease: 0.6078, accuracy: 0.5803, loss: -7812.8211 ||:  77%|███████▋  | 44/57 [04:24<02:01,  9.35s/it]

POLICY loss tensor(-4375.7671, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9366, DiseaseClass: 0.3832, Modifier: 0.3914, SpecificDisease: 0.6119, accuracy: 0.5808, loss: -7645.6884 ||:  84%|████████▍ | 48/57 [04:40<00:54,  6.05s/it]

POLICY loss tensor(-18855.4512, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9373, DiseaseClass: 0.3875, Modifier: 0.3911, SpecificDisease: 0.6076, accuracy: 0.5809, loss: -7736.0653 ||:  86%|████████▌ | 49/57 [04:52<01:03,  7.95s/it]

POLICY loss tensor(-12074.7109, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-25358.6035, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9367, DiseaseClass: 0.3892, Modifier: 0.3933, SpecificDisease: 0.6130, accuracy: 0.5830, loss: -8088.5041 ||:  88%|████████▊ | 50/57 [05:01<00:58,  8.35s/it]

POLICY loss tensor(-11877.7969, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9373, DiseaseClass: 0.3926, Modifier: 0.3979, SpecificDisease: 0.6176, accuracy: 0.5864, loss: -8162.7920 ||:  89%|████████▉ | 51/57 [05:07<00:44,  7.46s/it]

POLICY loss tensor(-28388.3477, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9408, DiseaseClass: 0.3936, Modifier: 0.4014, SpecificDisease: 0.6228, accuracy: 0.5896, loss: -8085.2448 ||:  96%|█████████▋| 55/57 [05:22<00:07,  3.85s/it]

POLICY loss tensor(-21169.4082, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9423, DiseaseClass: 0.3899, Modifier: 0.4038, SpecificDisease: 0.6306, accuracy: 0.5917, loss: -8172.9257 ||: 100%|██████████| 57/57 [05:31<00:00,  5.82s/it]

2020-12-14 12:25:07,124: INFO: Starting with Validation



CompositeMention: 0.9122, DiseaseClass: 0.3605, Modifier: 0.3981, SpecificDisease: 0.5611, accuracy: 0.5580, loss: 0.5907 ||: 100%|██████████| 10/10 [00:01<00:00,  7.82it/s]

2020-12-14 12:25:08,408: INFO: Validation done. (942.0 / 1276) zero predicted





2020-12-14 12:25:16,798: INFO: Metrics:
                        Training DiseaseClass      : 0.390  Validation DiseaseClass      : 0.361
                        Training SpecificDisease   : 0.631  Validation SpecificDisease   : 0.561
                        Training CompositeMention  : 0.942  Validation CompositeMention  : 0.912
                        Training accuracy          : 0.592  Validation accuracy          : 0.558
                        Training loss              : -8172.926  Validation loss              : 0.591
                        Training Modifier          : 0.404  Validation Modifier          : 0.398

2020-12-14 12:25:16,800: INFO: Reducing LR: 1.00e-07 -> 1.00e-07
2020-12-14 12:25:16,802: INFO: Epoch duration: 00:05:42
2020-12-14 12:25:16,803: INFO: Estimated training time remaining: 04:27:53
2020-12-14 12:25:16,805: INFO: Starting Training Epoch 9/50
2020-12-14 12:25:16,806: INFO: Peak CPU memory usage MB: 6982.376
2020-12-14 12:25:16,904: INFO: GPU 0 memory usage M

  return_array[slices] = self.array
CompositeMention: 0.9688, DiseaseClass: 0.4844, Modifier: 0.3125, SpecificDisease: 0.3750, accuracy: 0.5352, loss: 0.5334 ||:   2%|▏         | 1/57 [00:01<01:11,  1.28s/it]

POLICY loss tensor(-3548.6016, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9714, DiseaseClass: 0.3516, Modifier: 0.3281, SpecificDisease: 0.5781, accuracy: 0.5573, loss: -590.8795 ||:  11%|█         | 6/57 [00:09<01:16,  1.49s/it] 

POLICY loss tensor(-3270.7578, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9576, DiseaseClass: 0.3683, Modifier: 0.3571, SpecificDisease: 0.6116, accuracy: 0.5737, loss: -973.6292 ||:  12%|█▏        | 7/57 [00:12<01:36,  1.93s/it]

POLICY loss tensor(-3363.3997, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9601, DiseaseClass: 0.3542, Modifier: 0.3542, SpecificDisease: 0.5660, accuracy: 0.5586, loss: -1215.8162 ||:  16%|█▌        | 9/57 [00:18<01:50,  2.31s/it]

POLICY loss tensor(-764.5470, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9578, DiseaseClass: 0.3281, Modifier: 0.3563, SpecificDisease: 0.5656, accuracy: 0.5520, loss: -1570.4539 ||:  18%|█▊        | 10/57 [00:21<02:10,  2.78s/it]

POLICY loss tensor(-4762.7651, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9632, DiseaseClass: 0.3080, Modifier: 0.3460, SpecificDisease: 0.5692, accuracy: 0.5466, loss: -2067.5015 ||:  25%|██▍       | 14/57 [00:38<03:36,  5.04s/it]

POLICY loss tensor(-13242.6621, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9594, DiseaseClass: 0.3083, Modifier: 0.3667, SpecificDisease: 0.5875, accuracy: 0.5555, loss: -1929.6284 ||:  26%|██▋       | 15/57 [00:40<02:48,  4.01s/it]

POLICY loss tensor(-34507.6992, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9531, DiseaseClass: 0.3244, Modifier: 0.3621, SpecificDisease: 0.5938, accuracy: 0.5584, loss: -3732.4101 ||:  30%|██▉       | 17/57 [00:54<03:26,  5.15s/it]

POLICY loss tensor(-11144.6211, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9477, DiseaseClass: 0.3625, Modifier: 0.3859, SpecificDisease: 0.6062, accuracy: 0.5756, loss: -4107.9238 ||:  35%|███▌      | 20/57 [01:14<04:06,  6.66s/it]

POLICY loss tensor(-7564.6670, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-4077.8184, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9490, DiseaseClass: 0.3696, Modifier: 0.3872, SpecificDisease: 0.5992, accuracy: 0.5763, loss: -4271.9376 ||:  40%|████      | 23/57 [01:31<03:48,  6.72s/it]

POLICY loss tensor(-12019.9707, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9416, DiseaseClass: 0.3831, Modifier: 0.3993, SpecificDisease: 0.6192, accuracy: 0.5858, loss: -3958.7484 ||:  47%|████▋     | 27/57 [01:48<03:03,  6.10s/it]

POLICY loss tensor(-8633.9355, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9390, DiseaseClass: 0.3735, Modifier: 0.4121, SpecificDisease: 0.6328, accuracy: 0.5894, loss: -3760.8821 ||:  56%|█████▌    | 32/57 [02:06<02:16,  5.48s/it]

POLICY loss tensor(-13464.8711, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-19188.1914, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9407, DiseaseClass: 0.3782, Modifier: 0.4108, SpecificDisease: 0.6379, accuracy: 0.5919, loss: -4414.4503 ||:  60%|█████▉    | 34/57 [02:27<03:05,  8.06s/it]

POLICY loss tensor(-10555.9941, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9397, DiseaseClass: 0.3763, Modifier: 0.4161, SpecificDisease: 0.6411, accuracy: 0.5933, loss: -4288.3061 ||:  61%|██████▏   | 35/57 [02:28<02:14,  6.12s/it]

POLICY loss tensor(-11599.1123, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9434, DiseaseClass: 0.3824, Modifier: 0.4094, SpecificDisease: 0.6383, accuracy: 0.5934, loss: -4068.5954 ||:  70%|███████   | 40/57 [02:40<00:48,  2.84s/it]

POLICY loss tensor(-1056.7695, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9464, DiseaseClass: 0.3746, Modifier: 0.4041, SpecificDisease: 0.6250, accuracy: 0.5875, loss: -3917.8158 ||:  77%|███████▋  | 44/57 [02:55<00:56,  4.32s/it]

POLICY loss tensor(-9642.3848, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9469, DiseaseClass: 0.3799, Modifier: 0.4000, SpecificDisease: 0.6194, accuracy: 0.5865, loss: -3964.0145 ||:  79%|███████▉  | 45/57 [03:07<01:20,  6.71s/it]

POLICY loss tensor(-5997.2642, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9453, DiseaseClass: 0.3852, Modifier: 0.4035, SpecificDisease: 0.6257, accuracy: 0.5899, loss: -3877.8267 ||:  81%|████████  | 46/57 [03:09<00:57,  5.19s/it]

POLICY loss tensor(-4298.6377, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9465, DiseaseClass: 0.3830, Modifier: 0.4016, SpecificDisease: 0.6263, accuracy: 0.5893, loss: -3886.7680 ||:  82%|████████▏ | 47/57 [03:12<00:44,  4.47s/it]

POLICY loss tensor(-42097.3164, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9443, DiseaseClass: 0.3841, Modifier: 0.4069, SpecificDisease: 0.6302, accuracy: 0.5914, loss: -4682.8077 ||:  84%|████████▍ | 48/57 [03:24<01:02,  6.98s/it]

POLICY loss tensor(-16451.1914, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9420, DiseaseClass: 0.3858, Modifier: 0.4036, SpecificDisease: 0.6292, accuracy: 0.5901, loss: -4492.4516 ||:  95%|█████████▍| 54/57 [03:39<00:08,  2.69s/it]

POLICY loss tensor(-1369.9564, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9418, DiseaseClass: 0.3865, Modifier: 0.4026, SpecificDisease: 0.6318, accuracy: 0.5907, loss: -4554.4129 ||:  98%|█████████▊| 56/57 [03:53<00:05,  5.40s/it]

POLICY loss tensor(-12455.8623, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9423, DiseaseClass: 0.3819, Modifier: 0.4038, SpecificDisease: 0.6306, accuracy: 0.5896, loss: -4474.5013 ||: 100%|██████████| 57/57 [03:55<00:00,  4.13s/it]

2020-12-14 12:29:13,061: INFO: Starting with Validation



CompositeMention: 0.8934, DiseaseClass: 0.3793, Modifier: 0.4013, SpecificDisease: 0.5611, accuracy: 0.5588, loss: 0.5860 ||: 100%|██████████| 10/10 [00:01<00:00,  7.51it/s]

2020-12-14 12:29:14,396: INFO: Validation done. (1013.0 / 1276) zero predicted





2020-12-14 12:29:22,792: INFO: Metrics:
                        Training DiseaseClass      : 0.382  Validation DiseaseClass      : 0.379
                        Training SpecificDisease   : 0.631  Validation SpecificDisease   : 0.561
                        Training CompositeMention  : 0.942  Validation CompositeMention  : 0.893
                        Training accuracy          : 0.590  Validation accuracy          : 0.559
                        Training loss              : -4474.501  Validation loss              : 0.586
                        Training Modifier          : 0.404  Validation Modifier          : 0.401

2020-12-14 12:29:22,795: INFO: Reducing LR: 1.00e-07 -> 1.00e-07
2020-12-14 12:29:22,796: INFO: Epoch duration: 00:04:05
2020-12-14 12:29:22,796: INFO: Estimated training time remaining: 04:11:08
2020-12-14 12:29:22,798: INFO: Starting Training Epoch 10/50
2020-12-14 12:29:22,799: INFO: Peak CPU memory usage MB: 6982.376
2020-12-14 12:29:22,898: INFO: GPU 0 memory usage 

  return_array[slices] = self.array
CompositeMention: 0.9323, DiseaseClass: 0.4792, Modifier: 0.5104, SpecificDisease: 0.6875, accuracy: 0.6523, loss: -2944.6351 ||:   5%|▌         | 3/57 [00:15<04:21,  4.83s/it]

POLICY loss tensor(-8835.6602, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8867, DiseaseClass: 0.4902, Modifier: 0.4297, SpecificDisease: 0.6562, accuracy: 0.6157, loss: -1987.6206 ||:  14%|█▍        | 8/57 [00:34<04:23,  5.37s/it]

POLICY loss tensor(-7069.8662, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8920, DiseaseClass: 0.4688, Modifier: 0.4205, SpecificDisease: 0.6392, accuracy: 0.6051, loss: -1606.9430 ||:  19%|█▉        | 11/57 [00:42<02:57,  3.86s/it]

POLICY loss tensor(-1777.1127, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8932, DiseaseClass: 0.4479, Modifier: 0.4062, SpecificDisease: 0.6302, accuracy: 0.5944, loss: -2903.7713 ||:  21%|██        | 12/57 [00:54<04:51,  6.48s/it]

POLICY loss tensor(-17169.4414, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8917, DiseaseClass: 0.4406, Modifier: 0.4021, SpecificDisease: 0.6229, accuracy: 0.5893, loss: -2771.9017 ||:  26%|██▋       | 15/57 [01:06<03:45,  5.37s/it]

POLICY loss tensor(-6734.9829, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-4514.1045, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8888, DiseaseClass: 0.4485, Modifier: 0.4062, SpecificDisease: 0.6342, accuracy: 0.5944, loss: -3151.0037 ||:  30%|██▉       | 17/57 [01:20<04:22,  6.56s/it]

POLICY loss tensor(-7475.5527, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8941, DiseaseClass: 0.4566, Modifier: 0.4045, SpecificDisease: 0.6181, accuracy: 0.5933, loss: -3022.8848 ||:  32%|███▏      | 18/57 [01:23<03:29,  5.37s/it]

POLICY loss tensor(-845.3999, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9027, DiseaseClass: 0.4425, Modifier: 0.3991, SpecificDisease: 0.6094, accuracy: 0.5884, loss: -2788.2323 ||:  39%|███▊      | 22/57 [01:39<03:19,  5.69s/it]

POLICY loss tensor(-6931.4160, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9069, DiseaseClass: 0.4375, Modifier: 0.3954, SpecificDisease: 0.6005, accuracy: 0.5851, loss: -2666.9814 ||:  40%|████      | 23/57 [01:41<02:28,  4.36s/it]

POLICY loss tensor(-4042.1077, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9089, DiseaseClass: 0.4427, Modifier: 0.4076, SpecificDisease: 0.6094, accuracy: 0.5921, loss: -2724.2538 ||:  42%|████▏     | 24/57 [01:44<02:09,  3.92s/it]

POLICY loss tensor(-20596.8789, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9199, DiseaseClass: 0.4209, Modifier: 0.4072, SpecificDisease: 0.6250, accuracy: 0.5933, loss: -2922.6995 ||:  56%|█████▌    | 32/57 [02:16<02:19,  5.58s/it]

POLICY loss tensor(-7551.9336, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9237, DiseaseClass: 0.4159, Modifier: 0.4099, SpecificDisease: 0.6305, accuracy: 0.5950, loss: -2750.7434 ||:  60%|█████▉    | 34/57 [02:19<01:20,  3.49s/it]

POLICY loss tensor(-5793.2437, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9253, DiseaseClass: 0.4089, Modifier: 0.4123, SpecificDisease: 0.6389, accuracy: 0.5964, loss: -2758.8155 ||:  63%|██████▎   | 36/57 [02:25<01:03,  3.01s/it]

POLICY loss tensor(-32254.4102, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9265, DiseaseClass: 0.4054, Modifier: 0.4071, SpecificDisease: 0.6394, accuracy: 0.5946, loss: -3555.9779 ||:  65%|██████▍   | 37/57 [02:37<01:59,  6.00s/it]

POLICY loss tensor(-19565.6973, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9297, DiseaseClass: 0.4156, Modifier: 0.3961, SpecificDisease: 0.6391, accuracy: 0.5951, loss: -4005.7566 ||:  70%|███████   | 40/57 [02:59<02:03,  7.27s/it]

POLICY loss tensor(-9095.0039, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9308, DiseaseClass: 0.4222, Modifier: 0.3958, SpecificDisease: 0.6362, accuracy: 0.5963, loss: -3837.8285 ||:  74%|███████▎  | 42/57 [03:03<01:09,  4.64s/it]

POLICY loss tensor(-959.6177, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9324, DiseaseClass: 0.4251, Modifier: 0.3932, SpecificDisease: 0.6323, accuracy: 0.5957, loss: -3983.9247 ||:  75%|███████▌  | 43/57 [03:15<01:32,  6.60s/it]

POLICY loss tensor(-10120.4961, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9345, DiseaseClass: 0.4180, Modifier: 0.3859, SpecificDisease: 0.6182, accuracy: 0.5892, loss: -4014.0305 ||:  81%|████████  | 46/57 [03:30<01:12,  6.60s/it]

POLICY loss tensor(-13338.3242, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9360, DiseaseClass: 0.4152, Modifier: 0.3875, SpecificDisease: 0.6218, accuracy: 0.5901, loss: -3846.7556 ||:  84%|████████▍ | 48/57 [03:33<00:35,  3.99s/it]

POLICY loss tensor(-21066.7695, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9341, DiseaseClass: 0.4193, Modifier: 0.3876, SpecificDisease: 0.6194, accuracy: 0.5901, loss: -4142.5892 ||:  88%|████████▊ | 50/57 [03:46<00:34,  4.96s/it]

POLICY loss tensor(-1419.5493, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-8400.8945, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9326, DiseaseClass: 0.4200, Modifier: 0.3960, SpecificDisease: 0.6286, accuracy: 0.5943, loss: -4198.8801 ||:  95%|█████████▍| 54/57 [04:00<00:12,  4.06s/it]

POLICY loss tensor(-11211.5879, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9328, DiseaseClass: 0.4193, Modifier: 0.3998, SpecificDisease: 0.6268, accuracy: 0.5946, loss: -4048.8993 ||:  98%|█████████▊| 56/57 [04:03<00:02,  2.70s/it]

POLICY loss tensor(-8575.9023, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9307, DiseaseClass: 0.4224, Modifier: 0.4038, SpecificDisease: 0.6306, accuracy: 0.5969, loss: -4128.3092 ||: 100%|██████████| 57/57 [04:07<00:00,  4.34s/it]

2020-12-14 12:33:31,410: INFO: Starting with Validation



CompositeMention: 0.8558, DiseaseClass: 0.3730, Modifier: 0.3981, SpecificDisease: 0.5611, accuracy: 0.5470, loss: 0.5830 ||: 100%|██████████| 10/10 [00:01<00:00,  7.78it/s]

2020-12-14 12:33:32,700: INFO: Validation done. (1004.0 / 1276) zero predicted





2020-12-14 12:33:41,095: INFO: Metrics:
                        Training DiseaseClass      : 0.422  Validation DiseaseClass      : 0.373
                        Training SpecificDisease   : 0.631  Validation SpecificDisease   : 0.561
                        Training CompositeMention  : 0.931  Validation CompositeMention  : 0.856
                        Training accuracy          : 0.597  Validation accuracy          : 0.547
                        Training loss              : -4128.309  Validation loss              : 0.583
                        Training Modifier          : 0.404  Validation Modifier          : 0.398

2020-12-14 12:33:41,098: INFO: Reducing LR: 1.00e-07 -> 1.00e-07
2020-12-14 12:33:41,099: INFO: Epoch duration: 00:04:18
2020-12-14 12:33:41,100: INFO: Estimated training time remaining: 03:57:43
2020-12-14 12:33:41,103: INFO: Starting Training Epoch 11/50
2020-12-14 12:33:41,104: INFO: Peak CPU memory usage MB: 6982.376
2020-12-14 12:33:41,200: INFO: GPU 0 memory usage 

  return_array[slices] = self.array
CompositeMention: 0.8073, DiseaseClass: 0.3698, Modifier: 0.4583, SpecificDisease: 0.7500, accuracy: 0.5964, loss: 0.5849 ||:   5%|▌         | 3/57 [00:05<01:31,  1.69s/it]

POLICY loss tensor(-4660.3779, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.7625, DiseaseClass: 0.3688, Modifier: 0.4688, SpecificDisease: 0.7375, accuracy: 0.5844, loss: -931.4838 ||:   9%|▉         | 5/57 [00:10<01:45,  2.04s/it] 

POLICY loss tensor(-37156.5312, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.7879, DiseaseClass: 0.3795, Modifier: 0.4732, SpecificDisease: 0.6830, accuracy: 0.5809, loss: -7042.5291 ||:  12%|█▏        | 7/57 [00:36<06:27,  7.75s/it]

POLICY loss tensor(-7484.8584, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.7865, DiseaseClass: 0.3611, Modifier: 0.4201, SpecificDisease: 0.6181, accuracy: 0.5464, loss: -5477.3901 ||:  16%|█▌        | 9/57 [00:40<03:41,  4.62s/it]

POLICY loss tensor(-18211.1152, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8185, DiseaseClass: 0.3702, Modifier: 0.3750, SpecificDisease: 0.5817, accuracy: 0.5364, loss: -5609.9420 ||:  23%|██▎       | 13/57 [00:59<03:47,  5.18s/it]

POLICY loss tensor(-5423.7852, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-8959.1201, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8350, DiseaseClass: 0.3672, Modifier: 0.3965, SpecificDisease: 0.6172, accuracy: 0.5540, loss: -6020.7903 ||:  28%|██▊       | 16/57 [01:12<03:23,  4.96s/it]

POLICY loss tensor(-14446.0225, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8419, DiseaseClass: 0.3631, Modifier: 0.3805, SpecificDisease: 0.6121, accuracy: 0.5494, loss: -6438.9927 ||:  30%|██▉       | 17/57 [01:26<05:05,  7.64s/it]

POLICY loss tensor(-13130.7412, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8472, DiseaseClass: 0.3611, Modifier: 0.3750, SpecificDisease: 0.6007, accuracy: 0.5460, loss: -6081.2405 ||:  32%|███▏      | 18/57 [01:27<03:43,  5.74s/it]

POLICY loss tensor(-17414.6367, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8562, DiseaseClass: 0.3563, Modifier: 0.3984, SpecificDisease: 0.6250, accuracy: 0.5590, loss: -6343.7906 ||:  35%|███▌      | 20/57 [01:36<03:02,  4.94s/it]

POLICY loss tensor(-5094.2974, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8690, DiseaseClass: 0.3702, Modifier: 0.4231, SpecificDisease: 0.6490, accuracy: 0.5778, loss: -5075.6389 ||:  46%|████▌     | 26/57 [01:47<01:02,  2.01s/it]

POLICY loss tensor(-45547.0312, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8728, DiseaseClass: 0.3766, Modifier: 0.4353, SpecificDisease: 0.6606, accuracy: 0.5863, loss: -7088.3436 ||:  51%|█████     | 29/57 [02:18<03:22,  7.22s/it]

POLICY loss tensor(-28050.1250, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8760, DiseaseClass: 0.3792, Modifier: 0.4333, SpecificDisease: 0.6562, accuracy: 0.5862, loss: -7217.6896 ||:  53%|█████▎    | 30/57 [02:31<04:08,  9.20s/it]

POLICY loss tensor(-10969.2754, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8780, DiseaseClass: 0.3800, Modifier: 0.4244, SpecificDisease: 0.6482, accuracy: 0.5827, loss: -7296.2399 ||:  54%|█████▍    | 31/57 [02:43<04:15,  9.81s/it]

POLICY loss tensor(-9653.3008, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8813, DiseaseClass: 0.3784, Modifier: 0.4189, SpecificDisease: 0.6504, accuracy: 0.5823, loss: -7121.6959 ||:  56%|█████▌    | 32/57 [02:45<03:10,  7.64s/it]

POLICY loss tensor(-1711.3357, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8837, DiseaseClass: 0.3810, Modifier: 0.4173, SpecificDisease: 0.6553, accuracy: 0.5843, loss: -6744.6935 ||:  60%|█████▉    | 34/57 [02:50<01:54,  4.96s/it]

POLICY loss tensor(-1426.4258, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8862, DiseaseClass: 0.3781, Modifier: 0.4143, SpecificDisease: 0.6464, accuracy: 0.5813, loss: -6913.4409 ||:  61%|██████▏   | 35/57 [03:04<02:52,  7.82s/it]

POLICY loss tensor(-12651.3809, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8931, DiseaseClass: 0.3717, Modifier: 0.4104, SpecificDisease: 0.6365, accuracy: 0.5779, loss: -6661.5352 ||:  67%|██████▋   | 38/57 [03:21<02:17,  7.25s/it]

POLICY loss tensor(-11169.5312, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8950, DiseaseClass: 0.3670, Modifier: 0.4103, SpecificDisease: 0.6362, accuracy: 0.5771, loss: -6490.7119 ||:  68%|██████▊   | 39/57 [03:22<01:38,  5.49s/it]

POLICY loss tensor(-4135.5425, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8948, DiseaseClass: 0.3750, Modifier: 0.4093, SpecificDisease: 0.6311, accuracy: 0.5776, loss: -6274.9349 ||:  72%|███████▏  | 41/57 [03:27<00:59,  3.74s/it]

POLICY loss tensor(-8902.6445, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.8953, DiseaseClass: 0.3772, Modifier: 0.4113, SpecificDisease: 0.6410, accuracy: 0.5812, loss: -6190.0881 ||:  75%|███████▌  | 43/57 [03:32<00:44,  3.19s/it]

POLICY loss tensor(-4186.6865, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9072, DiseaseClass: 0.3817, Modifier: 0.4099, SpecificDisease: 0.6391, accuracy: 0.5845, loss: -5652.8925 ||:  89%|████████▉ | 51/57 [03:58<00:30,  5.03s/it]

POLICY loss tensor(-17941.4492, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9053, DiseaseClass: 0.3828, Modifier: 0.4123, SpecificDisease: 0.6424, accuracy: 0.5857, loss: -5544.1709 ||:  91%|█████████ | 52/57 [03:59<00:19,  3.98s/it]

POLICY loss tensor(-4719.7344, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9113, DiseaseClass: 0.3808, Modifier: 0.4038, SpecificDisease: 0.6312, accuracy: 0.5817, loss: -5178.4525 ||: 100%|██████████| 57/57 [04:09<00:00,  4.38s/it]

POLICY loss tensor(-2158.0063, device='cuda:0', grad_fn=<SumBackward0>)
2020-12-14 12:37:51,520: INFO: Starting with Validation



CompositeMention: 0.9091, DiseaseClass: 0.3730, Modifier: 0.4013, SpecificDisease: 0.5705, accuracy: 0.5635, loss: 0.5844 ||: 100%|██████████| 10/10 [00:01<00:00,  7.51it/s]

2020-12-14 12:37:52,854: INFO: Validation done. (1002.0 / 1276) zero predicted





2020-12-14 12:38:01,249: INFO: Metrics:
                        Training DiseaseClass      : 0.381  Validation DiseaseClass      : 0.373
                        Training SpecificDisease   : 0.631  Validation SpecificDisease   : 0.571
                        Training CompositeMention  : 0.911  Validation CompositeMention  : 0.909
                        Training accuracy          : 0.582  Validation accuracy          : 0.563
                        Training loss              : -5178.452  Validation loss              : 0.584
                        Training Modifier          : 0.404  Validation Modifier          : 0.401

2020-12-14 12:38:01,251: INFO: Reducing LR: 1.00e-07 -> 1.00e-07
2020-12-14 12:38:01,252: INFO: Epoch duration: 00:04:20
2020-12-14 12:38:01,253: INFO: Estimated training time remaining: 03:46:05
2020-12-14 12:38:01,256: INFO: Starting Training Epoch 12/50
2020-12-14 12:38:01,257: INFO: Peak CPU memory usage MB: 6982.376
2020-12-14 12:38:01,355: INFO: GPU 0 memory usage 

  return_array[slices] = self.array
CompositeMention: 0.9297, DiseaseClass: 0.6719, Modifier: 0.2500, SpecificDisease: 0.5547, accuracy: 0.6016, loss: -504.2511 ||:   4%|▎         | 2/57 [00:04<01:40,  1.83s/it]

POLICY loss tensor(-1009.4948, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9258, DiseaseClass: 0.6055, Modifier: 0.2695, SpecificDisease: 0.4492, accuracy: 0.5625, loss: -2316.3716 ||:   7%|▋         | 4/57 [00:17<04:20,  4.91s/it]

POLICY loss tensor(-8258.0859, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-8244.2285, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9219, DiseaseClass: 0.5969, Modifier: 0.3469, SpecificDisease: 0.5344, accuracy: 0.6000, loss: -3501.8180 ||:   9%|▉         | 5/57 [00:21<04:03,  4.69s/it]

POLICY loss tensor(-7423.8120, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9427, DiseaseClass: 0.5625, Modifier: 0.3351, SpecificDisease: 0.5330, accuracy: 0.5933, loss: -3157.4836 ||:  16%|█▌        | 9/57 [00:35<03:13,  4.02s/it]

POLICY loss tensor(-3486.6973, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9453, DiseaseClass: 0.5813, Modifier: 0.3391, SpecificDisease: 0.5172, accuracy: 0.5957, loss: -3800.8337 ||:  18%|█▊        | 10/57 [00:48<05:08,  6.56s/it]

POLICY loss tensor(-9591.5137, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-8099.0430, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9453, DiseaseClass: 0.5558, Modifier: 0.3672, SpecificDisease: 0.5882, accuracy: 0.6141, loss: -3582.7371 ||:  25%|██▍       | 14/57 [01:01<03:12,  4.47s/it]

POLICY loss tensor(-4053.1914, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9469, DiseaseClass: 0.5437, Modifier: 0.3781, SpecificDisease: 0.5698, accuracy: 0.6096, loss: -3476.7090 ||:  26%|██▋       | 15/57 [01:05<02:55,  4.19s/it]

POLICY loss tensor(-1992.8816, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-8047.5889, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9477, DiseaseClass: 0.4883, Modifier: 0.3992, SpecificDisease: 0.6008, accuracy: 0.6090, loss: -3091.7195 ||:  35%|███▌      | 20/57 [01:16<01:29,  2.43s/it]

POLICY loss tensor(-1639.0398, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9488, DiseaseClass: 0.4824, Modifier: 0.3843, SpecificDisease: 0.5908, accuracy: 0.6016, loss: -3168.8945 ||:  39%|███▊      | 22/57 [01:25<02:11,  3.75s/it]

POLICY loss tensor(-7882.3887, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-39471.8672, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9519, DiseaseClass: 0.4679, Modifier: 0.3858, SpecificDisease: 0.5976, accuracy: 0.6008, loss: -4367.4356 ||:  44%|████▍     | 25/57 [01:41<02:04,  3.88s/it]

POLICY loss tensor(-3972.5630, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9508, DiseaseClass: 0.4638, Modifier: 0.3980, SpecificDisease: 0.5996, accuracy: 0.6031, loss: -4959.3812 ||:  47%|████▋     | 27/57 [01:56<03:09,  6.31s/it]

POLICY loss tensor(-20746.0449, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9514, DiseaseClass: 0.4686, Modifier: 0.3937, SpecificDisease: 0.5949, accuracy: 0.6021, loss: -4841.9398 ||:  49%|████▉     | 28/57 [01:59<02:30,  5.18s/it]

POLICY loss tensor(-1671.5647, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9542, DiseaseClass: 0.4562, Modifier: 0.3867, SpecificDisease: 0.5927, accuracy: 0.5975, loss: -5317.5476 ||:  54%|█████▍    | 31/57 [02:14<02:37,  6.04s/it]

POLICY loss tensor(-29271.2773, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9574, DiseaseClass: 0.4424, Modifier: 0.3885, SpecificDisease: 0.5881, accuracy: 0.5941, loss: -4848.3030 ||:  60%|█████▉    | 34/57 [02:18<01:08,  2.98s/it]

POLICY loss tensor(-21972.1094, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9536, DiseaseClass: 0.4481, Modifier: 0.3937, SpecificDisease: 0.6021, accuracy: 0.5994, loss: -5021.0497 ||:  67%|██████▋   | 38/57 [02:33<01:01,  3.21s/it]

POLICY loss tensor(-3987.7715, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9515, DiseaseClass: 0.4454, Modifier: 0.3949, SpecificDisease: 0.6100, accuracy: 0.6004, loss: -4892.2895 ||:  68%|██████▊   | 39/57 [02:35<00:49,  2.76s/it]

POLICY loss tensor(-5871.9121, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9485, DiseaseClass: 0.4485, Modifier: 0.4009, SpecificDisease: 0.6061, accuracy: 0.6010, loss: -5270.3058 ||:  72%|███████▏  | 41/57 [02:51<01:32,  5.78s/it]

POLICY loss tensor(-19412.5117, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9456, DiseaseClass: 0.4583, Modifier: 0.3918, SpecificDisease: 0.6123, accuracy: 0.6020, loss: -5003.2729 ||:  81%|████████  | 46/57 [03:03<00:40,  3.64s/it]

POLICY loss tensor(-14070.8320, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-3929.3379, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9413, DiseaseClass: 0.4585, Modifier: 0.3975, SpecificDisease: 0.6153, accuracy: 0.6032, loss: -4980.4102 ||:  82%|████████▏ | 47/57 [03:06<00:34,  3.42s/it]

POLICY loss tensor(-9725.2158, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9363, DiseaseClass: 0.4574, Modifier: 0.4023, SpecificDisease: 0.6208, accuracy: 0.6042, loss: -5079.2475 ||:  84%|████████▍ | 48/57 [03:10<00:32,  3.63s/it]

POLICY loss tensor(-7079.9561, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9273, DiseaseClass: 0.4497, Modifier: 0.4034, SpecificDisease: 0.6294, accuracy: 0.6024, loss: -4621.0657 ||:  96%|█████████▋| 55/57 [03:24<00:05,  2.55s/it]

POLICY loss tensor(-3278.6641, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9251, DiseaseClass: 0.4479, Modifier: 0.4040, SpecificDisease: 0.6320, accuracy: 0.6023, loss: -4804.9243 ||: 100%|██████████| 57/57 [03:39<00:00,  3.84s/it]

POLICY loss tensor(-19723.2246, device='cuda:0', grad_fn=<SumBackward0>)
2020-12-14 12:41:41,376: INFO: Starting with Validation



CompositeMention: 0.8809, DiseaseClass: 0.3354, Modifier: 0.3981, SpecificDisease: 0.5705, accuracy: 0.5462, loss: 0.5899 ||: 100%|██████████| 10/10 [00:01<00:00,  7.59it/s]

2020-12-14 12:41:42,698: INFO: Validation done. (934.0 / 1276) zero predicted





2020-12-14 12:41:50,592: INFO: Metrics:
                        Training DiseaseClass      : 0.448  Validation DiseaseClass      : 0.335
                        Training SpecificDisease   : 0.632  Validation SpecificDisease   : 0.571
                        Training CompositeMention  : 0.925  Validation CompositeMention  : 0.881
                        Training accuracy          : 0.602  Validation accuracy          : 0.546
                        Training loss              : -4804.924  Validation loss              : 0.590
                        Training Modifier          : 0.404  Validation Modifier          : 0.398

2020-12-14 12:41:50,594: INFO: Reducing LR: 1.00e-07 -> 1.00e-07
2020-12-14 12:41:50,595: INFO: Epoch duration: 00:03:49
2020-12-14 12:41:50,596: INFO: Estimated training time remaining: 03:34:02
2020-12-14 12:41:50,599: INFO: Starting Training Epoch 13/50
2020-12-14 12:41:50,600: INFO: Peak CPU memory usage MB: 6982.376
2020-12-14 12:41:50,697: INFO: GPU 0 memory usage 

  return_array[slices] = self.array
CompositeMention: 0.9375, DiseaseClass: 0.5625, Modifier: 0.2500, SpecificDisease: 0.3438, accuracy: 0.5234, loss: -3823.3318 ||:   2%|▏         | 1/57 [00:04<04:39,  4.99s/it]

POLICY loss tensor(-3823.8362, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9609, DiseaseClass: 0.5234, Modifier: 0.1875, SpecificDisease: 0.3906, accuracy: 0.5156, loss: -8789.6581 ||:   4%|▎         | 2/57 [00:17<06:41,  7.29s/it]

POLICY loss tensor(-13756.4902, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-4687.9082, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9594, DiseaseClass: 0.4281, Modifier: 0.3937, SpecificDisease: 0.5813, accuracy: 0.5906, loss: -8508.4129 ||:   9%|▉         | 5/57 [00:37<06:36,  7.62s/it]

POLICY loss tensor(-20276.5391, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9609, DiseaseClass: 0.4193, Modifier: 0.3854, SpecificDisease: 0.5365, accuracy: 0.5755, loss: -7402.7956 ||:  11%|█         | 6/57 [00:39<05:11,  6.11s/it]

POLICY loss tensor(-1875.2550, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9512, DiseaseClass: 0.3926, Modifier: 0.4141, SpecificDisease: 0.5898, accuracy: 0.5869, loss: -8189.4826 ||:  14%|█▍        | 8/57 [00:50<04:55,  6.02s/it]

POLICY loss tensor(-21100.2598, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9566, DiseaseClass: 0.3646, Modifier: 0.4132, SpecificDisease: 0.5590, accuracy: 0.5734, loss: -7279.4775 ||:  16%|█▌        | 9/57 [00:51<03:41,  4.61s/it]

POLICY loss tensor(-38255.4219, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9531, DiseaseClass: 0.3494, Modifier: 0.3920, SpecificDisease: 0.5568, accuracy: 0.5629, loss: -9433.5889 ||:  19%|█▉        | 11/57 [01:09<04:45,  6.22s/it] 

POLICY loss tensor(-21395.1836, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9531, DiseaseClass: 0.3822, Modifier: 0.3846, SpecificDisease: 0.5577, accuracy: 0.5694, loss: -10530.0425 ||:  23%|██▎       | 13/57 [01:30<06:14,  8.51s/it]

POLICY loss tensor(-11727.0176, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9565, DiseaseClass: 0.3806, Modifier: 0.3750, SpecificDisease: 0.5536, accuracy: 0.5664, loss: -9919.2512 ||:  25%|██▍       | 14/57 [01:33<04:50,  6.75s/it] 

POLICY loss tensor(-1979.4960, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-26734.6172, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9510, DiseaseClass: 0.3823, Modifier: 0.3875, SpecificDisease: 0.5667, accuracy: 0.5719, loss: -11040.2334 ||:  26%|██▋       | 15/57 [01:43<05:31,  7.89s/it]

POLICY loss tensor(-4952.7451, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9458, DiseaseClass: 0.3943, Modifier: 0.3952, SpecificDisease: 0.5772, accuracy: 0.5781, loss: -10985.3697 ||:  30%|██▉       | 17/57 [01:59<05:32,  8.32s/it]

POLICY loss tensor(-16196.1797, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9433, DiseaseClass: 0.3857, Modifier: 0.4211, SpecificDisease: 0.6086, accuracy: 0.5896, loss: -9828.9542 ||:  33%|███▎      | 19/57 [02:02<03:05,  4.89s/it] 

POLICY loss tensor(-4879.6562, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9445, DiseaseClass: 0.3883, Modifier: 0.4234, SpecificDisease: 0.6188, accuracy: 0.5938, loss: -9581.4603 ||:  35%|███▌      | 20/57 [02:05<02:39,  4.30s/it]

POLICY loss tensor(-3853.4707, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9460, DiseaseClass: 0.3842, Modifier: 0.4256, SpecificDisease: 0.6119, accuracy: 0.5919, loss: -7242.6456 ||:  51%|█████     | 29/57 [02:31<02:16,  4.87s/it]

POLICY loss tensor(-14559.1602, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9457, DiseaseClass: 0.3951, Modifier: 0.4217, SpecificDisease: 0.6006, accuracy: 0.5908, loss: -7107.0200 ||:  53%|█████▎    | 30/57 [02:35<02:02,  4.52s/it]

POLICY loss tensor(-3174.4026, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9464, DiseaseClass: 0.4027, Modifier: 0.4181, SpecificDisease: 0.5973, accuracy: 0.5911, loss: -7355.9772 ||:  54%|█████▍    | 31/57 [02:47<03:01,  6.99s/it]

POLICY loss tensor(-14825.2607, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9500, DiseaseClass: 0.4063, Modifier: 0.4004, SpecificDisease: 0.5914, accuracy: 0.5870, loss: -6749.2001 ||:  61%|██████▏   | 35/57 [02:59<01:39,  4.50s/it]

POLICY loss tensor(-8188.8706, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9491, DiseaseClass: 0.4332, Modifier: 0.3985, SpecificDisease: 0.5949, accuracy: 0.5939, loss: -6056.9199 ||:  68%|██████▊   | 39/57 [03:04<00:38,  2.15s/it]

POLICY loss tensor(-4442.1670, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9472, DiseaseClass: 0.4181, Modifier: 0.4030, SpecificDisease: 0.6066, accuracy: 0.5937, loss: -5681.5022 ||:  75%|███████▌  | 43/57 [03:13<00:29,  2.14s/it]

POLICY loss tensor(-3644.8389, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9463, DiseaseClass: 0.4200, Modifier: 0.4030, SpecificDisease: 0.6136, accuracy: 0.5957, loss: -5552.3631 ||:  77%|███████▋  | 44/57 [03:15<00:25,  1.99s/it]

POLICY loss tensor(-5379.6396, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9411, DiseaseClass: 0.4228, Modifier: 0.4094, SpecificDisease: 0.6251, accuracy: 0.5996, loss: -5427.8775 ||:  81%|████████  | 46/57 [03:19<00:23,  2.09s/it]

POLICY loss tensor(-8702.9824, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9429, DiseaseClass: 0.4251, Modifier: 0.4079, SpecificDisease: 0.6290, accuracy: 0.6012, loss: -5424.9133 ||:  84%|████████▍ | 48/57 [03:26<00:23,  2.65s/it]

POLICY loss tensor(-2011.6106, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9441, DiseaseClass: 0.4308, Modifier: 0.4053, SpecificDisease: 0.6257, accuracy: 0.6015, loss: -5553.9718 ||:  86%|████████▌ | 49/57 [03:35<00:35,  4.46s/it]

POLICY loss tensor(-11749.2930, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9426, DiseaseClass: 0.4271, Modifier: 0.4047, SpecificDisease: 0.6307, accuracy: 0.6013, loss: -5912.5236 ||:  89%|████████▉ | 51/57 [03:49<00:38,  6.39s/it]

POLICY loss tensor(-29395.2461, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9437, DiseaseClass: 0.4251, Modifier: 0.4041, SpecificDisease: 0.6245, accuracy: 0.5994, loss: -6104.8005 ||:  91%|█████████ | 52/57 [04:03<00:42,  8.59s/it]

POLICY loss tensor(-15911.4551, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9441, DiseaseClass: 0.4200, Modifier: 0.3995, SpecificDisease: 0.6227, accuracy: 0.5966, loss: -5914.8616 ||:  95%|█████████▍| 54/57 [04:08<00:16,  5.45s/it]

POLICY loss tensor(-1953.9679, device='cuda:0', grad_fn=<SumBackward0>)
POLICY loss tensor(-18968.4141, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9405, DiseaseClass: 0.4204, Modifier: 0.4008, SpecificDisease: 0.6245, accuracy: 0.5965, loss: -6152.1876 ||:  96%|█████████▋| 55/57 [04:15<00:12,  6.02s/it]

POLICY loss tensor(-8482.6709, device='cuda:0', grad_fn=<SumBackward0>)


CompositeMention: 0.9395, DiseaseClass: 0.4248, Modifier: 0.4038, SpecificDisease: 0.6328, accuracy: 0.6002, loss: -6085.1196 ||: 100%|██████████| 57/57 [04:22<00:00,  4.60s/it]

2020-12-14 12:46:14,040: INFO: Starting with Validation



CompositeMention: 0.8433, DiseaseClass: 0.3605, Modifier: 0.4013, SpecificDisease: 0.5705, accuracy: 0.5439, loss: 0.5876 ||: 100%|██████████| 10/10 [00:01<00:00,  7.57it/s]

2020-12-14 12:46:15,365: INFO: Validation done. (959.0 / 1276) zero predicted
2020-12-14 12:46:15,366: INFO: Ran out of patience. Stopping training.





In [None]:
logger.info("Training Done.")
if instances_test is not None:
    logger.info("Computing final Test Accurje sacy")
    trainer.test(instances_test)
logger.info("Done.")