In [None]:
!pip show pytorch-pretrained-bert

In [None]:
!pip install seqeval

In [None]:
!pip install pytorch-pretrained-bert==0.4.0

In [None]:
!pip install git+https://github.com/IINemo/libact.git@seq

In [None]:
#!pip install -e ./packages/libact/
!pip uninstall -y actleto
!pip install git+https://github.com/IINemo/active_learning_toolbox.git@seq

# Initialization

In [1]:
%load_ext autoreload
%autoreload 2

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

import sys

In [2]:
import torch

device = torch.device("cuda")
n_gpu = torch.cuda.device_count()

for i in range(n_gpu):
    print(torch.cuda.get_device_name(i))

Tesla V100-DGXS-16GB


In [3]:
from logger import initialize_logger
logger = initialize_logger('biomed_ie')

In [4]:
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
import json

# Global parameters

In [5]:
# CACHE_DIR = '../workdir/models/0.4.0'

# # MAX_LEN = 150
# BATCH_SIZE = 40
# MAX_LEN = 100
# #BATCH_SIZE = 100
# #BATCH_SIZE_PRED = 1200 # 1600
# BATCH_SIZE_PRED = 1300 # 1600
# random_state = 2019

# EARLY_STOPPING = 4
# MAX_RETRAIN_EPOCHS = 17

# N_AL_PASSES = 10
# N_AL_ITERATIONS = 25

# BASE_LR = 5e-5


CACHE_DIR = '../workdir/models/0.4.0'

# MAX_LEN = 150
BATCH_SIZE = 32
MAX_LEN = 100
#BATCH_SIZE = 100
#BATCH_SIZE_PRED = 1200 # 1600
BATCH_SIZE_PRED = 1300 # 1600
random_state = 2019

EARLY_STOPPING = 1
MAX_RETRAIN_EPOCHS = 20

N_SAMPLES_PER_AL_ITER = 30
N_AL_PASSES = 10
N_AL_ITERATIONS = 25

BASE_LR = 5e-5

VALIDATION_RATIO = 0.25

MAX_TO_ANNEAL = 1

OUTPUT_FILE_PATH = '../workdir/experiments/14_biobert/'

# Load dataset

In [6]:
from pytorch_pretrained_bert import BertTokenizer
bpe_tokenizer = BertTokenizer.from_pretrained('bert-base-cased', cache_dir=CACHE_DIR, do_lower_case=False)

In [7]:
from flair.data_fetcher import NLPTaskDataFetcher

data_folder = '../workdir/genia/conll/'
corpus = NLPTaskDataFetcher.load_column_corpus(data_folder, {0 : 'text', 1 : 'ner'},
                                               train_file='Genia4ERtask1.iob2',
                                               test_file='Genia4EReval1.iob2',
                                               dev_file='Genia4EReval1.iob2')

2019-08-20 10:05:04,116 Reading data from ../workdir/genia/conll
2019-08-20 10:05:04,117 Train: ../workdir/genia/conll/Genia4ERtask1.iob2
2019-08-20 10:05:04,118 Dev: ../workdir/genia/conll/Genia4EReval1.iob2
2019-08-20 10:05:04,119 Test: ../workdir/genia/conll/Genia4EReval1.iob2


# Run experiments

In [8]:
from bert_active_learning_exp import run_experiment_al
from active_learning_seq import LibActNN, LibActNNPositiveLessCertain, RandomSamplingWithRetraining
from libact.query_strategies import UncertaintySampling, RandomSampling
from bert_utils import create_model_optimizer


def create_libact_adaptor(tag2index, adaptor_type, *args):
    model_ctor = lambda :  create_model_optimizer(tag2index, 
                                                      cache_dir=CACHE_DIR, 
                                                      base_lr=BASE_LR, 
                                                      bert_model='../workdir/bio_bert/torch2/')

    return adaptor_type(*args, 
                        model_ctor=model_ctor,
                        batch_size=BATCH_SIZE,
                        bs_pred=BATCH_SIZE_PRED,
                        train_from_scratch=True,
                        retrain_epochs=MAX_RETRAIN_EPOCHS,
                        early_stopping=EARLY_STOPPING)

In [9]:
def create_i2b2_experiment_pack(corpus, attr):
     return [
#     {
#         'corpus' : corpus,
#         'bpe_tokenizer' : bpe_tokenizer,
#         'output_file_path' : '../workdir/experiments/11_biobert/',
#         'ranking_strategy' : lambda trn_ds, _libact_model: UncertaintySampling(trn_ds, model=_libact_model, method='lc'),
#         'libact_adaptor_ctor' : lambda tag2index: create_libact_adaptor(tag2index, LibActNN),
#         'name' : f'{attr}_MNLP',
#         'n_passes' : N_AL_PASSES,
#         'n_al_iterations' : N_AL_ITERATIONS,
#         'max_len' : MAX_LEN,
#         'max_samples_number' : N_SAMPLES_PER_AL_ITER
#     },
    {
        'corpus' : corpus,
        'bpe_tokenizer' : bpe_tokenizer,
        'output_file_path' : '../workdir/experiments/14_biobert/',
        'ranking_strategy' : lambda trn_ds, _libact_model: UncertaintySampling(trn_ds, model=_libact_model, method='lc'),
        'libact_adaptor_ctor' : lambda tag2index: create_libact_adaptor(tag2index, 
                                                                        LibActNNPositiveLessCertain, 
                                                                        [e for e in tag2index.keys() 
                                                                         if e.startswith('B')]),
        'name' : f'{attr}_MNLP-mod',
        'n_passes' : N_AL_PASSES,
        'n_al_iterations' : N_AL_ITERATIONS,
        'max_len' : MAX_LEN,
        'max_samples_number' : N_SAMPLES_PER_AL_ITER
    },
    {
        'corpus' : corpus,
        'bpe_tokenizer' : bpe_tokenizer,
        'output_file_path' : '../workdir/experiments/14_biobert/',
        'ranking_strategy' : lambda trn_ds, _libact_model: RandomSamplingWithRetraining(trn_ds, model=_libact_model, method='lc'),
        'libact_adaptor_ctor' : lambda tag2index: create_libact_adaptor(tag2index, LibActNN),
        'name' : f'{attr}_i.i.d.',
        'n_passes' : N_AL_PASSES,
        'n_al_iterations' : N_AL_ITERATIONS,
        'max_len' : MAX_LEN,
        'max_samples_number' : N_SAMPLES_PER_AL_ITER
    }
]

In [10]:
experiments = create_i2b2_experiment_pack(corpus, 'genia')

In [None]:
for exp in experiments:
    logger.info('#################### Experiment ######################')
    logger.info(exp)
    
    run_experiment_al(**exp)
    
    logger.info('################### Experiment finished ##############')

2019-08-20 10:06:22,674 - biomed_ie - INFO - #################### Experiment ######################
2019-08-20 10:06:22,675 - biomed_ie - INFO - {'corpus': <flair.data.TaggedCorpus object at 0x7f6a63e55da0>, 'bpe_tokenizer': <pytorch_pretrained_bert.tokenization.BertTokenizer object at 0x7f6b030846a0>, 'output_file_path': '../workdir/experiments/14_biobert/', 'ranking_strategy': <function create_i2b2_experiment_pack.<locals>.<lambda> at 0x7f6a64165b70>, 'libact_adaptor_ctor': <function create_i2b2_experiment_pack.<locals>.<lambda> at 0x7f69d31e0c80>, 'name': 'genia_MNLP-mod', 'n_passes': 10, 'n_al_iterations': 25, 'max_len': 100, 'max_samples_number': 30}
2019-08-20 10:06:22,677 - biomed_ie - INFO - Preparing data...
2019-08-20 10:06:45,250 - biomed_ie - INFO - Done.
2019-08-20 10:06:45,251 - biomed_ie - INFO - Active learning...
2019-08-20 10:06:45,322 - biomed_ie - INFO - Number of seeding examples: 149
2019-08-20 10:06:45,328 - biomed_ie - INFO - Start emulating active learning.
201

Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

2019-08-20 10:06:58,886 - biomed_ie - INFO - Train loss: 0.002055953576908828
2019-08-20 10:06:58,962 - biomed_ie - INFO - Validation loss: 0.0010234704877334373


Epoch:   5%|▌         | 1/20 [00:02<00:41,  2.17s/it]

2019-08-20 10:06:59,646 - biomed_ie - INFO - Train loss: 0.0014401814347436185
2019-08-20 10:06:59,720 - biomed_ie - INFO - Validation loss: 0.0007254080169011337


Epoch:  10%|█         | 2/20 [00:02<00:31,  1.75s/it]

2019-08-20 10:07:00,471 - biomed_ie - INFO - Train loss: 0.0012060436290252505
2019-08-20 10:07:00,546 - biomed_ie - INFO - Validation loss: 0.0005981455591464496


Epoch:  15%|█▌        | 3/20 [00:03<00:25,  1.47s/it]

2019-08-20 10:07:01,275 - biomed_ie - INFO - Train loss: 0.0009365853780498877
2019-08-20 10:07:01,349 - biomed_ie - INFO - Validation loss: 0.0004774854338048954


Epoch:  20%|██        | 4/20 [00:04<00:20,  1.28s/it]

2019-08-20 10:07:02,053 - biomed_ie - INFO - Train loss: 0.0007638947769439852
2019-08-20 10:07:02,127 - biomed_ie - INFO - Validation loss: 0.00039364813022891193


Epoch:  25%|██▌       | 5/20 [00:05<00:16,  1.13s/it]

2019-08-20 10:07:02,864 - biomed_ie - INFO - Train loss: 0.000602521648675092
2019-08-20 10:07:02,939 - biomed_ie - INFO - Validation loss: 0.00030574052357860537


Epoch:  30%|███       | 6/20 [00:06<00:14,  1.03s/it]

2019-08-20 10:07:03,701 - biomed_ie - INFO - Train loss: 0.00046072579231362746
2019-08-20 10:07:03,776 - biomed_ie - INFO - Validation loss: 0.0002566716338592307


Epoch:  35%|███▌      | 7/20 [00:07<00:12,  1.02it/s]

2019-08-20 10:07:04,552 - biomed_ie - INFO - Train loss: 0.00035977345913183864
2019-08-20 10:07:04,626 - biomed_ie - INFO - Validation loss: 0.00023559508438473453


Epoch:  40%|████      | 8/20 [00:07<00:11,  1.07it/s]

2019-08-20 10:07:05,391 - biomed_ie - INFO - Train loss: 0.0002792237644375693
2019-08-20 10:07:05,465 - biomed_ie - INFO - Validation loss: 0.0002085971004618509


Epoch:  45%|████▌     | 9/20 [00:08<00:09,  1.10it/s]

2019-08-20 10:07:06,190 - biomed_ie - INFO - Train loss: 0.00021266929073802932
2019-08-20 10:07:06,265 - biomed_ie - INFO - Validation loss: 0.0002025041608233735


Epoch:  50%|█████     | 10/20 [00:09<00:08,  1.14it/s]

2019-08-20 10:07:07,025 - biomed_ie - INFO - Train loss: 0.00015700858044619093
2019-08-20 10:07:07,100 - biomed_ie - INFO - Validation loss: 0.00018268308006769333


Epoch:  55%|█████▌    | 11/20 [00:10<00:07,  1.16it/s]

2019-08-20 10:07:07,838 - biomed_ie - INFO - Train loss: 0.00011040656357944705
2019-08-20 10:07:07,913 - biomed_ie - INFO - Validation loss: 0.00019224230012327667
2019-08-20 10:07:07,914 - biomed_ie - INFO - Bad epoch #1


Epoch:  60%|██████    | 12/20 [00:11<00:06,  1.18it/s]

2019-08-20 10:07:08,668 - biomed_ie - INFO - Train loss: 8.421534516511319e-05
2019-08-20 10:07:08,742 - biomed_ie - INFO - Validation loss: 0.00019902074323656312
2019-08-20 10:07:08,743 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:07:08,744 - biomed_ie - INFO - Restoring best model...


Epoch:  65%|██████▌   | 13/20 [00:11<00:05,  1.18it/s]

2019-08-20 10:07:09,491 - biomed_ie - INFO - Train loss: 0.00011613131446574459
2019-08-20 10:07:09,566 - biomed_ie - INFO - Validation loss: 0.00018853851889175636
2019-08-20 10:07:09,567 - biomed_ie - INFO - Bad epoch #1


Epoch:  70%|███████   | 14/20 [00:12<00:04,  1.20it/s]

2019-08-20 10:07:10,303 - biomed_ie - INFO - Train loss: 9.154062656787577e-05
2019-08-20 10:07:10,377 - biomed_ie - INFO - Validation loss: 0.00019723629163868084
2019-08-20 10:07:10,378 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:07:10,380 - biomed_ie - INFO - Restoring best model...


Epoch:  75%|███████▌  | 15/20 [00:13<00:04,  1.20it/s]

2019-08-20 10:07:11,127 - biomed_ie - INFO - Train loss: 0.00011672990062381837
2019-08-20 10:07:11,202 - biomed_ie - INFO - Validation loss: 0.00018500353961615654
2019-08-20 10:07:11,203 - biomed_ie - INFO - Bad epoch #1


Epoch:  80%|████████  | 16/20 [00:14<00:03,  1.21it/s]

2019-08-20 10:07:11,926 - biomed_ie - INFO - Train loss: 0.00010117294269683674
2019-08-20 10:07:12,001 - biomed_ie - INFO - Validation loss: 0.0001909368048856934
2019-08-20 10:07:12,002 - biomed_ie - INFO - Bad epoch #2





2019-08-20 10:07:19,355 - biomed_ie - INFO - Evaluation: {'_f1_score': (0.4292682926829268, 0.8789187141033823)}
2019-08-20 10:07:19,356 - biomed_ie - INFO - Active learning iteration: #0
2019-08-20 10:07:23,313 - biomed_ie - INFO - Full finetuning: True
2019-08-20 10:07:23,317 - biomed_ie - INFO - N parameters: 108321807


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

2019-08-20 10:07:24,111 - biomed_ie - INFO - Train loss: 0.0022213482167436124
2019-08-20 10:07:24,185 - biomed_ie - INFO - Validation loss: 0.0010761267184156232


Epoch:   5%|▌         | 1/20 [00:00<00:16,  1.16it/s]

2019-08-20 10:07:24,939 - biomed_ie - INFO - Train loss: 0.0015214860794481604
2019-08-20 10:07:25,014 - biomed_ie - INFO - Validation loss: 0.0007031098403761872


Epoch:  10%|█         | 2/20 [00:01<00:15,  1.18it/s]

2019-08-20 10:07:25,755 - biomed_ie - INFO - Train loss: 0.0011449452136899957
2019-08-20 10:07:25,829 - biomed_ie - INFO - Validation loss: 0.000602313177248018


Epoch:  15%|█▌        | 3/20 [00:02<00:14,  1.19it/s]

2019-08-20 10:07:26,594 - biomed_ie - INFO - Train loss: 0.0009342439125716042
2019-08-20 10:07:26,669 - biomed_ie - INFO - Validation loss: 0.0004973065075093665


Epoch:  20%|██        | 4/20 [00:03<00:13,  1.18it/s]

2019-08-20 10:07:27,418 - biomed_ie - INFO - Train loss: 0.0007540273155973489
2019-08-20 10:07:27,493 - biomed_ie - INFO - Validation loss: 0.00040625474226158275


Epoch:  25%|██▌       | 5/20 [00:04<00:12,  1.19it/s]

2019-08-20 10:07:28,261 - biomed_ie - INFO - Train loss: 0.0005918579684585642
2019-08-20 10:07:28,336 - biomed_ie - INFO - Validation loss: 0.0003254425802589518


Epoch:  30%|███       | 6/20 [00:05<00:11,  1.19it/s]

2019-08-20 10:07:29,049 - biomed_ie - INFO - Train loss: 0.0004455546976530218
2019-08-20 10:07:29,123 - biomed_ie - INFO - Validation loss: 0.0002762087810356005


Epoch:  35%|███▌      | 7/20 [00:05<00:10,  1.22it/s]

2019-08-20 10:07:29,890 - biomed_ie - INFO - Train loss: 0.0003611413657967814
2019-08-20 10:07:29,965 - biomed_ie - INFO - Validation loss: 0.00023576762296457205


Epoch:  40%|████      | 8/20 [00:06<00:09,  1.21it/s]

2019-08-20 10:07:30,715 - biomed_ie - INFO - Train loss: 0.00027013599175381835
2019-08-20 10:07:30,790 - biomed_ie - INFO - Validation loss: 0.0002070677392514406


Epoch:  45%|████▌     | 9/20 [00:07<00:09,  1.21it/s]

2019-08-20 10:07:31,493 - biomed_ie - INFO - Train loss: 0.00021036433294839717
2019-08-20 10:07:31,567 - biomed_ie - INFO - Validation loss: 0.00020709670091097334
2019-08-20 10:07:31,568 - biomed_ie - INFO - Bad epoch #1


Epoch:  50%|█████     | 10/20 [00:08<00:08,  1.24it/s]

2019-08-20 10:07:32,262 - biomed_ie - INFO - Train loss: 0.000150653096033074
2019-08-20 10:07:32,336 - biomed_ie - INFO - Validation loss: 0.00019468845123738314


Epoch:  55%|█████▌    | 11/20 [00:09<00:07,  1.25it/s]

2019-08-20 10:07:33,079 - biomed_ie - INFO - Train loss: 0.00011467035342673018
2019-08-20 10:07:33,154 - biomed_ie - INFO - Validation loss: 0.00019626466112326733
2019-08-20 10:07:33,155 - biomed_ie - INFO - Bad epoch #1


Epoch:  60%|██████    | 12/20 [00:09<00:06,  1.25it/s]

2019-08-20 10:07:33,904 - biomed_ie - INFO - Train loss: 8.70710711978019e-05
2019-08-20 10:07:33,978 - biomed_ie - INFO - Validation loss: 0.00019770006822273793
2019-08-20 10:07:33,979 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:07:33,980 - biomed_ie - INFO - Restoring best model...


Epoch:  65%|██████▌   | 13/20 [00:10<00:05,  1.23it/s]

2019-08-20 10:07:34,715 - biomed_ie - INFO - Train loss: 0.00012056632458676937
2019-08-20 10:07:34,789 - biomed_ie - INFO - Validation loss: 0.00019717968143193068
2019-08-20 10:07:34,790 - biomed_ie - INFO - Bad epoch #1


Epoch:  70%|███████   | 14/20 [00:11<00:04,  1.24it/s]

2019-08-20 10:07:35,430 - biomed_ie - INFO - Train loss: 8.805929673899354e-05
2019-08-20 10:07:35,504 - biomed_ie - INFO - Validation loss: 0.00020150086982587798
2019-08-20 10:07:35,505 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:07:35,506 - biomed_ie - INFO - Restoring best model...


Epoch:  75%|███████▌  | 15/20 [00:12<00:03,  1.27it/s]

2019-08-20 10:07:36,194 - biomed_ie - INFO - Train loss: 0.00012038452614487678
2019-08-20 10:07:36,269 - biomed_ie - INFO - Validation loss: 0.0001926268327288923


Epoch:  80%|████████  | 16/20 [00:12<00:03,  1.29it/s]

2019-08-20 10:07:37,025 - biomed_ie - INFO - Train loss: 0.00010520045501037139
2019-08-20 10:07:37,100 - biomed_ie - INFO - Validation loss: 0.0001943907906523848
2019-08-20 10:07:37,101 - biomed_ie - INFO - Bad epoch #1


Epoch:  85%|████████▌ | 17/20 [00:13<00:02,  1.27it/s]

2019-08-20 10:07:37,837 - biomed_ie - INFO - Train loss: 9.142952107667555e-05
2019-08-20 10:07:37,912 - biomed_ie - INFO - Validation loss: 0.00019549481175114622
2019-08-20 10:07:37,913 - biomed_ie - INFO - Bad epoch #2





2019-08-20 10:08:17,855 - biomed_ie - INFO - Performance: {'_f1_score': (0.4694433284228383, 0.8845325462667517)}
2019-08-20 10:08:17,857 - biomed_ie - INFO - Active learning iteration: #1
2019-08-20 10:08:21,662 - biomed_ie - INFO - Full finetuning: True
2019-08-20 10:08:21,666 - biomed_ie - INFO - N parameters: 108321807


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

2019-08-20 10:08:22,775 - biomed_ie - INFO - Train loss: 0.00164505868703446
2019-08-20 10:08:22,861 - biomed_ie - INFO - Validation loss: 0.0007126549002544798


Epoch:   5%|▌         | 1/20 [00:01<00:22,  1.19s/it]

2019-08-20 10:08:23,933 - biomed_ie - INFO - Train loss: 0.0009896299142994089
2019-08-20 10:08:24,019 - biomed_ie - INFO - Validation loss: 0.0005855653299265443


Epoch:  10%|█         | 2/20 [00:02<00:21,  1.18s/it]

2019-08-20 10:08:25,081 - biomed_ie - INFO - Train loss: 0.0008164086229540807
2019-08-20 10:08:25,167 - biomed_ie - INFO - Validation loss: 0.0004738288226170546


Epoch:  15%|█▌        | 3/20 [00:03<00:19,  1.17s/it]

2019-08-20 10:08:26,183 - biomed_ie - INFO - Train loss: 0.000620058134230097
2019-08-20 10:08:26,269 - biomed_ie - INFO - Validation loss: 0.00040073635953207464


Epoch:  20%|██        | 4/20 [00:04<00:18,  1.15s/it]

2019-08-20 10:08:27,302 - biomed_ie - INFO - Train loss: 0.0004778176439735908
2019-08-20 10:08:27,390 - biomed_ie - INFO - Validation loss: 0.000321127624405437


Epoch:  25%|██▌       | 5/20 [00:05<00:17,  1.14s/it]

2019-08-20 10:08:28,537 - biomed_ie - INFO - Train loss: 0.00033515946597903224
2019-08-20 10:08:28,624 - biomed_ie - INFO - Validation loss: 0.00026274836221785693


Epoch:  30%|███       | 6/20 [00:06<00:16,  1.17s/it]

2019-08-20 10:08:29,729 - biomed_ie - INFO - Train loss: 0.0002640222371432885
2019-08-20 10:08:29,816 - biomed_ie - INFO - Validation loss: 0.00023320798857550328


Epoch:  35%|███▌      | 7/20 [00:08<00:15,  1.18s/it]

2019-08-20 10:08:30,913 - biomed_ie - INFO - Train loss: 0.00020381929532363533
2019-08-20 10:08:30,999 - biomed_ie - INFO - Validation loss: 0.0002056143338006809


Epoch:  40%|████      | 8/20 [00:09<00:14,  1.18s/it]

2019-08-20 10:08:32,006 - biomed_ie - INFO - Train loss: 0.00015441955284592427
2019-08-20 10:08:32,092 - biomed_ie - INFO - Validation loss: 0.00017958082357701752


Epoch:  45%|████▌     | 9/20 [00:10<00:12,  1.15s/it]

2019-08-20 10:08:33,101 - biomed_ie - INFO - Train loss: 0.00011928076300767386
2019-08-20 10:08:33,187 - biomed_ie - INFO - Validation loss: 0.00017530217837020149


Epoch:  50%|█████     | 10/20 [00:11<00:11,  1.14s/it]

2019-08-20 10:08:34,260 - biomed_ie - INFO - Train loss: 8.961841474678753e-05
2019-08-20 10:08:34,345 - biomed_ie - INFO - Validation loss: 0.00015959215174403968


Epoch:  55%|█████▌    | 11/20 [00:12<00:10,  1.14s/it]

2019-08-20 10:08:35,426 - biomed_ie - INFO - Train loss: 6.798355388777499e-05
2019-08-20 10:08:35,512 - biomed_ie - INFO - Validation loss: 0.00016783320285504436
2019-08-20 10:08:35,513 - biomed_ie - INFO - Bad epoch #1


Epoch:  60%|██████    | 12/20 [00:13<00:09,  1.15s/it]

2019-08-20 10:08:36,594 - biomed_ie - INFO - Train loss: 5.076400653793504e-05
2019-08-20 10:08:36,681 - biomed_ie - INFO - Validation loss: 0.00016068468214460978
2019-08-20 10:08:36,682 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:08:36,682 - biomed_ie - INFO - Restoring best model...


Epoch:  65%|██████▌   | 13/20 [00:15<00:08,  1.16s/it]

2019-08-20 10:08:37,790 - biomed_ie - INFO - Train loss: 7.466955601020339e-05
2019-08-20 10:08:37,876 - biomed_ie - INFO - Validation loss: 0.00016430888981252997
2019-08-20 10:08:37,877 - biomed_ie - INFO - Bad epoch #1


Epoch:  70%|███████   | 14/20 [00:16<00:06,  1.16s/it]

2019-08-20 10:08:38,965 - biomed_ie - INFO - Train loss: 5.393288098940417e-05
2019-08-20 10:08:39,051 - biomed_ie - INFO - Validation loss: 0.0001674745274979926
2019-08-20 10:08:39,051 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:08:39,052 - biomed_ie - INFO - Restoring best model...


Epoch:  75%|███████▌  | 15/20 [00:17<00:05,  1.17s/it]

2019-08-20 10:08:40,157 - biomed_ie - INFO - Train loss: 6.568131597445404e-05
2019-08-20 10:08:40,243 - biomed_ie - INFO - Validation loss: 0.0001615808212445419
2019-08-20 10:08:40,244 - biomed_ie - INFO - Bad epoch #1


Epoch:  80%|████████  | 16/20 [00:18<00:04,  1.17s/it]

2019-08-20 10:08:41,332 - biomed_ie - INFO - Train loss: 5.8733048096969246e-05
2019-08-20 10:08:41,418 - biomed_ie - INFO - Validation loss: 0.0001704963143680211
2019-08-20 10:08:41,419 - biomed_ie - INFO - Bad epoch #2





2019-08-20 10:09:21,369 - biomed_ie - INFO - Performance: {'_f1_score': (0.47184287989190754, 0.8916620134014039)}
2019-08-20 10:09:21,370 - biomed_ie - INFO - Active learning iteration: #2
2019-08-20 10:09:25,175 - biomed_ie - INFO - Full finetuning: True
2019-08-20 10:09:25,178 - biomed_ie - INFO - N parameters: 108321807


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

2019-08-20 10:09:26,309 - biomed_ie - INFO - Train loss: 0.0013406577208542813
2019-08-20 10:09:26,408 - biomed_ie - INFO - Validation loss: 0.0005825040786246943


Epoch:   5%|▌         | 1/20 [00:01<00:23,  1.22s/it]

2019-08-20 10:09:27,491 - biomed_ie - INFO - Train loss: 0.0008155165900791073
2019-08-20 10:09:27,589 - biomed_ie - INFO - Validation loss: 0.000430693955925422


Epoch:  10%|█         | 2/20 [00:02<00:21,  1.21s/it]

2019-08-20 10:09:28,674 - biomed_ie - INFO - Train loss: 0.0006289129414510329
2019-08-20 10:09:28,772 - biomed_ie - INFO - Validation loss: 0.0003357777899693816


Epoch:  15%|█▌        | 3/20 [00:03<00:20,  1.20s/it]

2019-08-20 10:09:29,936 - biomed_ie - INFO - Train loss: 0.0004784416624560057
2019-08-20 10:09:30,035 - biomed_ie - INFO - Validation loss: 0.0002634463979722262


Epoch:  20%|██        | 4/20 [00:04<00:19,  1.22s/it]

2019-08-20 10:09:31,244 - biomed_ie - INFO - Train loss: 0.00035998389836938065
2019-08-20 10:09:31,343 - biomed_ie - INFO - Validation loss: 0.0002066347843561417


Epoch:  25%|██▌       | 5/20 [00:06<00:18,  1.25s/it]

2019-08-20 10:09:32,497 - biomed_ie - INFO - Train loss: 0.0002608056695040043
2019-08-20 10:09:32,595 - biomed_ie - INFO - Validation loss: 0.000163755038889443


Epoch:  30%|███       | 6/20 [00:07<00:17,  1.25s/it]

2019-08-20 10:09:33,745 - biomed_ie - INFO - Train loss: 0.00019566009228608945
2019-08-20 10:09:33,844 - biomed_ie - INFO - Validation loss: 0.00013316413847784314


Epoch:  35%|███▌      | 7/20 [00:08<00:16,  1.25s/it]

2019-08-20 10:09:34,983 - biomed_ie - INFO - Train loss: 0.00014698694850832995
2019-08-20 10:09:35,082 - biomed_ie - INFO - Validation loss: 0.00011100360495810064


Epoch:  40%|████      | 8/20 [00:09<00:14,  1.25s/it]

2019-08-20 10:09:36,235 - biomed_ie - INFO - Train loss: 0.00011219792084336438
2019-08-20 10:09:36,333 - biomed_ie - INFO - Validation loss: 0.000103568251689126


Epoch:  45%|████▌     | 9/20 [00:11<00:13,  1.25s/it]

2019-08-20 10:09:37,471 - biomed_ie - INFO - Train loss: 8.603913814861594e-05
2019-08-20 10:09:37,570 - biomed_ie - INFO - Validation loss: 9.937441818709669e-05


Epoch:  50%|█████     | 10/20 [00:12<00:12,  1.24s/it]

2019-08-20 10:09:38,760 - biomed_ie - INFO - Train loss: 6.721483960793448e-05
2019-08-20 10:09:38,860 - biomed_ie - INFO - Validation loss: 9.057441117094038e-05


Epoch:  55%|█████▌    | 11/20 [00:13<00:11,  1.26s/it]

2019-08-20 10:09:39,999 - biomed_ie - INFO - Train loss: 5.0051938064628294e-05
2019-08-20 10:09:40,098 - biomed_ie - INFO - Validation loss: 9.223089097364758e-05
2019-08-20 10:09:40,099 - biomed_ie - INFO - Bad epoch #1


Epoch:  60%|██████    | 12/20 [00:14<00:09,  1.25s/it]

2019-08-20 10:09:41,280 - biomed_ie - INFO - Train loss: 3.60758766100784e-05
2019-08-20 10:09:41,380 - biomed_ie - INFO - Validation loss: 9.77438733009639e-05
2019-08-20 10:09:41,381 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:09:41,381 - biomed_ie - INFO - Restoring best model...


Epoch:  65%|██████▌   | 13/20 [00:16<00:08,  1.27s/it]

2019-08-20 10:09:42,567 - biomed_ie - INFO - Train loss: 4.9739347665469824e-05
2019-08-20 10:09:42,667 - biomed_ie - INFO - Validation loss: 8.806330923589786e-05


Epoch:  70%|███████   | 14/20 [00:17<00:07,  1.27s/it]

2019-08-20 10:09:43,863 - biomed_ie - INFO - Train loss: 4.123565069236864e-05
2019-08-20 10:09:43,963 - biomed_ie - INFO - Validation loss: 9.034779982838204e-05
2019-08-20 10:09:43,964 - biomed_ie - INFO - Bad epoch #1


Epoch:  75%|███████▌  | 15/20 [00:18<00:06,  1.27s/it]

2019-08-20 10:09:45,152 - biomed_ie - INFO - Train loss: 3.357902895994011e-05
2019-08-20 10:09:45,251 - biomed_ie - INFO - Validation loss: 8.969334157427227e-05
2019-08-20 10:09:45,252 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:09:45,252 - biomed_ie - INFO - Restoring best model...


Epoch:  80%|████████  | 16/20 [00:20<00:05,  1.28s/it]

2019-08-20 10:09:46,446 - biomed_ie - INFO - Train loss: 4.036386866009795e-05
2019-08-20 10:09:46,545 - biomed_ie - INFO - Validation loss: 9.040308975293682e-05
2019-08-20 10:09:46,546 - biomed_ie - INFO - Bad epoch #1


Epoch:  85%|████████▌ | 17/20 [00:21<00:03,  1.28s/it]

2019-08-20 10:09:47,730 - biomed_ie - INFO - Train loss: 3.6690354343962636e-05
2019-08-20 10:09:47,829 - biomed_ie - INFO - Validation loss: 9.275429542546275e-05
2019-08-20 10:09:47,830 - biomed_ie - INFO - Bad epoch #2





2019-08-20 10:10:27,794 - biomed_ie - INFO - Performance: {'_f1_score': (0.5342007259699241, 0.9038070357370772)}
2019-08-20 10:10:27,795 - biomed_ie - INFO - Active learning iteration: #3
2019-08-20 10:10:31,591 - biomed_ie - INFO - Full finetuning: True
2019-08-20 10:10:31,595 - biomed_ie - INFO - N parameters: 108321807


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

2019-08-20 10:10:33,007 - biomed_ie - INFO - Train loss: 0.001137238894236453
2019-08-20 10:10:33,119 - biomed_ie - INFO - Validation loss: 0.0003908948500849529


Epoch:   5%|▌         | 1/20 [00:01<00:28,  1.51s/it]

2019-08-20 10:10:34,399 - biomed_ie - INFO - Train loss: 0.0007224090377081582
2019-08-20 10:10:34,510 - biomed_ie - INFO - Validation loss: 0.00029543686083776814


Epoch:  10%|█         | 2/20 [00:02<00:26,  1.48s/it]

2019-08-20 10:10:35,850 - biomed_ie - INFO - Train loss: 0.0005454190808616774
2019-08-20 10:10:35,962 - biomed_ie - INFO - Validation loss: 0.0002253564597090563


Epoch:  15%|█▌        | 3/20 [00:04<00:24,  1.47s/it]

2019-08-20 10:10:37,301 - biomed_ie - INFO - Train loss: 0.00039689780621164015
2019-08-20 10:10:37,413 - biomed_ie - INFO - Validation loss: 0.00016113026798670628


Epoch:  20%|██        | 4/20 [00:05<00:23,  1.47s/it]

2019-08-20 10:10:38,670 - biomed_ie - INFO - Train loss: 0.0002900210766218385
2019-08-20 10:10:38,782 - biomed_ie - INFO - Validation loss: 0.0001281977492322461


Epoch:  25%|██▌       | 5/20 [00:07<00:21,  1.44s/it]

2019-08-20 10:10:40,159 - biomed_ie - INFO - Train loss: 0.00021151824313965728
2019-08-20 10:10:40,272 - biomed_ie - INFO - Validation loss: 0.00010201662618831673


Epoch:  30%|███       | 6/20 [00:08<00:20,  1.45s/it]

2019-08-20 10:10:41,656 - biomed_ie - INFO - Train loss: 0.0001527582015703208
2019-08-20 10:10:41,768 - biomed_ie - INFO - Validation loss: 8.581942592670593e-05


Epoch:  35%|███▌      | 7/20 [00:10<00:19,  1.47s/it]

2019-08-20 10:10:43,134 - biomed_ie - INFO - Train loss: 0.00010698728676100148
2019-08-20 10:10:43,247 - biomed_ie - INFO - Validation loss: 7.665774548520094e-05


Epoch:  40%|████      | 8/20 [00:11<00:17,  1.47s/it]

2019-08-20 10:10:44,562 - biomed_ie - INFO - Train loss: 7.608918483426203e-05
2019-08-20 10:10:44,674 - biomed_ie - INFO - Validation loss: 7.490652566430392e-05


Epoch:  45%|████▌     | 9/20 [00:13<00:16,  1.46s/it]

2019-08-20 10:10:45,998 - biomed_ie - INFO - Train loss: 5.7060838912538225e-05
2019-08-20 10:10:46,111 - biomed_ie - INFO - Validation loss: 7.540540023399905e-05
2019-08-20 10:10:46,112 - biomed_ie - INFO - Bad epoch #1


Epoch:  50%|█████     | 10/20 [00:14<00:14,  1.45s/it]

2019-08-20 10:10:47,427 - biomed_ie - INFO - Train loss: 4.233555288920127e-05
2019-08-20 10:10:47,540 - biomed_ie - INFO - Validation loss: 8.251259498235302e-05
2019-08-20 10:10:47,541 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:10:47,542 - biomed_ie - INFO - Restoring best model...


Epoch:  55%|█████▌    | 11/20 [00:15<00:13,  1.45s/it]

2019-08-20 10:10:48,902 - biomed_ie - INFO - Train loss: 5.65834274755392e-05
2019-08-20 10:10:49,015 - biomed_ie - INFO - Validation loss: 7.499949123145551e-05
2019-08-20 10:10:49,016 - biomed_ie - INFO - Bad epoch #1


Epoch:  60%|██████    | 12/20 [00:17<00:11,  1.45s/it]

2019-08-20 10:10:50,400 - biomed_ie - INFO - Train loss: 4.4384471056344336e-05
2019-08-20 10:10:50,512 - biomed_ie - INFO - Validation loss: 7.685024517023237e-05
2019-08-20 10:10:50,513 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:10:50,513 - biomed_ie - INFO - Restoring best model...


Epoch:  65%|██████▌   | 13/20 [00:18<00:10,  1.47s/it]

2019-08-20 10:10:51,915 - biomed_ie - INFO - Train loss: 5.576185101778762e-05
2019-08-20 10:10:52,028 - biomed_ie - INFO - Validation loss: 7.381240476034268e-05


Epoch:  70%|███████   | 14/20 [00:20<00:08,  1.48s/it]

2019-08-20 10:10:53,422 - biomed_ie - INFO - Train loss: 4.870079430942738e-05
2019-08-20 10:10:53,534 - biomed_ie - INFO - Validation loss: 7.596848962618487e-05
2019-08-20 10:10:53,535 - biomed_ie - INFO - Bad epoch #1


Epoch:  75%|███████▌  | 15/20 [00:21<00:07,  1.48s/it]

2019-08-20 10:10:54,835 - biomed_ie - INFO - Train loss: 4.556880693661299e-05
2019-08-20 10:10:54,947 - biomed_ie - INFO - Validation loss: 7.903386156018267e-05
2019-08-20 10:10:54,948 - biomed_ie - INFO - Bad epoch #2





2019-08-20 10:11:35,875 - biomed_ie - INFO - Performance: {'_f1_score': (0.5478390461997018, 0.9069380185067007)}
2019-08-20 10:11:35,876 - biomed_ie - INFO - Active learning iteration: #4
2019-08-20 10:11:39,668 - biomed_ie - INFO - Full finetuning: True
2019-08-20 10:11:39,672 - biomed_ie - INFO - N parameters: 108321807


Epoch:   0%|          | 0/20 [00:00<?, ?it/s]

2019-08-20 10:11:41,271 - biomed_ie - INFO - Train loss: 0.0011351545330895098
2019-08-20 10:11:41,397 - biomed_ie - INFO - Validation loss: 0.0002930829167816252


Epoch:   5%|▌         | 1/20 [00:01<00:32,  1.72s/it]

2019-08-20 10:11:42,993 - biomed_ie - INFO - Train loss: 0.0006649458052467546
2019-08-20 10:11:43,120 - biomed_ie - INFO - Validation loss: 0.00021213090474796656


Epoch:  10%|█         | 2/20 [00:03<00:30,  1.72s/it]

2019-08-20 10:11:44,715 - biomed_ie - INFO - Train loss: 0.0004794550291723088
2019-08-20 10:11:44,841 - biomed_ie - INFO - Validation loss: 0.00015524456193854316


Epoch:  15%|█▌        | 3/20 [00:05<00:29,  1.72s/it]

2019-08-20 10:11:46,376 - biomed_ie - INFO - Train loss: 0.0003281994407835232
2019-08-20 10:11:46,502 - biomed_ie - INFO - Validation loss: 0.00010914424613299511


Epoch:  20%|██        | 4/20 [00:06<00:27,  1.71s/it]

2019-08-20 10:11:48,096 - biomed_ie - INFO - Train loss: 0.00022192563743218287
2019-08-20 10:11:48,222 - biomed_ie - INFO - Validation loss: 8.35567575215393e-05


Epoch:  25%|██▌       | 5/20 [00:08<00:25,  1.71s/it]

2019-08-20 10:11:49,804 - biomed_ie - INFO - Train loss: 0.00016613072691621476
2019-08-20 10:11:49,929 - biomed_ie - INFO - Validation loss: 6.984346837393136e-05


Epoch:  30%|███       | 6/20 [00:10<00:23,  1.71s/it]

2019-08-20 10:11:51,522 - biomed_ie - INFO - Train loss: 0.00011653758718469548
2019-08-20 10:11:51,649 - biomed_ie - INFO - Validation loss: 5.9982176293258254e-05


Epoch:  35%|███▌      | 7/20 [00:11<00:22,  1.71s/it]

2019-08-20 10:11:53,246 - biomed_ie - INFO - Train loss: 8.560597777357808e-05
2019-08-20 10:11:53,372 - biomed_ie - INFO - Validation loss: 5.5249635031680526e-05


Epoch:  40%|████      | 8/20 [00:13<00:20,  1.72s/it]

2019-08-20 10:11:54,962 - biomed_ie - INFO - Train loss: 6.182739488088923e-05
2019-08-20 10:11:55,088 - biomed_ie - INFO - Validation loss: 5.774582027638604e-05
2019-08-20 10:11:55,089 - biomed_ie - INFO - Bad epoch #1


Epoch:  45%|████▌     | 9/20 [00:15<00:18,  1.71s/it]

2019-08-20 10:11:56,550 - biomed_ie - INFO - Train loss: 4.5517094820214605e-05
2019-08-20 10:11:56,676 - biomed_ie - INFO - Validation loss: 5.6519879296303194e-05
2019-08-20 10:11:56,677 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:11:56,678 - biomed_ie - INFO - Restoring best model...


Epoch:  50%|█████     | 10/20 [00:17<00:16,  1.68s/it]

2019-08-20 10:11:58,218 - biomed_ie - INFO - Train loss: 6.278711454949032e-05
2019-08-20 10:11:58,344 - biomed_ie - INFO - Validation loss: 5.638954572401475e-05
2019-08-20 10:11:58,345 - biomed_ie - INFO - Bad epoch #1


Epoch:  55%|█████▌    | 11/20 [00:18<00:15,  1.67s/it]

2019-08-20 10:11:59,862 - biomed_ie - INFO - Train loss: 4.9675480265362684e-05
2019-08-20 10:11:59,989 - biomed_ie - INFO - Validation loss: 5.7270440092819154e-05
2019-08-20 10:11:59,989 - biomed_ie - INFO - Bad epoch #2
2019-08-20 10:11:59,990 - biomed_ie - INFO - Restoring best model...


Epoch:  60%|██████    | 12/20 [00:20<00:13,  1.67s/it]

2019-08-20 10:12:01,505 - biomed_ie - INFO - Train loss: 6.284219773665774e-05
2019-08-20 10:12:01,631 - biomed_ie - INFO - Validation loss: 5.330069696407983e-05


Epoch:  65%|██████▌   | 13/20 [00:21<00:11,  1.66s/it]

2019-08-20 10:12:03,193 - biomed_ie - INFO - Train loss: 5.342699027130344e-05
2019-08-20 10:12:03,320 - biomed_ie - INFO - Validation loss: 5.6756303133306214e-05
2019-08-20 10:12:03,321 - biomed_ie - INFO - Bad epoch #1


Epoch:  70%|███████   | 14/20 [00:23<00:09,  1.66s/it]

2019-08-20 10:12:04,875 - biomed_ie - INFO - Train loss: 4.6008487627836355e-05
2019-08-20 10:12:05,002 - biomed_ie - INFO - Validation loss: 5.572660975830424e-05
2019-08-20 10:12:05,003 - biomed_ie - INFO - Bad epoch #2





# Analyze results

In [17]:
from exp_utils import print_al_stats

In [18]:
data_path = '../workdir/experiments_draw/diabetes_MNLP-mod.npy'
entire_dat_score = 0.718
print_al_stats(data_path, entire_dat_score)

(10,)
Al score:  0.6513872668796481
Entire data score: 0.718
Score ratio: 0.9072246056819612


In [19]:
data_path = '../workdir/experiments_draw/cad_MNLP-mod.npy'
entire_dat_score = 0.401
print_al_stats(data_path, entire_dat_score)

(10,)
Al score:  0.2841254596866301
Entire data score: 0.401
Score ratio: 0.708542293482868


In [20]:
data_path = '../workdir/experiments_draw/hypertension_MNLP-mod.npy'
entire_dat_score = 0.739
print_al_stats(data_path, entire_dat_score)

(10,)
Al score:  0.6053099221998488
Entire data score: 0.739
Score ratio: 0.8190932641405261


In [None]:
import numpy as np

data_path = '../workdir/experiments_draw/diabetes_MNLP-mod.npy'
stats = np.load(data_path)

stats.mean()