<a href="https://colab.research.google.com/github/emielsteegh/mdwnlp/blob/main/Experiments/with-task-adaptive-pretraining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook contains the code to run most experiments
It starts with the task adaption of a lanugae model
And ends with a quick wrapper to run multiple experiments

**IMPORTANT** the experiments use a *modified* version of the (Contrastive Active Learning library)[https://github.com/mourga/contrastive] by Katerina Margatina (github/mourga)

In [None]:
# pray for an A100 GPU
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU, please use one')
else:
  print(gpu_info)


Sun Jul 17 21:45:24 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    23W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Cont Pretraining

In [None]:
!pip install transformers==4.20 datasets nlp

In [None]:
BASE = '/Experiments'
TAPT_BERTJE = BASE+'/models/taptBERTje/'

In [None]:
# imports

from transformers import AutoTokenizer, AutoModel, AutoModelForPreTraining
from datasets import load_dataset
from transformers import DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments
import torch

import os
import re
import itertools
import random
import sys

In [None]:
path = '/Experiments/models'
tapt_model_path = os.path.join(path, "taptBERTje")


In [None]:
# load model

tokenizer = AutoTokenizer.from_pretrained("GroNLP/bert-base-dutch-cased")
model = AutoModelForPreTraining.from_pretrained("GroNLP/bert-base-dutch-cased")  # PyTorch

Some weights of BertForPreTraining were not initialized from the model checkpoint at GroNLP/bert-base-dutch-cased and are newly initialized: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'bert.pooler.dense.weight', 'bert.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
tokenizer.tokenize("Ondernemingsrecht")

# Data loading

In [None]:
# load dataset

dataset = load_dataset("Rodekool/ornl") 


Using custom data configuration default
Reusing dataset ornl (/root/.cache/huggingface/datasets/Rodekool___ornl/default/0.0.0/469bdfd028c8f296ef2a644744dd4dd0a573f621b782b3e25e5cc4a3613dca63)


  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# function to split dataset into sentences 

def text_breaker(t):
  # t = re.sub('\. ','\n', t)
  t = re.sub('( ){1,}|(\\xa0){1,}', ' ', t)
  ts = re.split(' \\n |\\n', t)
  ts = list(filter(lambda l: len(l.split(' ')) >= 5 , ts))
  return ts


In [None]:
sample_count = 50_000
train_data = random.sample(dataset['train']['text'], sample_count)


In [None]:
train_data_pars_of_lines = [text_breaker(lines) for lines in train_data]
train_data_lines = list(itertools.chain(*train_data_pars_of_lines))
train_data_sentences = [item for sentence in train_data_lines for item in sentence.split('. ') if item != '']

print(len(train_data_pars_of_lines))
print(len(train_data_lines))
print(len(train_data_sentences))


50000
1015179
1961856


# fixing the tokenizer

In [None]:
tapt_tokenizer = tokenizer

tapt_tokenizer.vocab_size

30073

In [None]:
batch_size = 1_000

def batch_iterator():
    for i in range(0, len(train_data_sentences), batch_size):
        yield train_data_sentences[i : i + batch_size]

In [None]:
tapt_tokenizer = tapt_tokenizer.train_new_from_iterator(batch_iterator(), vocab_size=30073)

In [None]:
def get_vocab_sim(vocab_a, vocab_b):
  # as per overlap/vocab_size and jaccard similarity index
  a_set = set(vocab_a.keys())
  b_set = set(vocab_b.keys())
  intersection = a_set.intersection(b_set)
  return len(intersection)/max(len(a_set), len(b_set)),  len(intersection)/len(a_set.union(b_set))

In [None]:
set([1,2,3]).union(set([2,3,4]))

{1, 2, 3, 4}

In [None]:
get_vocab_sim(tokenizer.vocab, tapt_tokenizer.vocab)

(0.37964286901872113, 0.23429579921607258)

# NSP

In [None]:
sentence_a = []
sentence_b = []
label = []

for paragraph in train_data_lines:
    sentences = [
        sentence for sentence in paragraph.split('. ') if sentence != ''
    ]
    num_sentences = len(sentences)
    if num_sentences > 1:
        start = random.randint(0, num_sentences-2)
        # 50/50 whether is IsNextSentence or NotNextSentence
        # if ((sentences[int(start)].startswith(('ECLI', ' ECLI')))
        #  or (sentences[start+1].startswith(('ECLI', ' ECLI')))):
        #   start = random.randint(0, num_sentences-2)
        #   if ((sentences[int(start)].startswith(('ECLI', ' ECLI')))
        #   or (sentences[start+1].startswith(('ECLI', ' ECLI')))):
        #     start = random.randint(0, num_sentences-2)

        if random.random() >= 0.5:
            # this is IsNextSentence
            sentence_a.append(sentences[start])
            sentence_b.append(sentences[start+1])
            label.append(0)
        else:
            index = random.randint(0, len(train_data_sentences)-1)
            # this is NotNextSentence
            sentence_a.append(sentences[start])
            sentence_b.append(train_data_sentences[index])
            label.append(1)


In [None]:
for i in range(3):
    print(label[i])
    print(sentence_a[i] + '\n---')
    print(sentence_b[i] + '\n')
    

1
Vernietiging besluit
---
De rechtbank Amsterdam heeft in haar uitspraak van 21 juli 2008 het beroep tegen het besluit van 10 april 2007 ongegrond verklaard

0
M.C.P
---
van den Berg te Someren (belanghebbende)

1
J.T.J.A
---
Voor het procesverloop en de beslissing in eerste aanleg verwijst het hof naar de bestreden beschikking.



In [None]:
inputs = tokenizer(sentence_a, sentence_b, return_tensors='pt',
                   max_length=512, truncation=True, padding='max_length')

In [None]:
inputs['next_sentence_label'] = torch.LongTensor([label]).T

# MLM

In [None]:
test = tokenizer('ik ben een [MASK]', return_tensors='pt',
    max_length=10,
    truncation=True,
    padding='max_length')

test.input_ids


tensor([[    1, 13604,  9092, 11130,     4,     2,     3,     3,     3,     3]])

In [None]:
# copy label input ids to be the labels

inputs['labels'] = inputs.input_ids.detach().clone()


In [None]:
# create a masking layer

rand = torch.rand(inputs.input_ids.shape)
mask_arr = (rand < 0.15) * (inputs.input_ids != 1) * (inputs.input_ids != 2) * (inputs.input_ids != 3)
 # 1,2,3 are special tokens

for row in range(mask_arr.shape[0]):
  selection = torch.flatten(mask_arr[row].nonzero()).tolist()
  inputs.input_ids[row, selection] = 4 # masking token

In [None]:
class RechtspraakDataset(torch.utils.data.Dataset):
  def __init__(self, encodings):
    self.encodings = encodings
  def __getitem__(self, idx):
    return {key: torch.tensor(val[idx]) for key,val in self.encodings.items()}
  def __len__(self):
    return len(self.encodings.input_ids)
    

In [None]:
train_dataset = RechtspraakDataset(inputs)


In [None]:
dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size = 16,
    shuffle = True)


In [None]:
device = torch.device('cuda') if torch.cuda.is_available else torch.device('cpu')

model.to(device)

BertForPreTraining(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30073, 768, padding_idx=3)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine

In [None]:
from transformers import AdamW
from tqdm import tqdm

model.train()
optim = AdamW(model.parameters(), lr=5e-5)



# training

In [None]:
epochs = 1
losses = []

for epoch in range(epochs):
  loop = tqdm(dataloader, leave=True)
  for batch in loop:
    # initialize calculated gradients (from prev step)
    optim.zero_grad()
    # pull all tensor batches required for training
    input_ids = batch['input_ids'].to(device)
    token_type_ids = batch['token_type_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    next_sentence_label = batch['next_sentence_label'].to(device)
    labels = batch['labels'].to(device)
    # process
    outputs = model(input_ids, attention_mask=attention_mask,
                    token_type_ids=token_type_ids,
                    next_sentence_label=next_sentence_label,
                    labels=labels)
    # extract loss
    loss = outputs.loss
    # calculate loss for every parameter that needs grad update
    loss.backward()
    losses.append(loss.item())
    # update parameters
    optim.step()
    # print relevant info to progress bar
    loop.set_description(f'Epoch {epoch}')
    loop.set_postfix(loss=loss.item())


  """
Epoch 0: 100%|██████████| 26721/26721 [1:13:16<00:00,  6.08it/s, loss=0.267]


In [None]:
print(tapt_path)

/content/drive/MyDrive/UvA/thesis/models/taptBERTje


In [None]:
# save model
model.save_pretrained(TAPT_BERTJE)

In [None]:
# Python program to store list to file using pickle module
import pickle

with open(TAPT_BERTJE+'/losses.pkl', 'wb') as f:
  pickle.dump(losses, f)

In [None]:
with open(TAPT_BERTJE+'/losses.pkl', 'rb') as f:
  losses_load = pickle.load(f)

In [None]:
# original_model = AutoModel.from_pretrained("GroNLP/bert-base-dutch-cased")  # PyTorch

# tapt_model = AutoModel.from_pretrained(TAPT_BERTJE)
tapt_token = AutoTokenizer.from_pretrained(TAPT_BERTJE)

In [None]:
original_model.state_dict()

In [None]:
tapt_model.state_dict()

# AL LOOP WITH NEW MODEL

In [None]:
BASE = '/Experiments'
CAL = '/contrastive-active-learning' # '/contrastive-active-learning'
REQS = BASE+CAL+'/requirements.txt'
TAPT_BERTJE = BASE+'/models/taptBERTje/'
AL_SCRIPT = BASE+CAL+'/run_al.py'

In [None]:
!pip install -r $REQS

In [None]:
# import random
# seeds = random.sample(range(0,9999), 5)
# seeds.sort()
seeds = [672, 2451, 5262, 7763, 9105]

## Hyper parameters that stay put

params = {
    
    'dataset_name'              : 'ornl',
    'budget'                    : '8%',         # default 15%
    'per_gpu_train_batch_size'  : 32,
    'max_seq_length'            : 256,
    'resume'                    : False,
    'cap_training_pool'         : 50000,
    'init'                      : 'random',
    'init_train_data'           : '1%',         # default 1%
    'acquisition_size'          : '1%',         # default 2%
    'model_name_or_path'        : 'wietsedv/bert-base-dutch-cased' # bert-base-cased # wietsedv is an older version, thats okay

    # l rate 2e-5
    # adamw opt with epsilon 1e-8
    # eval 5x p epoch
}

def make_flags(params_dict: dict, acq: str, seed: int, dataset: str = 'ornl'):
  #  flags, acquisition function and seed to string of flags for run_al.py
  #  'random' 'cal' 'entropy' 'zip'
  params_dict['acquisition'] = acq
  params_dict['seed'] = seed
  params_dict['dataset_name'] = dataset
  flags_string = ' '.join([f'--{f} {v}' for f, v in params_dict.items()])
  print("\n", flags_string, "\n")
  return flags_string

_ = make_flags(params, acq = 'entropy', seed = seeds[0])


 --dataset_name ornl --budget 8% --per_gpu_train_batch_size 32 --max_seq_length 256 --resume False --cap_training_pool 50000 --init random --init_train_data 1% --acquisition_size 1% --model_name_or_path wietsedv/bert-base-dutch-cased --acquisition entropy --seed 672 



# ORNL20

In [None]:
for current_seed in seeds[:3]:
  print(current_seed)
  flags = make_flags(params, acq = 'cal', seed = current_seed, dataset = 'ornl20')
  %run $AL_SCRIPT $flags

672

 --dataset_name ornl20 --budget 8% --per_gpu_train_batch_size 32 --max_seq_length 256 --resume False --cap_training_pool 50000 --init random --init_train_data 1% --acquisition_size 1% --model_name_or_path wietsedv/bert-base-dutch-cased --acquisition cal --seed 672 

torch: 1.12.0+cu113
cuda: 11.3
Cuda available: True




device: cuda:0
output_dir=/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls
Created /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls



 /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20 





07/17/2022 21:48:52 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_ornl20_original
07/17/2022 21:48:59 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_dev_ornl20_original
07/17/2022 21:49:01 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_ornl20_original



train set stats: class 21: 17% class 20: 17% class 17: 7% class 25: 17% class 3: 17% class 2: 3% class 23: 5% class 18: 1% class 4: 1% class 1: 5% class 9: 0% class 11: 3% class 14: 0% class 5: 1% class 15: 1% class 13: 0% class 6: 2% class 22: 0% class 10: 0% class 12: 1% class 19: 0% class 0: 0% class 16: 0% class 8: 0% class 7: 0% 
validation set stats: class 25: 17% class 20: 17% class 3: 17% class 21: 17% class 17: 7% class 0: 0% class 6: 2% class 23: 5% class 2: 3% class 1: 5% class 11: 3% class 12: 1% class 4: 1% class 18: 1% class 15: 1% class 14: 0% class 5: 1% class 10: 0% class 9: 0% class 19: 0% class 8: 0% class 22: 1% class 16: 0% class 13: 0% class 7: 0% 
test set stats: class 3: 17% class 5: 1% class 25: 18% class 1: 5% class 15: 1% class 20: 17% class 21: 17% class 2: 3% class 17: 7% class 6: 2% class 4: 1% class 23: 5% class 10: 0% class 0: 0% class 11: 2% class 8: 0% class 12: 1% class 22: 0% class 9: 0% class 16: 0% class 18: 1% class 14: 0% class 13: 0% class 19: 

07/17/2022 21:50:13 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0, acc_best_iteration=0, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-1', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, evaluate_during_training=True, fp16=False, fp16_opt_level='O1', gpu='0', gradient_accumulation_steps=1, indicator=None, init='random', init_train_data=500, knn_lab=False, learning_rate=2e-05, local_rank=-1, 

warmup steps: 4
total steps: 46
logging steps: 3
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.348:   0%|          | 0/16 [00:03<?, ?it/s][A
loss=3.348:   6%|▋         | 1/16 [00:04<01:03,  4.26s/it][A
loss=3.216:   6%|▋         | 1/16 [00:04<01:03,  4.26s/it][A
loss=3.216:  12%|█▎        | 2/16 [00:04<00:44,  3.16s/it][A
loss=3.284:  12%|█▎        | 2/16 [00:05<00:44,  3.16s/it][A07/17/2022 21:51:04 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:51:04 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:51:04 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:28,  1.04s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.04s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.03s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.03s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  

{"eval_acc": 0.10568754499640029, "eval_f1": 0.012447828198807041, "eval_acc_and_f1": 0.05906768659760367, "eval_loss": 3.1765956027167186, "learning_rate": 1.5000000000000002e-05, "train_loss": 3.282801310221354, "step": 3}



loss=3.188:  25%|██▌       | 4/16 [00:39<01:47,  8.97s/it][A
loss=3.137:  25%|██▌       | 4/16 [00:40<01:47,  8.97s/it][A
loss=3.137:  31%|███▏      | 5/16 [00:40<01:11,  6.46s/it][A
loss=3.021:  31%|███▏      | 5/16 [00:40<01:11,  6.46s/it][A07/17/2022 21:51:40 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:51:40 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:51:40 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.01s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.01s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.17278617710583152, "eval_f1": 0.012103994311797074, "eval_acc_and_f1": 0.0924450857088143, "eval_loss": 2.9441400596073697, "learning_rate": 1.9090909090909094e-05, "train_loss": 3.1150102615356445, "step": 6}



loss=2.938:  44%|████▍     | 7/16 [01:16<01:37, 10.85s/it][A
loss=2.783:  44%|████▍     | 7/16 [01:17<01:37, 10.85s/it][A
loss=2.783:  50%|█████     | 8/16 [01:17<01:02,  7.78s/it][A
loss=2.815:  50%|█████     | 8/16 [01:17<01:02,  7.78s/it][A07/17/2022 21:52:17 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:52:17 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:52:17 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.18934485241180707, "eval_f1": 0.01928002317623043, "eval_acc_and_f1": 0.10431243779401875, "eval_loss": 2.7240647843905856, "learning_rate": 1.772727272727273e-05, "train_loss": 2.8452625274658203, "step": 9}



loss=2.660:  62%|██████▎   | 10/16 [01:51<01:05, 10.93s/it][A
loss=2.551:  62%|██████▎   | 10/16 [01:51<01:05, 10.93s/it][A
loss=2.551:  69%|██████▉   | 11/16 [01:51<00:39,  7.83s/it][A
loss=2.429:  69%|██████▉   | 11/16 [01:51<00:39,  7.83s/it][A07/17/2022 21:52:52 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:52:52 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:52:52 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.01s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.23801295896328295, "eval_f1": 0.03719259247106839, "eval_acc_and_f1": 0.13760277571717566, "eval_loss": 2.575991988182068, "learning_rate": 1.6363636363636366e-05, "train_loss": 2.546760400136312, "step": 12}



loss=2.824:  81%|████████▏ | 13/16 [02:27<00:34, 11.43s/it][A
loss=2.550:  81%|████████▏ | 13/16 [02:27<00:34, 11.43s/it][A
loss=2.550:  88%|████████▊ | 14/16 [02:28<00:16,  8.18s/it][A
loss=2.679:  88%|████████▊ | 14/16 [02:28<00:16,  8.18s/it][A07/17/2022 21:53:28 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:53:28 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:53:28 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.40201583873290136, "eval_f1": 0.07555423125649234, "eval_acc_and_f1": 0.23878503499469686, "eval_loss": 2.4864542313984463, "learning_rate": 1.5000000000000002e-05, "train_loss": 2.6840295791625977, "step": 15}



loss=2.590:  94%|█████████▍| 15/16 [03:01<00:15, 15.69s/it][A
loss=2.590: 100%|██████████| 16/16 [03:01<00:00, 11.37s/it]
Epoch:  33%|███▎      | 1/3 [03:01<06:03, 181.87s/it]
Iteration:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=2.398:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=2.398:   6%|▋         | 1/16 [00:00<00:08,  1.67it/s][A
loss=2.378:   6%|▋         | 1/16 [00:00<00:08,  1.67it/s][A07/17/2022 21:54:02 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:54:02 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:54:02 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.01s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:0

{"eval_acc": 0.3560835133189345, "eval_f1": 0.059890858633000504, "eval_acc_and_f1": 0.2079871859759675, "eval_loss": 2.4165382214954922, "learning_rate": 1.3636363636363637e-05, "train_loss": 2.4549495379130044, "step": 18}



loss=2.354:  19%|█▉        | 3/16 [00:35<01:40,  7.76s/it][A
loss=2.384:  19%|█▉        | 3/16 [00:36<01:40,  7.76s/it][A
loss=2.384:  25%|██▌       | 4/16 [00:36<01:07,  5.62s/it][A
loss=2.295:  25%|██▌       | 4/16 [00:36<01:07,  5.62s/it][A07/17/2022 21:54:38 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:54:38 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:54:38 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.36616270698344133, "eval_f1": 0.06194608814059982, "eval_acc_and_f1": 0.21405439756202058, "eval_loss": 2.363262253148215, "learning_rate": 1.2272727272727274e-05, "train_loss": 2.3443633715311685, "step": 21}



loss=2.379:  38%|███▊      | 6/16 [01:10<01:38,  9.90s/it][A
loss=2.468:  38%|███▊      | 6/16 [01:10<01:38,  9.90s/it][A
loss=2.468:  44%|████▍     | 7/16 [01:10<01:03,  7.11s/it][A
loss=2.460:  44%|████▍     | 7/16 [01:11<01:03,  7.11s/it][A07/17/2022 21:55:12 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:55:12 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:55:12 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.5028077753779697, "eval_f1": 0.09764462052718936, "eval_acc_and_f1": 0.30022619795257954, "eval_loss": 2.315339522702353, "learning_rate": 1.0909090909090909e-05, "train_loss": 2.435572942097982, "step": 24}



loss=2.233:  56%|█████▋    | 9/16 [01:47<01:18, 11.22s/it][A
loss=2.264:  56%|█████▋    | 9/16 [01:47<01:18, 11.22s/it][A
loss=2.264:  62%|██████▎   | 10/16 [01:48<00:48,  8.05s/it][A
loss=2.320:  62%|██████▎   | 10/16 [01:48<00:48,  8.05s/it][A07/17/2022 21:55:49 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:55:49 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:55:49 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A


{"eval_acc": 0.5591072714182865, "eval_f1": 0.10746368289886753, "eval_acc_and_f1": 0.33328547715857704, "eval_loss": 2.2704900928906033, "learning_rate": 9.545454545454547e-06, "train_loss": 2.2721540927886963, "step": 27}



loss=2.297:  75%|███████▌  | 12/16 [02:21<00:44, 11.04s/it][A
loss=2.445:  75%|███████▌  | 12/16 [02:21<00:44, 11.04s/it][A
loss=2.445:  81%|████████▏ | 13/16 [02:22<00:23,  7.91s/it][A
loss=2.229:  81%|████████▏ | 13/16 [02:22<00:23,  7.91s/it][A07/17/2022 21:56:24 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:56:24 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:56:24 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.01s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.5568034557235421, "eval_f1": 0.10725395359644722, "eval_acc_and_f1": 0.3320287046599946, "eval_loss": 2.2385264209338596, "learning_rate": 8.181818181818183e-06, "train_loss": 2.3236567179361978, "step": 30}



loss=2.486:  94%|█████████▍| 15/16 [02:56<00:11, 11.13s/it][A
loss=2.285:  94%|█████████▍| 15/16 [02:56<00:11, 11.13s/it][A
loss=2.285: 100%|██████████| 16/16 [02:56<00:00, 11.06s/it]
Epoch:  67%|██████▋   | 2/3 [05:58<03:00, 180.39s/it]
Iteration:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=2.468:   0%|          | 0/16 [00:00<?, ?it/s][A07/17/2022 21:56:58 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:56:58 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:56:58 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:33,  1.23s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:30,  1.16s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:28,  1.12s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:26,  1.09s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:24,  1.07s/it][A[A

Evaluating:  21%|██▏       | 6/28 

{"eval_acc": 0.5707703383729301, "eval_f1": 0.10950029703417899, "eval_acc_and_f1": 0.34013531770355454, "eval_loss": 2.210686572960445, "learning_rate": 6.818181818181818e-06, "train_loss": 2.413191080093384, "step": 33}



loss=2.182:  12%|█▎        | 2/16 [00:36<05:49, 24.98s/it][A
loss=2.049:  12%|█▎        | 2/16 [00:36<05:49, 24.98s/it][A
loss=2.049:  19%|█▉        | 3/16 [00:36<03:49, 17.68s/it][A
loss=2.194:  19%|█▉        | 3/16 [00:36<03:49, 17.68s/it][A07/17/2022 21:57:35 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:57:35 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:57:35 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.577825773938085, "eval_f1": 0.11077901412571187, "eval_acc_and_f1": 0.34430239403189844, "eval_loss": 2.1829929777554105, "learning_rate": 5.4545454545454545e-06, "train_loss": 2.1417009830474854, "step": 36}



loss=2.076:  31%|███▏      | 5/16 [01:10<02:53, 15.76s/it][A
loss=2.329:  31%|███▏      | 5/16 [01:10<02:53, 15.76s/it][A
loss=2.329:  38%|███▊      | 6/16 [01:10<01:52, 11.22s/it][A
loss=2.206:  38%|███▊      | 6/16 [01:11<01:52, 11.22s/it][A07/17/2022 21:58:09 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:58:09 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:58:09 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.5821454283657307, "eval_f1": 0.11154056616427155, "eval_acc_and_f1": 0.3468429972650011, "eval_loss": 2.1616510237966264, "learning_rate": 4.0909090909090915e-06, "train_loss": 2.203479210535685, "step": 39}



loss=2.016:  50%|█████     | 8/16 [01:46<01:45, 13.13s/it][A
loss=2.085:  50%|█████     | 8/16 [01:47<01:45, 13.13s/it][A
loss=2.085:  56%|█████▋    | 9/16 [01:47<01:05,  9.37s/it][A
loss=2.058:  56%|█████▋    | 9/16 [01:47<01:05,  9.37s/it][A07/17/2022 21:58:46 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:58:46 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:58:46 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.5894888408927286, "eval_f1": 0.11294020421427663, "eval_acc_and_f1": 0.3512145225535026, "eval_loss": 2.143925300666264, "learning_rate": 2.7272727272727272e-06, "train_loss": 2.052790403366089, "step": 42}



loss=2.173:  69%|██████▉   | 11/16 [02:21<00:58, 11.73s/it][A
loss=2.111:  69%|██████▉   | 11/16 [02:21<00:58, 11.73s/it][A
loss=2.111:  75%|███████▌  | 12/16 [02:21<00:33,  8.39s/it][A
loss=2.154:  75%|███████▌  | 12/16 [02:21<00:33,  8.39s/it][A07/17/2022 21:59:20 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:59:20 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:59:20 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.5916486681065515, "eval_f1": 0.11345212151310102, "eval_acc_and_f1": 0.35255039480982625, "eval_loss": 2.1334928274154663, "learning_rate": 1.3636363636363636e-06, "train_loss": 2.1460208892822266, "step": 45}



loss=2.082:  88%|████████▊ | 14/16 [02:58<00:23, 11.91s/it][A
loss=2.205:  88%|████████▊ | 14/16 [02:58<00:23, 11.91s/it][A
loss=2.205:  94%|█████████▍| 15/16 [02:59<00:08,  8.53s/it][A
loss=2.361:  94%|█████████▍| 15/16 [02:59<00:08,  8.53s/it][A07/17/2022 21:59:58 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 21:59:58 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 21:59:58 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.5922246220302376, "eval_f1": 0.11356888783347106, "eval_acc_and_f1": 0.35289675493185435, "eval_loss": 2.1288683840206692, "learning_rate": 0.0, "train_loss": 2.2161529858907065, "step": 48}


07/17/2022 22:00:36 - INFO - utilities.trainers -   ***** Running evaluation iter-1_trial1 *****
07/17/2022 22:00:36 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:00:36 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/17/2022 22:01:04 - INFO - utilities.trainers -   ***** Eval results iter-1_trial1 *****
07/17/2022 22:01:04 - INFO - utilities.trainers -     acc = 0.5922246220302376
07/17/2022 22:01:04 - INFO - utilities.trainers -     acc_and_f1 = 0.35289675493185435
07/17/2022 22:01:04 - INFO - utilities.trainers -     f1 = 0.11356888783347106
07/17/2022 22:01:07 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/17/2022 22:01:13 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:01:13 - INFO - utilities.trainers -     Num examples = 34722
07/17/2022 22:01:13 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:18<00:00,  1.02s/it]
07/17/2022 22:03:32 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 22:03:32 - INFO - utilities.trainers -     acc = 0.5956166119463164
07/17/2022 22:03:32 - INFO - utilities.trainers -     acc_and_f1 = 0.35270748092679044
07/17/2022 22:03:32 - INFO - utilities.trainers -     f1 = 0.1097983499072644



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/17/2022 22:03:33 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/17/2022 22:03:51 - INFO - utilities.data_loader -   Selecting subsample...
07/17/2022 22:03:59 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:03:59 - INFO - utilities.trainers -     Num examples = 49500
07/17/2022 22:03:59 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/194 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 194/194 [06:32<00:00,  2.02s/it]
07/17/2022 22:10:32 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 22:10:32 - INFO - utilities.trainers -     acc = 0.5944646464646465
07/17/2022 22:10:32 - INFO - utilities.trainers -     acc_and_f1 = 0.35419530362920404
07/17/2022 22:10:32 - INFO - utilities.trainers -     f1 = 0.11392596079376156
07/17/2022 22:10:32 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/17/2022 22:10:51 - INFO - utilities.data_loader -   Selecting subsample...
07/17/2022 22:11:01 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:11:01 - INFO - utilities.trainers -     Num examples = 500
07/17/2022 22:11:01 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 2/2 [00:03<00:00,  1.99s/it]
07/17/2022 22:11:05 - INFO - util



************
End of iteration 1:
Train loss 2.4674, Val loss 2.1288683840206692, Test loss 2.126779524719014
Annotated 500 samples
Current labeled (training) data: 1000 samples
Remaining budget: 3000 (in samples)
************

Saving json with the results....

 Start Training model of iteration 2!



07/17/2022 22:12:48 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.5922246220302376, acc_best_iteration=1, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-1', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-2', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, fp

warmup steps: 9
total steps: 93
logging steps: 6
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.161:   3%|▎         | 1/32 [00:00<00:15,  2.04it/s][A
loss=3.128:   3%|▎         | 1/32 [00:00<00:15,  2.04it/s][A
loss=3.128:   6%|▋         | 2/32 [00:01<00:15,  1.98it/s][A
loss=3.065:   6%|▋         | 2/32 [00:01<00:15,  1.98it/s][A
loss=3.065:   9%|▉         | 3/32 [00:01<00:15,  1.93it/s][A
loss=3.102:   9%|▉         | 3/32 [00:01<00:15,  1.93it/s][A
loss=3.102:  12%|█▎        | 4/32 [00:02<00:14,  1.90it/s][A
loss=3.050:  12%|█▎        | 4/32 [00:02<00:14,  1.90it/s][A
loss=3.050:  16%|█▌        | 5/32 [00:02<00:14,  1.88it/s][A
loss=2.960:  16%|█▌        | 5/32 [00:02<00:14,  1.88it/s][A07/17/2022 22:13:23 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:13:23 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:13:23 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋       

{"eval_acc": 0.08236141108711303, "eval_f1": 0.018589635126785947, "eval_acc_and_f1": 0.05047552310694949, "eval_loss": 2.993161984852382, "learning_rate": 1.3333333333333333e-05, "train_loss": 3.077468514442444, "step": 6}



loss=2.982:  22%|██▏       | 7/32 [00:35<02:57,  7.12s/it][A
loss=2.927:  22%|██▏       | 7/32 [00:35<02:57,  7.12s/it][A
loss=2.927:  25%|██▌       | 8/32 [00:35<02:03,  5.15s/it][A
loss=2.836:  25%|██▌       | 8/32 [00:35<02:03,  5.15s/it][A
loss=2.836:  28%|██▊       | 9/32 [00:36<01:26,  3.77s/it][A
loss=2.942:  28%|██▊       | 9/32 [00:36<01:26,  3.77s/it][A
loss=2.942:  31%|███▏      | 10/32 [00:36<01:01,  2.80s/it][A
loss=2.704:  31%|███▏      | 10/32 [00:36<01:01,  2.80s/it][A
loss=2.704:  34%|███▍      | 11/32 [00:37<00:44,  2.13s/it][A
loss=2.585:  34%|███▍      | 11/32 [00:37<00:44,  2.13s/it][A07/17/2022 22:13:57 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:13:57 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:13:57 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋   

{"eval_acc": 0.20475161987041038, "eval_f1": 0.023005768608722162, "eval_acc_and_f1": 0.11387869423956627, "eval_loss": 2.7154045019830977, "learning_rate": 1.931034482758621e-05, "train_loss": 2.82947313785553, "step": 12}



loss=2.469:  41%|████      | 13/32 [01:09<02:31,  7.96s/it][A
loss=2.549:  41%|████      | 13/32 [01:10<02:31,  7.96s/it][A
loss=2.549:  44%|████▍     | 14/32 [01:10<01:43,  5.74s/it][A
loss=2.553:  44%|████▍     | 14/32 [01:10<01:43,  5.74s/it][A
loss=2.553:  47%|████▋     | 15/32 [01:11<01:11,  4.18s/it][A
loss=2.403:  47%|████▋     | 15/32 [01:11<01:11,  4.18s/it][A
loss=2.403:  50%|█████     | 16/32 [01:11<00:49,  3.09s/it][A
loss=2.540:  50%|█████     | 16/32 [01:11<00:49,  3.09s/it][A
loss=2.540:  53%|█████▎    | 17/32 [01:12<00:34,  2.33s/it][A
loss=2.377:  53%|█████▎    | 17/32 [01:12<00:34,  2.33s/it][A07/17/2022 22:14:32 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:14:32 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:14:32 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.18934485241180707, "eval_f1": 0.018986448775644114, "eval_acc_and_f1": 0.1041656505937256, "eval_loss": 2.518277551446642, "learning_rate": 1.7931034482758623e-05, "train_loss": 2.481805443763733, "step": 18}



loss=2.536:  59%|█████▉    | 19/32 [01:44<01:44,  8.07s/it][A
loss=2.203:  59%|█████▉    | 19/32 [01:45<01:44,  8.07s/it][A
loss=2.203:  62%|██████▎   | 20/32 [01:45<01:09,  5.82s/it][A
loss=2.439:  62%|██████▎   | 20/32 [01:45<01:09,  5.82s/it][A
loss=2.439:  66%|██████▌   | 21/32 [01:46<00:46,  4.24s/it][A
loss=2.188:  66%|██████▌   | 21/32 [01:46<00:46,  4.24s/it][A
loss=2.188:  69%|██████▉   | 22/32 [01:46<00:31,  3.13s/it][A
loss=2.244:  69%|██████▉   | 22/32 [01:46<00:31,  3.13s/it][A
loss=2.244:  72%|███████▏  | 23/32 [01:47<00:21,  2.36s/it][A
loss=1.847:  72%|███████▏  | 23/32 [01:47<00:21,  2.36s/it][A07/17/2022 22:15:07 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:15:07 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:15:07 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.21339092872570195, "eval_f1": 0.026963227939150635, "eval_acc_and_f1": 0.12017707833242629, "eval_loss": 2.3795449307986667, "learning_rate": 1.6551724137931037e-05, "train_loss": 2.2427764733632407, "step": 24}



loss=2.242:  78%|███████▊  | 25/32 [02:19<00:56,  8.08s/it][A
loss=2.487:  78%|███████▊  | 25/32 [02:20<00:56,  8.08s/it][A
loss=2.487:  81%|████████▏ | 26/32 [02:20<00:34,  5.82s/it][A
loss=2.033:  81%|████████▏ | 26/32 [02:20<00:34,  5.82s/it][A
loss=2.033:  84%|████████▍ | 27/32 [02:21<00:21,  4.24s/it][A
loss=1.780:  84%|████████▍ | 27/32 [02:21<00:21,  4.24s/it][A
loss=1.780:  88%|████████▊ | 28/32 [02:21<00:12,  3.13s/it][A
loss=2.245:  88%|████████▊ | 28/32 [02:21<00:12,  3.13s/it][A
loss=2.245:  91%|█████████ | 29/32 [02:22<00:07,  2.36s/it][A
loss=2.147:  91%|█████████ | 29/32 [02:22<00:07,  2.36s/it][A07/17/2022 22:15:42 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:15:42 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:15:42 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7

{"eval_acc": 0.328869690424766, "eval_f1": 0.04759598976580433, "eval_acc_and_f1": 0.18823284009528515, "eval_loss": 2.2860682691846574, "learning_rate": 1.5172413793103448e-05, "train_loss": 2.155705908934275, "step": 30}



loss=2.049:  97%|█████████▋| 31/32 [02:56<00:08,  8.53s/it][A
loss=1.687:  97%|█████████▋| 31/32 [02:57<00:08,  8.53s/it][A
loss=1.687: 100%|██████████| 32/32 [02:57<00:00,  5.54s/it]
Epoch:  33%|███▎      | 1/3 [02:57<05:54, 177.16s/it]
Iteration:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=1.728:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=1.728:   3%|▎         | 1/32 [00:00<00:17,  1.82it/s][A
loss=1.893:   3%|▎         | 1/32 [00:00<00:17,  1.82it/s][A
loss=1.893:   6%|▋         | 2/32 [00:01<00:16,  1.82it/s][A
loss=2.045:   6%|▋         | 2/32 [00:01<00:16,  1.82it/s][A
loss=2.045:   9%|▉         | 3/32 [00:01<00:15,  1.81it/s][A
loss=1.678:   9%|▉         | 3/32 [00:01<00:15,  1.81it/s][A07/17/2022 22:16:19 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:16:19 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:16:19 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s]

{"eval_acc": 0.39150467962562996, "eval_f1": 0.06303085302509842, "eval_acc_and_f1": 0.2272677663253642, "eval_loss": 2.1935945664133345, "learning_rate": 1.3793103448275863e-05, "train_loss": 1.846727232138316, "step": 36}



loss=1.834:  16%|█▌        | 5/32 [00:35<03:21,  7.47s/it][A
loss=1.866:  16%|█▌        | 5/32 [00:35<03:21,  7.47s/it][A
loss=1.866:  19%|█▉        | 6/32 [00:36<02:20,  5.40s/it][A
loss=1.956:  19%|█▉        | 6/32 [00:36<02:20,  5.40s/it][A
loss=1.956:  22%|██▏       | 7/32 [00:36<01:38,  3.95s/it][A
loss=1.585:  22%|██▏       | 7/32 [00:36<01:38,  3.95s/it][A
loss=1.585:  25%|██▌       | 8/32 [00:37<01:10,  2.93s/it][A
loss=1.717:  25%|██▌       | 8/32 [00:37<01:10,  2.93s/it][A
loss=1.717:  28%|██▊       | 9/32 [00:37<00:50,  2.22s/it][A
loss=1.749:  28%|██▊       | 9/32 [00:38<00:50,  2.22s/it][A07/17/2022 22:16:56 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:16:56 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:16:56 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋       

{"eval_acc": 0.34226061915046796, "eval_f1": 0.06146919798324839, "eval_acc_and_f1": 0.20186490856685818, "eval_loss": 2.104537674358913, "learning_rate": 1.2413793103448277e-05, "train_loss": 1.7845996220906575, "step": 42}



loss=1.810:  34%|███▍      | 11/32 [01:14<03:03,  8.74s/it][A
loss=1.756:  34%|███▍      | 11/32 [01:14<03:03,  8.74s/it][A
loss=1.756:  38%|███▊      | 12/32 [01:14<02:05,  6.29s/it][A
loss=1.597:  38%|███▊      | 12/32 [01:14<02:05,  6.29s/it][A
loss=1.597:  41%|████      | 13/32 [01:15<01:26,  4.57s/it][A
loss=1.727:  41%|████      | 13/32 [01:15<01:26,  4.57s/it][A
loss=1.727:  44%|████▍     | 14/32 [01:15<01:00,  3.36s/it][A
loss=1.770:  44%|████▍     | 14/32 [01:15<01:00,  3.36s/it][A
loss=1.770:  47%|████▋     | 15/32 [01:16<00:42,  2.52s/it][A
loss=1.550:  47%|████▋     | 15/32 [01:16<00:42,  2.52s/it][A07/17/2022 22:17:34 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:17:34 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:17:34 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.44665226781857453, "eval_f1": 0.0739371581477691, "eval_acc_and_f1": 0.2602947129831718, "eval_loss": 1.9805378402982439, "learning_rate": 1.103448275862069e-05, "train_loss": 1.701532781124115, "step": 48}



loss=1.816:  53%|█████▎    | 17/32 [01:50<02:08,  8.53s/it][A
loss=1.670:  53%|█████▎    | 17/32 [01:51<02:08,  8.53s/it][A
loss=1.670:  56%|█████▋    | 18/32 [01:51<01:25,  6.14s/it][A
loss=1.581:  56%|█████▋    | 18/32 [01:51<01:25,  6.14s/it][A
loss=1.581:  59%|█████▉    | 19/32 [01:52<00:58,  4.46s/it][A
loss=1.696:  59%|█████▉    | 19/32 [01:52<00:58,  4.46s/it][A
loss=1.696:  62%|██████▎   | 20/32 [01:52<00:39,  3.29s/it][A
loss=1.571:  62%|██████▎   | 20/32 [01:52<00:39,  3.29s/it][A
loss=1.571:  66%|██████▌   | 21/32 [01:53<00:27,  2.47s/it][A
loss=1.380:  66%|██████▌   | 21/32 [01:53<00:27,  2.47s/it][A07/17/2022 22:18:10 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:18:10 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:18:10 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7

{"eval_acc": 0.4920086393088553, "eval_f1": 0.0880080428532443, "eval_acc_and_f1": 0.2900083410810498, "eval_loss": 1.873503872326442, "learning_rate": 9.655172413793105e-06, "train_loss": 1.6191195646921794, "step": 54}



loss=1.844:  72%|███████▏  | 23/32 [02:27<01:16,  8.55s/it][A
loss=1.681:  72%|███████▏  | 23/32 [02:28<01:16,  8.55s/it][A
loss=1.681:  75%|███████▌  | 24/32 [02:28<00:49,  6.15s/it][A
loss=1.475:  75%|███████▌  | 24/32 [02:28<00:49,  6.15s/it][A
loss=1.475:  78%|███████▊  | 25/32 [02:28<00:31,  4.48s/it][A
loss=1.858:  78%|███████▊  | 25/32 [02:29<00:31,  4.48s/it][A
loss=1.858:  81%|████████▏ | 26/32 [02:29<00:19,  3.30s/it][A
loss=1.358:  81%|████████▏ | 26/32 [02:29<00:19,  3.30s/it][A
loss=1.358:  84%|████████▍ | 27/32 [02:30<00:12,  2.48s/it][A
loss=1.505:  84%|████████▍ | 27/32 [02:30<00:12,  2.48s/it][A07/17/2022 22:18:47 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:18:47 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:18:47 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7

{"eval_acc": 0.5955363570914327, "eval_f1": 0.11316497064921446, "eval_acc_and_f1": 0.3543506638703236, "eval_loss": 1.7943123238427299, "learning_rate": 8.275862068965518e-06, "train_loss": 1.6203103264172871, "step": 60}



loss=1.215:  91%|█████████ | 29/32 [03:02<00:24,  8.12s/it][A
loss=1.378:  91%|█████████ | 29/32 [03:02<00:24,  8.12s/it][A
loss=1.378:  94%|█████████▍| 30/32 [03:03<00:11,  5.85s/it][A
loss=1.926:  94%|█████████▍| 30/32 [03:03<00:11,  5.85s/it][A
loss=1.926:  97%|█████████▋| 31/32 [03:03<00:04,  4.26s/it][A
loss=2.105:  97%|█████████▋| 31/32 [03:03<00:04,  4.26s/it][A
loss=2.105: 100%|██████████| 32/32 [03:03<00:00,  5.75s/it]
Epoch:  67%|██████▋   | 2/3 [06:01<02:59, 179.21s/it]
Iteration:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=1.507:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=1.507:   3%|▎         | 1/32 [00:00<00:17,  1.81it/s][A
loss=1.425:   3%|▎         | 1/32 [00:00<00:17,  1.81it/s][A07/17/2022 22:19:22 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:19:22 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:19:22 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?i

{"eval_acc": 0.6023038156947444, "eval_f1": 0.11397386739225167, "eval_acc_and_f1": 0.35813884154349807, "eval_loss": 1.730607658624649, "learning_rate": 6.896551724137932e-06, "train_loss": 1.5926907658576965, "step": 66}



loss=1.422:   9%|▉         | 3/32 [00:37<03:51,  7.99s/it][A
loss=1.232:   9%|▉         | 3/32 [00:37<03:51,  7.99s/it][A
loss=1.232:  12%|█▎        | 4/32 [00:37<02:41,  5.76s/it][A
loss=1.348:  12%|█▎        | 4/32 [00:37<02:41,  5.76s/it][A
loss=1.348:  16%|█▌        | 5/32 [00:38<01:53,  4.20s/it][A
loss=1.374:  16%|█▌        | 5/32 [00:38<01:53,  4.20s/it][A
loss=1.374:  19%|█▉        | 6/32 [00:38<01:20,  3.10s/it][A
loss=1.324:  19%|█▉        | 6/32 [00:38<01:20,  3.10s/it][A
loss=1.324:  22%|██▏       | 7/32 [00:39<00:58,  2.34s/it][A
loss=1.179:  22%|██▏       | 7/32 [00:39<00:58,  2.34s/it][A07/17/2022 22:20:00 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:20:00 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:20:00 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋       

{"eval_acc": 0.6161267098632109, "eval_f1": 0.11716710943122974, "eval_acc_and_f1": 0.36664690964722035, "eval_loss": 1.6854806074074336, "learning_rate": 5.517241379310345e-06, "train_loss": 1.3131461540857952, "step": 72}



loss=1.513:  28%|██▊       | 9/32 [01:12<03:06,  8.13s/it][A
loss=1.450:  28%|██▊       | 9/32 [01:12<03:06,  8.13s/it][A
loss=1.450:  31%|███▏      | 10/32 [01:12<02:08,  5.86s/it][A
loss=1.260:  31%|███▏      | 10/32 [01:13<02:08,  5.86s/it][A
loss=1.260:  34%|███▍      | 11/32 [01:13<01:29,  4.27s/it][A
loss=1.829:  34%|███▍      | 11/32 [01:13<01:29,  4.27s/it][A
loss=1.829:  38%|███▊      | 12/32 [01:13<01:03,  3.15s/it][A
loss=1.431:  38%|███▊      | 12/32 [01:14<01:03,  3.15s/it][A
loss=1.431:  41%|████      | 13/32 [01:14<00:45,  2.37s/it][A
loss=1.318:  41%|████      | 13/32 [01:14<00:45,  2.37s/it][A07/17/2022 22:20:36 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:20:36 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:20:36 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|

{"eval_acc": 0.6217422606191505, "eval_f1": 0.11833698859549197, "eval_acc_and_f1": 0.3700396246073212, "eval_loss": 1.6518100372382574, "learning_rate": 4.137931034482759e-06, "train_loss": 1.4671261111895244, "step": 78}



loss=1.189:  47%|████▋     | 15/32 [01:48<02:20,  8.27s/it][A
loss=1.180:  47%|████▋     | 15/32 [01:48<02:20,  8.27s/it][A
loss=1.180:  50%|█████     | 16/32 [01:48<01:35,  5.96s/it][A
loss=1.452:  50%|█████     | 16/32 [01:48<01:35,  5.96s/it][A
loss=1.452:  53%|█████▎    | 17/32 [01:49<01:05,  4.34s/it][A
loss=1.037:  53%|█████▎    | 17/32 [01:49<01:05,  4.34s/it][A
loss=1.037:  56%|█████▋    | 18/32 [01:49<00:44,  3.20s/it][A
loss=1.720:  56%|█████▋    | 18/32 [01:49<00:44,  3.20s/it][A
loss=1.720:  59%|█████▉    | 19/32 [01:50<00:31,  2.41s/it][A
loss=1.005:  59%|█████▉    | 19/32 [01:50<00:31,  2.41s/it][A07/17/2022 22:21:11 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:21:11 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:21:11 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.6100791936645068, "eval_f1": 0.11556784095758388, "eval_acc_and_f1": 0.36282351731104534, "eval_loss": 1.6291294012750899, "learning_rate": 2.7586206896551725e-06, "train_loss": 1.26368248462677, "step": 84}



loss=1.417:  66%|██████▌   | 21/32 [02:26<01:36,  8.73s/it][A
loss=1.412:  66%|██████▌   | 21/32 [02:26<01:36,  8.73s/it][A
loss=1.412:  69%|██████▉   | 22/32 [02:26<01:02,  6.28s/it][A
loss=1.104:  69%|██████▉   | 22/32 [02:26<01:02,  6.28s/it][A
loss=1.104:  72%|███████▏  | 23/32 [02:27<00:41,  4.56s/it][A
loss=1.203:  72%|███████▏  | 23/32 [02:27<00:41,  4.56s/it][A
loss=1.203:  75%|███████▌  | 24/32 [02:27<00:26,  3.36s/it][A
loss=1.574:  75%|███████▌  | 24/32 [02:27<00:26,  3.36s/it][A
loss=1.574:  78%|███████▊  | 25/32 [02:28<00:17,  2.52s/it][A
loss=1.338:  78%|███████▊  | 25/32 [02:28<00:17,  2.52s/it][A07/17/2022 22:21:49 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:21:49 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:21:49 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.6171346292296617, "eval_f1": 0.11702984494777079, "eval_acc_and_f1": 0.36708223708871623, "eval_loss": 1.6089912397520882, "learning_rate": 1.3793103448275862e-06, "train_loss": 1.3416048487027485, "step": 90}



loss=1.290:  84%|████████▍ | 27/32 [03:01<00:41,  8.21s/it][A
loss=1.410:  84%|████████▍ | 27/32 [03:01<00:41,  8.21s/it][A
loss=1.410:  88%|████████▊ | 28/32 [03:01<00:23,  5.91s/it][A
loss=1.408:  88%|████████▊ | 28/32 [03:01<00:23,  5.91s/it][A
loss=1.408:  91%|█████████ | 29/32 [03:02<00:12,  4.30s/it][A
loss=1.627:  91%|█████████ | 29/32 [03:02<00:12,  4.30s/it][A
loss=1.627:  94%|█████████▍| 30/32 [03:02<00:06,  3.18s/it][A
loss=1.189:  94%|█████████▍| 30/32 [03:03<00:06,  3.18s/it][A
loss=1.189:  97%|█████████▋| 31/32 [03:03<00:02,  2.39s/it][A
loss=1.600:  97%|█████████▋| 31/32 [03:03<00:02,  2.39s/it][A07/17/2022 22:22:24 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:22:24 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:22:24 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.6195824334053276, "eval_f1": 0.11758414074305053, "eval_acc_and_f1": 0.3685832870741891, "eval_loss": 1.600908019712993, "learning_rate": 0.0, "train_loss": 1.4205716649691265, "step": 96}


07/17/2022 22:22:59 - INFO - utilities.trainers -   ***** Running evaluation iter-2_trial1 *****
07/17/2022 22:22:59 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:22:59 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:28<00:00,  1.00s/it]
  'precision', 'predicted', average, warn_for)
07/17/2022 22:23:27 - INFO - utilities.trainers -   ***** Eval results iter-2_trial1 *****
07/17/2022 22:23:27 - INFO - utilities.trainers -     acc = 0.6195824334053276
07/17/2022 22:23:27 - INFO - utilities.trainers -     acc_and_f1 = 0.3685832870741891
07/17/2022 22:23:27 - INFO - utilities.trainers -     f1 = 0.11758414074305053
07/17/2022 22:23:30 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/17/2022 22:23:35 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:23:35 - INFO - utilities.trainers -     Num examples = 34722
07/17/2022 22:23:36 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:18<00:00,  1.02s/it]
07/17/2022 22:25:55 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 22:25:55 - INFO - utilities.trainers -     acc = 0.6234951903692184
07/17/2022 22:25:55 - INFO - utilities.trainers -     acc_and_f1 = 0.3686713435241349
07/17/2022 22:25:55 - INFO - utilities.trainers -     f1 = 0.11384749667905134



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/17/2022 22:25:56 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/17/2022 22:26:14 - INFO - utilities.data_loader -   Selecting subsample...
07/17/2022 22:26:23 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:26:23 - INFO - utilities.trainers -     Num examples = 49000
07/17/2022 22:26:23 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/192 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 192/192 [06:30<00:00,  2.03s/it]
07/17/2022 22:32:54 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 22:32:54 - INFO - utilities.trainers -     acc = 0.6203061224489796
07/17/2022 22:32:54 - INFO - utilities.trainers -     acc_and_f1 = 0.36914957924949116
07/17/2022 22:32:54 - INFO - utilities.trainers -     f1 = 0.11799303605000264
07/17/2022 22:32:54 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/17/2022 22:33:13 - INFO - utilities.data_loader -   Selecting subsample...
07/17/2022 22:33:13 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:33:19 - INFO - utilities.trainers -     Num examples = 1000
07/17/2022 22:33:19 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 4/4 [00:07<00:00,  1.99s/it]
07/17/2022 22:33:27 - INFO - uti



************
End of iteration 2:
Train loss 1.8599, Val loss 1.600908019712993, Test loss 1.5949259391602348
Annotated 500 samples
Current labeled (training) data: 1500 samples
Remaining budget: 2500 (in samples)
************

Saving json with the results....

 Start Training model of iteration 3!



07/17/2022 22:35:24 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.6195824334053276, acc_best_iteration=2, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-2', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-3', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, fp

warmup steps: 14
total steps: 140
logging steps: 9
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.278:   2%|▏         | 1/47 [00:00<00:22,  2.09it/s][A
loss=3.321:   2%|▏         | 1/47 [00:00<00:22,  2.09it/s][A
loss=3.321:   4%|▍         | 2/47 [00:01<00:22,  2.03it/s][A
loss=3.279:   4%|▍         | 2/47 [00:01<00:22,  2.03it/s][A
loss=3.279:   6%|▋         | 3/47 [00:01<00:22,  1.98it/s][A
loss=3.282:   6%|▋         | 3/47 [00:01<00:22,  1.98it/s][A
loss=3.282:   9%|▊         | 4/47 [00:02<00:22,  1.95it/s][A
loss=3.219:   9%|▊         | 4/47 [00:02<00:22,  1.95it/s][A
loss=3.219:  11%|█         | 5/47 [00:02<00:21,  1.94it/s][A
loss=3.206:  11%|█         | 5/47 [00:02<00:21,  1.94it/s][A
loss=3.206:  13%|█▎        | 6/47 [00:03<00:21,  1.93it/s][A
loss=3.201:  13%|█▎        | 6/47 [00:03<00:21,  1.93it/s][A
loss=3.201:  15%|█▍        | 7/47 [00:03<00:20,  1.92it/s][A
loss=3.083:  15%|█▍        | 7/47 [00:03<00:20,  1.92it/s][A
loss=3.083:  17%|█▋        | 8/47 [00:04<00:20,  1.91it/s][A
loss=3.124:  17%|█▋        | 8/47 [00:04<00:20,  1.91it/s][A07/17/20

{"eval_acc": 0.17782577393808496, "eval_f1": 0.016006732074731467, "eval_acc_and_f1": 0.09691625300640822, "eval_loss": 3.024706108229501, "learning_rate": 1.2857142857142859e-05, "train_loss": 3.221605751249525, "step": 9}



loss=2.893:  21%|██▏       | 10/47 [00:36<04:23,  7.12s/it][A
loss=2.863:  21%|██▏       | 10/47 [00:36<04:23,  7.12s/it][A
loss=2.863:  23%|██▎       | 11/47 [00:37<03:05,  5.15s/it][A
loss=2.775:  23%|██▎       | 11/47 [00:37<03:05,  5.15s/it][A
loss=2.775:  26%|██▌       | 12/47 [00:37<02:11,  3.76s/it][A
loss=2.621:  26%|██▌       | 12/47 [00:37<02:11,  3.76s/it][A
loss=2.621:  28%|██▊       | 13/47 [00:38<01:35,  2.79s/it][A
loss=2.609:  28%|██▊       | 13/47 [00:38<01:35,  2.79s/it][A
loss=2.609:  30%|██▉       | 14/47 [00:38<01:09,  2.12s/it][A
loss=2.482:  30%|██▉       | 14/47 [00:38<01:09,  2.12s/it][A
loss=2.482:  32%|███▏      | 15/47 [00:39<00:52,  1.64s/it][A
loss=2.251:  32%|███▏      | 15/47 [00:39<00:52,  1.64s/it][A
loss=2.251:  34%|███▍      | 16/47 [00:39<00:40,  1.31s/it][A
loss=2.267:  34%|███▍      | 16/47 [00:39<00:40,  1.31s/it][A
loss=2.267:  36%|███▌      | 17/47 [00:40<00:32,  1.08s/it][A
loss=2.295:  36%|███▌      | 17/47 [00:40<00:32,  1.08

{"eval_acc": 0.1720662347012239, "eval_f1": 0.011744471744471746, "eval_acc_and_f1": 0.09190535322284782, "eval_loss": 2.801488757133484, "learning_rate": 1.937007874015748e-05, "train_loss": 2.561634964413113, "step": 18}



loss=2.437:  40%|████      | 19/47 [01:15<03:42,  7.94s/it][A
loss=2.354:  40%|████      | 19/47 [01:15<03:42,  7.94s/it][A
loss=2.354:  43%|████▎     | 20/47 [01:15<02:34,  5.72s/it][A
loss=2.247:  43%|████▎     | 20/47 [01:16<02:34,  5.72s/it][A
loss=2.247:  45%|████▍     | 21/47 [01:16<01:48,  4.17s/it][A
loss=2.410:  45%|████▍     | 21/47 [01:16<01:48,  4.17s/it][A
loss=2.410:  47%|████▋     | 22/47 [01:17<01:16,  3.08s/it][A
loss=2.468:  47%|████▋     | 22/47 [01:17<01:16,  3.08s/it][A
loss=2.468:  49%|████▉     | 23/47 [01:17<00:55,  2.32s/it][A
loss=2.190:  49%|████▉     | 23/47 [01:17<00:55,  2.32s/it][A
loss=2.190:  51%|█████     | 24/47 [01:18<00:41,  1.79s/it][A
loss=2.052:  51%|█████     | 24/47 [01:18<00:41,  1.79s/it][A
loss=2.052:  53%|█████▎    | 25/47 [01:18<00:31,  1.41s/it][A
loss=2.051:  53%|█████▎    | 25/47 [01:18<00:31,  1.41s/it][A
loss=2.051:  55%|█████▌    | 26/47 [01:19<00:24,  1.15s/it][A
loss=2.048:  55%|█████▌    | 26/47 [01:19<00:24,  1.15

{"eval_acc": 0.1720662347012239, "eval_f1": 0.011744471744471746, "eval_acc_and_f1": 0.09190535322284782, "eval_loss": 2.484351396560669, "learning_rate": 1.7952755905511813e-05, "train_loss": 2.250861644744873, "step": 27}



loss=1.735:  60%|█████▉    | 28/47 [01:52<02:22,  7.52s/it][A
loss=1.876:  60%|█████▉    | 28/47 [01:52<02:22,  7.52s/it][A
loss=1.876:  62%|██████▏   | 29/47 [01:52<01:37,  5.43s/it][A
loss=2.118:  62%|██████▏   | 29/47 [01:52<01:37,  5.43s/it][A
loss=2.118:  64%|██████▍   | 30/47 [01:53<01:07,  3.96s/it][A
loss=2.072:  64%|██████▍   | 30/47 [01:53<01:07,  3.96s/it][A
loss=2.072:  66%|██████▌   | 31/47 [01:53<00:46,  2.93s/it][A
loss=2.069:  66%|██████▌   | 31/47 [01:53<00:46,  2.93s/it][A
loss=2.069:  68%|██████▊   | 32/47 [01:54<00:33,  2.21s/it][A
loss=1.888:  68%|██████▊   | 32/47 [01:54<00:33,  2.21s/it][A
loss=1.888:  70%|███████   | 33/47 [01:54<00:23,  1.71s/it][A
loss=1.910:  70%|███████   | 33/47 [01:54<00:23,  1.71s/it][A
loss=1.910:  72%|███████▏  | 34/47 [01:55<00:17,  1.36s/it][A
loss=1.786:  72%|███████▏  | 34/47 [01:55<00:17,  1.36s/it][A
loss=1.786:  74%|███████▍  | 35/47 [01:55<00:13,  1.11s/it][A
loss=1.921:  74%|███████▍  | 35/47 [01:56<00:13,  1.11

{"eval_acc": 0.21137508999280058, "eval_f1": 0.02678814379251637, "eval_acc_and_f1": 0.11908161689265848, "eval_loss": 2.371484407356807, "learning_rate": 1.6535433070866142e-05, "train_loss": 1.930446492301093, "step": 36}



loss=1.783:  79%|███████▊  | 37/47 [02:30<01:18,  7.90s/it][A
loss=1.886:  79%|███████▊  | 37/47 [02:30<01:18,  7.90s/it][A
loss=1.886:  81%|████████  | 38/47 [02:31<00:51,  5.69s/it][A
loss=1.816:  81%|████████  | 38/47 [02:31<00:51,  5.69s/it][A
loss=1.816:  83%|████████▎ | 39/47 [02:31<00:33,  4.14s/it][A
loss=1.831:  83%|████████▎ | 39/47 [02:31<00:33,  4.14s/it][A
loss=1.831:  85%|████████▌ | 40/47 [02:32<00:21,  3.06s/it][A
loss=1.546:  85%|████████▌ | 40/47 [02:32<00:21,  3.06s/it][A
loss=1.546:  87%|████████▋ | 41/47 [02:32<00:13,  2.30s/it][A
loss=1.800:  87%|████████▋ | 41/47 [02:32<00:13,  2.30s/it][A
loss=1.800:  89%|████████▉ | 42/47 [02:33<00:08,  1.77s/it][A
loss=2.065:  89%|████████▉ | 42/47 [02:33<00:08,  1.77s/it][A
loss=2.065:  91%|█████████▏| 43/47 [02:33<00:05,  1.40s/it][A
loss=1.625:  91%|█████████▏| 43/47 [02:34<00:05,  1.40s/it][A
loss=1.625:  94%|█████████▎| 44/47 [02:34<00:03,  1.14s/it][A
loss=1.819:  94%|█████████▎| 44/47 [02:34<00:03,  1.14

{"eval_acc": 0.37609791216702665, "eval_f1": 0.06297429139161274, "eval_acc_and_f1": 0.2195361017793197, "eval_loss": 2.1659832426479886, "learning_rate": 1.5118110236220473e-05, "train_loss": 1.7967371940612793, "step": 45}



loss=1.688:  98%|█████████▊| 46/47 [03:08<00:07,  7.83s/it][A
loss=1.925:  98%|█████████▊| 46/47 [03:08<00:07,  7.83s/it][A
loss=1.925: 100%|██████████| 47/47 [03:09<00:00,  4.03s/it]
Epoch:  33%|███▎      | 1/3 [03:09<06:18, 189.31s/it]
Iteration:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=1.540:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=1.540:   2%|▏         | 1/47 [00:00<00:24,  1.87it/s][A
loss=1.781:   2%|▏         | 1/47 [00:00<00:24,  1.87it/s][A
loss=1.781:   4%|▍         | 2/47 [00:01<00:24,  1.87it/s][A
loss=1.786:   4%|▍         | 2/47 [00:01<00:24,  1.87it/s][A
loss=1.786:   6%|▋         | 3/47 [00:01<00:23,  1.87it/s][A
loss=1.462:   6%|▋         | 3/47 [00:01<00:23,  1.87it/s][A
loss=1.462:   9%|▊         | 4/47 [00:02<00:22,  1.88it/s][A
loss=1.361:   9%|▊         | 4/47 [00:02<00:22,  1.88it/s][A
loss=1.361:  11%|█         | 5/47 [00:02<00:22,  1.87it/s][A
loss=1.504:  11%|█         | 5/47 [00:02<00:22,  1.87it/s][A
loss=1.504:  13%|█▎        | 6/4

{"eval_acc": 0.39467242620590354, "eval_f1": 0.06828324239739136, "eval_acc_and_f1": 0.23147783430164745, "eval_loss": 2.100032035793577, "learning_rate": 1.3700787401574804e-05, "train_loss": 1.6144961648517184, "step": 54}



loss=1.749:  17%|█▋        | 8/47 [00:36<04:41,  7.21s/it][A
loss=1.470:  17%|█▋        | 8/47 [00:36<04:41,  7.21s/it][A
loss=1.470:  19%|█▉        | 9/47 [00:36<03:17,  5.21s/it][A
loss=1.457:  19%|█▉        | 9/47 [00:36<03:17,  5.21s/it][A
loss=1.457:  21%|██▏       | 10/47 [00:37<02:20,  3.81s/it][A
loss=1.327:  21%|██▏       | 10/47 [00:37<02:20,  3.81s/it][A
loss=1.327:  23%|██▎       | 11/47 [00:37<01:41,  2.83s/it][A
loss=1.468:  23%|██▎       | 11/47 [00:37<01:41,  2.83s/it][A
loss=1.468:  26%|██▌       | 12/47 [00:38<01:14,  2.14s/it][A
loss=1.417:  26%|██▌       | 12/47 [00:38<01:14,  2.14s/it][A
loss=1.417:  28%|██▊       | 13/47 [00:38<00:56,  1.65s/it][A
loss=1.735:  28%|██▊       | 13/47 [00:38<00:56,  1.65s/it][A
loss=1.735:  30%|██▉       | 14/47 [00:39<00:43,  1.32s/it][A
loss=1.502:  30%|██▉       | 14/47 [00:39<00:43,  1.32s/it][A
loss=1.502:  32%|███▏      | 15/47 [00:39<00:34,  1.08s/it][A
loss=1.462:  32%|███▏      | 15/47 [00:39<00:34,  1.08s/it

{"eval_acc": 0.5330453563714903, "eval_f1": 0.1031243681837855, "eval_acc_and_f1": 0.3180848622776379, "eval_loss": 1.835459930556161, "learning_rate": 1.2283464566929135e-05, "train_loss": 1.509655819998847, "step": 63}



loss=1.368:  36%|███▌      | 17/47 [01:12<03:44,  7.48s/it][A
loss=1.370:  36%|███▌      | 17/47 [01:12<03:44,  7.48s/it][A
loss=1.370:  38%|███▊      | 18/47 [01:13<02:36,  5.39s/it][A
loss=1.497:  38%|███▊      | 18/47 [01:13<02:36,  5.39s/it][A
loss=1.497:  40%|████      | 19/47 [01:13<01:50,  3.93s/it][A
loss=1.584:  40%|████      | 19/47 [01:13<01:50,  3.93s/it][A
loss=1.584:  43%|████▎     | 20/47 [01:14<01:18,  2.92s/it][A
loss=1.278:  43%|████▎     | 20/47 [01:14<01:18,  2.92s/it][A
loss=1.278:  45%|████▍     | 21/47 [01:14<00:57,  2.20s/it][A
loss=1.043:  45%|████▍     | 21/47 [01:14<00:57,  2.20s/it][A
loss=1.043:  47%|████▋     | 22/47 [01:15<00:42,  1.70s/it][A
loss=1.224:  47%|████▋     | 22/47 [01:15<00:42,  1.70s/it][A
loss=1.224:  49%|████▉     | 23/47 [01:15<00:32,  1.35s/it][A
loss=1.534:  49%|████▉     | 23/47 [01:16<00:32,  1.35s/it][A
loss=1.534:  51%|█████     | 24/47 [01:16<00:25,  1.11s/it][A
loss=1.230:  51%|█████     | 24/47 [01:16<00:25,  1.11

{"eval_acc": 0.5982721382289417, "eval_f1": 0.12060237737240335, "eval_acc_and_f1": 0.3594372578006725, "eval_loss": 1.706041817154203, "learning_rate": 1.0866141732283466e-05, "train_loss": 1.3473813401328192, "step": 72}



loss=1.144:  55%|█████▌    | 26/47 [01:50<02:44,  7.82s/it][A
loss=1.244:  55%|█████▌    | 26/47 [01:50<02:44,  7.82s/it][A
loss=1.244:  57%|█████▋    | 27/47 [01:51<01:52,  5.63s/it][A
loss=1.506:  57%|█████▋    | 27/47 [01:51<01:52,  5.63s/it][A
loss=1.506:  60%|█████▉    | 28/47 [01:51<01:17,  4.10s/it][A
loss=1.523:  60%|█████▉    | 28/47 [01:52<01:17,  4.10s/it][A
loss=1.523:  62%|██████▏   | 29/47 [01:52<00:54,  3.03s/it][A
loss=0.929:  62%|██████▏   | 29/47 [01:52<00:54,  3.03s/it][A
loss=0.929:  64%|██████▍   | 30/47 [01:52<00:38,  2.28s/it][A
loss=0.817:  64%|██████▍   | 30/47 [01:53<00:38,  2.28s/it][A
loss=0.817:  66%|██████▌   | 31/47 [01:53<00:28,  1.76s/it][A
loss=1.215:  66%|██████▌   | 31/47 [01:53<00:28,  1.76s/it][A
loss=1.215:  68%|██████▊   | 32/47 [01:54<00:20,  1.39s/it][A
loss=1.082:  68%|██████▊   | 32/47 [01:54<00:20,  1.39s/it][A
loss=1.082:  70%|███████   | 33/47 [01:54<00:15,  1.14s/it][A
loss=1.394:  70%|███████   | 33/47 [01:54<00:15,  1.14

{"eval_acc": 0.6138228941684665, "eval_f1": 0.12411627947065489, "eval_acc_and_f1": 0.36896958681956066, "eval_loss": 1.6003248521259852, "learning_rate": 9.448818897637797e-06, "train_loss": 1.2060292561848958, "step": 81}



loss=1.398:  74%|███████▍  | 35/47 [02:27<01:29,  7.49s/it][A
loss=0.974:  74%|███████▍  | 35/47 [02:27<01:29,  7.49s/it][A
loss=0.974:  77%|███████▋  | 36/47 [02:27<00:59,  5.40s/it][A
loss=1.038:  77%|███████▋  | 36/47 [02:28<00:59,  5.40s/it][A
loss=1.038:  79%|███████▊  | 37/47 [02:28<00:39,  3.94s/it][A
loss=1.411:  79%|███████▊  | 37/47 [02:28<00:39,  3.94s/it][A
loss=1.411:  81%|████████  | 38/47 [02:28<00:26,  2.92s/it][A
loss=1.040:  81%|████████  | 38/47 [02:29<00:26,  2.92s/it][A
loss=1.040:  83%|████████▎ | 39/47 [02:29<00:17,  2.21s/it][A
loss=1.051:  83%|████████▎ | 39/47 [02:29<00:17,  2.21s/it][A
loss=1.051:  85%|████████▌ | 40/47 [02:30<00:11,  1.71s/it][A
loss=1.081:  85%|████████▌ | 40/47 [02:30<00:11,  1.71s/it][A
loss=1.081:  87%|████████▋ | 41/47 [02:30<00:08,  1.36s/it][A
loss=1.423:  87%|████████▋ | 41/47 [02:30<00:08,  1.36s/it][A
loss=1.423:  89%|████████▉ | 42/47 [02:31<00:05,  1.11s/it][A
loss=1.153:  89%|████████▉ | 42/47 [02:31<00:05,  1.11

{"eval_acc": 0.6466522678185745, "eval_f1": 0.13774364232339542, "eval_acc_and_f1": 0.392197955070985, "eval_loss": 1.4918845891952515, "learning_rate": 8.031496062992128e-06, "train_loss": 1.1744361122449238, "step": 90}



loss=0.849:  94%|█████████▎| 44/47 [03:05<00:23,  7.82s/it][A
loss=0.810:  94%|█████████▎| 44/47 [03:05<00:23,  7.82s/it][A
loss=0.810:  96%|█████████▌| 45/47 [03:06<00:11,  5.63s/it][A
loss=1.582:  96%|█████████▌| 45/47 [03:06<00:11,  5.63s/it][A
loss=1.582:  98%|█████████▊| 46/47 [03:06<00:04,  4.10s/it][A
loss=0.941:  98%|█████████▊| 46/47 [03:06<00:04,  4.10s/it][A
loss=0.941: 100%|██████████| 47/47 [03:07<00:00,  3.98s/it]
Epoch:  67%|██████▋   | 2/3 [06:16<03:08, 188.64s/it]
Iteration:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=0.834:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=0.834:   2%|▏         | 1/47 [00:00<00:24,  1.87it/s][A
loss=1.079:   2%|▏         | 1/47 [00:00<00:24,  1.87it/s][A
loss=1.079:   4%|▍         | 2/47 [00:01<00:24,  1.86it/s][A
loss=0.983:   4%|▍         | 2/47 [00:01<00:24,  1.86it/s][A
loss=0.983:   6%|▋         | 3/47 [00:01<00:23,  1.86it/s][A
loss=0.878:   6%|▋         | 3/47 [00:01<00:23,  1.86it/s][A
loss=0.878:   9%|▊         |

{"eval_acc": 0.6616270698344132, "eval_f1": 0.14337653117702118, "eval_acc_and_f1": 0.4025018005057172, "eval_loss": 1.4200897770268577, "learning_rate": 6.614173228346458e-06, "train_loss": 1.034185356563992, "step": 99}



loss=1.335:  11%|█         | 5/47 [00:37<07:34, 10.82s/it][A
loss=1.335:  13%|█▎        | 6/47 [00:37<05:17,  7.73s/it][A
loss=0.954:  13%|█▎        | 6/47 [00:37<05:17,  7.73s/it][A
loss=0.954:  15%|█▍        | 7/47 [00:38<03:42,  5.57s/it][A
loss=0.958:  15%|█▍        | 7/47 [00:38<03:42,  5.57s/it][A
loss=0.958:  17%|█▋        | 8/47 [00:38<02:38,  4.06s/it][A
loss=0.792:  17%|█▋        | 8/47 [00:38<02:38,  4.06s/it][A
loss=0.792:  19%|█▉        | 9/47 [00:39<01:54,  3.00s/it][A
loss=0.770:  19%|█▉        | 9/47 [00:39<01:54,  3.00s/it][A
loss=0.770:  21%|██▏       | 10/47 [00:39<01:23,  2.26s/it][A
loss=0.967:  21%|██▏       | 10/47 [00:39<01:23,  2.26s/it][A
loss=0.967:  23%|██▎       | 11/47 [00:40<01:02,  1.74s/it][A
loss=1.350:  23%|██▎       | 11/47 [00:40<01:02,  1.74s/it][A
loss=1.350:  26%|██▌       | 12/47 [00:40<00:48,  1.38s/it][A
loss=1.089:  26%|██▌       | 12/47 [00:40<00:48,  1.38s/it][A
loss=1.089:  28%|██▊       | 13/47 [00:41<00:38,  1.13s/it][A


{"eval_acc": 0.6803455723542117, "eval_f1": 0.1550910774414045, "eval_acc_and_f1": 0.4177183248978081, "eval_loss": 1.3748115641730172, "learning_rate": 5.196850393700788e-06, "train_loss": 1.0010734862751431, "step": 108}



loss=1.028:  32%|███▏      | 15/47 [01:18<04:27,  8.35s/it][A
loss=0.875:  32%|███▏      | 15/47 [01:18<04:27,  8.35s/it][A
loss=0.875:  34%|███▍      | 16/47 [01:18<03:06,  6.01s/it][A
loss=1.044:  34%|███▍      | 16/47 [01:18<03:06,  6.01s/it][A
loss=1.044:  36%|███▌      | 17/47 [01:19<02:10,  4.36s/it][A
loss=0.779:  36%|███▌      | 17/47 [01:19<02:10,  4.36s/it][A
loss=0.779:  38%|███▊      | 18/47 [01:19<01:33,  3.21s/it][A
loss=0.929:  38%|███▊      | 18/47 [01:19<01:33,  3.21s/it][A
loss=0.929:  40%|████      | 19/47 [01:20<01:07,  2.41s/it][A
loss=0.664:  40%|████      | 19/47 [01:20<01:07,  2.41s/it][A
loss=0.664:  43%|████▎     | 20/47 [01:20<00:49,  1.85s/it][A
loss=0.838:  43%|████▎     | 20/47 [01:20<00:49,  1.85s/it][A
loss=0.838:  45%|████▍     | 21/47 [01:21<00:37,  1.45s/it][A
loss=1.046:  45%|████▍     | 21/47 [01:21<00:37,  1.45s/it][A
loss=1.046:  47%|████▋     | 22/47 [01:21<00:29,  1.18s/it][A
loss=0.888:  47%|████▋     | 22/47 [01:22<00:29,  1.18

{"eval_acc": 0.6999280057595393, "eval_f1": 0.1646108538274833, "eval_acc_and_f1": 0.4322694297935113, "eval_loss": 1.3237173344407762, "learning_rate": 3.7795275590551182e-06, "train_loss": 0.8989937835269504, "step": 117}



loss=0.810:  51%|█████     | 24/47 [01:54<02:53,  7.52s/it][A
loss=1.132:  51%|█████     | 24/47 [01:54<02:53,  7.52s/it][A
loss=1.132:  53%|█████▎    | 25/47 [01:55<01:59,  5.42s/it][A
loss=0.825:  53%|█████▎    | 25/47 [01:55<01:59,  5.42s/it][A
loss=0.825:  55%|█████▌    | 26/47 [01:55<01:23,  3.95s/it][A
loss=0.857:  55%|█████▌    | 26/47 [01:55<01:23,  3.95s/it][A
loss=0.857:  57%|█████▋    | 27/47 [01:56<00:58,  2.93s/it][A
loss=0.978:  57%|█████▋    | 27/47 [01:56<00:58,  2.93s/it][A
loss=0.978:  60%|█████▉    | 28/47 [01:56<00:42,  2.21s/it][A
loss=0.782:  60%|█████▉    | 28/47 [01:57<00:42,  2.21s/it][A
loss=0.782:  62%|██████▏   | 29/47 [01:57<00:30,  1.71s/it][A
loss=1.050:  62%|██████▏   | 29/47 [01:57<00:30,  1.71s/it][A
loss=1.050:  64%|██████▍   | 30/47 [01:57<00:23,  1.36s/it][A
loss=0.663:  64%|██████▍   | 30/47 [01:58<00:23,  1.36s/it][A
loss=0.663:  66%|██████▌   | 31/47 [01:58<00:17,  1.11s/it][A
loss=0.857:  66%|██████▌   | 31/47 [01:58<00:17,  1.11

{"eval_acc": 0.7041036717062635, "eval_f1": 0.16673793414170482, "eval_acc_and_f1": 0.43542080292398416, "eval_loss": 1.3011681224618639, "learning_rate": 2.362204724409449e-06, "train_loss": 0.8837455047501458, "step": 126}



loss=1.025:  70%|███████   | 33/47 [02:31<01:44,  7.45s/it][A
loss=0.817:  70%|███████   | 33/47 [02:31<01:44,  7.45s/it][A
loss=0.817:  72%|███████▏  | 34/47 [02:31<01:09,  5.38s/it][A
loss=1.282:  72%|███████▏  | 34/47 [02:31<01:09,  5.38s/it][A
loss=1.282:  74%|███████▍  | 35/47 [02:32<00:47,  3.92s/it][A
loss=1.110:  74%|███████▍  | 35/47 [02:32<00:47,  3.92s/it][A
loss=1.110:  77%|███████▋  | 36/47 [02:32<00:31,  2.91s/it][A
loss=0.925:  77%|███████▋  | 36/47 [02:32<00:31,  2.91s/it][A
loss=0.925:  79%|███████▊  | 37/47 [02:33<00:21,  2.19s/it][A
loss=0.816:  79%|███████▊  | 37/47 [02:33<00:21,  2.19s/it][A
loss=0.816:  81%|████████  | 38/47 [02:33<00:15,  1.69s/it][A
loss=1.487:  81%|████████  | 38/47 [02:33<00:15,  1.69s/it][A
loss=1.487:  83%|████████▎ | 39/47 [02:34<00:10,  1.35s/it][A
loss=0.740:  83%|████████▎ | 39/47 [02:34<00:10,  1.35s/it][A
loss=0.740:  85%|████████▌ | 40/47 [02:34<00:07,  1.10s/it][A
loss=0.915:  85%|████████▌ | 40/47 [02:35<00:07,  1.10

{"eval_acc": 0.7061195104391649, "eval_f1": 0.16770378356034818, "eval_acc_and_f1": 0.43691164699975654, "eval_loss": 1.2901644834450312, "learning_rate": 9.448818897637796e-07, "train_loss": 1.0130523376994662, "step": 135}



loss=0.759:  89%|████████▉ | 42/47 [03:07<00:37,  7.49s/it][A
loss=0.765:  89%|████████▉ | 42/47 [03:07<00:37,  7.49s/it][A
loss=0.765:  91%|█████████▏| 43/47 [03:08<00:21,  5.41s/it][A
loss=0.551:  91%|█████████▏| 43/47 [03:08<00:21,  5.41s/it][A
loss=0.551:  94%|█████████▎| 44/47 [03:08<00:11,  3.94s/it][A
loss=0.644:  94%|█████████▎| 44/47 [03:08<00:11,  3.94s/it][A
loss=0.644:  96%|█████████▌| 45/47 [03:09<00:05,  2.92s/it][A
loss=1.027:  96%|█████████▌| 45/47 [03:09<00:05,  2.92s/it][A
loss=1.027:  98%|█████████▊| 46/47 [03:09<00:02,  2.20s/it][A
loss=0.651:  98%|█████████▊| 46/47 [03:09<00:02,  2.20s/it][A07/17/2022 22:45:19 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:45:19 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:45:19 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.03s/it][A[A

Evaluating:   7

{"eval_acc": 0.7064074874010079, "eval_f1": 0.1680478487041892, "eval_acc_and_f1": 0.43722766805259855, "eval_loss": 1.285630234650203, "learning_rate": 0.0, "train_loss": 0.4886035919189453, "step": 141}


07/17/2022 22:45:54 - INFO - utilities.trainers -   ***** Running evaluation iter-3_trial1 *****
07/17/2022 22:45:54 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 22:45:54 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/17/2022 22:46:21 - INFO - utilities.trainers -   ***** Eval results iter-3_trial1 *****
07/17/2022 22:46:21 - INFO - utilities.trainers -     acc = 0.7064074874010079
07/17/2022 22:46:21 - INFO - utilities.trainers -     acc_and_f1 = 0.43722766805259855
07/17/2022 22:46:21 - INFO - utilities.trainers -     f1 = 0.1680478487041892
07/17/2022 22:46:24 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/17/2022 22:46:30 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:46:31 - INFO - utilities.trainers -     Num examples = 34722
07/17/2022 22:46:31 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:18<00:00,  1.02s/it]
07/17/2022 22:48:50 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 22:48:50 - INFO - utilities.trainers -     acc = 0.7059213178964345
07/17/2022 22:48:50 - INFO - utilities.trainers -     acc_and_f1 = 0.43329723004934423
07/17/2022 22:48:50 - INFO - utilities.trainers -     f1 = 0.16067314220225393



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/17/2022 22:48:51 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/17/2022 22:49:09 - INFO - utilities.data_loader -   Selecting subsample...
07/17/2022 22:49:19 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:49:19 - INFO - utilities.trainers -     Num examples = 48500
07/17/2022 22:49:19 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/190 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 190/190 [06:26<00:00,  2.03s/it]
07/17/2022 22:55:46 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 22:55:46 - INFO - utilities.trainers -     acc = 0.7025773195876288
07/17/2022 22:55:46 - INFO - utilities.trainers -     acc_and_f1 = 0.43412802831727915
07/17/2022 22:55:46 - INFO - utilities.trainers -     f1 = 0.16567873704692954
07/17/2022 22:55:46 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/17/2022 22:56:04 - INFO - utilities.data_loader -   Selecting subsample...
07/17/2022 22:56:12 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 22:56:12 - INFO - utilities.trainers -     Num examples = 1500
07/17/2022 22:56:12 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 6/6 [00:11<00:00,  1.99s/it]
07/17/2022 22:56:24 - INFO - uti



************
End of iteration 3:
Train loss 1.5276, Val loss 1.285630234650203, Test loss 1.283561787184547
Annotated 500 samples
Current labeled (training) data: 2000 samples
Remaining budget: 2000 (in samples)
************

Saving json with the results....

 Start Training model of iteration 4!



07/17/2022 22:58:38 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.7064074874010079, acc_best_iteration=3, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-3', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-4', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, fp

warmup steps: 18
total steps: 187
logging steps: 12
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.316:   2%|▏         | 1/63 [00:00<00:29,  2.11it/s][A
loss=3.317:   2%|▏         | 1/63 [00:00<00:29,  2.11it/s][A
loss=3.317:   3%|▎         | 2/63 [00:01<00:30,  2.01it/s][A
loss=3.262:   3%|▎         | 2/63 [00:01<00:30,  2.01it/s][A
loss=3.262:   5%|▍         | 3/63 [00:01<00:30,  1.96it/s][A
loss=3.318:   5%|▍         | 3/63 [00:01<00:30,  1.96it/s][A
loss=3.318:   6%|▋         | 4/63 [00:02<00:30,  1.92it/s][A
loss=3.222:   6%|▋         | 4/63 [00:02<00:30,  1.92it/s][A
loss=3.222:   8%|▊         | 5/63 [00:02<00:30,  1.89it/s][A
loss=3.224:   8%|▊         | 5/63 [00:02<00:30,  1.89it/s][A
loss=3.224:  10%|▉         | 6/63 [00:03<00:30,  1.87it/s][A
loss=3.100:  10%|▉         | 6/63 [00:03<00:30,  1.87it/s][A
loss=3.100:  11%|█         | 7/63 [00:03<00:30,  1.86it/s][A
loss=3.085:  11%|█         | 7/63 [00:03<00:30,  1.86it/s][A
loss=3.085:  13%|█▎        | 8/63 [00:04<00:29,  1.85it/s][A
loss=2.999:  13%|█▎        | 8/63 [00:04<00:29,  1.85it/s][A
loss=2.

{"eval_acc": 0.219150467962563, "eval_f1": 0.04094126120916595, "eval_acc_and_f1": 0.13004586458586448, "eval_loss": 2.9847333346094405, "learning_rate": 1.3333333333333333e-05, "train_loss": 3.139969845612844, "step": 12}



loss=2.832:  21%|██        | 13/63 [00:38<05:56,  7.13s/it][A
loss=2.785:  21%|██        | 13/63 [00:38<05:56,  7.13s/it][A
loss=2.785:  22%|██▏       | 14/63 [00:38<04:12,  5.16s/it][A
loss=2.740:  22%|██▏       | 14/63 [00:39<04:12,  5.16s/it][A
loss=2.740:  24%|██▍       | 15/63 [00:39<03:01,  3.78s/it][A
loss=2.570:  24%|██▍       | 15/63 [00:39<03:01,  3.78s/it][A
loss=2.570:  25%|██▌       | 16/63 [00:40<02:12,  2.81s/it][A
loss=2.631:  25%|██▌       | 16/63 [00:40<02:12,  2.81s/it][A
loss=2.631:  27%|██▋       | 17/63 [00:40<01:38,  2.13s/it][A
loss=2.399:  27%|██▋       | 17/63 [00:40<01:38,  2.13s/it][A
loss=2.399:  29%|██▊       | 18/63 [00:41<01:14,  1.66s/it][A
loss=2.418:  29%|██▊       | 18/63 [00:41<01:14,  1.66s/it][A
loss=2.418:  30%|███       | 19/63 [00:41<00:58,  1.32s/it][A
loss=2.329:  30%|███       | 19/63 [00:41<00:58,  1.32s/it][A
loss=2.329:  32%|███▏      | 20/63 [00:42<00:46,  1.09s/it][A
loss=2.371:  32%|███▏      | 20/63 [00:42<00:46,  1.09

{"eval_acc": 0.18070554355651547, "eval_f1": 0.01832817571502346, "eval_acc_and_f1": 0.09951685963576946, "eval_loss": 2.679982057639531, "learning_rate": 1.929824561403509e-05, "train_loss": 2.481994887193044, "step": 24}



loss=2.209:  40%|███▉      | 25/63 [01:16<04:36,  7.29s/it][A
loss=2.186:  40%|███▉      | 25/63 [01:16<04:36,  7.29s/it][A
loss=2.186:  41%|████▏     | 26/63 [01:17<03:14,  5.27s/it][A
loss=2.187:  41%|████▏     | 26/63 [01:17<03:14,  5.27s/it][A
loss=2.187:  43%|████▎     | 27/63 [01:17<02:18,  3.85s/it][A
loss=2.197:  43%|████▎     | 27/63 [01:17<02:18,  3.85s/it][A
loss=2.197:  44%|████▍     | 28/63 [01:18<01:40,  2.86s/it][A
loss=2.123:  44%|████▍     | 28/63 [01:18<01:40,  2.86s/it][A
loss=2.123:  46%|████▌     | 29/63 [01:18<01:13,  2.17s/it][A
loss=1.997:  46%|████▌     | 29/63 [01:18<01:13,  2.17s/it][A
loss=1.997:  48%|████▊     | 30/63 [01:19<00:55,  1.68s/it][A
loss=2.029:  48%|████▊     | 30/63 [01:19<00:55,  1.68s/it][A
loss=2.029:  49%|████▉     | 31/63 [01:19<00:42,  1.34s/it][A
loss=1.924:  49%|████▉     | 31/63 [01:20<00:42,  1.34s/it][A
loss=1.924:  51%|█████     | 32/63 [01:20<00:34,  1.10s/it][A
loss=1.880:  51%|█████     | 32/63 [01:20<00:34,  1.10

{"eval_acc": 0.18185745140388768, "eval_f1": 0.03240328004138597, "eval_acc_and_f1": 0.10713036572263682, "eval_loss": 2.422629722527095, "learning_rate": 1.7894736842105264e-05, "train_loss": 2.0182448426882424, "step": 36}



loss=1.890:  59%|█████▊    | 37/63 [01:55<03:10,  7.32s/it][A
loss=1.951:  59%|█████▊    | 37/63 [01:55<03:10,  7.32s/it][A
loss=1.951:  60%|██████    | 38/63 [01:55<02:12,  5.29s/it][A
loss=1.471:  60%|██████    | 38/63 [01:55<02:12,  5.29s/it][A
loss=1.471:  62%|██████▏   | 39/63 [01:56<01:32,  3.87s/it][A
loss=1.976:  62%|██████▏   | 39/63 [01:56<01:32,  3.87s/it][A
loss=1.976:  63%|██████▎   | 40/63 [01:56<01:06,  2.87s/it][A
loss=1.572:  63%|██████▎   | 40/63 [01:56<01:06,  2.87s/it][A
loss=1.572:  65%|██████▌   | 41/63 [01:57<00:47,  2.18s/it][A
loss=1.878:  65%|██████▌   | 41/63 [01:57<00:47,  2.18s/it][A
loss=1.878:  67%|██████▋   | 42/63 [01:57<00:35,  1.69s/it][A
loss=1.666:  67%|██████▋   | 42/63 [01:57<00:35,  1.69s/it][A
loss=1.666:  68%|██████▊   | 43/63 [01:58<00:26,  1.35s/it][A
loss=2.119:  68%|██████▊   | 43/63 [01:58<00:26,  1.35s/it][A
loss=2.119:  70%|██████▉   | 44/63 [01:58<00:21,  1.12s/it][A
loss=1.372:  70%|██████▉   | 44/63 [01:59<00:21,  1.12

{"eval_acc": 0.30064794816414686, "eval_f1": 0.05432747403665405, "eval_acc_and_f1": 0.17748771110040046, "eval_loss": 2.219632795878819, "learning_rate": 1.649122807017544e-05, "train_loss": 1.754076212644577, "step": 48}



loss=1.362:  78%|███████▊  | 49/63 [02:33<01:42,  7.33s/it][A
loss=1.443:  78%|███████▊  | 49/63 [02:33<01:42,  7.33s/it][A
loss=1.443:  79%|███████▉  | 50/63 [02:34<01:08,  5.30s/it][A
loss=1.536:  79%|███████▉  | 50/63 [02:34<01:08,  5.30s/it][A
loss=1.536:  81%|████████  | 51/63 [02:34<00:46,  3.88s/it][A
loss=1.583:  81%|████████  | 51/63 [02:34<00:46,  3.88s/it][A
loss=1.583:  83%|████████▎ | 52/63 [02:35<00:31,  2.88s/it][A
loss=1.716:  83%|████████▎ | 52/63 [02:35<00:31,  2.88s/it][A
loss=1.716:  84%|████████▍ | 53/63 [02:35<00:21,  2.18s/it][A
loss=1.410:  84%|████████▍ | 53/63 [02:35<00:21,  2.18s/it][A
loss=1.410:  86%|████████▌ | 54/63 [02:36<00:15,  1.69s/it][A
loss=1.306:  86%|████████▌ | 54/63 [02:36<00:15,  1.69s/it][A
loss=1.306:  87%|████████▋ | 55/63 [02:36<00:10,  1.35s/it][A
loss=1.298:  87%|████████▋ | 55/63 [02:36<00:10,  1.35s/it][A
loss=1.298:  89%|████████▉ | 56/63 [02:37<00:07,  1.11s/it][A
loss=1.222:  89%|████████▉ | 56/63 [02:37<00:07,  1.11

{"eval_acc": 0.4453563714902808, "eval_f1": 0.08223011403748767, "eval_acc_and_f1": 0.2637932427638842, "eval_loss": 2.0162728003093173, "learning_rate": 1.5087719298245615e-05, "train_loss": 1.4297593633333843, "step": 60}



loss=1.293:  97%|█████████▋| 61/63 [03:12<00:14,  7.36s/it][A
loss=1.345:  97%|█████████▋| 61/63 [03:12<00:14,  7.36s/it][A
loss=1.345:  98%|█████████▊| 62/63 [03:12<00:05,  5.32s/it][A
loss=1.495:  98%|█████████▊| 62/63 [03:12<00:05,  5.32s/it][A
loss=1.495: 100%|██████████| 63/63 [03:12<00:00,  3.06s/it]
Epoch:  33%|███▎      | 1/3 [03:12<06:25, 192.95s/it]
Iteration:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=1.301:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=1.301:   2%|▏         | 1/63 [00:00<00:33,  1.82it/s][A
loss=1.609:   2%|▏         | 1/63 [00:00<00:33,  1.82it/s][A
loss=1.609:   3%|▎         | 2/63 [00:01<00:33,  1.82it/s][A
loss=1.338:   3%|▎         | 2/63 [00:01<00:33,  1.82it/s][A
loss=1.338:   5%|▍         | 3/63 [00:01<00:33,  1.82it/s][A
loss=1.397:   5%|▍         | 3/63 [00:01<00:33,  1.82it/s][A
loss=1.397:   6%|▋         | 4/63 [00:02<00:32,  1.81it/s][A
loss=1.229:   6%|▋         | 4/63 [00:02<00:32,  1.81it/s][A
loss=1.229:   8%|▊         | 5

{"eval_acc": 0.5192224622030237, "eval_f1": 0.10436767857129343, "eval_acc_and_f1": 0.3117950703871586, "eval_loss": 1.8838884745325362, "learning_rate": 1.3684210526315791e-05, "train_loss": 1.400148073832194, "step": 72}



loss=1.589:  16%|█▌        | 10/63 [00:37<06:23,  7.24s/it][A
loss=1.358:  16%|█▌        | 10/63 [00:37<06:23,  7.24s/it][A
loss=1.358:  17%|█▋        | 11/63 [00:37<04:32,  5.24s/it][A
loss=1.145:  17%|█▋        | 11/63 [00:38<04:32,  5.24s/it][A
loss=1.145:  19%|█▉        | 12/63 [00:38<03:15,  3.83s/it][A
loss=1.335:  19%|█▉        | 12/63 [00:38<03:15,  3.83s/it][A
loss=1.335:  21%|██        | 13/63 [00:39<02:22,  2.85s/it][A
loss=1.065:  21%|██        | 13/63 [00:39<02:22,  2.85s/it][A
loss=1.065:  22%|██▏       | 14/63 [00:39<01:45,  2.16s/it][A
loss=1.269:  22%|██▏       | 14/63 [00:39<01:45,  2.16s/it][A
loss=1.269:  24%|██▍       | 15/63 [00:40<01:20,  1.68s/it][A
loss=1.277:  24%|██▍       | 15/63 [00:40<01:20,  1.68s/it][A
loss=1.277:  25%|██▌       | 16/63 [00:40<01:02,  1.34s/it][A
loss=1.354:  25%|██▌       | 16/63 [00:40<01:02,  1.34s/it][A
loss=1.354:  27%|██▋       | 17/63 [00:41<00:50,  1.10s/it][A
loss=1.427:  27%|██▋       | 17/63 [00:41<00:50,  1.10

{"eval_acc": 0.6056155507559395, "eval_f1": 0.1432226913437658, "eval_acc_and_f1": 0.37441912104985264, "eval_loss": 1.6895274392196111, "learning_rate": 1.2280701754385966e-05, "train_loss": 1.2843943039576213, "step": 84}



loss=1.294:  35%|███▍      | 22/63 [01:17<05:12,  7.63s/it][A
loss=1.449:  35%|███▍      | 22/63 [01:17<05:12,  7.63s/it][A
loss=1.449:  37%|███▋      | 23/63 [01:17<03:40,  5.51s/it][A
loss=1.270:  37%|███▋      | 23/63 [01:18<03:40,  5.51s/it][A
loss=1.270:  38%|███▊      | 24/63 [01:18<02:36,  4.02s/it][A
loss=1.200:  38%|███▊      | 24/63 [01:18<02:36,  4.02s/it][A
loss=1.200:  40%|███▉      | 25/63 [01:18<01:53,  2.98s/it][A
loss=0.900:  40%|███▉      | 25/63 [01:19<01:53,  2.98s/it][A
loss=0.900:  41%|████▏     | 26/63 [01:19<01:23,  2.25s/it][A
loss=0.858:  41%|████▏     | 26/63 [01:19<01:23,  2.25s/it][A
loss=0.858:  43%|████▎     | 27/63 [01:20<01:02,  1.75s/it][A
loss=0.727:  43%|████▎     | 27/63 [01:20<01:02,  1.75s/it][A
loss=0.727:  44%|████▍     | 28/63 [01:20<00:48,  1.39s/it][A
loss=1.017:  44%|████▍     | 28/63 [01:20<00:48,  1.39s/it][A
loss=1.017:  46%|████▌     | 29/63 [01:21<00:38,  1.14s/it][A
loss=1.169:  46%|████▌     | 29/63 [01:21<00:38,  1.14

{"eval_acc": 0.6394528437724982, "eval_f1": 0.1462693475551393, "eval_acc_and_f1": 0.39286109566381877, "eval_loss": 1.534148016146251, "learning_rate": 1.0877192982456142e-05, "train_loss": 1.0399394532044728, "step": 96}



loss=1.424:  54%|█████▍    | 34/63 [01:57<03:40,  7.61s/it][A
loss=0.830:  54%|█████▍    | 34/63 [01:57<03:40,  7.61s/it][A
loss=0.830:  56%|█████▌    | 35/63 [01:57<02:33,  5.50s/it][A
loss=1.017:  56%|█████▌    | 35/63 [01:57<02:33,  5.50s/it][A
loss=1.017:  57%|█████▋    | 36/63 [01:58<01:48,  4.01s/it][A
loss=1.197:  57%|█████▋    | 36/63 [01:58<01:48,  4.01s/it][A
loss=1.197:  59%|█████▊    | 37/63 [01:58<01:17,  2.97s/it][A
loss=0.934:  59%|█████▊    | 37/63 [01:58<01:17,  2.97s/it][A
loss=0.934:  60%|██████    | 38/63 [01:59<00:56,  2.25s/it][A
loss=1.037:  60%|██████    | 38/63 [01:59<00:56,  2.25s/it][A
loss=1.037:  62%|██████▏   | 39/63 [01:59<00:41,  1.74s/it][A
loss=0.812:  62%|██████▏   | 39/63 [02:00<00:41,  1.74s/it][A
loss=0.812:  63%|██████▎   | 40/63 [02:00<00:31,  1.39s/it][A
loss=1.022:  63%|██████▎   | 40/63 [02:00<00:31,  1.39s/it][A
loss=1.022:  65%|██████▌   | 41/63 [02:01<00:25,  1.14s/it][A
loss=0.953:  65%|██████▌   | 41/63 [02:01<00:25,  1.14

{"eval_acc": 0.6483801295896329, "eval_f1": 0.15398200578114954, "eval_acc_and_f1": 0.4011810676853912, "eval_loss": 1.445560382945197, "learning_rate": 9.473684210526315e-06, "train_loss": 1.0145096331834793, "step": 108}



loss=0.921:  73%|███████▎  | 46/63 [02:37<02:10,  7.65s/it][A
loss=0.980:  73%|███████▎  | 46/63 [02:37<02:10,  7.65s/it][A
loss=0.980:  75%|███████▍  | 47/63 [02:37<01:28,  5.52s/it][A
loss=0.650:  75%|███████▍  | 47/63 [02:37<01:28,  5.52s/it][A
loss=0.650:  76%|███████▌  | 48/63 [02:38<01:00,  4.03s/it][A
loss=0.717:  76%|███████▌  | 48/63 [02:38<01:00,  4.03s/it][A
loss=0.717:  78%|███████▊  | 49/63 [02:38<00:41,  2.99s/it][A
loss=0.893:  78%|███████▊  | 49/63 [02:38<00:41,  2.99s/it][A
loss=0.893:  79%|███████▉  | 50/63 [02:39<00:29,  2.26s/it][A
loss=1.108:  79%|███████▉  | 50/63 [02:39<00:29,  2.26s/it][A
loss=1.108:  81%|████████  | 51/63 [02:39<00:20,  1.75s/it][A
loss=0.997:  81%|████████  | 51/63 [02:40<00:20,  1.75s/it][A
loss=0.997:  83%|████████▎ | 52/63 [02:40<00:15,  1.39s/it][A
loss=0.937:  83%|████████▎ | 52/63 [02:40<00:15,  1.39s/it][A
loss=0.937:  84%|████████▍ | 53/63 [02:41<00:11,  1.14s/it][A
loss=0.729:  84%|████████▍ | 53/63 [02:41<00:11,  1.14

{"eval_acc": 0.6688264938804895, "eval_f1": 0.1595445649925971, "eval_acc_and_f1": 0.4141855294365433, "eval_loss": 1.3405988429273878, "learning_rate": 8.070175438596492e-06, "train_loss": 0.8893062969048818, "step": 120}



loss=0.892:  90%|█████████ | 57/63 [03:17<01:05, 10.90s/it][A
loss=0.892:  92%|█████████▏| 58/63 [03:17<00:38,  7.80s/it][A
loss=0.780:  92%|█████████▏| 58/63 [03:17<00:38,  7.80s/it][A
loss=0.780:  94%|█████████▎| 59/63 [03:18<00:22,  5.62s/it][A
loss=0.805:  94%|█████████▎| 59/63 [03:18<00:22,  5.62s/it][A
loss=0.805:  95%|█████████▌| 60/63 [03:18<00:12,  4.10s/it][A
loss=0.803:  95%|█████████▌| 60/63 [03:19<00:12,  4.10s/it][A
loss=0.803:  97%|█████████▋| 61/63 [03:19<00:06,  3.04s/it][A
loss=0.853:  97%|█████████▋| 61/63 [03:19<00:06,  3.04s/it][A
loss=0.853:  98%|█████████▊| 62/63 [03:20<00:02,  2.29s/it][A
loss=0.889:  98%|█████████▊| 62/63 [03:20<00:02,  2.29s/it][A
loss=0.889: 100%|██████████| 63/63 [03:20<00:00,  3.18s/it]
Epoch:  67%|██████▋   | 2/3 [06:33<03:15, 195.16s/it]
Iteration:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=1.020:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=1.020:   2%|▏         | 1/63 [00:00<00:33,  1.82it/s][A
loss=1.210:   2%|▏     

{"eval_acc": 0.6971922246220302, "eval_f1": 0.18892387584518955, "eval_acc_and_f1": 0.4430580502336099, "eval_loss": 1.2646299515451704, "learning_rate": 6.666666666666667e-06, "train_loss": 0.9279800007740656, "step": 132}



loss=0.874:  11%|█         | 7/63 [00:39<07:26,  7.97s/it][A
loss=0.619:  11%|█         | 7/63 [00:39<07:26,  7.97s/it][A
loss=0.619:  13%|█▎        | 8/63 [00:39<05:16,  5.75s/it][A
loss=0.645:  13%|█▎        | 8/63 [00:39<05:16,  5.75s/it][A
loss=0.645:  14%|█▍        | 9/63 [00:40<03:46,  4.19s/it][A
loss=0.870:  14%|█▍        | 9/63 [00:40<03:46,  4.19s/it][A
loss=0.870:  16%|█▌        | 10/63 [00:40<02:44,  3.10s/it][A
loss=0.846:  16%|█▌        | 10/63 [00:41<02:44,  3.10s/it][A
loss=0.846:  17%|█▋        | 11/63 [00:41<02:01,  2.34s/it][A
loss=0.528:  17%|█▋        | 11/63 [00:41<02:01,  2.34s/it][A
loss=0.528:  19%|█▉        | 12/63 [00:41<01:31,  1.80s/it][A
loss=0.773:  19%|█▉        | 12/63 [00:42<01:31,  1.80s/it][A
loss=0.773:  21%|██        | 13/63 [00:42<01:11,  1.43s/it][A
loss=1.077:  21%|██        | 13/63 [00:42<01:11,  1.43s/it][A
loss=1.077:  22%|██▏       | 14/63 [00:43<00:57,  1.16s/it][A
loss=1.095:  22%|██▏       | 14/63 [00:43<00:57,  1.16s/it]

{"eval_acc": 0.7068394528437725, "eval_f1": 0.19576887478092822, "eval_acc_and_f1": 0.45130416381235033, "eval_loss": 1.2110217724527632, "learning_rate": 5.263157894736842e-06, "train_loss": 0.7644244780143102, "step": 144}



loss=1.048:  30%|███       | 19/63 [01:17<05:23,  7.36s/it][A
loss=0.894:  30%|███       | 19/63 [01:17<05:23,  7.36s/it][A
loss=0.894:  32%|███▏      | 20/63 [01:18<03:48,  5.31s/it][A
loss=0.887:  32%|███▏      | 20/63 [01:18<03:48,  5.31s/it][A
loss=0.887:  33%|███▎      | 21/63 [01:18<02:43,  3.89s/it][A
loss=0.708:  33%|███▎      | 21/63 [01:19<02:43,  3.89s/it][A
loss=0.708:  35%|███▍      | 22/63 [01:19<01:58,  2.89s/it][A
loss=0.684:  35%|███▍      | 22/63 [01:19<01:58,  2.89s/it][A
loss=0.684:  37%|███▋      | 23/63 [01:19<01:27,  2.19s/it][A
loss=0.759:  37%|███▋      | 23/63 [01:20<01:27,  2.19s/it][A
loss=0.759:  38%|███▊      | 24/63 [01:20<01:06,  1.70s/it][A
loss=0.699:  38%|███▊      | 24/63 [01:20<01:06,  1.70s/it][A
loss=0.699:  40%|███▉      | 25/63 [01:21<00:51,  1.35s/it][A
loss=1.036:  40%|███▉      | 25/63 [01:21<00:51,  1.35s/it][A
loss=1.036:  41%|████▏     | 26/63 [01:21<00:41,  1.12s/it][A
loss=0.635:  41%|████▏     | 26/63 [01:21<00:41,  1.12

{"eval_acc": 0.7074154067674586, "eval_f1": 0.19387687316229066, "eval_acc_and_f1": 0.4506461399648746, "eval_loss": 1.173922598361969, "learning_rate": 3.859649122807018e-06, "train_loss": 0.8162530759970347, "step": 156}



loss=0.952:  49%|████▉     | 31/63 [01:57<04:02,  7.59s/it][A
loss=0.774:  49%|████▉     | 31/63 [01:57<04:02,  7.59s/it][A
loss=0.774:  51%|█████     | 32/63 [01:58<02:49,  5.48s/it][A
loss=0.831:  51%|█████     | 32/63 [01:58<02:49,  5.48s/it][A
loss=0.831:  52%|█████▏    | 33/63 [01:58<02:00,  4.00s/it][A
loss=0.685:  52%|█████▏    | 33/63 [01:58<02:00,  4.00s/it][A
loss=0.685:  54%|█████▍    | 34/63 [01:59<01:25,  2.97s/it][A
loss=0.790:  54%|█████▍    | 34/63 [01:59<01:25,  2.97s/it][A
loss=0.790:  56%|█████▌    | 35/63 [01:59<01:02,  2.24s/it][A
loss=0.777:  56%|█████▌    | 35/63 [01:59<01:02,  2.24s/it][A
loss=0.777:  57%|█████▋    | 36/63 [02:00<00:46,  1.74s/it][A
loss=0.499:  57%|█████▋    | 36/63 [02:00<00:46,  1.74s/it][A
loss=0.499:  59%|█████▊    | 37/63 [02:00<00:35,  1.38s/it][A
loss=0.632:  59%|█████▊    | 37/63 [02:00<00:35,  1.38s/it][A
loss=0.632:  60%|██████    | 38/63 [02:01<00:28,  1.13s/it][A
loss=0.675:  60%|██████    | 38/63 [02:01<00:28,  1.13

{"eval_acc": 0.7202303815694744, "eval_f1": 0.20240848614411358, "eval_acc_and_f1": 0.46131943385679397, "eval_loss": 1.1429437420197897, "learning_rate": 2.456140350877193e-06, "train_loss": 0.6986302261551222, "step": 168}



loss=0.973:  68%|██████▊   | 43/63 [02:35<02:26,  7.33s/it][A
loss=0.773:  68%|██████▊   | 43/63 [02:36<02:26,  7.33s/it][A
loss=0.773:  70%|██████▉   | 44/63 [02:36<01:40,  5.30s/it][A
loss=0.521:  70%|██████▉   | 44/63 [02:36<01:40,  5.30s/it][A
loss=0.521:  71%|███████▏  | 45/63 [02:37<01:09,  3.87s/it][A
loss=0.407:  71%|███████▏  | 45/63 [02:37<01:09,  3.87s/it][A
loss=0.407:  73%|███████▎  | 46/63 [02:37<00:48,  2.88s/it][A
loss=0.491:  73%|███████▎  | 46/63 [02:37<00:48,  2.88s/it][A
loss=0.491:  75%|███████▍  | 47/63 [02:38<00:34,  2.18s/it][A
loss=0.512:  75%|███████▍  | 47/63 [02:38<00:34,  2.18s/it][A
loss=0.512:  76%|███████▌  | 48/63 [02:38<00:25,  1.69s/it][A
loss=0.515:  76%|███████▌  | 48/63 [02:38<00:25,  1.69s/it][A
loss=0.515:  78%|███████▊  | 49/63 [02:39<00:18,  1.35s/it][A
loss=0.918:  78%|███████▊  | 49/63 [02:39<00:18,  1.35s/it][A
loss=0.918:  79%|███████▉  | 50/63 [02:39<00:14,  1.11s/it][A
loss=0.639:  79%|███████▉  | 50/63 [02:39<00:14,  1.11

{"eval_acc": 0.723110151187905, "eval_f1": 0.2024775351580246, "eval_acc_and_f1": 0.4627938431729648, "eval_loss": 1.1285720872027534, "learning_rate": 1.0526315789473685e-06, "train_loss": 0.7024731462200483, "step": 180}



loss=0.588:  87%|████████▋ | 55/63 [03:14<00:58,  7.36s/it][A
loss=0.816:  87%|████████▋ | 55/63 [03:14<00:58,  7.36s/it][A
loss=0.816:  89%|████████▉ | 56/63 [03:15<00:37,  5.32s/it][A
loss=1.046:  89%|████████▉ | 56/63 [03:15<00:37,  5.32s/it][A
loss=1.046:  90%|█████████ | 57/63 [03:15<00:23,  3.89s/it][A
loss=0.572:  90%|█████████ | 57/63 [03:15<00:23,  3.89s/it][A
loss=0.572:  92%|█████████▏| 58/63 [03:16<00:14,  2.89s/it][A
loss=0.359:  92%|█████████▏| 58/63 [03:16<00:14,  2.89s/it][A
loss=0.359:  94%|█████████▎| 59/63 [03:16<00:08,  2.19s/it][A
loss=0.502:  94%|█████████▎| 59/63 [03:16<00:08,  2.19s/it][A
loss=0.502:  95%|█████████▌| 60/63 [03:17<00:05,  1.70s/it][A
loss=0.704:  95%|█████████▌| 60/63 [03:17<00:05,  1.70s/it][A
loss=0.704:  97%|█████████▋| 61/63 [03:17<00:02,  1.35s/it][A
loss=0.534:  97%|█████████▋| 61/63 [03:17<00:02,  1.35s/it][A
loss=0.534:  98%|█████████▊| 62/63 [03:18<00:01,  1.11s/it][A
loss=0.538:  98%|█████████▊| 62/63 [03:18<00:01,  1.11

{"eval_acc": 0.7190784737221022, "eval_f1": 0.20099981407262818, "eval_acc_and_f1": 0.4600391438973652, "eval_loss": 1.1274298472063882, "learning_rate": 0.0, "train_loss": 0.47161322583754856, "step": 189}


07/17/2022 23:09:37 - INFO - utilities.trainers -   ***** Running evaluation iter-4_trial1 *****
07/17/2022 23:09:37 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 23:09:37 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/17/2022 23:10:05 - INFO - utilities.trainers -   ***** Eval results iter-4_trial1 *****
07/17/2022 23:10:05 - INFO - utilities.trainers -     acc = 0.7190784737221022
07/17/2022 23:10:05 - INFO - utilities.trainers -     acc_and_f1 = 0.4600391438973652
07/17/2022 23:10:05 - INFO - utilities.trainers -     f1 = 0.20099981407262818
07/17/2022 23:10:07 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/17/2022 23:10:13 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 23:10:13 - INFO - utilities.trainers -     Num examples = 34722
07/17/2022 23:10:13 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:18<00:00,  1.02s/it]
07/17/2022 23:12:32 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 23:12:32 - INFO - utilities.trainers -     acc = 0.7179021945740452
07/17/2022 23:12:32 - INFO - utilities.trainers -     acc_and_f1 = 0.4529019770481735
07/17/2022 23:12:32 - INFO - utilities.trainers -     f1 = 0.1879017595223017



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/17/2022 23:12:32 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/17/2022 23:12:51 - INFO - utilities.data_loader -   Selecting subsample...
07/17/2022 23:12:55 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 23:12:55 - INFO - utilities.trainers -     Num examples = 48000
07/17/2022 23:12:55 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/188 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 188/188 [06:20<00:00,  2.02s/it]
07/17/2022 23:19:16 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 23:19:16 - INFO - utilities.trainers -     acc = 0.7126666666666667
07/17/2022 23:19:16 - INFO - utilities.trainers -     acc_and_f1 = 0.43951785916900843
07/17/2022 23:19:16 - INFO - utilities.trainers -     f1 = 0.1663690516713502
07/17/2022 23:19:16 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/17/2022 23:19:35 - INFO - utilities.data_loader -   Selecting subsample...
07/17/2022 23:19:35 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 23:19:38 - INFO - utilities.trainers -     Num examples = 2000
07/17/2022 23:19:38 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 8/8 [00:15<00:00,  1.98s/it]
07/17/2022 23:19:54 - INFO - util



************
End of iteration 4:
Train loss 1.3228, Val loss 1.1274298472063882, Test loss 1.1258293429718298
Annotated 500 samples
Current labeled (training) data: 2500 samples
Remaining budget: 1500 (in samples)
************

Saving json with the results....

 Start Training model of iteration 5!



07/17/2022 23:22:32 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.7190784737221022, acc_best_iteration=4, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-4', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-5', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, fp

warmup steps: 23
total steps: 234
logging steps: 15
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.246:   1%|▏         | 1/79 [00:00<00:38,  2.04it/s][A
loss=3.253:   1%|▏         | 1/79 [00:00<00:38,  2.04it/s][A
loss=3.253:   3%|▎         | 2/79 [00:01<00:40,  1.92it/s][A
loss=3.221:   3%|▎         | 2/79 [00:01<00:40,  1.92it/s][A
loss=3.221:   4%|▍         | 3/79 [00:01<00:40,  1.86it/s][A
loss=3.256:   4%|▍         | 3/79 [00:01<00:40,  1.86it/s][A
loss=3.256:   5%|▌         | 4/79 [00:02<00:41,  1.82it/s][A
loss=3.127:   5%|▌         | 4/79 [00:02<00:41,  1.82it/s][A
loss=3.127:   6%|▋         | 5/79 [00:02<00:41,  1.79it/s][A
loss=3.076:   6%|▋         | 5/79 [00:02<00:41,  1.79it/s][A
loss=3.076:   8%|▊         | 6/79 [00:03<00:41,  1.77it/s][A
loss=3.196:   8%|▊         | 6/79 [00:03<00:41,  1.77it/s][A
loss=3.196:   9%|▉         | 7/79 [00:03<00:40,  1.76it/s][A
loss=3.141:   9%|▉         | 7/79 [00:04<00:40,  1.76it/s][A
loss=3.141:  10%|█         | 8/79 [00:04<00:40,  1.75it/s][A
loss=2.980:  10%|█         | 8/79 [00:04<00:40,  1.75it/s][A
loss=2.

{"eval_acc": 0.17249820014398848, "eval_f1": 0.011948592928480297, "eval_acc_and_f1": 0.09222339653623439, "eval_loss": 3.115387567452022, "learning_rate": 1.3043478260869566e-05, "train_loss": 3.083747148513794, "step": 15}



loss=2.709:  19%|█▉        | 15/79 [00:40<10:38,  9.97s/it][A
loss=2.709:  20%|██        | 16/79 [00:40<07:31,  7.16s/it][A
loss=2.749:  20%|██        | 16/79 [00:40<07:31,  7.16s/it][A
loss=2.749:  22%|██▏       | 17/79 [00:41<05:21,  5.19s/it][A
loss=2.895:  22%|██▏       | 17/79 [00:41<05:21,  5.19s/it][A
loss=2.895:  23%|██▎       | 18/79 [00:41<03:52,  3.80s/it][A
loss=2.860:  23%|██▎       | 18/79 [00:41<03:52,  3.80s/it][A
loss=2.860:  24%|██▍       | 19/79 [00:42<02:50,  2.83s/it][A
loss=2.784:  24%|██▍       | 19/79 [00:42<02:50,  2.83s/it][A
loss=2.784:  25%|██▌       | 20/79 [00:42<02:07,  2.16s/it][A
loss=2.806:  25%|██▌       | 20/79 [00:42<02:07,  2.16s/it][A
loss=2.806:  27%|██▋       | 21/79 [00:43<01:37,  1.68s/it][A
loss=2.697:  27%|██▋       | 21/79 [00:43<01:37,  1.68s/it][A
loss=2.697:  28%|██▊       | 22/79 [00:44<01:17,  1.36s/it][A
loss=2.542:  28%|██▊       | 22/79 [00:44<01:17,  1.36s/it][A
loss=2.542:  29%|██▉       | 23/79 [00:44<01:03,  1.13

{"eval_acc": 0.18948884089272858, "eval_f1": 0.05068841161487429, "eval_acc_and_f1": 0.12008862625380143, "eval_loss": 2.5101551754134044, "learning_rate": 1.9345794392523366e-05, "train_loss": 2.6147706508636475, "step": 30}



loss=2.322:  39%|███▉      | 31/79 [01:21<05:49,  7.29s/it][A
loss=2.249:  39%|███▉      | 31/79 [01:21<05:49,  7.29s/it][A
loss=2.249:  41%|████      | 32/79 [01:21<04:08,  5.28s/it][A
loss=2.459:  41%|████      | 32/79 [01:21<04:08,  5.28s/it][A
loss=2.459:  42%|████▏     | 33/79 [01:22<02:58,  3.87s/it][A
loss=2.216:  42%|████▏     | 33/79 [01:22<02:58,  3.87s/it][A
loss=2.216:  43%|████▎     | 34/79 [01:22<02:10,  2.89s/it][A
loss=2.112:  43%|████▎     | 34/79 [01:22<02:10,  2.89s/it][A
loss=2.112:  44%|████▍     | 35/79 [01:23<01:36,  2.19s/it][A
loss=2.153:  44%|████▍     | 35/79 [01:23<01:36,  2.19s/it][A
loss=2.153:  46%|████▌     | 36/79 [01:23<01:13,  1.71s/it][A
loss=2.139:  46%|████▌     | 36/79 [01:24<01:13,  1.71s/it][A
loss=2.139:  47%|████▋     | 37/79 [01:24<00:57,  1.37s/it][A
loss=2.268:  47%|████▋     | 37/79 [01:24<00:57,  1.37s/it][A
loss=2.268:  48%|████▊     | 38/79 [01:25<00:46,  1.14s/it][A
loss=1.897:  48%|████▊     | 38/79 [01:25<00:46,  1.14

{"eval_acc": 0.3303095752339813, "eval_f1": 0.07915492062174004, "eval_acc_and_f1": 0.20473224792786066, "eval_loss": 2.2707497222082957, "learning_rate": 1.794392523364486e-05, "train_loss": 2.09562398592631, "step": 45}



loss=1.930:  58%|█████▊    | 46/79 [02:01<04:02,  7.36s/it][A
loss=2.006:  58%|█████▊    | 46/79 [02:02<04:02,  7.36s/it][A
loss=2.006:  59%|█████▉    | 47/79 [02:02<02:50,  5.32s/it][A
loss=1.673:  59%|█████▉    | 47/79 [02:02<02:50,  5.32s/it][A
loss=1.673:  61%|██████    | 48/79 [02:03<02:01,  3.90s/it][A
loss=1.684:  61%|██████    | 48/79 [02:03<02:01,  3.90s/it][A
loss=1.684:  62%|██████▏   | 49/79 [02:03<01:27,  2.91s/it][A
loss=1.894:  62%|██████▏   | 49/79 [02:03<01:27,  2.91s/it][A
loss=1.894:  63%|██████▎   | 50/79 [02:04<01:04,  2.21s/it][A
loss=2.127:  63%|██████▎   | 50/79 [02:04<01:04,  2.21s/it][A
loss=2.127:  65%|██████▍   | 51/79 [02:04<00:48,  1.72s/it][A
loss=1.928:  65%|██████▍   | 51/79 [02:05<00:48,  1.72s/it][A
loss=1.928:  66%|██████▌   | 52/79 [02:05<00:37,  1.39s/it][A
loss=2.062:  66%|██████▌   | 52/79 [02:05<00:37,  1.39s/it][A
loss=2.062:  67%|██████▋   | 53/79 [02:06<00:29,  1.15s/it][A
loss=1.676:  67%|██████▋   | 53/79 [02:06<00:29,  1.15

{"eval_acc": 0.4421886249100072, "eval_f1": 0.10618138234321854, "eval_acc_and_f1": 0.27418500362661286, "eval_loss": 2.1069330147334506, "learning_rate": 1.6542056074766357e-05, "train_loss": 1.8192510286966959, "step": 60}



loss=1.680:  77%|███████▋  | 61/79 [02:45<02:23,  8.00s/it][A
loss=2.200:  77%|███████▋  | 61/79 [02:46<02:23,  8.00s/it][A
loss=2.200:  78%|███████▊  | 62/79 [02:46<01:38,  5.77s/it][A
loss=1.671:  78%|███████▊  | 62/79 [02:46<01:38,  5.77s/it][A
loss=1.671:  80%|███████▉  | 63/79 [02:47<01:07,  4.22s/it][A
loss=2.374:  80%|███████▉  | 63/79 [02:47<01:07,  4.22s/it][A
loss=2.374:  81%|████████  | 64/79 [02:47<00:46,  3.13s/it][A
loss=1.763:  81%|████████  | 64/79 [02:47<00:46,  3.13s/it][A
loss=1.763:  82%|████████▏ | 65/79 [02:48<00:33,  2.36s/it][A
loss=2.046:  82%|████████▏ | 65/79 [02:48<00:33,  2.36s/it][A
loss=2.046:  84%|████████▎ | 66/79 [02:48<00:23,  1.83s/it][A
loss=1.425:  84%|████████▎ | 66/79 [02:48<00:23,  1.83s/it][A
loss=1.425:  85%|████████▍ | 67/79 [02:49<00:17,  1.46s/it][A
loss=1.864:  85%|████████▍ | 67/79 [02:49<00:17,  1.46s/it][A
loss=1.864:  86%|████████▌ | 68/79 [02:49<00:13,  1.19s/it][A
loss=1.527:  86%|████████▌ | 68/79 [02:50<00:13,  1.19

{"eval_acc": 0.5030957523398129, "eval_f1": 0.1266806504897866, "eval_acc_and_f1": 0.31488820141479973, "eval_loss": 1.9023305773735046, "learning_rate": 1.5140186915887852e-05, "train_loss": 1.7686414082845052, "step": 75}



loss=1.995:  96%|█████████▌| 76/79 [03:28<00:23,  7.71s/it][A
loss=1.351:  96%|█████████▌| 76/79 [03:28<00:23,  7.71s/it][A
loss=1.351:  97%|█████████▋| 77/79 [03:28<00:11,  5.58s/it][A
loss=1.316:  97%|█████████▋| 77/79 [03:29<00:11,  5.58s/it][A
loss=1.316:  99%|█████████▊| 78/79 [03:29<00:04,  4.08s/it][A
loss=1.756:  99%|█████████▊| 78/79 [03:29<00:04,  4.08s/it][A
loss=1.756: 100%|██████████| 79/79 [03:29<00:00,  2.65s/it]
Epoch:  33%|███▎      | 1/3 [03:29<06:59, 209.68s/it]
Iteration:   0%|          | 0/79 [00:00<?, ?it/s][A
loss=1.672:   0%|          | 0/79 [00:00<?, ?it/s][A
loss=1.672:   1%|▏         | 1/79 [00:00<00:45,  1.72it/s][A
loss=1.465:   1%|▏         | 1/79 [00:00<00:45,  1.72it/s][A
loss=1.465:   3%|▎         | 2/79 [00:01<00:44,  1.71it/s][A
loss=0.942:   3%|▎         | 2/79 [00:01<00:44,  1.71it/s][A
loss=0.942:   4%|▍         | 3/79 [00:01<00:44,  1.71it/s][A
loss=1.571:   4%|▍         | 3/79 [00:01<00:44,  1.71it/s][A
loss=1.571:   5%|▌         |

{"eval_acc": 0.6431965442764579, "eval_f1": 0.18185127596733988, "eval_acc_and_f1": 0.4125239101218989, "eval_loss": 1.6758939240659987, "learning_rate": 1.3738317757009347e-05, "train_loss": 1.4584136644999186, "step": 90}



loss=1.439:  15%|█▌        | 12/79 [00:42<09:02,  8.10s/it][A
loss=1.513:  15%|█▌        | 12/79 [00:42<09:02,  8.10s/it][A
loss=1.513:  16%|█▋        | 13/79 [00:43<06:25,  5.85s/it][A
loss=1.423:  16%|█▋        | 13/79 [00:43<06:25,  5.85s/it][A
loss=1.423:  18%|█▊        | 14/79 [00:43<04:37,  4.27s/it][A
loss=1.572:  18%|█▊        | 14/79 [00:44<04:37,  4.27s/it][A
loss=1.572:  19%|█▉        | 15/79 [00:44<03:22,  3.16s/it][A
loss=1.319:  19%|█▉        | 15/79 [00:44<03:22,  3.16s/it][A
loss=1.319:  20%|██        | 16/79 [00:45<02:30,  2.39s/it][A
loss=1.232:  20%|██        | 16/79 [00:45<02:30,  2.39s/it][A
loss=1.232:  22%|██▏       | 17/79 [00:45<01:54,  1.85s/it][A
loss=1.510:  22%|██▏       | 17/79 [00:45<01:54,  1.85s/it][A
loss=1.510:  23%|██▎       | 18/79 [00:46<01:29,  1.47s/it][A
loss=0.753:  23%|██▎       | 18/79 [00:46<01:29,  1.47s/it][A
loss=0.753:  24%|██▍       | 19/79 [00:46<01:12,  1.21s/it][A
loss=1.053:  24%|██▍       | 19/79 [00:47<01:12,  1.21

{"eval_acc": 0.7056875449964003, "eval_f1": 0.20060584404436818, "eval_acc_and_f1": 0.45314669452038425, "eval_loss": 1.4425802443708693, "learning_rate": 1.233644859813084e-05, "train_loss": 1.2694793581962585, "step": 105}



loss=1.183:  34%|███▍      | 27/79 [01:23<06:20,  7.32s/it][A
loss=1.200:  34%|███▍      | 27/79 [01:23<06:20,  7.32s/it][A
loss=1.200:  35%|███▌      | 28/79 [01:24<04:30,  5.30s/it][A
loss=1.478:  35%|███▌      | 28/79 [01:24<04:30,  5.30s/it][A
loss=1.478:  37%|███▋      | 29/79 [01:24<03:14,  3.88s/it][A
loss=1.170:  37%|███▋      | 29/79 [01:24<03:14,  3.88s/it][A
loss=1.170:  38%|███▊      | 30/79 [01:25<02:21,  2.89s/it][A
loss=1.121:  38%|███▊      | 30/79 [01:25<02:21,  2.89s/it][A
loss=1.121:  39%|███▉      | 31/79 [01:25<01:45,  2.20s/it][A
loss=1.238:  39%|███▉      | 31/79 [01:25<01:45,  2.20s/it][A
loss=1.238:  41%|████      | 32/79 [01:26<01:20,  1.72s/it][A
loss=1.244:  41%|████      | 32/79 [01:26<01:20,  1.72s/it][A
loss=1.244:  42%|████▏     | 33/79 [01:27<01:03,  1.38s/it][A
loss=1.382:  42%|████▏     | 33/79 [01:27<01:03,  1.38s/it][A
loss=1.382:  43%|████▎     | 34/79 [01:27<00:51,  1.14s/it][A
loss=1.082:  43%|████▎     | 34/79 [01:27<00:51,  1.14

{"eval_acc": 0.7352051835853132, "eval_f1": 0.21031157204881665, "eval_acc_and_f1": 0.4727583778170649, "eval_loss": 1.2870411659990038, "learning_rate": 1.0934579439252338e-05, "train_loss": 1.2337527712186178, "step": 120}



loss=1.229:  53%|█████▎    | 42/79 [02:05<04:43,  7.65s/it][A
loss=1.118:  53%|█████▎    | 42/79 [02:05<04:43,  7.65s/it][A
loss=1.118:  54%|█████▍    | 43/79 [02:06<03:19,  5.53s/it][A
loss=1.182:  54%|█████▍    | 43/79 [02:06<03:19,  5.53s/it][A
loss=1.182:  56%|█████▌    | 44/79 [02:06<02:21,  4.05s/it][A
loss=1.300:  56%|█████▌    | 44/79 [02:07<02:21,  4.05s/it][A
loss=1.300:  57%|█████▋    | 45/79 [02:07<01:42,  3.01s/it][A
loss=0.748:  57%|█████▋    | 45/79 [02:07<01:42,  3.01s/it][A
loss=0.748:  58%|█████▊    | 46/79 [02:08<01:15,  2.28s/it][A
loss=1.085:  58%|█████▊    | 46/79 [02:08<01:15,  2.28s/it][A
loss=1.085:  59%|█████▉    | 47/79 [02:08<00:56,  1.77s/it][A
loss=0.925:  59%|█████▉    | 47/79 [02:08<00:56,  1.77s/it][A
loss=0.925:  61%|██████    | 48/79 [02:09<00:44,  1.42s/it][A
loss=0.946:  61%|██████    | 48/79 [02:09<00:44,  1.42s/it][A
loss=0.946:  62%|██████▏   | 49/79 [02:09<00:35,  1.17s/it][A
loss=0.882:  62%|██████▏   | 49/79 [02:10<00:35,  1.17

{"eval_acc": 0.7471562275017999, "eval_f1": 0.21525484381900495, "eval_acc_and_f1": 0.4812055356604024, "eval_loss": 1.1606675258704595, "learning_rate": 9.532710280373833e-06, "train_loss": 1.0896191795667012, "step": 135}



loss=0.987:  72%|███████▏  | 57/79 [02:46<02:41,  7.34s/it][A
loss=0.862:  72%|███████▏  | 57/79 [02:46<02:41,  7.34s/it][A
loss=0.862:  73%|███████▎  | 58/79 [02:47<01:51,  5.31s/it][A
loss=0.953:  73%|███████▎  | 58/79 [02:47<01:51,  5.31s/it][A
loss=0.953:  75%|███████▍  | 59/79 [02:47<01:17,  3.89s/it][A
loss=0.863:  75%|███████▍  | 59/79 [02:47<01:17,  3.89s/it][A
loss=0.863:  76%|███████▌  | 60/79 [02:48<00:55,  2.90s/it][A
loss=0.844:  76%|███████▌  | 60/79 [02:48<00:55,  2.90s/it][A
loss=0.844:  77%|███████▋  | 61/79 [02:48<00:39,  2.21s/it][A
loss=0.701:  77%|███████▋  | 61/79 [02:49<00:39,  2.21s/it][A
loss=0.701:  78%|███████▊  | 62/79 [02:49<00:29,  1.72s/it][A
loss=1.330:  78%|███████▊  | 62/79 [02:49<00:29,  1.72s/it][A
loss=1.330:  80%|███████▉  | 63/79 [02:50<00:22,  1.38s/it][A
loss=1.375:  80%|███████▉  | 63/79 [02:50<00:22,  1.38s/it][A
loss=1.375:  81%|████████  | 64/79 [02:50<00:17,  1.14s/it][A
loss=0.960:  81%|████████  | 64/79 [02:50<00:17,  1.14

{"eval_acc": 0.7591072714182865, "eval_f1": 0.21930296326514842, "eval_acc_and_f1": 0.48920511734171745, "eval_loss": 1.057755702308246, "learning_rate": 8.130841121495327e-06, "train_loss": 0.9042288343111674, "step": 150}



loss=0.809:  91%|█████████ | 72/79 [03:28<00:52,  7.55s/it][A
loss=0.718:  91%|█████████ | 72/79 [03:28<00:52,  7.55s/it][A
loss=0.718:  92%|█████████▏| 73/79 [03:28<00:32,  5.46s/it][A
loss=0.873:  92%|█████████▏| 73/79 [03:29<00:32,  5.46s/it][A
loss=0.873:  94%|█████████▎| 74/79 [03:29<00:19,  4.00s/it][A
loss=0.877:  94%|█████████▎| 74/79 [03:29<00:19,  4.00s/it][A
loss=0.877:  95%|█████████▍| 75/79 [03:30<00:11,  2.98s/it][A
loss=0.862:  95%|█████████▍| 75/79 [03:30<00:11,  2.98s/it][A
loss=0.862:  96%|█████████▌| 76/79 [03:30<00:06,  2.26s/it][A
loss=0.846:  96%|█████████▌| 76/79 [03:30<00:06,  2.26s/it][A
loss=0.846:  97%|█████████▋| 77/79 [03:31<00:03,  1.76s/it][A
loss=0.785:  97%|█████████▋| 77/79 [03:31<00:03,  1.76s/it][A
loss=0.785:  99%|█████████▊| 78/79 [03:31<00:01,  1.41s/it][A
loss=0.495:  99%|█████████▊| 78/79 [03:31<00:01,  1.41s/it][A
loss=0.495: 100%|██████████| 79/79 [03:31<00:00,  2.68s/it]
Epoch:  67%|██████▋   | 2/3 [07:01<03:30, 210.37s/it]
Ite

{"eval_acc": 0.7635709143268539, "eval_f1": 0.22444385014556226, "eval_acc_and_f1": 0.49400738223620805, "eval_loss": 0.9801471212080547, "learning_rate": 6.728971962616823e-06, "train_loss": 0.7725532015164693, "step": 165}



loss=1.024:   9%|▉         | 7/79 [00:36<12:15, 10.22s/it][A
loss=1.024:  10%|█         | 8/79 [00:36<08:40,  7.33s/it][A
loss=0.712:  10%|█         | 8/79 [00:36<08:40,  7.33s/it][A
loss=0.712:  11%|█▏        | 9/79 [00:37<06:11,  5.30s/it][A
loss=0.760:  11%|█▏        | 9/79 [00:37<06:11,  5.30s/it][A
loss=0.760:  13%|█▎        | 10/79 [00:37<04:28,  3.89s/it][A
loss=0.726:  13%|█▎        | 10/79 [00:38<04:28,  3.89s/it][A
loss=0.726:  14%|█▍        | 11/79 [00:38<03:16,  2.90s/it][A
loss=0.836:  14%|█▍        | 11/79 [00:38<03:16,  2.90s/it][A
loss=0.836:  15%|█▌        | 12/79 [00:39<02:27,  2.20s/it][A
loss=0.926:  15%|█▌        | 12/79 [00:39<02:27,  2.20s/it][A
loss=0.926:  16%|█▋        | 13/79 [00:39<01:53,  1.71s/it][A
loss=0.833:  16%|█▋        | 13/79 [00:39<01:53,  1.71s/it][A
loss=0.833:  18%|█▊        | 14/79 [00:40<01:29,  1.38s/it][A
loss=1.168:  18%|█▊        | 14/79 [00:40<01:29,  1.38s/it][A
loss=1.168:  19%|█▉        | 15/79 [00:40<01:12,  1.14s/it]

{"eval_acc": 0.7686105111591073, "eval_f1": 0.23523401816522518, "eval_acc_and_f1": 0.5019222646621663, "eval_loss": 0.9330519650663648, "learning_rate": 5.3271028037383174e-06, "train_loss": 0.8253240863482157, "step": 180}



loss=0.759:  29%|██▉       | 23/79 [01:20<07:24,  7.94s/it][A
loss=0.558:  29%|██▉       | 23/79 [01:20<07:24,  7.94s/it][A
loss=0.558:  30%|███       | 24/79 [01:21<05:15,  5.73s/it][A
loss=0.474:  30%|███       | 24/79 [01:21<05:15,  5.73s/it][A
loss=0.474:  32%|███▏      | 25/79 [01:21<03:46,  4.19s/it][A
loss=0.823:  32%|███▏      | 25/79 [01:21<03:46,  4.19s/it][A
loss=0.823:  33%|███▎      | 26/79 [01:22<02:44,  3.11s/it][A
loss=0.981:  33%|███▎      | 26/79 [01:22<02:44,  3.11s/it][A
loss=0.981:  34%|███▍      | 27/79 [01:22<02:02,  2.35s/it][A
loss=0.841:  34%|███▍      | 27/79 [01:22<02:02,  2.35s/it][A
loss=0.841:  35%|███▌      | 28/79 [01:23<01:32,  1.82s/it][A
loss=0.978:  35%|███▌      | 28/79 [01:23<01:32,  1.82s/it][A
loss=0.978:  37%|███▋      | 29/79 [01:23<01:12,  1.45s/it][A
loss=0.445:  37%|███▋      | 29/79 [01:24<01:12,  1.45s/it][A
loss=0.445:  38%|███▊      | 30/79 [01:24<00:58,  1.19s/it][A
loss=0.502:  38%|███▊      | 30/79 [01:24<00:58,  1.19

{"eval_acc": 0.7782577393808495, "eval_f1": 0.25323827466057425, "eval_acc_and_f1": 0.5157480070207119, "eval_loss": 0.8967332584517342, "learning_rate": 3.925233644859814e-06, "train_loss": 0.7192180434862773, "step": 195}



loss=0.829:  48%|████▊     | 38/79 [02:02<05:12,  7.63s/it][A
loss=0.543:  48%|████▊     | 38/79 [02:02<05:12,  7.63s/it][A
loss=0.543:  49%|████▉     | 39/79 [02:03<03:40,  5.51s/it][A
loss=0.615:  49%|████▉     | 39/79 [02:03<03:40,  5.51s/it][A
loss=0.615:  51%|█████     | 40/79 [02:03<02:37,  4.03s/it][A
loss=0.615:  51%|█████     | 40/79 [02:03<02:37,  4.03s/it][A
loss=0.615:  52%|█████▏    | 41/79 [02:04<01:53,  3.00s/it][A
loss=0.822:  52%|█████▏    | 41/79 [02:04<01:53,  3.00s/it][A
loss=0.822:  53%|█████▎    | 42/79 [02:04<01:24,  2.27s/it][A
loss=0.784:  53%|█████▎    | 42/79 [02:05<01:24,  2.27s/it][A
loss=0.784:  54%|█████▍    | 43/79 [02:05<01:03,  1.77s/it][A
loss=0.664:  54%|█████▍    | 43/79 [02:05<01:03,  1.77s/it][A
loss=0.664:  56%|█████▌    | 44/79 [02:06<00:49,  1.41s/it][A
loss=0.690:  56%|█████▌    | 44/79 [02:06<00:49,  1.41s/it][A
loss=0.690:  57%|█████▋    | 45/79 [02:06<00:39,  1.17s/it][A
loss=0.759:  57%|█████▋    | 45/79 [02:06<00:39,  1.17

{"eval_acc": 0.7837293016558675, "eval_f1": 0.26360091789896056, "eval_acc_and_f1": 0.5236651097774141, "eval_loss": 0.8745561731713158, "learning_rate": 2.5233644859813085e-06, "train_loss": 0.7408166289329529, "step": 210}



loss=0.816:  67%|██████▋   | 53/79 [02:43<03:10,  7.34s/it][A
loss=0.619:  67%|██████▋   | 53/79 [02:43<03:10,  7.34s/it][A
loss=0.619:  68%|██████▊   | 54/79 [02:43<02:12,  5.31s/it][A
loss=0.668:  68%|██████▊   | 54/79 [02:44<02:12,  5.31s/it][A
loss=0.668:  70%|██████▉   | 55/79 [02:44<01:33,  3.90s/it][A
loss=0.421:  70%|██████▉   | 55/79 [02:44<01:33,  3.90s/it][A
loss=0.421:  71%|███████   | 56/79 [02:45<01:06,  2.90s/it][A
loss=0.523:  71%|███████   | 56/79 [02:45<01:06,  2.90s/it][A
loss=0.523:  72%|███████▏  | 57/79 [02:45<00:48,  2.21s/it][A
loss=0.678:  72%|███████▏  | 57/79 [02:45<00:48,  2.21s/it][A
loss=0.678:  73%|███████▎  | 58/79 [02:46<00:36,  1.72s/it][A
loss=0.560:  73%|███████▎  | 58/79 [02:46<00:36,  1.72s/it][A
loss=0.560:  75%|███████▍  | 59/79 [02:46<00:27,  1.38s/it][A
loss=0.741:  75%|███████▍  | 59/79 [02:47<00:27,  1.38s/it][A
loss=0.741:  76%|███████▌  | 60/79 [02:47<00:21,  1.14s/it][A
loss=0.891:  76%|███████▌  | 60/79 [02:47<00:21,  1.14

{"eval_acc": 0.7827213822894169, "eval_f1": 0.26342309205366715, "eval_acc_and_f1": 0.523072237171542, "eval_loss": 0.8574294937508447, "learning_rate": 1.1214953271028038e-06, "train_loss": 0.6818112293879192, "step": 225}



loss=1.128:  86%|████████▌ | 68/79 [03:25<01:24,  7.64s/it][A
loss=0.901:  86%|████████▌ | 68/79 [03:25<01:24,  7.64s/it][A
loss=0.901:  87%|████████▋ | 69/79 [03:26<00:55,  5.52s/it][A
loss=0.513:  87%|████████▋ | 69/79 [03:26<00:55,  5.52s/it][A
loss=0.513:  89%|████████▊ | 70/79 [03:26<00:36,  4.04s/it][A
loss=0.540:  89%|████████▊ | 70/79 [03:26<00:36,  4.04s/it][A
loss=0.540:  90%|████████▉ | 71/79 [03:27<00:24,  3.00s/it][A
loss=0.827:  90%|████████▉ | 71/79 [03:27<00:24,  3.00s/it][A
loss=0.827:  91%|█████████ | 72/79 [03:27<00:15,  2.28s/it][A
loss=0.498:  91%|█████████ | 72/79 [03:28<00:15,  2.28s/it][A
loss=0.498:  92%|█████████▏| 73/79 [03:28<00:10,  1.77s/it][A
loss=0.846:  92%|█████████▏| 73/79 [03:28<00:10,  1.77s/it][A
loss=0.846:  94%|█████████▎| 74/79 [03:29<00:07,  1.41s/it][A
loss=0.679:  94%|█████████▎| 74/79 [03:29<00:07,  1.41s/it][A
loss=0.679:  95%|█████████▍| 75/79 [03:29<00:04,  1.16s/it][A
loss=0.601:  95%|█████████▍| 75/79 [03:29<00:04,  1.16

{"eval_acc": 0.7844492440604751, "eval_f1": 0.263956242303954, "eval_acc_and_f1": 0.5242027431822145, "eval_loss": 0.8516431940453393, "learning_rate": 0.0, "train_loss": 0.5197933226823807, "step": 237}


07/17/2022 23:34:07 - INFO - utilities.trainers -   ***** Running evaluation iter-5_trial1 *****
07/17/2022 23:34:07 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 23:34:07 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/17/2022 23:34:35 - INFO - utilities.trainers -   ***** Eval results iter-5_trial1 *****
07/17/2022 23:34:35 - INFO - utilities.trainers -     acc = 0.7844492440604751
07/17/2022 23:34:35 - INFO - utilities.trainers -     acc_and_f1 = 0.5242027431822145
07/17/2022 23:34:35 - INFO - utilities.trainers -     f1 = 0.263956242303954
07/17/2022 23:34:38 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/17/2022 23:34:43 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 23:34:43 - INFO - utilities.trainers -     Num examples = 34722
07/17/2022 23:34:43 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:18<00:00,  1.02s/it]
07/17/2022 23:37:02 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 23:37:02 - INFO - utilities.trainers -     acc = 0.7831346120615172
07/17/2022 23:37:02 - INFO - utilities.trainers -     acc_and_f1 = 0.5184599000141392
07/17/2022 23:37:02 - INFO - utilities.trainers -     f1 = 0.2537851879667611



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/17/2022 23:37:03 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/17/2022 23:37:21 - INFO - utilities.data_loader -   Selecting subsample...
07/17/2022 23:37:30 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 23:37:30 - INFO - utilities.trainers -     Num examples = 47500
07/17/2022 23:37:30 - INFO - utilities.trainers -     Batch size = 256


MC samples N=None


Evaluating: 100%|██████████| 186/186 [06:16<00:00,  2.02s/it]
07/17/2022 23:43:47 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 23:43:47 - INFO - utilities.trainers -     acc = 0.7814105263157894
07/17/2022 23:43:47 - INFO - utilities.trainers -     acc_and_f1 = 0.5089196015003077
07/17/2022 23:43:47 - INFO - utilities.trainers -     f1 = 0.23642867668482587
07/17/2022 23:43:47 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/17/2022 23:44:06 - INFO - utilities.data_loader -   Selecting subsample...
07/17/2022 23:44:14 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 23:44:14 - INFO - utilities.trainers -     Num examples = 2500
07/17/2022 23:44:14 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 10/10 [00:19<00:00,  1.98s/it]
07/17/2022 23:44:34 - INFO - ut



************
End of iteration 5:
Train loss 1.3669, Val loss 0.8516431940453393, Test loss 0.8534096295342726
Annotated 500 samples
Current labeled (training) data: 3000 samples
Remaining budget: 1000 (in samples)
************

Saving json with the results....

 Start Training model of iteration 6!



07/17/2022 23:47:29 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.7844492440604751, acc_best_iteration=5, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-5', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-6', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, fp

warmup steps: 28
total steps: 281
logging steps: 18
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.554:   1%|          | 1/94 [00:00<00:46,  2.01it/s][A
loss=3.610:   1%|          | 1/94 [00:00<00:46,  2.01it/s][A
loss=3.610:   2%|▏         | 2/94 [00:01<00:48,  1.89it/s][A
loss=3.372:   2%|▏         | 2/94 [00:01<00:48,  1.89it/s][A
loss=3.372:   3%|▎         | 3/94 [00:01<00:49,  1.86it/s][A
loss=3.366:   3%|▎         | 3/94 [00:01<00:49,  1.86it/s][A
loss=3.366:   4%|▍         | 4/94 [00:02<00:49,  1.83it/s][A
loss=3.359:   4%|▍         | 4/94 [00:02<00:49,  1.83it/s][A
loss=3.359:   5%|▌         | 5/94 [00:02<00:49,  1.81it/s][A
loss=3.414:   5%|▌         | 5/94 [00:02<00:49,  1.81it/s][A
loss=3.414:   6%|▋         | 6/94 [00:03<00:48,  1.81it/s][A
loss=3.322:   6%|▋         | 6/94 [00:03<00:48,  1.81it/s][A
loss=3.322:   7%|▋         | 7/94 [00:03<00:48,  1.80it/s][A
loss=3.307:   7%|▋         | 7/94 [00:04<00:48,  1.80it/s][A
loss=3.307:   9%|▊         | 8/94 [00:04<00:47,  1.80it/s][A
loss=3.245:   9%|▊         | 8/94 [00:04<00:47,  1.80it/s][A
loss=3.

{"eval_acc": 0.2290856731461483, "eval_f1": 0.052641721746329494, "eval_acc_and_f1": 0.1408636974462389, "eval_loss": 2.8609505891799927, "learning_rate": 1.2857142857142859e-05, "train_loss": 3.2243981626298694, "step": 18}



loss=2.825:  20%|██        | 19/94 [00:42<08:59,  7.20s/it][A
loss=2.747:  20%|██        | 19/94 [00:42<08:59,  7.20s/it][A
loss=2.747:  21%|██▏       | 20/94 [00:42<06:25,  5.21s/it][A
loss=2.684:  21%|██▏       | 20/94 [00:42<06:25,  5.21s/it][A
loss=2.684:  22%|██▏       | 21/94 [00:43<04:38,  3.81s/it][A
loss=2.551:  22%|██▏       | 21/94 [00:43<04:38,  3.81s/it][A
loss=2.551:  23%|██▎       | 22/94 [00:43<03:24,  2.84s/it][A
loss=2.472:  23%|██▎       | 22/94 [00:44<03:24,  2.84s/it][A
loss=2.472:  24%|██▍       | 23/94 [00:44<02:33,  2.16s/it][A
loss=2.573:  24%|██▍       | 23/94 [00:44<02:33,  2.16s/it][A
loss=2.573:  26%|██▌       | 24/94 [00:45<01:57,  1.68s/it][A
loss=2.570:  26%|██▌       | 24/94 [00:45<01:57,  1.68s/it][A
loss=2.570:  27%|██▋       | 25/94 [00:45<01:32,  1.34s/it][A
loss=2.548:  27%|██▋       | 25/94 [00:45<01:32,  1.34s/it][A
loss=2.548:  28%|██▊       | 26/94 [00:46<01:15,  1.11s/it][A
loss=2.509:  28%|██▊       | 26/94 [00:46<01:15,  1.11

{"eval_acc": 0.21540676745860332, "eval_f1": 0.03148240992713073, "eval_acc_and_f1": 0.12344458869286702, "eval_loss": 2.4404562371117726, "learning_rate": 1.937007874015748e-05, "train_loss": 2.506657693121168, "step": 36}



loss=2.138:  39%|███▉      | 37/94 [01:24<06:53,  7.26s/it][A
loss=1.970:  39%|███▉      | 37/94 [01:24<06:53,  7.26s/it][A
loss=1.970:  40%|████      | 38/94 [01:24<04:53,  5.25s/it][A
loss=2.529:  40%|████      | 38/94 [01:24<04:53,  5.25s/it][A
loss=2.529:  41%|████▏     | 39/94 [01:25<03:31,  3.84s/it][A
loss=2.343:  41%|████▏     | 39/94 [01:25<03:31,  3.84s/it][A
loss=2.343:  43%|████▎     | 40/94 [01:25<02:34,  2.85s/it][A
loss=1.955:  43%|████▎     | 40/94 [01:26<02:34,  2.85s/it][A
loss=1.955:  44%|████▎     | 41/94 [01:26<01:54,  2.17s/it][A
loss=2.173:  44%|████▎     | 41/94 [01:26<01:54,  2.17s/it][A
loss=2.173:  45%|████▍     | 42/94 [01:27<01:27,  1.69s/it][A
loss=1.917:  45%|████▍     | 42/94 [01:27<01:27,  1.69s/it][A
loss=1.917:  46%|████▌     | 43/94 [01:27<01:08,  1.35s/it][A
loss=2.076:  46%|████▌     | 43/94 [01:27<01:08,  1.35s/it][A
loss=2.076:  47%|████▋     | 44/94 [01:28<00:55,  1.11s/it][A
loss=1.976:  47%|████▋     | 44/94 [01:28<00:55,  1.11

{"eval_acc": 0.46277897768178544, "eval_f1": 0.10101858939450771, "eval_acc_and_f1": 0.28189878353814657, "eval_loss": 2.1612379806382314, "learning_rate": 1.7952755905511813e-05, "train_loss": 2.090800152884589, "step": 54}



loss=2.056:  59%|█████▊    | 55/94 [02:07<04:56,  7.59s/it][A
loss=2.023:  59%|█████▊    | 55/94 [02:07<04:56,  7.59s/it][A
loss=2.023:  60%|█████▉    | 56/94 [02:08<03:28,  5.49s/it][A
loss=2.008:  60%|█████▉    | 56/94 [02:08<03:28,  5.49s/it][A
loss=2.008:  61%|██████    | 57/94 [02:08<02:28,  4.01s/it][A
loss=1.874:  61%|██████    | 57/94 [02:09<02:28,  4.01s/it][A
loss=1.874:  62%|██████▏   | 58/94 [02:09<01:47,  2.98s/it][A
loss=2.049:  62%|██████▏   | 58/94 [02:09<01:47,  2.98s/it][A
loss=2.049:  63%|██████▎   | 59/94 [02:10<01:18,  2.25s/it][A
loss=2.297:  63%|██████▎   | 59/94 [02:10<01:18,  2.25s/it][A
loss=2.297:  64%|██████▍   | 60/94 [02:10<00:59,  1.75s/it][A
loss=1.720:  64%|██████▍   | 60/94 [02:10<00:59,  1.75s/it][A
loss=1.720:  65%|██████▍   | 61/94 [02:11<00:45,  1.39s/it][A
loss=1.939:  65%|██████▍   | 61/94 [02:11<00:45,  1.39s/it][A
loss=1.939:  66%|██████▌   | 62/94 [02:11<00:36,  1.15s/it][A
loss=2.547:  66%|██████▌   | 62/94 [02:11<00:36,  1.15

{"eval_acc": 0.4526997840172786, "eval_f1": 0.1120973286742854, "eval_acc_and_f1": 0.28239855634578204, "eval_loss": 2.0653943674904958, "learning_rate": 1.6535433070866142e-05, "train_loss": 1.9579704536332025, "step": 72}



loss=2.080:  78%|███████▊  | 73/94 [02:51<02:38,  7.56s/it][A
loss=1.694:  78%|███████▊  | 73/94 [02:51<02:38,  7.56s/it][A
loss=1.694:  79%|███████▊  | 74/94 [02:51<01:49,  5.46s/it][A
loss=1.874:  79%|███████▊  | 74/94 [02:51<01:49,  5.46s/it][A
loss=1.874:  80%|███████▉  | 75/94 [02:52<01:15,  3.99s/it][A
loss=1.566:  80%|███████▉  | 75/94 [02:52<01:15,  3.99s/it][A
loss=1.566:  81%|████████  | 76/94 [02:52<00:53,  2.96s/it][A
loss=1.802:  81%|████████  | 76/94 [02:53<00:53,  2.96s/it][A
loss=1.802:  82%|████████▏ | 77/94 [02:53<00:38,  2.24s/it][A
loss=1.895:  82%|████████▏ | 77/94 [02:53<00:38,  2.24s/it][A
loss=1.895:  83%|████████▎ | 78/94 [02:54<00:27,  1.74s/it][A
loss=1.907:  83%|████████▎ | 78/94 [02:54<00:27,  1.74s/it][A
loss=1.907:  84%|████████▍ | 79/94 [02:54<00:20,  1.39s/it][A
loss=1.745:  84%|████████▍ | 79/94 [02:54<00:20,  1.39s/it][A
loss=1.745:  85%|████████▌ | 80/94 [02:55<00:15,  1.14s/it][A
loss=1.712:  85%|████████▌ | 80/94 [02:55<00:15,  1.14

{"eval_acc": 0.6470842332613391, "eval_f1": 0.15469502904120552, "eval_acc_and_f1": 0.4008896311512723, "eval_loss": 1.7599937277180808, "learning_rate": 1.5118110236220473e-05, "train_loss": 1.7830261521869235, "step": 90}



loss=1.771:  97%|█████████▋| 91/94 [03:37<00:24,  8.07s/it][A
loss=1.563:  97%|█████████▋| 91/94 [03:37<00:24,  8.07s/it][A
loss=1.563:  98%|█████████▊| 92/94 [03:37<00:11,  5.82s/it][A
loss=1.722:  98%|█████████▊| 92/94 [03:37<00:11,  5.82s/it][A
loss=1.722:  99%|█████████▉| 93/94 [03:38<00:04,  4.25s/it][A
loss=1.492:  99%|█████████▉| 93/94 [03:38<00:04,  4.25s/it][A
loss=1.492: 100%|██████████| 94/94 [03:38<00:00,  2.33s/it]
Epoch:  33%|███▎      | 1/3 [03:38<07:17, 218.69s/it]
Iteration:   0%|          | 0/94 [00:00<?, ?it/s][A
loss=1.358:   0%|          | 0/94 [00:00<?, ?it/s][A
loss=1.358:   1%|          | 1/94 [00:00<00:53,  1.75it/s][A
loss=1.592:   1%|          | 1/94 [00:00<00:53,  1.75it/s][A
loss=1.592:   2%|▏         | 2/94 [00:01<00:52,  1.75it/s][A
loss=1.692:   2%|▏         | 2/94 [00:01<00:52,  1.75it/s][A
loss=1.692:   3%|▎         | 3/94 [00:01<00:52,  1.75it/s][A
loss=1.422:   3%|▎         | 3/94 [00:01<00:52,  1.75it/s][A
loss=1.422:   4%|▍         |

{"eval_acc": 0.6793376529877609, "eval_f1": 0.16799706274228918, "eval_acc_and_f1": 0.4236673578650251, "eval_loss": 1.5739458501338959, "learning_rate": 1.3700787401574804e-05, "train_loss": 1.527753425969018, "step": 108}



loss=1.322:  15%|█▍        | 14/94 [00:42<14:23, 10.79s/it][A
loss=1.322:  16%|█▌        | 15/94 [00:42<10:10,  7.72s/it][A
loss=1.569:  16%|█▌        | 15/94 [00:42<10:10,  7.72s/it][A
loss=1.569:  17%|█▋        | 16/94 [00:43<07:15,  5.58s/it][A
loss=1.558:  17%|█▋        | 16/94 [00:43<07:15,  5.58s/it][A
loss=1.558:  18%|█▊        | 17/94 [00:43<05:13,  4.08s/it][A
loss=1.440:  18%|█▊        | 17/94 [00:43<05:13,  4.08s/it][A
loss=1.440:  19%|█▉        | 18/94 [00:44<03:49,  3.02s/it][A
loss=1.211:  19%|█▉        | 18/94 [00:44<03:49,  3.02s/it][A
loss=1.211:  20%|██        | 19/94 [00:44<02:51,  2.29s/it][A
loss=1.620:  20%|██        | 19/94 [00:45<02:51,  2.29s/it][A
loss=1.620:  21%|██▏       | 20/94 [00:45<02:11,  1.77s/it][A
loss=1.583:  21%|██▏       | 20/94 [00:45<02:11,  1.77s/it][A
loss=1.583:  22%|██▏       | 21/94 [00:46<01:42,  1.41s/it][A
loss=1.034:  22%|██▏       | 21/94 [00:46<01:42,  1.41s/it][A
loss=1.034:  23%|██▎       | 22/94 [00:46<01:23,  1.16

{"eval_acc": 0.6917206623470122, "eval_f1": 0.19497379551927757, "eval_acc_and_f1": 0.44334722893314493, "eval_loss": 1.3935229309967585, "learning_rate": 1.2283464566929135e-05, "train_loss": 1.4805436664157443, "step": 126}



loss=1.517:  35%|███▌      | 33/94 [01:24<07:23,  7.28s/it][A
loss=1.374:  35%|███▌      | 33/94 [01:24<07:23,  7.28s/it][A
loss=1.374:  36%|███▌      | 34/94 [01:25<05:15,  5.26s/it][A
loss=1.487:  36%|███▌      | 34/94 [01:25<05:15,  5.26s/it][A
loss=1.487:  37%|███▋      | 35/94 [01:25<03:47,  3.86s/it][A
loss=1.328:  37%|███▋      | 35/94 [01:26<03:47,  3.86s/it][A
loss=1.328:  38%|███▊      | 36/94 [01:26<02:46,  2.87s/it][A
loss=1.553:  38%|███▊      | 36/94 [01:26<02:46,  2.87s/it][A
loss=1.553:  39%|███▉      | 37/94 [01:26<02:04,  2.18s/it][A
loss=1.362:  39%|███▉      | 37/94 [01:27<02:04,  2.18s/it][A
loss=1.362:  40%|████      | 38/94 [01:27<01:34,  1.69s/it][A
loss=1.277:  40%|████      | 38/94 [01:27<01:34,  1.69s/it][A
loss=1.277:  41%|████▏     | 39/94 [01:28<01:14,  1.35s/it][A
loss=1.154:  41%|████▏     | 39/94 [01:28<01:14,  1.35s/it][A
loss=1.154:  43%|████▎     | 40/94 [01:28<01:00,  1.12s/it][A
loss=1.111:  43%|████▎     | 40/94 [01:28<01:00,  1.12

{"eval_acc": 0.7435565154787617, "eval_f1": 0.22568073226018096, "eval_acc_and_f1": 0.4846186238694713, "eval_loss": 1.215368505035128, "learning_rate": 1.0866141732283466e-05, "train_loss": 1.2598378360271454, "step": 144}



loss=1.423:  54%|█████▍    | 51/94 [02:06<05:11,  7.24s/it][A
loss=1.547:  54%|█████▍    | 51/94 [02:06<05:11,  7.24s/it][A
loss=1.547:  55%|█████▌    | 52/94 [02:07<03:39,  5.24s/it][A
loss=1.197:  55%|█████▌    | 52/94 [02:07<03:39,  5.24s/it][A
loss=1.197:  56%|█████▋    | 53/94 [02:07<02:37,  3.83s/it][A
loss=1.134:  56%|█████▋    | 53/94 [02:07<02:37,  3.83s/it][A
loss=1.134:  57%|█████▋    | 54/94 [02:08<01:54,  2.85s/it][A
loss=1.036:  57%|█████▋    | 54/94 [02:08<01:54,  2.85s/it][A
loss=1.036:  59%|█████▊    | 55/94 [02:08<01:24,  2.17s/it][A
loss=1.087:  59%|█████▊    | 55/94 [02:09<01:24,  2.17s/it][A
loss=1.087:  60%|█████▉    | 56/94 [02:09<01:04,  1.69s/it][A
loss=1.404:  60%|█████▉    | 56/94 [02:09<01:04,  1.69s/it][A
loss=1.404:  61%|██████    | 57/94 [02:10<00:50,  1.36s/it][A
loss=1.236:  61%|██████    | 57/94 [02:10<00:50,  1.36s/it][A
loss=1.236:  62%|██████▏   | 58/94 [02:10<00:40,  1.12s/it][A
loss=0.960:  62%|██████▏   | 58/94 [02:10<00:40,  1.12

{"eval_acc": 0.7742260619150468, "eval_f1": 0.24322606290096857, "eval_acc_and_f1": 0.5087260624080077, "eval_loss": 1.0705830859286445, "learning_rate": 9.448818897637797e-06, "train_loss": 1.1749906341234844, "step": 162}



loss=1.043:  73%|███████▎  | 69/94 [02:48<03:02,  7.28s/it][A
loss=0.966:  73%|███████▎  | 69/94 [02:48<03:02,  7.28s/it][A
loss=0.966:  74%|███████▍  | 70/94 [02:49<02:06,  5.27s/it][A
loss=1.089:  74%|███████▍  | 70/94 [02:49<02:06,  5.27s/it][A
loss=1.089:  76%|███████▌  | 71/94 [02:49<01:28,  3.86s/it][A
loss=1.409:  76%|███████▌  | 71/94 [02:50<01:28,  3.86s/it][A
loss=1.409:  77%|███████▋  | 72/94 [02:50<01:03,  2.87s/it][A
loss=0.994:  77%|███████▋  | 72/94 [02:50<01:03,  2.87s/it][A
loss=0.994:  78%|███████▊  | 73/94 [02:51<00:45,  2.18s/it][A
loss=0.875:  78%|███████▊  | 73/94 [02:51<00:45,  2.18s/it][A
loss=0.875:  79%|███████▊  | 74/94 [02:51<00:33,  1.70s/it][A
loss=1.214:  79%|███████▊  | 74/94 [02:51<00:33,  1.70s/it][A
loss=1.214:  80%|███████▉  | 75/94 [02:52<00:25,  1.36s/it][A
loss=0.917:  80%|███████▉  | 75/94 [02:52<00:25,  1.36s/it][A
loss=0.917:  81%|████████  | 76/94 [02:52<00:20,  1.12s/it][A
loss=0.925:  81%|████████  | 76/94 [02:52<00:20,  1.12

{"eval_acc": 0.7887688984881209, "eval_f1": 0.2563647269279676, "eval_acc_and_f1": 0.5225668127080443, "eval_loss": 0.9693671975816999, "learning_rate": 8.031496062992128e-06, "train_loss": 1.083131002055274, "step": 180}



loss=0.977:  93%|█████████▎| 87/94 [03:34<00:55,  7.96s/it][A
loss=0.734:  93%|█████████▎| 87/94 [03:34<00:55,  7.96s/it][A
loss=0.734:  94%|█████████▎| 88/94 [03:34<00:34,  5.74s/it][A
loss=0.989:  94%|█████████▎| 88/94 [03:34<00:34,  5.74s/it][A
loss=0.989:  95%|█████████▍| 89/94 [03:35<00:20,  4.19s/it][A
loss=0.826:  95%|█████████▍| 89/94 [03:35<00:20,  4.19s/it][A
loss=0.826:  96%|█████████▌| 90/94 [03:35<00:12,  3.10s/it][A
loss=1.049:  96%|█████████▌| 90/94 [03:36<00:12,  3.10s/it][A
loss=1.049:  97%|█████████▋| 91/94 [03:36<00:07,  2.34s/it][A
loss=0.827:  97%|█████████▋| 91/94 [03:36<00:07,  2.34s/it][A
loss=0.827:  98%|█████████▊| 92/94 [03:37<00:03,  1.81s/it][A
loss=0.945:  98%|█████████▊| 92/94 [03:37<00:03,  1.81s/it][A
loss=0.945:  99%|█████████▉| 93/94 [03:37<00:01,  1.44s/it][A
loss=0.761:  99%|█████████▉| 93/94 [03:37<00:01,  1.44s/it][A
loss=0.761: 100%|██████████| 94/94 [03:38<00:00,  2.32s/it]
Epoch:  67%|██████▋   | 2/3 [07:16<03:38, 218.49s/it]
Ite

{"eval_acc": 0.7844492440604751, "eval_f1": 0.2510826208560969, "eval_acc_and_f1": 0.517765932458286, "eval_loss": 0.9182502648660115, "learning_rate": 6.614173228346458e-06, "train_loss": 0.9593182802200317, "step": 198}



loss=0.752:  12%|█▏        | 11/94 [00:40<10:51,  7.85s/it][A
loss=0.874:  12%|█▏        | 11/94 [00:41<10:51,  7.85s/it][A
loss=0.874:  13%|█▎        | 12/94 [00:41<07:44,  5.66s/it][A
loss=0.624:  13%|█▎        | 12/94 [00:41<07:44,  5.66s/it][A
loss=0.624:  14%|█▍        | 13/94 [00:42<05:34,  4.13s/it][A
loss=1.106:  14%|█▍        | 13/94 [00:42<05:34,  4.13s/it][A
loss=1.106:  15%|█▍        | 14/94 [00:42<04:04,  3.06s/it][A
loss=0.806:  15%|█▍        | 14/94 [00:42<04:04,  3.06s/it][A
loss=0.806:  16%|█▌        | 15/94 [00:43<03:02,  2.31s/it][A
loss=0.780:  16%|█▌        | 15/94 [00:43<03:02,  2.31s/it][A
loss=0.780:  17%|█▋        | 16/94 [00:43<02:19,  1.79s/it][A
loss=0.927:  17%|█▋        | 16/94 [00:43<02:19,  1.79s/it][A
loss=0.927:  18%|█▊        | 17/94 [00:44<01:49,  1.43s/it][A
loss=0.826:  18%|█▊        | 17/94 [00:44<01:49,  1.43s/it][A
loss=0.826:  19%|█▉        | 18/94 [00:44<01:28,  1.17s/it][A
loss=0.798:  19%|█▉        | 18/94 [00:45<01:28,  1.17

{"eval_acc": 0.7982721382289417, "eval_f1": 0.26409483118972304, "eval_acc_and_f1": 0.5311834847093324, "eval_loss": 0.8568499428885323, "learning_rate": 5.196850393700788e-06, "train_loss": 0.8515894545449151, "step": 216}



loss=0.666:  31%|███       | 29/94 [01:24<08:12,  7.58s/it][A
loss=1.063:  31%|███       | 29/94 [01:24<08:12,  7.58s/it][A
loss=1.063:  32%|███▏      | 30/94 [01:25<05:50,  5.47s/it][A
loss=0.855:  32%|███▏      | 30/94 [01:25<05:50,  5.47s/it][A
loss=0.855:  33%|███▎      | 31/94 [01:25<04:12,  4.00s/it][A
loss=1.042:  33%|███▎      | 31/94 [01:25<04:12,  4.00s/it][A
loss=1.042:  34%|███▍      | 32/94 [01:26<03:04,  2.97s/it][A
loss=1.285:  34%|███▍      | 32/94 [01:26<03:04,  2.97s/it][A
loss=1.285:  35%|███▌      | 33/94 [01:26<02:17,  2.25s/it][A
loss=1.318:  35%|███▌      | 33/94 [01:26<02:17,  2.25s/it][A
loss=1.318:  36%|███▌      | 34/94 [01:27<01:44,  1.75s/it][A
loss=0.734:  36%|███▌      | 34/94 [01:27<01:44,  1.75s/it][A
loss=0.734:  37%|███▋      | 35/94 [01:27<01:22,  1.39s/it][A
loss=0.966:  37%|███▋      | 35/94 [01:27<01:22,  1.39s/it][A
loss=0.966:  38%|███▊      | 36/94 [01:28<01:06,  1.14s/it][A
loss=0.909:  38%|███▊      | 36/94 [01:28<01:06,  1.14

{"eval_acc": 0.8066234701223902, "eval_f1": 0.27362880175965526, "eval_acc_and_f1": 0.5401261359410228, "eval_loss": 0.8216815612145832, "learning_rate": 3.7795275590551182e-06, "train_loss": 0.8991196817821927, "step": 234}



loss=0.732:  50%|█████     | 47/94 [02:08<06:01,  7.68s/it][A
loss=0.588:  50%|█████     | 47/94 [02:08<06:01,  7.68s/it][A
loss=0.588:  51%|█████     | 48/94 [02:09<04:15,  5.55s/it][A
loss=0.765:  51%|█████     | 48/94 [02:09<04:15,  5.55s/it][A
loss=0.765:  52%|█████▏    | 49/94 [02:09<03:02,  4.06s/it][A
loss=0.684:  52%|█████▏    | 49/94 [02:09<03:02,  4.06s/it][A
loss=0.684:  53%|█████▎    | 50/94 [02:10<02:12,  3.01s/it][A
loss=1.032:  53%|█████▎    | 50/94 [02:10<02:12,  3.01s/it][A
loss=1.032:  54%|█████▍    | 51/94 [02:10<01:37,  2.28s/it][A
loss=1.202:  54%|█████▍    | 51/94 [02:10<01:37,  2.28s/it][A
loss=1.202:  55%|█████▌    | 52/94 [02:11<01:14,  1.77s/it][A
loss=0.839:  55%|█████▌    | 52/94 [02:11<01:14,  1.77s/it][A
loss=0.839:  56%|█████▋    | 53/94 [02:11<00:57,  1.41s/it][A
loss=0.761:  56%|█████▋    | 53/94 [02:12<00:57,  1.41s/it][A
loss=0.761:  57%|█████▋    | 54/94 [02:12<00:46,  1.15s/it][A
loss=0.839:  57%|█████▋    | 54/94 [02:12<00:46,  1.15

{"eval_acc": 0.8051835853131749, "eval_f1": 0.27195819240710445, "eval_acc_and_f1": 0.5385708888601397, "eval_loss": 0.7970584481954575, "learning_rate": 2.362204724409449e-06, "train_loss": 0.7896270602941513, "step": 252}



loss=0.636:  69%|██████▉   | 65/94 [02:54<03:51,  7.99s/it][A
loss=0.829:  69%|██████▉   | 65/94 [02:54<03:51,  7.99s/it][A
loss=0.829:  70%|███████   | 66/94 [02:54<02:41,  5.77s/it][A
loss=0.809:  70%|███████   | 66/94 [02:54<02:41,  5.77s/it][A
loss=0.809:  71%|███████▏  | 67/94 [02:55<01:53,  4.21s/it][A
loss=0.814:  71%|███████▏  | 67/94 [02:55<01:53,  4.21s/it][A
loss=0.814:  72%|███████▏  | 68/94 [02:55<01:20,  3.11s/it][A
loss=1.043:  72%|███████▏  | 68/94 [02:55<01:20,  3.11s/it][A
loss=1.043:  73%|███████▎  | 69/94 [02:56<00:58,  2.35s/it][A
loss=0.931:  73%|███████▎  | 69/94 [02:56<00:58,  2.35s/it][A
loss=0.931:  74%|███████▍  | 70/94 [02:56<00:43,  1.81s/it][A
loss=0.833:  74%|███████▍  | 70/94 [02:57<00:43,  1.81s/it][A
loss=0.833:  76%|███████▌  | 71/94 [02:57<00:33,  1.44s/it][A
loss=0.749:  76%|███████▌  | 71/94 [02:57<00:33,  1.44s/it][A
loss=0.749:  77%|███████▋  | 72/94 [02:57<00:25,  1.18s/it][A
loss=0.664:  77%|███████▋  | 72/94 [02:58<00:25,  1.18

{"eval_acc": 0.8035997120230381, "eval_f1": 0.27052324312956105, "eval_acc_and_f1": 0.5370614775762996, "eval_loss": 0.788720714194434, "learning_rate": 9.448818897637796e-07, "train_loss": 0.7817913426293267, "step": 270}



loss=0.771:  88%|████████▊ | 83/94 [03:35<01:19,  7.25s/it][A
loss=1.050:  88%|████████▊ | 83/94 [03:36<01:19,  7.25s/it][A
loss=1.050:  89%|████████▉ | 84/94 [03:36<00:52,  5.24s/it][A
loss=0.717:  89%|████████▉ | 84/94 [03:36<00:52,  5.24s/it][A
loss=0.717:  90%|█████████ | 85/94 [03:37<00:34,  3.84s/it][A
loss=0.861:  90%|█████████ | 85/94 [03:37<00:34,  3.84s/it][A
loss=0.861:  91%|█████████▏| 86/94 [03:37<00:22,  2.86s/it][A
loss=1.136:  91%|█████████▏| 86/94 [03:37<00:22,  2.86s/it][A
loss=1.136:  93%|█████████▎| 87/94 [03:38<00:15,  2.17s/it][A
loss=0.936:  93%|█████████▎| 87/94 [03:38<00:15,  2.17s/it][A
loss=0.936:  94%|█████████▎| 88/94 [03:38<00:10,  1.69s/it][A
loss=0.610:  94%|█████████▎| 88/94 [03:38<00:10,  1.69s/it][A
loss=0.610:  95%|█████████▍| 89/94 [03:39<00:06,  1.35s/it][A
loss=0.583:  95%|█████████▍| 89/94 [03:39<00:06,  1.35s/it][A
loss=0.583:  96%|█████████▌| 90/94 [03:39<00:04,  1.12s/it][A
loss=0.767:  96%|█████████▌| 90/94 [03:40<00:04,  1.12

{"eval_acc": 0.8048956083513319, "eval_f1": 0.2722696830016188, "eval_acc_and_f1": 0.5385826456764753, "eval_loss": 0.7842266751187188, "learning_rate": 0.0, "train_loss": 0.5485623776912689, "step": 282}


07/17/2022 23:59:33 - INFO - utilities.trainers -   ***** Running evaluation iter-6_trial1 *****
07/17/2022 23:59:33 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 23:59:33 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 00:00:01 - INFO - utilities.trainers -   ***** Eval results iter-6_trial1 *****
07/18/2022 00:00:01 - INFO - utilities.trainers -     acc = 0.8048956083513319
07/18/2022 00:00:01 - INFO - utilities.trainers -     acc_and_f1 = 0.5385826456764753
07/18/2022 00:00:01 - INFO - utilities.trainers -     f1 = 0.2722696830016188
07/18/2022 00:00:04 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 00:00:09 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:00:09 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 00:00:09 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:18<00:00,  1.02s/it]
07/18/2022 00:02:28 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 00:02:28 - INFO - utilities.trainers -     acc = 0.8061459593341397
07/18/2022 00:02:28 - INFO - utilities.trainers -     acc_and_f1 = 0.5320243539877315
07/18/2022 00:02:28 - INFO - utilities.trainers -     f1 = 0.2579027486413233



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 00:02:29 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 00:02:47 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 00:02:51 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:02:51 - INFO - utilities.trainers -     Num examples = 47000
07/18/2022 00:02:51 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/184 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 184/184 [06:14<00:00,  2.04s/it]
07/18/2022 00:09:06 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 00:09:06 - INFO - utilities.trainers -     acc = 0.8064893617021277
07/18/2022 00:09:06 - INFO - utilities.trainers -     acc_and_f1 = 0.5225359249721773
07/18/2022 00:09:06 - INFO - utilities.trainers -     f1 = 0.23858248824222691
07/18/2022 00:09:06 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 00:09:25 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 00:09:28 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:09:28 - INFO - utilities.trainers -     Num examples = 3000
07/18/2022 00:09:28 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 12/12 [00:23<00:00,  1.99s/it]
07/18/2022 00:09:52 - INFO - ut



************
End of iteration 6:
Train loss 1.4629, Val loss 0.7842266751187188, Test loss 0.7910688019850675
Annotated 500 samples
Current labeled (training) data: 3500 samples
Remaining budget: 500 (in samples)
************

Saving json with the results....

 Start Training model of iteration 7!



07/18/2022 00:13:03 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.8048956083513319, acc_best_iteration=6, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-6', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_672/ornl20_bert-cls/iter-7', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, fp

warmup steps: 32
total steps: 328
logging steps: 21
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.336:   1%|          | 1/110 [00:00<00:52,  2.07it/s][A
loss=3.356:   1%|          | 1/110 [00:00<00:52,  2.07it/s][A
loss=3.356:   2%|▏         | 2/110 [00:01<00:53,  2.00it/s][A
loss=3.244:   2%|▏         | 2/110 [00:01<00:53,  2.00it/s][A
loss=3.244:   3%|▎         | 3/110 [00:01<00:54,  1.96it/s][A
loss=3.387:   3%|▎         | 3/110 [00:01<00:54,  1.96it/s][A
loss=3.387:   4%|▎         | 4/110 [00:02<00:54,  1.93it/s][A
loss=3.241:   4%|▎         | 4/110 [00:02<00:54,  1.93it/s][A
loss=3.241:   5%|▍         | 5/110 [00:02<00:54,  1.92it/s][A
loss=3.245:   5%|▍         | 5/110 [00:02<00:54,  1.92it/s][A
loss=3.245:   5%|▌         | 6/110 [00:03<00:54,  1.90it/s][A
loss=3.186:   5%|▌         | 6/110 [00:03<00:54,  1.90it/s][A
loss=3.186:   6%|▋         | 7/110 [00:03<00:54,  1.88it/s][A
loss=3.223:   6%|▋         | 7/110 [00:03<00:54,  1.88it/s][A
loss=3.223:   7%|▋         | 8/110 [00:04<00:54,  1.87it/s][A
loss=3.205:   7%|▋         | 8/110 [00:04<00:54,  1.87

{"eval_acc": 0.18185745140388768, "eval_f1": 0.032862590060453105, "eval_acc_and_f1": 0.1073600207321704, "eval_loss": 2.8633106436048235, "learning_rate": 1.3125e-05, "train_loss": 3.1419734841301326, "step": 21}



loss=2.788:  20%|██        | 22/110 [00:43<10:26,  7.12s/it][A
loss=2.742:  20%|██        | 22/110 [00:43<10:26,  7.12s/it][A
loss=2.742:  21%|██        | 23/110 [00:43<07:27,  5.15s/it][A
loss=2.848:  21%|██        | 23/110 [00:43<07:27,  5.15s/it][A
loss=2.848:  22%|██▏       | 24/110 [00:44<05:23,  3.76s/it][A
loss=2.645:  22%|██▏       | 24/110 [00:44<05:23,  3.76s/it][A
loss=2.645:  23%|██▎       | 25/110 [00:44<03:57,  2.80s/it][A
loss=2.825:  23%|██▎       | 25/110 [00:44<03:57,  2.80s/it][A
loss=2.825:  24%|██▎       | 26/110 [00:45<02:57,  2.12s/it][A
loss=2.871:  24%|██▎       | 26/110 [00:45<02:57,  2.12s/it][A
loss=2.871:  25%|██▍       | 27/110 [00:45<02:16,  1.64s/it][A
loss=2.914:  25%|██▍       | 27/110 [00:45<02:16,  1.64s/it][A
loss=2.914:  25%|██▌       | 28/110 [00:46<01:47,  1.31s/it][A
loss=2.929:  25%|██▌       | 28/110 [00:46<01:47,  1.31s/it][A
loss=2.929:  26%|██▋       | 29/110 [00:46<01:27,  1.08s/it][A
loss=2.687:  26%|██▋       | 29/110 [00

{"eval_acc": 0.33203743700503957, "eval_f1": 0.08571295886969306, "eval_acc_and_f1": 0.2088751979373663, "eval_loss": 2.379174862589155, "learning_rate": 1.9328859060402687e-05, "train_loss": 2.633713029679798, "step": 42}



loss=2.285:  39%|███▉      | 43/110 [01:27<08:24,  7.53s/it][A
loss=2.300:  39%|███▉      | 43/110 [01:27<08:24,  7.53s/it][A
loss=2.300:  40%|████      | 44/110 [01:28<05:58,  5.43s/it][A
loss=2.114:  40%|████      | 44/110 [01:28<05:58,  5.43s/it][A
loss=2.114:  41%|████      | 45/110 [01:28<04:17,  3.97s/it][A
loss=2.513:  41%|████      | 45/110 [01:29<04:17,  3.97s/it][A
loss=2.513:  42%|████▏     | 46/110 [01:29<03:08,  2.94s/it][A
loss=2.235:  42%|████▏     | 46/110 [01:29<03:08,  2.94s/it][A
loss=2.235:  43%|████▎     | 47/110 [01:29<02:20,  2.22s/it][A
loss=2.565:  43%|████▎     | 47/110 [01:30<02:20,  2.22s/it][A
loss=2.565:  44%|████▎     | 48/110 [01:30<01:46,  1.72s/it][A
loss=2.211:  44%|████▎     | 48/110 [01:30<01:46,  1.72s/it][A
loss=2.211:  45%|████▍     | 49/110 [01:31<01:23,  1.36s/it][A
loss=2.329:  45%|████▍     | 49/110 [01:31<01:23,  1.36s/it][A
loss=2.329:  45%|████▌     | 50/110 [01:31<01:07,  1.12s/it][A
loss=2.131:  45%|████▌     | 50/110 [01

{"eval_acc": 0.35550755939524836, "eval_f1": 0.08010967300222849, "eval_acc_and_f1": 0.21780861619873843, "eval_loss": 2.1340579986572266, "learning_rate": 1.7919463087248323e-05, "train_loss": 2.174420646258763, "step": 63}



loss=2.089:  58%|█████▊    | 64/110 [02:10<05:30,  7.19s/it][A
loss=2.171:  58%|█████▊    | 64/110 [02:10<05:30,  7.19s/it][A
loss=2.171:  59%|█████▉    | 65/110 [02:11<03:53,  5.20s/it][A
loss=2.219:  59%|█████▉    | 65/110 [02:11<03:53,  5.20s/it][A
loss=2.219:  60%|██████    | 66/110 [02:11<02:47,  3.80s/it][A
loss=2.046:  60%|██████    | 66/110 [02:12<02:47,  3.80s/it][A
loss=2.046:  61%|██████    | 67/110 [02:12<02:01,  2.82s/it][A
loss=2.149:  61%|██████    | 67/110 [02:12<02:01,  2.82s/it][A
loss=2.149:  62%|██████▏   | 68/110 [02:12<01:29,  2.13s/it][A
loss=2.103:  62%|██████▏   | 68/110 [02:13<01:29,  2.13s/it][A
loss=2.103:  63%|██████▎   | 69/110 [02:13<01:08,  1.66s/it][A
loss=1.962:  63%|██████▎   | 69/110 [02:13<01:08,  1.66s/it][A
loss=1.962:  64%|██████▎   | 70/110 [02:14<00:53,  1.33s/it][A
loss=2.103:  64%|██████▎   | 70/110 [02:14<00:53,  1.33s/it][A
loss=2.103:  65%|██████▍   | 71/110 [02:14<00:42,  1.09s/it][A
loss=1.794:  65%|██████▍   | 71/110 [02

{"eval_acc": 0.5441324694024477, "eval_f1": 0.13209684364417326, "eval_acc_and_f1": 0.3381146565233105, "eval_loss": 1.8540198930672236, "learning_rate": 1.6510067114093962e-05, "train_loss": 1.9808378162838163, "step": 84}



loss=1.747:  77%|███████▋  | 85/110 [02:54<03:00,  7.24s/it][A
loss=2.034:  77%|███████▋  | 85/110 [02:54<03:00,  7.24s/it][A
loss=2.034:  78%|███████▊  | 86/110 [02:54<02:05,  5.23s/it][A
loss=2.053:  78%|███████▊  | 86/110 [02:54<02:05,  5.23s/it][A
loss=2.053:  79%|███████▉  | 87/110 [02:55<01:27,  3.82s/it][A
loss=1.483:  79%|███████▉  | 87/110 [02:55<01:27,  3.82s/it][A
loss=1.483:  80%|████████  | 88/110 [02:55<01:02,  2.84s/it][A
loss=1.968:  80%|████████  | 88/110 [02:55<01:02,  2.84s/it][A
loss=1.968:  81%|████████  | 89/110 [02:56<00:45,  2.15s/it][A
loss=1.590:  81%|████████  | 89/110 [02:56<00:45,  2.15s/it][A
loss=1.590:  82%|████████▏ | 90/110 [02:56<00:33,  1.67s/it][A
loss=1.923:  82%|████████▏ | 90/110 [02:56<00:33,  1.67s/it][A
loss=1.923:  83%|████████▎ | 91/110 [02:57<00:25,  1.33s/it][A
loss=1.442:  83%|████████▎ | 91/110 [02:57<00:25,  1.33s/it][A
loss=1.442:  84%|████████▎ | 92/110 [02:57<00:19,  1.09s/it][A
loss=1.693:  84%|████████▎ | 92/110 [02

{"eval_acc": 0.68207343412527, "eval_f1": 0.18612008133577998, "eval_acc_and_f1": 0.434096757730525, "eval_loss": 1.5801727729184287, "learning_rate": 1.5100671140939598e-05, "train_loss": 1.7292144355319796, "step": 105}



loss=1.598:  96%|█████████▋| 106/110 [03:37<00:28,  7.23s/it][A
loss=1.242:  96%|█████████▋| 106/110 [03:37<00:28,  7.23s/it][A
loss=1.242:  97%|█████████▋| 107/110 [03:37<00:15,  5.22s/it][A
loss=1.668:  97%|█████████▋| 107/110 [03:37<00:15,  5.22s/it][A
loss=1.668:  98%|█████████▊| 108/110 [03:38<00:07,  3.82s/it][A
loss=2.006:  98%|█████████▊| 108/110 [03:38<00:07,  3.82s/it][A
loss=2.006:  99%|█████████▉| 109/110 [03:38<00:02,  2.84s/it][A
loss=1.048:  99%|█████████▉| 109/110 [03:38<00:02,  2.84s/it][A
loss=1.048: 100%|██████████| 110/110 [03:39<00:00,  1.99s/it]
Epoch:  33%|███▎      | 1/3 [03:39<07:18, 219.09s/it]
Iteration:   0%|          | 0/110 [00:00<?, ?it/s][A
loss=1.558:   0%|          | 0/110 [00:00<?, ?it/s][A
loss=1.558:   1%|          | 1/110 [00:00<00:58,  1.87it/s][A
loss=1.352:   1%|          | 1/110 [00:00<00:58,  1.87it/s][A
loss=1.352:   2%|▏         | 2/110 [00:01<00:58,  1.86it/s][A
loss=1.988:   2%|▏         | 2/110 [00:01<00:58,  1.86it/s][A
lo

{"eval_acc": 0.740100791936645, "eval_f1": 0.21996049260494138, "eval_acc_and_f1": 0.4800306422707932, "eval_loss": 1.33487953884261, "learning_rate": 1.3691275167785237e-05, "train_loss": 1.503535929180327, "step": 126}



loss=1.382:  15%|█▌        | 17/110 [00:41<11:13,  7.24s/it][A
loss=1.664:  15%|█▌        | 17/110 [00:41<11:13,  7.24s/it][A
loss=1.664:  16%|█▋        | 18/110 [00:41<08:01,  5.23s/it][A
loss=1.473:  16%|█▋        | 18/110 [00:41<08:01,  5.23s/it][A
loss=1.473:  17%|█▋        | 19/110 [00:42<05:47,  3.82s/it][A
loss=1.414:  17%|█▋        | 19/110 [00:42<05:47,  3.82s/it][A
loss=1.414:  18%|█▊        | 20/110 [00:42<04:15,  2.84s/it][A
loss=1.418:  18%|█▊        | 20/110 [00:42<04:15,  2.84s/it][A
loss=1.418:  19%|█▉        | 21/110 [00:43<03:11,  2.15s/it][A
loss=1.321:  19%|█▉        | 21/110 [00:43<03:11,  2.15s/it][A
loss=1.321:  20%|██        | 22/110 [00:43<02:26,  1.67s/it][A
loss=1.216:  20%|██        | 22/110 [00:43<02:26,  1.67s/it][A
loss=1.216:  21%|██        | 23/110 [00:44<01:55,  1.33s/it][A
loss=1.408:  21%|██        | 23/110 [00:44<01:55,  1.33s/it][A
loss=1.408:  22%|██▏       | 24/110 [00:44<01:33,  1.09s/it][A
loss=1.447:  22%|██▏       | 24/110 [00

{"eval_acc": 0.7758099352051836, "eval_f1": 0.23814386687439407, "eval_acc_and_f1": 0.5069769010397889, "eval_loss": 1.1255254447460175, "learning_rate": 1.2281879194630872e-05, "train_loss": 1.3823568650654383, "step": 147}



loss=1.346:  35%|███▍      | 38/110 [01:24<08:47,  7.32s/it][A
loss=1.229:  35%|███▍      | 38/110 [01:24<08:47,  7.32s/it][A
loss=1.229:  35%|███▌      | 39/110 [01:25<06:15,  5.28s/it][A
loss=1.529:  35%|███▌      | 39/110 [01:25<06:15,  5.28s/it][A
loss=1.529:  36%|███▋      | 40/110 [01:25<04:30,  3.86s/it][A
loss=1.374:  36%|███▋      | 40/110 [01:25<04:30,  3.86s/it][A
loss=1.374:  37%|███▋      | 41/110 [01:26<03:17,  2.87s/it][A
loss=1.064:  37%|███▋      | 41/110 [01:26<03:17,  2.87s/it][A
loss=1.064:  38%|███▊      | 42/110 [01:26<02:27,  2.17s/it][A
loss=1.394:  38%|███▊      | 42/110 [01:27<02:27,  2.17s/it][A
loss=1.394:  39%|███▉      | 43/110 [01:27<01:52,  1.68s/it][A
loss=1.375:  39%|███▉      | 43/110 [01:27<01:52,  1.68s/it][A
loss=1.375:  40%|████      | 44/110 [01:27<01:28,  1.34s/it][A
loss=1.402:  40%|████      | 44/110 [01:28<01:28,  1.34s/it][A
loss=1.402:  41%|████      | 45/110 [01:28<01:11,  1.10s/it][A
loss=1.356:  41%|████      | 45/110 [01

{"eval_acc": 0.7992800575953923, "eval_f1": 0.25322005597619757, "eval_acc_and_f1": 0.526250056785795, "eval_loss": 0.97551229596138, "learning_rate": 1.0872483221476512e-05, "train_loss": 1.2832425492150443, "step": 168}



loss=1.103:  54%|█████▎    | 59/110 [02:07<06:08,  7.23s/it][A
loss=1.183:  54%|█████▎    | 59/110 [02:08<06:08,  7.23s/it][A
loss=1.183:  55%|█████▍    | 60/110 [02:08<04:21,  5.22s/it][A
loss=1.221:  55%|█████▍    | 60/110 [02:08<04:21,  5.22s/it][A
loss=1.221:  55%|█████▌    | 61/110 [02:09<03:07,  3.82s/it][A
loss=0.567:  55%|█████▌    | 61/110 [02:09<03:07,  3.82s/it][A
loss=0.567:  56%|█████▋    | 62/110 [02:09<02:16,  2.84s/it][A
loss=1.283:  56%|█████▋    | 62/110 [02:09<02:16,  2.84s/it][A
loss=1.283:  57%|█████▋    | 63/110 [02:10<01:40,  2.15s/it][A
loss=1.003:  57%|█████▋    | 63/110 [02:10<01:40,  2.15s/it][A
loss=1.003:  58%|█████▊    | 64/110 [02:10<01:16,  1.67s/it][A
loss=1.190:  58%|█████▊    | 64/110 [02:10<01:16,  1.67s/it][A
loss=1.190:  59%|█████▉    | 65/110 [02:11<00:59,  1.33s/it][A
loss=1.404:  59%|█████▉    | 65/110 [02:11<00:59,  1.33s/it][A
loss=1.404:  60%|██████    | 66/110 [02:11<00:48,  1.09s/it][A
loss=0.850:  60%|██████    | 66/110 [02

{"eval_acc": 0.803023758099352, "eval_f1": 0.25483397362586274, "eval_acc_and_f1": 0.5289288658626073, "eval_loss": 0.8412341198750904, "learning_rate": 9.463087248322147e-06, "train_loss": 1.0708899327686854, "step": 189}



loss=0.879:  73%|███████▎  | 80/110 [02:52<03:46,  7.56s/it][A
loss=1.203:  73%|███████▎  | 80/110 [02:52<03:46,  7.56s/it][A
loss=1.203:  74%|███████▎  | 81/110 [02:53<02:38,  5.45s/it][A
loss=0.954:  74%|███████▎  | 81/110 [02:53<02:38,  5.45s/it][A
loss=0.954:  75%|███████▍  | 82/110 [02:53<01:51,  3.98s/it][A
loss=0.953:  75%|███████▍  | 82/110 [02:53<01:51,  3.98s/it][A
loss=0.953:  75%|███████▌  | 83/110 [02:54<01:19,  2.95s/it][A
loss=1.340:  75%|███████▌  | 83/110 [02:54<01:19,  2.95s/it][A
loss=1.340:  76%|███████▋  | 84/110 [02:54<00:57,  2.23s/it][A
loss=0.771:  76%|███████▋  | 84/110 [02:54<00:57,  2.23s/it][A
loss=0.771:  77%|███████▋  | 85/110 [02:55<00:43,  1.72s/it][A
loss=1.100:  77%|███████▋  | 85/110 [02:55<00:43,  1.72s/it][A
loss=1.100:  78%|███████▊  | 86/110 [02:55<00:32,  1.37s/it][A
loss=0.938:  78%|███████▊  | 86/110 [02:56<00:32,  1.37s/it][A
loss=0.938:  79%|███████▉  | 87/110 [02:56<00:25,  1.12s/it][A
loss=0.830:  79%|███████▉  | 87/110 [02

{"eval_acc": 0.8152627789776817, "eval_f1": 0.284132632362808, "eval_acc_and_f1": 0.5496977056702449, "eval_loss": 0.7633647152355739, "learning_rate": 8.053691275167785e-06, "train_loss": 1.0000960344359988, "step": 210}



loss=0.951:  92%|█████████▏| 101/110 [03:36<01:05,  7.30s/it][A
loss=0.861:  92%|█████████▏| 101/110 [03:36<01:05,  7.30s/it][A
loss=0.861:  93%|█████████▎| 102/110 [03:36<00:42,  5.27s/it][A
loss=1.154:  93%|█████████▎| 102/110 [03:36<00:42,  5.27s/it][A
loss=1.154:  94%|█████████▎| 103/110 [03:37<00:26,  3.85s/it][A
loss=1.059:  94%|█████████▎| 103/110 [03:37<00:26,  3.85s/it][A
loss=1.059:  95%|█████████▍| 104/110 [03:37<00:17,  2.86s/it][A
loss=1.093:  95%|█████████▍| 104/110 [03:38<00:17,  2.86s/it][A
loss=1.093:  95%|█████████▌| 105/110 [03:38<00:10,  2.16s/it][A
loss=0.930:  95%|█████████▌| 105/110 [03:38<00:10,  2.16s/it][A
loss=0.930:  96%|█████████▋| 106/110 [03:38<00:06,  1.68s/it][A
loss=0.613:  96%|█████████▋| 106/110 [03:39<00:06,  1.68s/it][A
loss=0.613:  97%|█████████▋| 107/110 [03:39<00:04,  1.34s/it][A
loss=0.989:  97%|█████████▋| 107/110 [03:39<00:04,  1.34s/it][A
loss=0.989:  98%|█████████▊| 108/110 [03:40<00:02,  1.10s/it][A
loss=0.982:  98%|███████

{"eval_acc": 0.8299496040316775, "eval_f1": 0.3006755509325804, "eval_acc_and_f1": 0.5653125774821289, "eval_loss": 0.6996024272271565, "learning_rate": 6.644295302013424e-06, "train_loss": 0.9352451676414126, "step": 231}



loss=0.652:  11%|█         | 12/110 [00:40<12:24,  7.60s/it][A
loss=0.953:  11%|█         | 12/110 [00:40<12:24,  7.60s/it][A
loss=0.953:  12%|█▏        | 13/110 [00:40<08:51,  5.48s/it][A
loss=0.755:  12%|█▏        | 13/110 [00:40<08:51,  5.48s/it][A
loss=0.755:  13%|█▎        | 14/110 [00:41<06:23,  4.00s/it][A
loss=0.596:  13%|█▎        | 14/110 [00:41<06:23,  4.00s/it][A
loss=0.596:  14%|█▎        | 15/110 [00:41<04:41,  2.96s/it][A
loss=0.855:  14%|█▎        | 15/110 [00:41<04:41,  2.96s/it][A
loss=0.855:  15%|█▍        | 16/110 [00:42<03:30,  2.23s/it][A
loss=1.053:  15%|█▍        | 16/110 [00:42<03:30,  2.23s/it][A
loss=1.053:  15%|█▌        | 17/110 [00:42<02:40,  1.73s/it][A
loss=0.900:  15%|█▌        | 17/110 [00:42<02:40,  1.73s/it][A
loss=0.900:  16%|█▋        | 18/110 [00:43<02:06,  1.37s/it][A
loss=0.692:  16%|█▋        | 18/110 [00:43<02:06,  1.37s/it][A
loss=0.692:  17%|█▋        | 19/110 [00:43<01:42,  1.12s/it][A
loss=0.845:  17%|█▋        | 19/110 [00

{"eval_acc": 0.8417566594672427, "eval_f1": 0.3151350706299417, "eval_acc_and_f1": 0.5784458650485922, "eval_loss": 0.6521062701940536, "learning_rate": 5.234899328859061e-06, "train_loss": 0.800063798824946, "step": 252}



loss=0.784:  30%|███       | 33/110 [01:23<09:16,  7.22s/it][A
loss=0.997:  30%|███       | 33/110 [01:23<09:16,  7.22s/it][A
loss=0.997:  31%|███       | 34/110 [01:23<06:36,  5.22s/it][A
loss=0.843:  31%|███       | 34/110 [01:23<06:36,  5.22s/it][A
loss=0.843:  32%|███▏      | 35/110 [01:24<04:46,  3.81s/it][A
loss=0.984:  32%|███▏      | 35/110 [01:24<04:46,  3.81s/it][A
loss=0.984:  33%|███▎      | 36/110 [01:24<03:29,  2.83s/it][A
loss=0.735:  33%|███▎      | 36/110 [01:25<03:29,  2.83s/it][A
loss=0.735:  34%|███▎      | 37/110 [01:25<02:36,  2.14s/it][A
loss=0.834:  34%|███▎      | 37/110 [01:25<02:36,  2.14s/it][A
loss=0.834:  35%|███▍      | 38/110 [01:25<01:59,  1.66s/it][A
loss=0.511:  35%|███▍      | 38/110 [01:26<01:59,  1.66s/it][A
loss=0.511:  35%|███▌      | 39/110 [01:26<01:34,  1.33s/it][A
loss=0.791:  35%|███▌      | 39/110 [01:26<01:34,  1.33s/it][A
loss=0.791:  36%|███▋      | 40/110 [01:27<01:16,  1.09s/it][A
loss=0.775:  36%|███▋      | 40/110 [01

{"eval_acc": 0.8570194384449245, "eval_f1": 0.33998244106768655, "eval_acc_and_f1": 0.5985009397563055, "eval_loss": 0.6196726177419934, "learning_rate": 3.825503355704698e-06, "train_loss": 0.7821218115942818, "step": 273}



loss=0.787:  49%|████▉     | 54/110 [02:08<07:05,  7.59s/it][A
loss=0.632:  49%|████▉     | 54/110 [02:08<07:05,  7.59s/it][A
loss=0.632:  50%|█████     | 55/110 [02:08<05:01,  5.47s/it][A
loss=0.667:  50%|█████     | 55/110 [02:08<05:01,  5.47s/it][A
loss=0.667:  51%|█████     | 56/110 [02:09<03:35,  3.99s/it][A
loss=0.861:  51%|█████     | 56/110 [02:09<03:35,  3.99s/it][A
loss=0.861:  52%|█████▏    | 57/110 [02:09<02:36,  2.96s/it][A
loss=0.896:  52%|█████▏    | 57/110 [02:09<02:36,  2.96s/it][A
loss=0.896:  53%|█████▎    | 58/110 [02:10<01:56,  2.23s/it][A
loss=0.754:  53%|█████▎    | 58/110 [02:10<01:56,  2.23s/it][A
loss=0.754:  54%|█████▎    | 59/110 [02:10<01:28,  1.73s/it][A
loss=0.931:  54%|█████▎    | 59/110 [02:11<01:28,  1.73s/it][A
loss=0.931:  55%|█████▍    | 60/110 [02:11<01:08,  1.37s/it][A
loss=0.794:  55%|█████▍    | 60/110 [02:11<01:08,  1.37s/it][A
loss=0.794:  55%|█████▌    | 61/110 [02:11<00:55,  1.13s/it][A
loss=0.861:  55%|█████▌    | 61/110 [02

{"eval_acc": 0.8647948164146868, "eval_f1": 0.35230987643506206, "eval_acc_and_f1": 0.6085523464248744, "eval_loss": 0.6046780113662992, "learning_rate": 2.416107382550336e-06, "train_loss": 0.7675775999114627, "step": 294}



loss=0.849:  68%|██████▊   | 75/110 [02:51<04:14,  7.28s/it][A
loss=0.528:  68%|██████▊   | 75/110 [02:51<04:14,  7.28s/it][A
loss=0.528:  69%|██████▉   | 76/110 [02:52<02:58,  5.26s/it][A
loss=0.532:  69%|██████▉   | 76/110 [02:52<02:58,  5.26s/it][A
loss=0.532:  70%|███████   | 77/110 [02:52<02:06,  3.84s/it][A
loss=0.637:  70%|███████   | 77/110 [02:52<02:06,  3.84s/it][A
loss=0.637:  71%|███████   | 78/110 [02:53<01:31,  2.85s/it][A
loss=0.804:  71%|███████   | 78/110 [02:53<01:31,  2.85s/it][A
loss=0.804:  72%|███████▏  | 79/110 [02:53<01:06,  2.16s/it][A
loss=0.806:  72%|███████▏  | 79/110 [02:53<01:06,  2.16s/it][A
loss=0.806:  73%|███████▎  | 80/110 [02:54<00:50,  1.67s/it][A
loss=0.959:  73%|███████▎  | 80/110 [02:54<00:50,  1.67s/it][A
loss=0.959:  74%|███████▎  | 81/110 [02:54<00:38,  1.33s/it][A
loss=0.908:  74%|███████▎  | 81/110 [02:55<00:38,  1.33s/it][A
loss=0.908:  75%|███████▍  | 82/110 [02:55<00:30,  1.09s/it][A
loss=0.916:  75%|███████▍  | 82/110 [02

{"eval_acc": 0.8624910007199424, "eval_f1": 0.3503173864253511, "eval_acc_and_f1": 0.6064041935726467, "eval_loss": 0.5940765706556184, "learning_rate": 1.006711409395973e-06, "train_loss": 0.8234543402989706, "step": 315}



loss=0.431:  87%|████████▋ | 96/110 [03:35<01:42,  7.29s/it][A
loss=0.746:  87%|████████▋ | 96/110 [03:35<01:42,  7.29s/it][A
loss=0.746:  88%|████████▊ | 97/110 [03:35<01:08,  5.27s/it][A
loss=0.835:  88%|████████▊ | 97/110 [03:35<01:08,  5.27s/it][A
loss=0.835:  89%|████████▉ | 98/110 [03:36<00:46,  3.85s/it][A
loss=0.578:  89%|████████▉ | 98/110 [03:36<00:46,  3.85s/it][A
loss=0.578:  90%|█████████ | 99/110 [03:36<00:31,  2.86s/it][A
loss=0.547:  90%|█████████ | 99/110 [03:36<00:31,  2.86s/it][A
loss=0.547:  91%|█████████ | 100/110 [03:37<00:21,  2.16s/it][A
loss=0.645:  91%|█████████ | 100/110 [03:37<00:21,  2.16s/it][A
loss=0.645:  92%|█████████▏| 101/110 [03:37<00:15,  1.67s/it][A
loss=0.717:  92%|█████████▏| 101/110 [03:37<00:15,  1.67s/it][A
loss=0.717:  93%|█████████▎| 102/110 [03:38<00:10,  1.33s/it][A
loss=0.538:  93%|█████████▎| 102/110 [03:38<00:10,  1.33s/it][A
loss=0.538:  94%|█████████▎| 103/110 [03:38<00:07,  1.10s/it][A
loss=0.516:  94%|█████████▎| 103

{"eval_acc": 0.8640748740100792, "eval_f1": 0.3525843476273101, "eval_acc_and_f1": 0.6083296108186946, "eval_loss": 0.5894734167626926, "learning_rate": 0.0, "train_loss": 0.45788813063076567, "step": 330}


07/18/2022 00:25:06 - INFO - utilities.trainers -   ***** Running evaluation iter-7_trial1 *****
07/18/2022 00:25:06 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:25:06 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 00:25:34 - INFO - utilities.trainers -   ***** Eval results iter-7_trial1 *****
07/18/2022 00:25:34 - INFO - utilities.trainers -     acc = 0.8640748740100792
07/18/2022 00:25:34 - INFO - utilities.trainers -     acc_and_f1 = 0.6083296108186946
07/18/2022 00:25:34 - INFO - utilities.trainers -     f1 = 0.3525843476273101
07/18/2022 00:25:37 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 00:25:42 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:25:42 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 00:25:42 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:18<00:00,  1.02s/it]
07/18/2022 00:28:01 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 00:28:01 - INFO - utilities.trainers -     acc = 0.8623639191290824
07/18/2022 00:28:01 - INFO - utilities.trainers -     acc_and_f1 = 0.5978714736582981
07/18/2022 00:28:01 - INFO - utilities.trainers -     f1 = 0.3333790281875138



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 00:28:02 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 00:28:21 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 00:28:22 - INFO - utilities.trainers -   ***** Running evaluation  *****


MC samples N=None


07/18/2022 00:28:25 - INFO - utilities.trainers -     Num examples = 46500
07/18/2022 00:28:25 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 182/182 [06:10<00:00,  2.04s/it]
07/18/2022 00:34:36 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 00:34:36 - INFO - utilities.trainers -     acc = 0.866
07/18/2022 00:34:36 - INFO - utilities.trainers -     acc_and_f1 = 0.5905264513632861
07/18/2022 00:34:36 - INFO - utilities.trainers -     f1 = 0.31505290272657216
07/18/2022 00:34:36 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 00:34:55 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 00:34:58 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:34:58 - INFO - utilities.trainers -     Num examples = 3500
07/18/2022 00:34:58 - INFO - 



************
End of iteration 7:
Train loss 1.4297, Val loss 0.5894734167626926, Test loss 0.5980753265321255
Annotated 500 samples
Current labeled (training) data: 4000 samples
Remaining budget: 0 (in samples)
************

Saving json with the results....




The end!....
2451

 --dataset_name ornl20 --budget 8% --per_gpu_train_batch_size 32 --max_seq_length 256 --resume False --cap_training_pool 50000 --init random --init_train_data 1% --acquisition_size 1% --model_name_or_path wietsedv/bert-base-dutch-cased --acquisition cal --seed 2451 

device: cuda:0
output_dir=/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls
Created /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls



 /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20 





07/18/2022 00:38:53 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_ornl20_original
07/18/2022 00:38:57 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_dev_ornl20_original
07/18/2022 00:38:58 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_ornl20_original



train set stats: class 21: 17% class 20: 17% class 17: 7% class 25: 17% class 3: 17% class 2: 3% class 23: 5% class 18: 1% class 4: 1% class 1: 5% class 9: 0% class 11: 3% class 14: 0% class 5: 1% class 15: 1% class 13: 0% class 6: 2% class 22: 0% class 10: 0% class 12: 1% class 19: 0% class 0: 0% class 16: 0% class 8: 0% class 7: 0% 
validation set stats: class 25: 17% class 20: 17% class 3: 17% class 21: 17% class 17: 7% class 0: 0% class 6: 2% class 23: 5% class 2: 3% class 1: 5% class 11: 3% class 12: 1% class 4: 1% class 18: 1% class 15: 1% class 14: 0% class 5: 1% class 10: 0% class 9: 0% class 19: 0% class 8: 0% class 22: 1% class 16: 0% class 13: 0% class 7: 0% 
test set stats: class 3: 17% class 5: 1% class 25: 18% class 1: 5% class 15: 1% class 20: 17% class 21: 17% class 2: 3% class 17: 7% class 6: 2% class 4: 1% class 23: 5% class 10: 0% class 0: 0% class 11: 2% class 8: 0% class 12: 1% class 22: 0% class 9: 0% class 16: 0% class 18: 1% class 14: 0% class 13: 0% class 19: 

07/18/2022 00:40:17 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0, acc_best_iteration=0, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-1', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, evaluate_during_training=True, fp16=False, fp16_opt_level='O1', gpu='0', gradient_accumulation_steps=1, indicator=None, init='random', init_train_data=500, knn_lab=False, learning_rate=2e-05, local_rank=-1,

warmup steps: 4
total steps: 46
logging steps: 3
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.329:   6%|▋         | 1/16 [00:00<00:07,  1.93it/s][A
loss=3.294:   6%|▋         | 1/16 [00:00<00:07,  1.93it/s][A
loss=3.294:  12%|█▎        | 2/16 [00:01<00:07,  1.89it/s][A
loss=3.287:  12%|█▎        | 2/16 [00:01<00:07,  1.89it/s][A07/18/2022 00:40:44 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:40:44 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:40:44 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.01s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.01s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.01s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.04146868250539957, "eval_f1": 0.011046877500284389, "eval_acc_and_f1": 0.026257780002841978, "eval_loss": 3.2190202048846652, "learning_rate": 1.5000000000000002e-05, "train_loss": 3.3034022649129233, "step": 3}



loss=3.228:  25%|██▌       | 4/16 [00:33<01:26,  7.22s/it][A
loss=3.153:  25%|██▌       | 4/16 [00:34<01:26,  7.22s/it][A
loss=3.153:  31%|███▏      | 5/16 [00:34<00:57,  5.22s/it][A
loss=3.157:  31%|███▏      | 5/16 [00:34<00:57,  5.22s/it][A07/18/2022 00:41:18 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:41:18 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:41:18 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.18660907127429804, "eval_f1": 0.03251352096382196, "eval_acc_and_f1": 0.10956129611906, "eval_loss": 2.972302258014679, "learning_rate": 1.9090909090909094e-05, "train_loss": 3.1793177922566733, "step": 6}



loss=2.971:  44%|████▍     | 7/16 [01:09<01:28,  9.86s/it][A
loss=2.936:  44%|████▍     | 7/16 [01:09<01:28,  9.86s/it][A
loss=2.936:  50%|█████     | 8/16 [01:09<00:56,  7.07s/it][A
loss=2.875:  50%|█████     | 8/16 [01:09<00:56,  7.07s/it][A07/18/2022 00:41:53 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:41:53 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:41:53 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.25601151907847375, "eval_f1": 0.035343570406373474, "eval_acc_and_f1": 0.14567754474242361, "eval_loss": 2.7457016621317183, "learning_rate": 1.772727272727273e-05, "train_loss": 2.9273483753204346, "step": 9}



loss=2.731:  62%|██████▎   | 10/16 [01:43<01:03, 10.54s/it][A
loss=2.711:  62%|██████▎   | 10/16 [01:43<01:03, 10.54s/it][A
loss=2.711:  69%|██████▉   | 11/16 [01:43<00:37,  7.54s/it][A
loss=2.612:  69%|██████▉   | 11/16 [01:43<00:37,  7.54s/it][A07/18/2022 00:42:27 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:42:27 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:42:27 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.03s/it][A[

{"eval_acc": 0.31922246220302375, "eval_f1": 0.05090403603540146, "eval_acc_and_f1": 0.1850632491192126, "eval_loss": 2.585863394396646, "learning_rate": 1.6363636363636366e-05, "train_loss": 2.684751113255819, "step": 12}



loss=2.597:  81%|████████▏ | 13/16 [02:16<00:32, 10.70s/it][A
loss=2.544:  81%|████████▏ | 13/16 [02:16<00:32, 10.70s/it][A
loss=2.544:  88%|████████▊ | 14/16 [02:17<00:15,  7.66s/it][A
loss=2.405:  88%|████████▊ | 14/16 [02:17<00:15,  7.66s/it][A07/18/2022 00:43:01 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:43:01 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:43:01 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.4064794816414687, "eval_f1": 0.07022080610693117, "eval_acc_and_f1": 0.23835014387419992, "eval_loss": 2.463851204940251, "learning_rate": 1.5000000000000002e-05, "train_loss": 2.5157176653544107, "step": 15}



loss=2.490: 100%|██████████| 16/16 [02:53<00:00, 10.85s/it]
Epoch:  33%|███▎      | 1/3 [02:53<05:47, 173.68s/it]
Iteration:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=2.342:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=2.342:   6%|▋         | 1/16 [00:00<00:08,  1.79it/s][A
loss=2.293:   6%|▋         | 1/16 [00:00<00:08,  1.79it/s][A07/18/2022 00:43:38 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:43:38 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:43:38 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [0

{"eval_acc": 0.4433405327573794, "eval_f1": 0.08077359272643912, "eval_acc_and_f1": 0.26205706274190926, "eval_loss": 2.3729300584111894, "learning_rate": 1.3636363636363637e-05, "train_loss": 2.3749639987945557, "step": 18}



loss=2.066:  19%|█▉        | 3/16 [00:33<01:34,  7.28s/it][A
loss=2.324:  19%|█▉        | 3/16 [00:33<01:34,  7.28s/it][A
loss=2.324:  25%|██▌       | 4/16 [00:34<01:03,  5.26s/it][A
loss=2.467:  25%|██▌       | 4/16 [00:34<01:03,  5.26s/it][A07/18/2022 00:44:11 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:44:11 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:44:11 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:23,  1.09s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:22,  1.07s/it][A[A

E

{"eval_acc": 0.5105831533477322, "eval_f1": 0.09780375951763746, "eval_acc_and_f1": 0.3041934564326848, "eval_loss": 2.2982575041907176, "learning_rate": 1.2272727272727274e-05, "train_loss": 2.285445769627889, "step": 21}



loss=2.011:  31%|███▏      | 5/16 [01:08<02:33, 13.96s/it][A
loss=2.011:  38%|███▊      | 6/16 [01:09<01:39,  9.94s/it][A
loss=2.284:  38%|███▊      | 6/16 [01:09<01:39,  9.94s/it][A
loss=2.284:  44%|████▍     | 7/16 [01:09<01:04,  7.13s/it][A
loss=2.111:  44%|████▍     | 7/16 [01:09<01:04,  7.13s/it][A07/18/2022 00:44:47 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:44:47 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:44:47 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evalu

{"eval_acc": 0.5409647228221742, "eval_f1": 0.10429626109852867, "eval_acc_and_f1": 0.32263049196035143, "eval_loss": 2.2414396916116988, "learning_rate": 1.0909090909090909e-05, "train_loss": 2.135270595550537, "step": 24}



loss=2.384:  56%|█████▋    | 9/16 [01:42<01:13, 10.50s/it][A
loss=2.359:  56%|█████▋    | 9/16 [01:42<01:13, 10.50s/it][A
loss=2.359:  62%|██████▎   | 10/16 [01:43<00:45,  7.52s/it][A
loss=2.393:  62%|██████▎   | 10/16 [01:43<00:45,  7.52s/it][A07/18/2022 00:45:20 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:45:20 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:45:20 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.03s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.03s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A


{"eval_acc": 0.5566594672426206, "eval_f1": 0.10755227714424065, "eval_acc_and_f1": 0.33210587219343063, "eval_loss": 2.1922589370182584, "learning_rate": 9.545454545454547e-06, "train_loss": 2.3784584999084473, "step": 27}



loss=2.339:  75%|███████▌  | 12/16 [02:16<00:42, 10.69s/it][A
loss=2.212:  75%|███████▌  | 12/16 [02:16<00:42, 10.69s/it][A
loss=2.212:  81%|████████▏ | 13/16 [02:16<00:22,  7.65s/it][A
loss=2.508:  81%|████████▏ | 13/16 [02:17<00:22,  7.65s/it][A07/18/2022 00:45:54 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:45:54 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:45:54 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.5690424766018719, "eval_f1": 0.1085087359560982, "eval_acc_and_f1": 0.33877560627898506, "eval_loss": 2.1559222510882785, "learning_rate": 8.181818181818183e-06, "train_loss": 2.3527638912200928, "step": 30}



loss=2.251:  94%|█████████▍| 15/16 [02:53<00:11, 11.56s/it][A
loss=1.917:  94%|█████████▍| 15/16 [02:54<00:11, 11.56s/it][A
loss=1.917: 100%|██████████| 16/16 [02:54<00:00, 10.89s/it]
Epoch:  67%|██████▋   | 2/3 [05:47<02:53, 173.87s/it]
Iteration:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=1.860:   0%|          | 0/16 [00:00<?, ?it/s][A07/18/2022 00:46:31 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:46:31 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:46:31 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 

{"eval_acc": 0.566882649388049, "eval_f1": 0.10737535105345827, "eval_acc_and_f1": 0.33712900022075365, "eval_loss": 2.1285039441926137, "learning_rate": 6.818181818181818e-06, "train_loss": 2.0092347462972007, "step": 33}



loss=2.111:  12%|█▎        | 2/16 [00:36<05:51, 25.09s/it][A
loss=1.914:  12%|█▎        | 2/16 [00:36<05:51, 25.09s/it][A
loss=1.914:  19%|█▉        | 3/16 [00:36<03:50, 17.73s/it][A
loss=2.265:  19%|█▉        | 3/16 [00:36<03:50, 17.73s/it][A07/18/2022 00:47:08 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:47:08 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:47:08 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.5651547876169907, "eval_f1": 0.10739374190965441, "eval_acc_and_f1": 0.33627426476332256, "eval_loss": 2.102148941584996, "learning_rate": 5.4545454545454545e-06, "train_loss": 2.0968695084253945, "step": 36}



loss=2.143:  31%|███▏      | 5/16 [01:11<02:56, 16.02s/it][A
loss=1.875:  31%|███▏      | 5/16 [01:11<02:56, 16.02s/it][A
loss=1.875:  38%|███▊      | 6/16 [01:11<01:53, 11.38s/it][A
loss=2.153:  38%|███▊      | 6/16 [01:12<01:53, 11.38s/it][A07/18/2022 00:47:43 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:47:43 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:47:43 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.5663066954643629, "eval_f1": 0.10788168687315056, "eval_acc_and_f1": 0.3370941911687567, "eval_loss": 2.0727139030184065, "learning_rate": 4.0909090909090915e-06, "train_loss": 2.056956688563029, "step": 39}



loss=2.063:  50%|█████     | 8/16 [01:45<01:40, 12.58s/it][A
loss=2.235:  50%|█████     | 8/16 [01:45<01:40, 12.58s/it][A
loss=2.235:  56%|█████▋    | 9/16 [01:45<01:02,  8.97s/it][A
loss=1.821:  56%|█████▋    | 9/16 [01:45<01:02,  8.97s/it][A07/18/2022 00:48:17 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:48:17 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:48:17 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.566882649388049, "eval_f1": 0.10814637258799803, "eval_acc_and_f1": 0.33751451098802354, "eval_loss": 2.0468188651970456, "learning_rate": 2.7272727272727272e-06, "train_loss": 2.039588292439779, "step": 42}



loss=1.948:  69%|██████▉   | 11/16 [02:18<00:57, 11.41s/it][A
loss=1.837:  69%|██████▉   | 11/16 [02:18<00:57, 11.41s/it][A
loss=1.837:  75%|███████▌  | 12/16 [02:19<00:32,  8.16s/it][A
loss=2.182:  75%|███████▌  | 12/16 [02:19<00:32,  8.16s/it][A07/18/2022 00:48:51 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:48:51 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:48:51 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.5704823614110871, "eval_f1": 0.10892590656146343, "eval_acc_and_f1": 0.33970413398627525, "eval_loss": 2.0326700125421797, "learning_rate": 1.3636363636363636e-06, "train_loss": 1.9889432986577351, "step": 45}



loss=2.280:  88%|████████▊ | 14/16 [02:55<00:23, 11.70s/it][A
loss=1.983:  88%|████████▊ | 14/16 [02:55<00:23, 11.70s/it][A
loss=1.983:  94%|█████████▍| 15/16 [02:56<00:08,  8.36s/it][A
loss=2.101:  94%|█████████▍| 15/16 [02:56<00:08,  8.36s/it][A07/18/2022 00:49:27 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:49:27 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:49:27 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.5722102231821454, "eval_f1": 0.10938939102606407, "eval_acc_and_f1": 0.34079980710410473, "eval_loss": 2.027183907372611, "learning_rate": 0.0, "train_loss": 2.121570070584615, "step": 48}


07/18/2022 00:50:03 - INFO - utilities.trainers -   ***** Running evaluation iter-1_trial1 *****
07/18/2022 00:50:03 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 00:50:03 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 00:50:30 - INFO - utilities.trainers -   ***** Eval results iter-1_trial1 *****
07/18/2022 00:50:30 - INFO - utilities.trainers -     acc = 0.5722102231821454
07/18/2022 00:50:30 - INFO - utilities.trainers -     acc_and_f1 = 0.34079980710410473
07/18/2022 00:50:30 - INFO - utilities.trainers -     f1 = 0.10938939102606407
07/18/2022 00:50:33 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 00:50:39 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:50:39 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 00:50:39 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:18<00:00,  1.02s/it]
07/18/2022 00:52:58 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 00:52:58 - INFO - utilities.trainers -     acc = 0.5787397039341052
07/18/2022 00:52:58 - INFO - utilities.trainers -     acc_and_f1 = 0.34259086104452163
07/18/2022 00:52:58 - INFO - utilities.trainers -     f1 = 0.10644201815493809



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 00:52:59 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 00:53:17 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 00:53:22 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 00:53:22 - INFO - utilities.trainers -     Num examples = 49500
07/18/2022 00:53:22 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/194 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 194/194 [06:34<00:00,  2.03s/it]
07/18/2022 00:59:58 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 00:59:58 - INFO - utilities.trainers -     acc = 0.5787474747474748
07/18/2022 00:59:58 - INFO - utilities.trainers -     acc_and_f1 = 0.3447313308258191
07/18/2022 00:59:58 - INFO - utilities.trainers -     f1 = 0.11071518690416332
07/18/2022 00:59:58 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 01:00:16 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 01:00:21 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:00:21 - INFO - utilities.trainers -     Num examples = 500
07/18/2022 01:00:21 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 2/2 [00:04<00:00,  2.01s/it]
07/18/2022 01:00:25 - INFO - utili



************
End of iteration 1:
Train loss 2.4032, Val loss 2.027183907372611, Test loss 2.0171720096293617
Annotated 500 samples
Current labeled (training) data: 1000 samples
Remaining budget: 3000 (in samples)
************

Saving json with the results....

 Start Training model of iteration 2!



07/18/2022 01:02:03 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.5722102231821454, acc_best_iteration=1, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-1', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-2', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, 

warmup steps: 9
total steps: 93
logging steps: 6
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.446:   3%|▎         | 1/32 [00:00<00:15,  1.97it/s][A
loss=3.400:   3%|▎         | 1/32 [00:00<00:15,  1.97it/s][A
loss=3.400:   6%|▋         | 2/32 [00:01<00:15,  1.96it/s][A
loss=3.382:   6%|▋         | 2/32 [00:01<00:15,  1.96it/s][A
loss=3.382:   9%|▉         | 3/32 [00:01<00:14,  1.96it/s][A
loss=3.363:   9%|▉         | 3/32 [00:01<00:14,  1.96it/s][A
loss=3.363:  12%|█▎        | 4/32 [00:02<00:14,  1.96it/s][A
loss=3.297:  12%|█▎        | 4/32 [00:02<00:14,  1.96it/s][A
loss=3.297:  16%|█▌        | 5/32 [00:02<00:13,  1.95it/s][A
loss=3.210:  16%|█▌        | 5/32 [00:02<00:13,  1.95it/s][A07/18/2022 01:02:33 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:02:33 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:02:33 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋       

{"eval_acc": 0.1216702663786897, "eval_f1": 0.022388604034042338, "eval_acc_and_f1": 0.07202943520636602, "eval_loss": 3.1553479177611217, "learning_rate": 1.3333333333333333e-05, "train_loss": 3.3496557076772056, "step": 6}



loss=3.242:  22%|██▏       | 7/32 [00:35<02:58,  7.15s/it][A
loss=3.101:  22%|██▏       | 7/32 [00:35<02:58,  7.15s/it][A
loss=3.101:  25%|██▌       | 8/32 [00:35<02:03,  5.16s/it][A
loss=2.997:  25%|██▌       | 8/32 [00:35<02:03,  5.16s/it][A
loss=2.997:  28%|██▊       | 9/32 [00:36<01:26,  3.77s/it][A
loss=2.934:  28%|██▊       | 9/32 [00:36<01:26,  3.77s/it][A
loss=2.934:  31%|███▏      | 10/32 [00:36<01:01,  2.79s/it][A
loss=2.874:  31%|███▏      | 10/32 [00:36<01:01,  2.79s/it][A
loss=2.874:  34%|███▍      | 11/32 [00:37<00:44,  2.11s/it][A
loss=2.770:  34%|███▍      | 11/32 [00:37<00:44,  2.11s/it][A07/18/2022 01:03:08 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:03:08 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:03:08 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋   

{"eval_acc": 0.23585313174946004, "eval_f1": 0.03193983438850933, "eval_acc_and_f1": 0.1338964830689847, "eval_loss": 2.764160684176854, "learning_rate": 1.931034482758621e-05, "train_loss": 2.9860145648320517, "step": 12}



loss=2.904:  41%|████      | 13/32 [01:12<02:42,  8.57s/it][A
loss=2.809:  41%|████      | 13/32 [01:13<02:42,  8.57s/it][A
loss=2.809:  44%|████▍     | 14/32 [01:13<01:50,  6.15s/it][A
loss=2.602:  44%|████▍     | 14/32 [01:13<01:50,  6.15s/it][A
loss=2.602:  47%|████▋     | 15/32 [01:13<01:15,  4.46s/it][A
loss=2.673:  47%|████▋     | 15/32 [01:14<01:15,  4.46s/it][A
loss=2.673:  50%|█████     | 16/32 [01:14<00:52,  3.28s/it][A
loss=2.608:  50%|█████     | 16/32 [01:14<00:52,  3.28s/it][A
loss=2.608:  53%|█████▎    | 17/32 [01:14<00:36,  2.45s/it][A
loss=2.670:  53%|█████▎    | 17/32 [01:15<00:36,  2.45s/it][A07/18/2022 01:03:45 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:03:45 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:03:45 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.2660907127429806, "eval_f1": 0.03881337863610577, "eval_acc_and_f1": 0.15245204568954318, "eval_loss": 2.4951909610203336, "learning_rate": 1.7931034482758623e-05, "train_loss": 2.7109891970952353, "step": 18}



loss=2.455:  59%|█████▉    | 19/32 [01:51<01:55,  8.90s/it][A
loss=2.406:  59%|█████▉    | 19/32 [01:51<01:55,  8.90s/it][A
loss=2.406:  62%|██████▎   | 20/32 [01:51<01:16,  6.39s/it][A
loss=2.339:  62%|██████▎   | 20/32 [01:52<01:16,  6.39s/it][A
loss=2.339:  66%|██████▌   | 21/32 [01:52<00:50,  4.63s/it][A
loss=2.293:  66%|██████▌   | 21/32 [01:52<00:50,  4.63s/it][A
loss=2.293:  69%|██████▉   | 22/32 [01:52<00:33,  3.39s/it][A
loss=2.523:  69%|██████▉   | 22/32 [01:53<00:33,  3.39s/it][A
loss=2.523:  72%|███████▏  | 23/32 [01:53<00:22,  2.53s/it][A
loss=2.259:  72%|███████▏  | 23/32 [01:53<00:22,  2.53s/it][A07/18/2022 01:04:24 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:04:24 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:04:24 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.4033117350611951, "eval_f1": 0.07500273223464125, "eval_acc_and_f1": 0.2391572336479182, "eval_loss": 2.352418916566031, "learning_rate": 1.6551724137931037e-05, "train_loss": 2.379199822743734, "step": 24}



loss=2.307:  78%|███████▊  | 25/32 [02:30<01:02,  8.97s/it][A
loss=2.568:  78%|███████▊  | 25/32 [02:30<01:02,  8.97s/it][A
loss=2.568:  81%|████████▏ | 26/32 [02:30<00:38,  6.44s/it][A
loss=2.089:  81%|████████▏ | 26/32 [02:30<00:38,  6.44s/it][A
loss=2.089:  84%|████████▍ | 27/32 [02:31<00:23,  4.66s/it][A
loss=1.926:  84%|████████▍ | 27/32 [02:31<00:23,  4.66s/it][A
loss=1.926:  88%|████████▊ | 28/32 [02:31<00:13,  3.42s/it][A
loss=2.533:  88%|████████▊ | 28/32 [02:31<00:13,  3.42s/it][A
loss=2.533:  91%|█████████ | 29/32 [02:32<00:07,  2.55s/it][A
loss=1.903:  91%|█████████ | 29/32 [02:32<00:07,  2.55s/it][A07/18/2022 01:05:03 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:05:03 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:05:03 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.4528437724982001, "eval_f1": 0.0776219186432606, "eval_acc_and_f1": 0.26523284557073035, "eval_loss": 2.2466768452099393, "learning_rate": 1.5172413793103448e-05, "train_loss": 2.221080402533213, "step": 30}



loss=1.943:  94%|█████████▍| 30/32 [03:07<00:24, 12.45s/it][A
loss=1.943:  97%|█████████▋| 31/32 [03:08<00:08,  8.87s/it][A
loss=2.071:  97%|█████████▋| 31/32 [03:08<00:08,  8.87s/it][A
loss=2.071: 100%|██████████| 32/32 [03:08<00:00,  5.89s/it]
Epoch:  33%|███▎      | 1/3 [03:08<06:16, 188.42s/it]
Iteration:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=2.190:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=2.190:   3%|▎         | 1/32 [00:00<00:16,  1.87it/s][A
loss=2.452:   3%|▎         | 1/32 [00:00<00:16,  1.87it/s][A
loss=2.452:   6%|▋         | 2/32 [00:01<00:15,  1.89it/s][A
loss=1.973:   6%|▋         | 2/32 [00:01<00:15,  1.89it/s][A
loss=1.973:   9%|▉         | 3/32 [00:01<00:15,  1.89it/s][A
loss=1.764:   9%|▉         | 3/32 [00:01<00:15,  1.89it/s][A07/18/2022 01:05:40 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:05:40 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:05:40 - INFO - utilities.trainers -     Batch 

{"eval_acc": 0.4658027357811375, "eval_f1": 0.08149134811412932, "eval_acc_and_f1": 0.2736470419476334, "eval_loss": 2.159139241491045, "learning_rate": 1.3793103448275863e-05, "train_loss": 2.065390944480896, "step": 36}



loss=2.358:  16%|█▌        | 5/32 [00:35<03:23,  7.52s/it][A
loss=1.948:  16%|█▌        | 5/32 [00:36<03:23,  7.52s/it][A
loss=1.948:  19%|█▉        | 6/32 [00:36<02:20,  5.42s/it][A
loss=2.032:  19%|█▉        | 6/32 [00:36<02:20,  5.42s/it][A
loss=2.032:  22%|██▏       | 7/32 [00:36<01:38,  3.95s/it][A
loss=1.988:  22%|██▏       | 7/32 [00:37<01:38,  3.95s/it][A
loss=1.988:  25%|██▌       | 8/32 [00:37<01:10,  2.92s/it][A
loss=1.788:  25%|██▌       | 8/32 [00:37<01:10,  2.92s/it][A
loss=1.788:  28%|██▊       | 9/32 [00:38<00:50,  2.21s/it][A
loss=2.026:  28%|██▊       | 9/32 [00:38<00:50,  2.21s/it][A07/18/2022 01:06:17 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:06:17 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:06:17 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋       

{"eval_acc": 0.5065514758819294, "eval_f1": 0.09405141580716943, "eval_acc_and_f1": 0.30030144584454943, "eval_loss": 2.0681132120745525, "learning_rate": 1.2413793103448277e-05, "train_loss": 2.0233128865559897, "step": 42}



loss=2.053:  34%|███▍      | 11/32 [01:14<03:04,  8.77s/it][A
loss=2.360:  34%|███▍      | 11/32 [01:14<03:04,  8.77s/it][A
loss=2.360:  38%|███▊      | 12/32 [01:14<02:05,  6.29s/it][A
loss=2.036:  38%|███▊      | 12/32 [01:15<02:05,  6.29s/it][A
loss=2.036:  41%|████      | 13/32 [01:15<01:26,  4.56s/it][A
loss=1.931:  41%|████      | 13/32 [01:15<01:26,  4.56s/it][A
loss=1.931:  44%|████▍     | 14/32 [01:15<01:00,  3.35s/it][A
loss=1.912:  44%|████▍     | 14/32 [01:16<01:00,  3.35s/it][A
loss=1.912:  47%|████▋     | 15/32 [01:16<00:42,  2.50s/it][A
loss=1.831:  47%|████▋     | 15/32 [01:16<00:42,  2.50s/it][A07/18/2022 01:06:55 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:06:55 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:06:55 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.01s/it][A[A

Evaluating:   7

{"eval_acc": 0.5555075593952484, "eval_f1": 0.10468066858650546, "eval_acc_and_f1": 0.33009411399087696, "eval_loss": 1.984877837555749, "learning_rate": 1.103448275862069e-05, "train_loss": 2.0203805367151895, "step": 48}



loss=2.093:  53%|█████▎    | 17/32 [01:50<02:07,  8.50s/it][A
loss=1.888:  53%|█████▎    | 17/32 [01:51<02:07,  8.50s/it][A
loss=1.888:  56%|█████▋    | 18/32 [01:51<01:25,  6.10s/it][A
loss=1.942:  56%|█████▋    | 18/32 [01:51<01:25,  6.10s/it][A
loss=1.942:  59%|█████▉    | 19/32 [01:51<00:57,  4.43s/it][A
loss=1.944:  59%|█████▉    | 19/32 [01:52<00:57,  4.43s/it][A
loss=1.944:  62%|██████▎   | 20/32 [01:52<00:39,  3.26s/it][A
loss=1.475:  62%|██████▎   | 20/32 [01:52<00:39,  3.26s/it][A
loss=1.475:  66%|██████▌   | 21/32 [01:52<00:26,  2.44s/it][A
loss=1.774:  66%|██████▌   | 21/32 [01:53<00:26,  2.44s/it][A07/18/2022 01:07:32 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:07:32 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:07:32 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.5569474442044636, "eval_f1": 0.1063135191894195, "eval_acc_and_f1": 0.33163048169694154, "eval_loss": 1.908826972757067, "learning_rate": 9.655172413793105e-06, "train_loss": 1.8527223666508992, "step": 54}



loss=1.793:  72%|███████▏  | 23/32 [02:29<01:19,  8.83s/it][A
loss=1.869:  72%|███████▏  | 23/32 [02:29<01:19,  8.83s/it][A
loss=1.869:  75%|███████▌  | 24/32 [02:29<00:50,  6.34s/it][A
loss=1.591:  75%|███████▌  | 24/32 [02:29<00:50,  6.34s/it][A
loss=1.591:  78%|███████▊  | 25/32 [02:30<00:32,  4.59s/it][A
loss=1.420:  78%|███████▊  | 25/32 [02:30<00:32,  4.59s/it][A
loss=1.420:  81%|████████▏ | 26/32 [02:30<00:20,  3.37s/it][A
loss=2.092:  81%|████████▏ | 26/32 [02:30<00:20,  3.37s/it][A
loss=2.092:  84%|████████▍ | 27/32 [02:31<00:12,  2.52s/it][A
loss=1.905:  84%|████████▍ | 27/32 [02:31<00:12,  2.52s/it][A07/18/2022 01:08:10 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:08:10 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:08:10 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.5650107991360691, "eval_f1": 0.10746818557836907, "eval_acc_and_f1": 0.33623949235721906, "eval_loss": 1.8482070352349962, "learning_rate": 8.275862068965518e-06, "train_loss": 1.778429905573527, "step": 60}



loss=1.862:  91%|█████████ | 29/32 [03:04<00:24,  8.30s/it][A
loss=1.589:  91%|█████████ | 29/32 [03:04<00:24,  8.30s/it][A
loss=1.589:  94%|█████████▍| 30/32 [03:05<00:11,  5.97s/it][A
loss=2.169:  94%|█████████▍| 30/32 [03:05<00:11,  5.97s/it][A
loss=2.169:  97%|█████████▋| 31/32 [03:05<00:04,  4.33s/it][A
loss=1.252:  97%|█████████▋| 31/32 [03:05<00:04,  4.33s/it][A
loss=1.252: 100%|██████████| 32/32 [03:05<00:00,  5.81s/it]
Epoch:  67%|██████▋   | 2/3 [06:14<03:07, 187.65s/it]
Iteration:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=1.574:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=1.574:   3%|▎         | 1/32 [00:00<00:15,  1.95it/s][A
loss=1.671:   3%|▎         | 1/32 [00:00<00:15,  1.95it/s][A07/18/2022 01:08:45 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:08:45 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:08:45 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?i

{"eval_acc": 0.5635709143268538, "eval_f1": 0.1072212346074532, "eval_acc_and_f1": 0.33539607446715347, "eval_loss": 1.8170259084020342, "learning_rate": 6.896551724137932e-06, "train_loss": 1.6862494548161824, "step": 66}



loss=1.879:   9%|▉         | 3/32 [00:34<03:36,  7.46s/it][A
loss=1.816:   9%|▉         | 3/32 [00:34<03:36,  7.46s/it][A
loss=1.816:  12%|█▎        | 4/32 [00:35<02:30,  5.38s/it][A
loss=1.581:  12%|█▎        | 4/32 [00:35<02:30,  5.38s/it][A
loss=1.581:  16%|█▌        | 5/32 [00:35<01:45,  3.92s/it][A
loss=1.545:  16%|█▌        | 5/32 [00:35<01:45,  3.92s/it][A
loss=1.545:  19%|█▉        | 6/32 [00:36<01:15,  2.90s/it][A
loss=1.850:  19%|█▉        | 6/32 [00:36<01:15,  2.90s/it][A
loss=1.850:  22%|██▏       | 7/32 [00:36<00:54,  2.19s/it][A
loss=1.749:  22%|██▏       | 7/32 [00:36<00:54,  2.19s/it][A07/18/2022 01:09:21 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:09:21 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:09:21 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋       

{"eval_acc": 0.6156947444204464, "eval_f1": 0.11733976020495954, "eval_acc_and_f1": 0.36651725231270293, "eval_loss": 1.7427486862455095, "learning_rate": 5.517241379310345e-06, "train_loss": 1.7368279496828716, "step": 72}



loss=1.254:  28%|██▊       | 9/32 [01:12<03:17,  8.58s/it][A
loss=1.566:  28%|██▊       | 9/32 [01:12<03:17,  8.58s/it][A
loss=1.566:  31%|███▏      | 10/32 [01:12<02:15,  6.16s/it][A
loss=1.564:  31%|███▏      | 10/32 [01:12<02:15,  6.16s/it][A
loss=1.564:  34%|███▍      | 11/32 [01:13<01:33,  4.47s/it][A
loss=1.923:  34%|███▍      | 11/32 [01:13<01:33,  4.47s/it][A
loss=1.923:  38%|███▊      | 12/32 [01:13<01:05,  3.29s/it][A
loss=1.511:  38%|███▊      | 12/32 [01:13<01:05,  3.29s/it][A
loss=1.511:  41%|████      | 13/32 [01:14<00:46,  2.45s/it][A
loss=1.508:  41%|████      | 13/32 [01:14<00:46,  2.45s/it][A07/18/2022 01:09:59 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:09:59 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:09:59 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|

{"eval_acc": 0.6224622030237581, "eval_f1": 0.11881277650448684, "eval_acc_and_f1": 0.3706374897641225, "eval_loss": 1.6979927463190896, "learning_rate": 4.137931034482759e-06, "train_loss": 1.5542619228363037, "step": 78}



loss=1.631:  47%|████▋     | 15/32 [01:50<02:30,  8.85s/it][A
loss=1.332:  47%|████▋     | 15/32 [01:50<02:30,  8.85s/it][A
loss=1.332:  50%|█████     | 16/32 [01:51<01:41,  6.35s/it][A
loss=1.560:  50%|█████     | 16/32 [01:51<01:41,  6.35s/it][A
loss=1.560:  53%|█████▎    | 17/32 [01:51<01:09,  4.60s/it][A
loss=1.513:  53%|█████▎    | 17/32 [01:51<01:09,  4.60s/it][A
loss=1.513:  56%|█████▋    | 18/32 [01:52<00:47,  3.38s/it][A
loss=1.647:  56%|█████▋    | 18/32 [01:52<00:47,  3.38s/it][A
loss=1.647:  59%|█████▉    | 19/32 [01:52<00:32,  2.52s/it][A
loss=1.741:  59%|█████▉    | 19/32 [01:52<00:32,  2.52s/it][A07/18/2022 01:10:37 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:10:37 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:10:37 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.6215982721382289, "eval_f1": 0.11859082199334262, "eval_acc_and_f1": 0.37009454706578576, "eval_loss": 1.6722789577075414, "learning_rate": 2.7586206896551725e-06, "train_loss": 1.570828636487325, "step": 84}



loss=1.189:  66%|██████▌   | 21/32 [02:28<01:38,  8.92s/it][A
loss=1.391:  66%|██████▌   | 21/32 [02:29<01:38,  8.92s/it][A
loss=1.391:  69%|██████▉   | 22/32 [02:29<01:04,  6.40s/it][A
loss=1.821:  69%|██████▉   | 22/32 [02:29<01:04,  6.40s/it][A
loss=1.821:  72%|███████▏  | 23/32 [02:29<00:41,  4.64s/it][A
loss=1.414:  72%|███████▏  | 23/32 [02:30<00:41,  4.64s/it][A
loss=1.414:  75%|███████▌  | 24/32 [02:30<00:27,  3.40s/it][A
loss=1.866:  75%|███████▌  | 24/32 [02:30<00:27,  3.40s/it][A
loss=1.866:  78%|███████▊  | 25/32 [02:31<00:17,  2.54s/it][A
loss=1.488:  78%|███████▊  | 25/32 [02:31<00:17,  2.54s/it][A07/18/2022 01:11:16 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:11:16 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:11:16 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.6256299496040317, "eval_f1": 0.11946385516444968, "eval_acc_and_f1": 0.3725469023842407, "eval_loss": 1.653119010584695, "learning_rate": 1.3793103448275862e-06, "train_loss": 1.528130034605662, "step": 90}



loss=1.955:  84%|████████▍ | 27/32 [03:04<00:41,  8.28s/it][A
loss=1.628:  84%|████████▍ | 27/32 [03:04<00:41,  8.28s/it][A
loss=1.628:  88%|████████▊ | 28/32 [03:04<00:23,  5.95s/it][A
loss=1.829:  88%|████████▊ | 28/32 [03:04<00:23,  5.95s/it][A
loss=1.829:  91%|█████████ | 29/32 [03:05<00:12,  4.32s/it][A
loss=1.508:  91%|█████████ | 29/32 [03:05<00:12,  4.32s/it][A
loss=1.508:  94%|█████████▍| 30/32 [03:05<00:06,  3.18s/it][A
loss=1.741:  94%|█████████▍| 30/32 [03:06<00:06,  3.18s/it][A
loss=1.741:  97%|█████████▋| 31/32 [03:06<00:02,  2.38s/it][A
loss=1.830:  97%|█████████▋| 31/32 [03:06<00:02,  2.38s/it][A07/18/2022 01:11:51 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:11:51 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:11:51 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.03s/it][A[A

Evaluating:   7

{"eval_acc": 0.6263498920086393, "eval_f1": 0.11956868755554904, "eval_acc_and_f1": 0.3729592897820942, "eval_loss": 1.646863886288234, "learning_rate": 0.0, "train_loss": 1.7483650644620259, "step": 96}


07/18/2022 01:12:28 - INFO - utilities.trainers -   ***** Running evaluation iter-2_trial1 *****
07/18/2022 01:12:28 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:12:28 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.00it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 01:12:56 - INFO - utilities.trainers -   ***** Eval results iter-2_trial1 *****
07/18/2022 01:12:56 - INFO - utilities.trainers -     acc = 0.6263498920086393
07/18/2022 01:12:56 - INFO - utilities.trainers -     acc_and_f1 = 0.3729592897820942
07/18/2022 01:12:56 - INFO - utilities.trainers -     f1 = 0.11956868755554904
07/18/2022 01:12:58 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 01:13:04 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:13:04 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 01:13:04 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:19<00:00,  1.02s/it]
07/18/2022 01:15:24 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 01:15:24 - INFO - utilities.trainers -     acc = 0.62793041875468
07/18/2022 01:15:24 - INFO - utilities.trainers -     acc_and_f1 = 0.3715934475231534
07/18/2022 01:15:24 - INFO - utilities.trainers -     f1 = 0.11525647629162673



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 01:15:24 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 01:15:43 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 01:15:43 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:15:43 - INFO - utilities.trainers -     Num examples = 49000


MC samples N=None


07/18/2022 01:15:49 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 192/192 [06:31<00:00,  2.04s/it]
07/18/2022 01:22:21 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 01:22:21 - INFO - utilities.trainers -     acc = 0.6259183673469387
07/18/2022 01:22:21 - INFO - utilities.trainers -     acc_and_f1 = 0.3726963380585738
07/18/2022 01:22:21 - INFO - utilities.trainers -     f1 = 0.11947430877020886
07/18/2022 01:22:21 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 01:22:39 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 01:22:39 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:22:39 - INFO - utilities.trainers -     Num examples = 1000
07/18/2022 01:22:39 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|███



************
End of iteration 2:
Train loss 2.0757, Val loss 1.646863886288234, Test loss 1.6494404594687855
Annotated 500 samples
Current labeled (training) data: 1500 samples
Remaining budget: 2500 (in samples)
************

Saving json with the results....

 Start Training model of iteration 3!



07/18/2022 01:25:04 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.6263498920086393, acc_best_iteration=2, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-2', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-3', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, 

warmup steps: 14
total steps: 140
logging steps: 9
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.145:   2%|▏         | 1/47 [00:00<00:22,  2.03it/s][A
loss=3.177:   2%|▏         | 1/47 [00:00<00:22,  2.03it/s][A
loss=3.177:   4%|▍         | 2/47 [00:01<00:22,  1.99it/s][A
loss=3.128:   4%|▍         | 2/47 [00:01<00:22,  1.99it/s][A
loss=3.128:   6%|▋         | 3/47 [00:01<00:22,  1.96it/s][A
loss=3.267:   6%|▋         | 3/47 [00:01<00:22,  1.96it/s][A
loss=3.267:   9%|▊         | 4/47 [00:02<00:22,  1.94it/s][A
loss=3.212:   9%|▊         | 4/47 [00:02<00:22,  1.94it/s][A
loss=3.212:  11%|█         | 5/47 [00:02<00:21,  1.92it/s][A
loss=3.161:  11%|█         | 5/47 [00:02<00:21,  1.92it/s][A
loss=3.161:  13%|█▎        | 6/47 [00:03<00:21,  1.91it/s][A
loss=3.117:  13%|█▎        | 6/47 [00:03<00:21,  1.91it/s][A
loss=3.117:  15%|█▍        | 7/47 [00:03<00:21,  1.90it/s][A
loss=3.149:  15%|█▍        | 7/47 [00:03<00:21,  1.90it/s][A
loss=3.149:  17%|█▋        | 8/47 [00:04<00:20,  1.90it/s][A
loss=3.136:  17%|█▋        | 8/47 [00:04<00:20,  1.90it/s][A07/18/20

{"eval_acc": 0.1007919366450684, "eval_f1": 0.013322601830382324, "eval_acc_and_f1": 0.05705726923772536, "eval_loss": 3.048706693308694, "learning_rate": 1.2857142857142859e-05, "train_loss": 3.1657301319970026, "step": 9}



loss=3.098:  21%|██▏       | 10/47 [00:36<04:25,  7.17s/it][A
loss=2.981:  21%|██▏       | 10/47 [00:37<04:25,  7.17s/it][A
loss=2.981:  23%|██▎       | 11/47 [00:37<03:06,  5.18s/it][A
loss=2.937:  23%|██▎       | 11/47 [00:37<03:06,  5.18s/it][A
loss=2.937:  26%|██▌       | 12/47 [00:37<02:12,  3.79s/it][A
loss=2.881:  26%|██▌       | 12/47 [00:38<02:12,  3.79s/it][A
loss=2.881:  28%|██▊       | 13/47 [00:38<01:35,  2.81s/it][A
loss=2.846:  28%|██▊       | 13/47 [00:38<01:35,  2.81s/it][A
loss=2.846:  30%|██▉       | 14/47 [00:39<01:10,  2.13s/it][A
loss=2.805:  30%|██▉       | 14/47 [00:39<01:10,  2.13s/it][A
loss=2.805:  32%|███▏      | 15/47 [00:39<00:52,  1.65s/it][A
loss=2.723:  32%|███▏      | 15/47 [00:39<00:52,  1.65s/it][A
loss=2.723:  34%|███▍      | 16/47 [00:40<00:40,  1.32s/it][A
loss=2.764:  34%|███▍      | 16/47 [00:40<00:40,  1.32s/it][A
loss=2.764:  36%|███▌      | 17/47 [00:40<00:32,  1.08s/it][A
loss=2.556:  36%|███▌      | 17/47 [00:40<00:32,  1.08

{"eval_acc": 0.2790496760259179, "eval_f1": 0.03547700702227171, "eval_acc_and_f1": 0.1572633415240948, "eval_loss": 2.620921552181244, "learning_rate": 1.937007874015748e-05, "train_loss": 2.843491236368815, "step": 18}



loss=2.725:  40%|████      | 19/47 [01:16<03:47,  8.13s/it][A
loss=2.582:  40%|████      | 19/47 [01:16<03:47,  8.13s/it][A
loss=2.582:  43%|████▎     | 20/47 [01:17<02:37,  5.85s/it][A
loss=2.625:  43%|████▎     | 20/47 [01:17<02:37,  5.85s/it][A
loss=2.625:  45%|████▍     | 21/47 [01:17<01:50,  4.25s/it][A
loss=2.657:  45%|████▍     | 21/47 [01:17<01:50,  4.25s/it][A
loss=2.657:  47%|████▋     | 22/47 [01:18<01:18,  3.14s/it][A
loss=2.673:  47%|████▋     | 22/47 [01:18<01:18,  3.14s/it][A
loss=2.673:  49%|████▉     | 23/47 [01:18<00:56,  2.36s/it][A
loss=2.604:  49%|████▉     | 23/47 [01:18<00:56,  2.36s/it][A
loss=2.604:  51%|█████     | 24/47 [01:19<00:41,  1.81s/it][A
loss=2.386:  51%|█████     | 24/47 [01:19<00:41,  1.81s/it][A
loss=2.386:  53%|█████▎    | 25/47 [01:19<00:31,  1.43s/it][A
loss=2.302:  53%|█████▎    | 25/47 [01:19<00:31,  1.43s/it][A
loss=2.302:  55%|█████▌    | 26/47 [01:20<00:24,  1.16s/it][A
loss=2.676:  55%|█████▌    | 26/47 [01:20<00:24,  1.16

{"eval_acc": 0.2017278617710583, "eval_f1": 0.02423223185597107, "eval_acc_and_f1": 0.11298004681351469, "eval_loss": 2.4362751245498657, "learning_rate": 1.7952755905511813e-05, "train_loss": 2.58099537425571, "step": 27}



loss=2.524:  60%|█████▉    | 28/47 [01:56<02:34,  8.13s/it][A
loss=2.205:  60%|█████▉    | 28/47 [01:56<02:34,  8.13s/it][A
loss=2.205:  62%|██████▏   | 29/47 [01:56<01:45,  5.85s/it][A
loss=2.601:  62%|██████▏   | 29/47 [01:56<01:45,  5.85s/it][A
loss=2.601:  64%|██████▍   | 30/47 [01:57<01:12,  4.26s/it][A
loss=2.226:  64%|██████▍   | 30/47 [01:57<01:12,  4.26s/it][A
loss=2.226:  66%|██████▌   | 31/47 [01:57<00:50,  3.14s/it][A
loss=2.342:  66%|██████▌   | 31/47 [01:57<00:50,  3.14s/it][A
loss=2.342:  68%|██████▊   | 32/47 [01:58<00:35,  2.36s/it][A
loss=2.247:  68%|██████▊   | 32/47 [01:58<00:35,  2.36s/it][A
loss=2.247:  70%|███████   | 33/47 [01:58<00:25,  1.81s/it][A
loss=2.470:  70%|███████   | 33/47 [01:58<00:25,  1.81s/it][A
loss=2.470:  72%|███████▏  | 34/47 [01:59<00:18,  1.43s/it][A
loss=2.127:  72%|███████▏  | 34/47 [01:59<00:18,  1.43s/it][A
loss=2.127:  74%|███████▍  | 35/47 [01:59<00:13,  1.16s/it][A
loss=2.267:  74%|███████▍  | 35/47 [01:59<00:13,  1.16

{"eval_acc": 0.32138228941684666, "eval_f1": 0.0385867701544241, "eval_acc_and_f1": 0.17998452978563537, "eval_loss": 2.3039209757532393, "learning_rate": 1.6535433070866142e-05, "train_loss": 2.3343594074249268, "step": 36}



loss=2.439:  77%|███████▋  | 36/47 [02:36<02:09, 11.79s/it][A
loss=2.439:  79%|███████▊  | 37/47 [02:36<01:24,  8.41s/it][A
loss=2.106:  79%|███████▊  | 37/47 [02:37<01:24,  8.41s/it][A
loss=2.106:  81%|████████  | 38/47 [02:37<00:54,  6.05s/it][A
loss=2.096:  81%|████████  | 38/47 [02:37<00:54,  6.05s/it][A
loss=2.096:  83%|████████▎ | 39/47 [02:38<00:35,  4.40s/it][A
loss=2.376:  83%|████████▎ | 39/47 [02:38<00:35,  4.40s/it][A
loss=2.376:  85%|████████▌ | 40/47 [02:38<00:22,  3.24s/it][A
loss=2.223:  85%|████████▌ | 40/47 [02:38<00:22,  3.24s/it][A
loss=2.223:  87%|████████▋ | 41/47 [02:39<00:14,  2.43s/it][A
loss=2.217:  87%|████████▋ | 41/47 [02:39<00:14,  2.43s/it][A
loss=2.217:  89%|████████▉ | 42/47 [02:39<00:09,  1.86s/it][A
loss=2.079:  89%|████████▉ | 42/47 [02:39<00:09,  1.86s/it][A
loss=2.079:  91%|█████████▏| 43/47 [02:40<00:05,  1.47s/it][A
loss=2.059:  91%|█████████▏| 43/47 [02:40<00:05,  1.47s/it][A
loss=2.059:  94%|█████████▎| 44/47 [02:40<00:03,  1.19

{"eval_acc": 0.4427645788336933, "eval_f1": 0.07376395424853018, "eval_acc_and_f1": 0.25826426654111173, "eval_loss": 2.118187210389546, "learning_rate": 1.5118110236220473e-05, "train_loss": 2.2392293877071805, "step": 45}



loss=2.226:  98%|█████████▊| 46/47 [03:14<00:07,  7.63s/it][A
loss=2.401:  98%|█████████▊| 46/47 [03:14<00:07,  7.63s/it][A
loss=2.401: 100%|██████████| 47/47 [03:14<00:00,  4.14s/it]
Epoch:  33%|███▎      | 1/3 [03:14<06:29, 194.54s/it]
Iteration:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=1.923:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=1.923:   2%|▏         | 1/47 [00:00<00:24,  1.87it/s][A
loss=1.988:   2%|▏         | 1/47 [00:00<00:24,  1.87it/s][A
loss=1.988:   4%|▍         | 2/47 [00:01<00:23,  1.88it/s][A
loss=1.936:   4%|▍         | 2/47 [00:01<00:23,  1.88it/s][A
loss=1.936:   6%|▋         | 3/47 [00:01<00:23,  1.88it/s][A
loss=1.863:   6%|▋         | 3/47 [00:01<00:23,  1.88it/s][A
loss=1.863:   9%|▊         | 4/47 [00:02<00:23,  1.87it/s][A
loss=2.128:   9%|▊         | 4/47 [00:02<00:23,  1.87it/s][A
loss=2.128:  11%|█         | 5/47 [00:02<00:22,  1.86it/s][A
loss=2.162:  11%|█         | 5/47 [00:02<00:22,  1.86it/s][A
loss=2.162:  13%|█▎        | 6/4

{"eval_acc": 0.484809215262779, "eval_f1": 0.08233762160036356, "eval_acc_and_f1": 0.28357341843157124, "eval_loss": 1.9979033427579063, "learning_rate": 1.3700787401574804e-05, "train_loss": 2.1117942465676203, "step": 54}



loss=1.970:  17%|█▋        | 8/47 [00:37<04:56,  7.60s/it][A
loss=1.884:  17%|█▋        | 8/47 [00:38<04:56,  7.60s/it][A
loss=1.884:  19%|█▉        | 9/47 [00:38<03:28,  5.48s/it][A
loss=1.754:  19%|█▉        | 9/47 [00:38<03:28,  5.48s/it][A
loss=1.754:  21%|██▏       | 10/47 [00:38<02:27,  3.99s/it][A
loss=2.188:  21%|██▏       | 10/47 [00:39<02:27,  3.99s/it][A
loss=2.188:  23%|██▎       | 11/47 [00:39<01:46,  2.96s/it][A
loss=1.728:  23%|██▎       | 11/47 [00:39<01:46,  2.96s/it][A
loss=1.728:  26%|██▌       | 12/47 [00:40<01:18,  2.23s/it][A
loss=2.360:  26%|██▌       | 12/47 [00:40<01:18,  2.23s/it][A
loss=2.360:  28%|██▊       | 13/47 [00:40<00:58,  1.72s/it][A
loss=2.182:  28%|██▊       | 13/47 [00:40<00:58,  1.72s/it][A
loss=2.182:  30%|██▉       | 14/47 [00:41<00:45,  1.37s/it][A
loss=1.940:  30%|██▉       | 14/47 [00:41<00:45,  1.37s/it][A
loss=1.940:  32%|███▏      | 15/47 [00:41<00:35,  1.12s/it][A
loss=1.762:  32%|███▏      | 15/47 [00:41<00:35,  1.12s/it

{"eval_acc": 0.47976961843052557, "eval_f1": 0.080551980378738, "eval_acc_and_f1": 0.2801607994046318, "eval_loss": 1.8806081031050002, "learning_rate": 1.2283464566929135e-05, "train_loss": 1.974281562699212, "step": 63}



loss=2.046:  36%|███▌      | 17/47 [01:16<03:59,  7.98s/it][A
loss=1.850:  36%|███▌      | 17/47 [01:16<03:59,  7.98s/it][A
loss=1.850:  38%|███▊      | 18/47 [01:17<02:46,  5.74s/it][A
loss=2.042:  38%|███▊      | 18/47 [01:17<02:46,  5.74s/it][A
loss=2.042:  40%|████      | 19/47 [01:17<01:57,  4.18s/it][A
loss=1.912:  40%|████      | 19/47 [01:18<01:57,  4.18s/it][A
loss=1.912:  43%|████▎     | 20/47 [01:18<01:23,  3.09s/it][A
loss=1.831:  43%|████▎     | 20/47 [01:18<01:23,  3.09s/it][A
loss=1.831:  45%|████▍     | 21/47 [01:18<01:00,  2.32s/it][A
loss=1.678:  45%|████▍     | 21/47 [01:19<01:00,  2.32s/it][A
loss=1.678:  47%|████▋     | 22/47 [01:19<00:44,  1.79s/it][A
loss=2.039:  47%|████▋     | 22/47 [01:19<00:44,  1.79s/it][A
loss=2.039:  49%|████▉     | 23/47 [01:20<00:33,  1.41s/it][A
loss=2.024:  49%|████▉     | 23/47 [01:20<00:33,  1.41s/it][A
loss=2.024:  51%|█████     | 24/47 [01:20<00:26,  1.15s/it][A
loss=1.606:  51%|█████     | 24/47 [01:20<00:26,  1.15

{"eval_acc": 0.553491720662347, "eval_f1": 0.11463776912878139, "eval_acc_and_f1": 0.3340647448955642, "eval_loss": 1.7553656995296478, "learning_rate": 1.0866141732283466e-05, "train_loss": 1.8921272489759657, "step": 72}



loss=1.776:  55%|█████▌    | 26/47 [01:53<02:39,  7.59s/it][A
loss=1.661:  55%|█████▌    | 26/47 [01:53<02:39,  7.59s/it][A
loss=1.661:  57%|█████▋    | 27/47 [01:54<01:49,  5.48s/it][A
loss=1.556:  57%|█████▋    | 27/47 [01:54<01:49,  5.48s/it][A
loss=1.556:  60%|█████▉    | 28/47 [01:54<01:15,  3.99s/it][A
loss=2.049:  60%|█████▉    | 28/47 [01:55<01:15,  3.99s/it][A
loss=2.049:  62%|██████▏   | 29/47 [01:55<00:53,  2.95s/it][A
loss=1.564:  62%|██████▏   | 29/47 [01:55<00:53,  2.95s/it][A
loss=1.564:  64%|██████▍   | 30/47 [01:55<00:37,  2.23s/it][A
loss=2.200:  64%|██████▍   | 30/47 [01:56<00:37,  2.23s/it][A
loss=2.200:  66%|██████▌   | 31/47 [01:56<00:27,  1.72s/it][A
loss=1.505:  66%|██████▌   | 31/47 [01:56<00:27,  1.72s/it][A
loss=1.505:  68%|██████▊   | 32/47 [01:57<00:20,  1.37s/it][A
loss=1.814:  68%|██████▊   | 32/47 [01:57<00:20,  1.37s/it][A
loss=1.814:  70%|███████   | 33/47 [01:57<00:15,  1.12s/it][A
loss=1.852:  70%|███████   | 33/47 [01:57<00:15,  1.12

{"eval_acc": 0.5527717782577394, "eval_f1": 0.11064374700842837, "eval_acc_and_f1": 0.3317077626330839, "eval_loss": 1.6482871600559779, "learning_rate": 9.448818897637797e-06, "train_loss": 1.7753097878562079, "step": 81}



loss=1.639:  74%|███████▍  | 35/47 [02:32<01:35,  7.95s/it][A
loss=1.632:  74%|███████▍  | 35/47 [02:32<01:35,  7.95s/it][A
loss=1.632:  77%|███████▋  | 36/47 [02:33<01:02,  5.72s/it][A
loss=1.625:  77%|███████▋  | 36/47 [02:33<01:02,  5.72s/it][A
loss=1.625:  79%|███████▊  | 37/47 [02:33<00:41,  4.17s/it][A
loss=1.683:  79%|███████▊  | 37/47 [02:33<00:41,  4.17s/it][A
loss=1.683:  81%|████████  | 38/47 [02:34<00:27,  3.08s/it][A
loss=1.340:  81%|████████  | 38/47 [02:34<00:27,  3.08s/it][A
loss=1.340:  83%|████████▎ | 39/47 [02:34<00:18,  2.32s/it][A
loss=1.567:  83%|████████▎ | 39/47 [02:34<00:18,  2.32s/it][A
loss=1.567:  85%|████████▌ | 40/47 [02:35<00:12,  1.79s/it][A
loss=1.399:  85%|████████▌ | 40/47 [02:35<00:12,  1.79s/it][A
loss=1.399:  87%|████████▋ | 41/47 [02:35<00:08,  1.41s/it][A
loss=1.360:  87%|████████▋ | 41/47 [02:35<00:08,  1.41s/it][A
loss=1.360:  89%|████████▉ | 42/47 [02:36<00:05,  1.15s/it][A
loss=1.487:  89%|████████▉ | 42/47 [02:36<00:05,  1.15

{"eval_acc": 0.6080633549316055, "eval_f1": 0.1338420949470995, "eval_acc_and_f1": 0.3709527249393525, "eval_loss": 1.561890870332718, "learning_rate": 8.031496062992128e-06, "train_loss": 1.52582761976454, "step": 90}



loss=1.714:  94%|█████████▎| 44/47 [03:09<00:22,  7.59s/it][A
loss=1.619:  94%|█████████▎| 44/47 [03:09<00:22,  7.59s/it][A
loss=1.619:  96%|█████████▌| 45/47 [03:10<00:10,  5.47s/it][A
loss=1.657:  96%|█████████▌| 45/47 [03:10<00:10,  5.47s/it][A
loss=1.657:  98%|█████████▊| 46/47 [03:10<00:03,  3.99s/it][A
loss=1.644:  98%|█████████▊| 46/47 [03:10<00:03,  3.99s/it][A
loss=1.644: 100%|██████████| 47/47 [03:11<00:00,  4.07s/it]
Epoch:  67%|██████▋   | 2/3 [06:25<03:13, 193.52s/it]
Iteration:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=1.788:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=1.788:   2%|▏         | 1/47 [00:00<00:24,  1.87it/s][A
loss=1.428:   2%|▏         | 1/47 [00:00<00:24,  1.87it/s][A
loss=1.428:   4%|▍         | 2/47 [00:01<00:23,  1.88it/s][A
loss=1.546:   4%|▍         | 2/47 [00:01<00:23,  1.88it/s][A
loss=1.546:   6%|▋         | 3/47 [00:01<00:23,  1.87it/s][A
loss=1.357:   6%|▋         | 3/47 [00:01<00:23,  1.87it/s][A
loss=1.357:   9%|▊         |

{"eval_acc": 0.6344132469402448, "eval_f1": 0.14294335908347863, "eval_acc_and_f1": 0.38867830301186174, "eval_loss": 1.4759231805801392, "learning_rate": 6.614173228346458e-06, "train_loss": 1.5650029977162678, "step": 99}



loss=1.507:  13%|█▎        | 6/47 [00:36<05:08,  7.53s/it][A
loss=1.571:  13%|█▎        | 6/47 [00:36<05:08,  7.53s/it][A
loss=1.571:  15%|█▍        | 7/47 [00:37<03:37,  5.43s/it][A
loss=1.226:  15%|█▍        | 7/47 [00:37<03:37,  5.43s/it][A
loss=1.226:  17%|█▋        | 8/47 [00:37<02:34,  3.96s/it][A
loss=1.659:  17%|█▋        | 8/47 [00:37<02:34,  3.96s/it][A
loss=1.659:  19%|█▉        | 9/47 [00:38<01:51,  2.94s/it][A
loss=1.574:  19%|█▉        | 9/47 [00:38<01:51,  2.94s/it][A
loss=1.574:  21%|██▏       | 10/47 [00:38<01:22,  2.22s/it][A
loss=1.976:  21%|██▏       | 10/47 [00:38<01:22,  2.22s/it][A
loss=1.976:  23%|██▎       | 11/47 [00:39<01:01,  1.72s/it][A
loss=1.457:  23%|██▎       | 11/47 [00:39<01:01,  1.72s/it][A
loss=1.457:  26%|██▌       | 12/47 [00:39<00:47,  1.36s/it][A
loss=1.650:  26%|██▌       | 12/47 [00:39<00:47,  1.36s/it][A
loss=1.650:  28%|██▊       | 13/47 [00:40<00:38,  1.12s/it][A
loss=1.166:  28%|██▊       | 13/47 [00:40<00:38,  1.12s/it][A

{"eval_acc": 0.6802015838732901, "eval_f1": 0.1611361340053509, "eval_acc_and_f1": 0.4206688589393205, "eval_loss": 1.411988194499697, "learning_rate": 5.196850393700788e-06, "train_loss": 1.5318672921922472, "step": 108}



loss=0.968:  32%|███▏      | 15/47 [01:13<04:03,  7.61s/it][A
loss=1.347:  32%|███▏      | 15/47 [01:13<04:03,  7.61s/it][A
loss=1.347:  34%|███▍      | 16/47 [01:14<02:50,  5.49s/it][A
loss=1.359:  34%|███▍      | 16/47 [01:14<02:50,  5.49s/it][A
loss=1.359:  36%|███▌      | 17/47 [01:14<02:00,  4.00s/it][A
loss=1.384:  36%|███▌      | 17/47 [01:14<02:00,  4.00s/it][A
loss=1.384:  38%|███▊      | 18/47 [01:15<01:25,  2.96s/it][A
loss=1.364:  38%|███▊      | 18/47 [01:15<01:25,  2.96s/it][A
loss=1.364:  40%|████      | 19/47 [01:15<01:02,  2.23s/it][A
loss=1.278:  40%|████      | 19/47 [01:15<01:02,  2.23s/it][A
loss=1.278:  43%|████▎     | 20/47 [01:16<00:46,  1.73s/it][A
loss=1.429:  43%|████▎     | 20/47 [01:16<00:46,  1.73s/it][A
loss=1.429:  45%|████▍     | 21/47 [01:16<00:35,  1.37s/it][A
loss=1.451:  45%|████▍     | 21/47 [01:17<00:35,  1.37s/it][A
loss=1.451:  47%|████▋     | 22/47 [01:17<00:28,  1.13s/it][A
loss=1.244:  47%|████▋     | 22/47 [01:17<00:28,  1.13

{"eval_acc": 0.6796256299496041, "eval_f1": 0.16120009865866344, "eval_acc_and_f1": 0.42041286430413377, "eval_loss": 1.3682175832135337, "learning_rate": 3.7795275590551182e-06, "train_loss": 1.3136788209279378, "step": 117}



loss=1.332:  51%|█████     | 24/47 [01:53<03:06,  8.12s/it][A
loss=1.574:  51%|█████     | 24/47 [01:53<03:06,  8.12s/it][A
loss=1.574:  53%|█████▎    | 25/47 [01:53<02:08,  5.84s/it][A
loss=1.331:  53%|█████▎    | 25/47 [01:53<02:08,  5.84s/it][A
loss=1.331:  55%|█████▌    | 26/47 [01:54<01:29,  4.26s/it][A
loss=1.762:  55%|█████▌    | 26/47 [01:54<01:29,  4.26s/it][A
loss=1.762:  57%|█████▋    | 27/47 [01:54<01:02,  3.14s/it][A
loss=1.348:  57%|█████▋    | 27/47 [01:55<01:02,  3.14s/it][A
loss=1.348:  60%|█████▉    | 28/47 [01:55<00:44,  2.36s/it][A
loss=1.230:  60%|█████▉    | 28/47 [01:55<00:44,  2.36s/it][A
loss=1.230:  62%|██████▏   | 29/47 [01:56<00:32,  1.82s/it][A
loss=1.090:  62%|██████▏   | 29/47 [01:56<00:32,  1.82s/it][A
loss=1.090:  64%|██████▍   | 30/47 [01:56<00:24,  1.44s/it][A
loss=1.494:  64%|██████▍   | 30/47 [01:56<00:24,  1.44s/it][A
loss=1.494:  66%|██████▌   | 31/47 [01:57<00:18,  1.17s/it][A
loss=1.994:  66%|██████▌   | 31/47 [01:57<00:18,  1.17

{"eval_acc": 0.6915766738660907, "eval_f1": 0.16693332289744015, "eval_acc_and_f1": 0.4292549983817654, "eval_loss": 1.3397679584366935, "learning_rate": 2.362204724409449e-06, "train_loss": 1.4617384407255385, "step": 126}



loss=1.212:  70%|███████   | 33/47 [02:32<01:52,  8.04s/it][A
loss=1.091:  70%|███████   | 33/47 [02:32<01:52,  8.04s/it][A
loss=1.091:  72%|███████▏  | 34/47 [02:32<01:15,  5.79s/it][A
loss=1.122:  72%|███████▏  | 34/47 [02:33<01:15,  5.79s/it][A
loss=1.122:  74%|███████▍  | 35/47 [02:33<00:50,  4.21s/it][A
loss=1.351:  74%|███████▍  | 35/47 [02:33<00:50,  4.21s/it][A
loss=1.351:  77%|███████▋  | 36/47 [02:34<00:34,  3.11s/it][A
loss=1.526:  77%|███████▋  | 36/47 [02:34<00:34,  3.11s/it][A
loss=1.526:  79%|███████▊  | 37/47 [02:34<00:23,  2.34s/it][A
loss=1.329:  79%|███████▊  | 37/47 [02:34<00:23,  2.34s/it][A
loss=1.329:  81%|████████  | 38/47 [02:35<00:16,  1.80s/it][A
loss=1.231:  81%|████████  | 38/47 [02:35<00:16,  1.80s/it][A
loss=1.231:  83%|████████▎ | 39/47 [02:35<00:11,  1.42s/it][A
loss=1.465:  83%|████████▎ | 39/47 [02:35<00:11,  1.42s/it][A
loss=1.465:  85%|████████▌ | 40/47 [02:36<00:08,  1.15s/it][A
loss=1.469:  85%|████████▌ | 40/47 [02:36<00:08,  1.15

{"eval_acc": 0.6937365010799136, "eval_f1": 0.16682206415252146, "eval_acc_and_f1": 0.4302792826162175, "eval_loss": 1.3204791290419442, "learning_rate": 9.448818897637796e-07, "train_loss": 1.3108174006144206, "step": 135}



loss=1.261:  89%|████████▉ | 42/47 [03:11<00:39,  7.93s/it][A
loss=1.481:  89%|████████▉ | 42/47 [03:11<00:39,  7.93s/it][A
loss=1.481:  91%|█████████▏| 43/47 [03:11<00:22,  5.72s/it][A
loss=1.454:  91%|█████████▏| 43/47 [03:11<00:22,  5.72s/it][A
loss=1.454:  94%|█████████▎| 44/47 [03:12<00:12,  4.17s/it][A
loss=1.202:  94%|█████████▎| 44/47 [03:12<00:12,  4.17s/it][A
loss=1.202:  96%|█████████▌| 45/47 [03:12<00:06,  3.08s/it][A
loss=1.643:  96%|█████████▌| 45/47 [03:12<00:06,  3.08s/it][A
loss=1.643:  98%|█████████▊| 46/47 [03:13<00:02,  2.32s/it][A
loss=1.633:  98%|█████████▊| 46/47 [03:13<00:02,  2.32s/it][A07/18/2022 01:35:11 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:35:11 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:35:11 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.6937365010799136, "eval_f1": 0.1664935089683954, "eval_acc_and_f1": 0.4301150050241545, "eval_loss": 1.3151823622839791, "learning_rate": 0.0, "train_loss": 0.9638173580169678, "step": 141}


07/18/2022 01:35:46 - INFO - utilities.trainers -   ***** Running evaluation iter-3_trial1 *****
07/18/2022 01:35:46 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:35:46 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 01:36:14 - INFO - utilities.trainers -   ***** Eval results iter-3_trial1 *****
07/18/2022 01:36:14 - INFO - utilities.trainers -     acc = 0.6937365010799136
07/18/2022 01:36:14 - INFO - utilities.trainers -     acc_and_f1 = 0.4301150050241545
07/18/2022 01:36:14 - INFO - utilities.trainers -     f1 = 0.1664935089683954
07/18/2022 01:36:17 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 01:36:23 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:36:23 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 01:36:23 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:19<00:00,  1.02s/it]
07/18/2022 01:38:42 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 01:38:42 - INFO - utilities.trainers -     acc = 0.69189562813202
07/18/2022 01:38:42 - INFO - utilities.trainers -     acc_and_f1 = 0.42692301098806806
07/18/2022 01:38:42 - INFO - utilities.trainers -     f1 = 0.16195039384411616



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 01:38:43 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 01:39:01 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 01:39:06 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:39:06 - INFO - utilities.trainers -     Num examples = 48500


MC samples N=None


07/18/2022 01:39:07 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 190/190 [06:27<00:00,  2.04s/it]
07/18/2022 01:45:35 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 01:45:35 - INFO - utilities.trainers -     acc = 0.6930927835051547
07/18/2022 01:45:35 - INFO - utilities.trainers -     acc_and_f1 = 0.4293444936929285
07/18/2022 01:45:35 - INFO - utilities.trainers -     f1 = 0.16559620388070223
07/18/2022 01:45:35 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 01:45:54 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 01:45:58 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 01:45:58 - INFO - utilities.trainers -     Num examples = 1500
07/18/2022 01:45:58 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|███



************
End of iteration 3:
Train loss 1.9526, Val loss 1.3151823622839791, Test loss 1.3184103930697721
Annotated 500 samples
Current labeled (training) data: 2000 samples
Remaining budget: 2000 (in samples)
************

Saving json with the results....

 Start Training model of iteration 4!



07/18/2022 01:48:47 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.6937365010799136, acc_best_iteration=3, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-3', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-4', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, 

warmup steps: 18
total steps: 187
logging steps: 12
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.222:   2%|▏         | 1/63 [00:00<00:30,  2.03it/s][A
loss=3.175:   2%|▏         | 1/63 [00:00<00:30,  2.03it/s][A
loss=3.175:   3%|▎         | 2/63 [00:01<00:31,  1.92it/s][A
loss=3.313:   3%|▎         | 2/63 [00:01<00:31,  1.92it/s][A
loss=3.313:   5%|▍         | 3/63 [00:01<00:31,  1.89it/s][A
loss=3.118:   5%|▍         | 3/63 [00:01<00:31,  1.89it/s][A
loss=3.118:   6%|▋         | 4/63 [00:02<00:31,  1.87it/s][A
loss=3.282:   6%|▋         | 4/63 [00:02<00:31,  1.87it/s][A
loss=3.282:   8%|▊         | 5/63 [00:02<00:31,  1.86it/s][A
loss=3.209:   8%|▊         | 5/63 [00:02<00:31,  1.86it/s][A
loss=3.209:  10%|▉         | 6/63 [00:03<00:30,  1.84it/s][A
loss=3.137:  10%|▉         | 6/63 [00:03<00:30,  1.84it/s][A
loss=3.137:  11%|█         | 7/63 [00:03<00:30,  1.84it/s][A
loss=3.065:  11%|█         | 7/63 [00:03<00:30,  1.84it/s][A
loss=3.065:  13%|█▎        | 8/63 [00:04<00:29,  1.84it/s][A
loss=3.214:  13%|█▎        | 8/63 [00:04<00:29,  1.84it/s][A
loss=3.

{"eval_acc": 0.17249820014398848, "eval_f1": 0.01585395706568669, "eval_acc_and_f1": 0.09417607860483758, "eval_loss": 2.9654017090797424, "learning_rate": 1.3333333333333333e-05, "train_loss": 3.164909064769745, "step": 12}



loss=2.897:  21%|██        | 13/63 [00:38<05:59,  7.18s/it][A
loss=2.882:  21%|██        | 13/63 [00:38<05:59,  7.18s/it][A
loss=2.882:  22%|██▏       | 14/63 [00:39<04:14,  5.19s/it][A
loss=2.920:  22%|██▏       | 14/63 [00:39<04:14,  5.19s/it][A
loss=2.920:  24%|██▍       | 15/63 [00:39<03:02,  3.80s/it][A
loss=2.931:  24%|██▍       | 15/63 [00:39<03:02,  3.80s/it][A
loss=2.931:  25%|██▌       | 16/63 [00:40<02:12,  2.83s/it][A
loss=2.841:  25%|██▌       | 16/63 [00:40<02:12,  2.83s/it][A
loss=2.841:  27%|██▋       | 17/63 [00:40<01:38,  2.15s/it][A
loss=2.841:  27%|██▋       | 17/63 [00:41<01:38,  2.15s/it][A
loss=2.841:  29%|██▊       | 18/63 [00:41<01:15,  1.67s/it][A
loss=2.594:  29%|██▊       | 18/63 [00:41<01:15,  1.67s/it][A
loss=2.594:  30%|███       | 19/63 [00:42<00:58,  1.34s/it][A
loss=2.661:  30%|███       | 19/63 [00:42<00:58,  1.34s/it][A
loss=2.661:  32%|███▏      | 20/63 [00:42<00:47,  1.10s/it][A
loss=2.638:  32%|███▏      | 20/63 [00:42<00:47,  1.10

{"eval_acc": 0.30107991360691144, "eval_f1": 0.049364013223255225, "eval_acc_and_f1": 0.17522196341508334, "eval_loss": 2.46597444159644, "learning_rate": 1.929824561403509e-05, "train_loss": 2.737689177195231, "step": 24}



loss=2.467:  40%|███▉      | 25/63 [01:17<04:43,  7.47s/it][A
loss=2.303:  40%|███▉      | 25/63 [01:18<04:43,  7.47s/it][A
loss=2.303:  41%|████▏     | 26/63 [01:18<03:19,  5.39s/it][A
loss=2.512:  41%|████▏     | 26/63 [01:18<03:19,  5.39s/it][A
loss=2.512:  43%|████▎     | 27/63 [01:18<02:21,  3.94s/it][A
loss=2.491:  43%|████▎     | 27/63 [01:19<02:21,  3.94s/it][A
loss=2.491:  44%|████▍     | 28/63 [01:19<01:42,  2.93s/it][A
loss=2.202:  44%|████▍     | 28/63 [01:19<01:42,  2.93s/it][A
loss=2.202:  46%|████▌     | 29/63 [01:20<01:15,  2.21s/it][A
loss=2.464:  46%|████▌     | 29/63 [01:20<01:15,  2.21s/it][A
loss=2.464:  48%|████▊     | 30/63 [01:20<00:56,  1.71s/it][A
loss=2.493:  48%|████▊     | 30/63 [01:20<00:56,  1.71s/it][A
loss=2.493:  49%|████▉     | 31/63 [01:21<00:43,  1.37s/it][A
loss=2.163:  49%|████▉     | 31/63 [01:21<00:43,  1.37s/it][A
loss=2.163:  51%|█████     | 32/63 [01:21<00:34,  1.13s/it][A
loss=2.242:  51%|█████     | 32/63 [01:21<00:34,  1.13

{"eval_acc": 0.32167026637868973, "eval_f1": 0.05598067210270592, "eval_acc_and_f1": 0.18882546924069782, "eval_loss": 2.270237147808075, "learning_rate": 1.7894736842105264e-05, "train_loss": 2.366247753302256, "step": 36}



loss=2.467:  59%|█████▊    | 37/63 [01:58<03:21,  7.73s/it][A
loss=2.604:  59%|█████▊    | 37/63 [01:58<03:21,  7.73s/it][A
loss=2.604:  60%|██████    | 38/63 [01:58<02:19,  5.58s/it][A
loss=2.241:  60%|██████    | 38/63 [01:58<02:19,  5.58s/it][A
loss=2.241:  62%|██████▏   | 39/63 [01:59<01:37,  4.08s/it][A
loss=2.353:  62%|██████▏   | 39/63 [01:59<01:37,  4.08s/it][A
loss=2.353:  63%|██████▎   | 40/63 [01:59<01:09,  3.02s/it][A
loss=2.640:  63%|██████▎   | 40/63 [02:00<01:09,  3.02s/it][A
loss=2.640:  65%|██████▌   | 41/63 [02:00<00:50,  2.28s/it][A
loss=2.251:  65%|██████▌   | 41/63 [02:00<00:50,  2.28s/it][A
loss=2.251:  67%|██████▋   | 42/63 [02:01<00:37,  1.77s/it][A
loss=2.344:  67%|██████▋   | 42/63 [02:01<00:37,  1.77s/it][A
loss=2.344:  68%|██████▊   | 43/63 [02:01<00:28,  1.40s/it][A
loss=2.692:  68%|██████▊   | 43/63 [02:01<00:28,  1.40s/it][A
loss=2.692:  70%|██████▉   | 44/63 [02:02<00:21,  1.15s/it][A
loss=2.024:  70%|██████▉   | 44/63 [02:02<00:21,  1.15

{"eval_acc": 0.4794816414686825, "eval_f1": 0.09344344627260312, "eval_acc_and_f1": 0.2864625438706428, "eval_loss": 2.1201805600098202, "learning_rate": 1.649122807017544e-05, "train_loss": 2.3707149227460227, "step": 48}



loss=2.300:  78%|███████▊  | 49/63 [02:37<01:43,  7.39s/it][A
loss=2.195:  78%|███████▊  | 49/63 [02:37<01:43,  7.39s/it][A
loss=2.195:  79%|███████▉  | 50/63 [02:37<01:09,  5.34s/it][A
loss=2.370:  79%|███████▉  | 50/63 [02:37<01:09,  5.34s/it][A
loss=2.370:  81%|████████  | 51/63 [02:38<00:46,  3.90s/it][A
loss=2.008:  81%|████████  | 51/63 [02:38<00:46,  3.90s/it][A
loss=2.008:  83%|████████▎ | 52/63 [02:38<00:31,  2.90s/it][A
loss=2.031:  83%|████████▎ | 52/63 [02:38<00:31,  2.90s/it][A
loss=2.031:  84%|████████▍ | 53/63 [02:39<00:21,  2.19s/it][A
loss=1.834:  84%|████████▍ | 53/63 [02:39<00:21,  2.19s/it][A
loss=1.834:  86%|████████▌ | 54/63 [02:39<00:15,  1.70s/it][A
loss=2.027:  86%|████████▌ | 54/63 [02:39<00:15,  1.70s/it][A
loss=2.027:  87%|████████▋ | 55/63 [02:40<00:10,  1.36s/it][A
loss=1.977:  87%|████████▋ | 55/63 [02:40<00:10,  1.36s/it][A
loss=1.977:  89%|████████▉ | 56/63 [02:40<00:07,  1.12s/it][A
loss=2.204:  89%|████████▉ | 56/63 [02:41<00:07,  1.12

{"eval_acc": 0.5868970482361411, "eval_f1": 0.12203973391020277, "eval_acc_and_f1": 0.3544683910731719, "eval_loss": 1.946656448500497, "learning_rate": 1.5087719298245615e-05, "train_loss": 2.139861742655436, "step": 60}



loss=2.024:  97%|█████████▋| 61/63 [03:19<00:16,  8.10s/it][A
loss=2.101:  97%|█████████▋| 61/63 [03:19<00:16,  8.10s/it][A
loss=2.101:  98%|█████████▊| 62/63 [03:19<00:05,  5.83s/it][A
loss=2.124:  98%|█████████▊| 62/63 [03:19<00:05,  5.83s/it][A
loss=2.124: 100%|██████████| 63/63 [03:19<00:00,  3.17s/it]
Epoch:  33%|███▎      | 1/3 [03:19<06:39, 200.00s/it]
Iteration:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=2.103:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=2.103:   2%|▏         | 1/63 [00:00<00:34,  1.82it/s][A
loss=1.839:   2%|▏         | 1/63 [00:00<00:34,  1.82it/s][A
loss=1.839:   3%|▎         | 2/63 [00:01<00:33,  1.81it/s][A
loss=1.925:   3%|▎         | 2/63 [00:01<00:33,  1.81it/s][A
loss=1.925:   5%|▍         | 3/63 [00:01<00:33,  1.81it/s][A
loss=1.696:   5%|▍         | 3/63 [00:01<00:33,  1.81it/s][A
loss=1.696:   6%|▋         | 4/63 [00:02<00:32,  1.81it/s][A
loss=1.947:   6%|▋         | 4/63 [00:02<00:32,  1.81it/s][A
loss=1.947:   8%|▊         | 5

{"eval_acc": 0.649532037437005, "eval_f1": 0.1493759297985099, "eval_acc_and_f1": 0.39945398361775747, "eval_loss": 1.7702497754778181, "learning_rate": 1.3684210526315791e-05, "train_loss": 1.9676024913787842, "step": 72}



loss=2.004:  16%|█▌        | 10/63 [00:37<06:26,  7.30s/it][A
loss=1.638:  16%|█▌        | 10/63 [00:37<06:26,  7.30s/it][A
loss=1.638:  17%|█▋        | 11/63 [00:38<04:34,  5.28s/it][A
loss=1.879:  17%|█▋        | 11/63 [00:38<04:34,  5.28s/it][A
loss=1.879:  19%|█▉        | 12/63 [00:38<03:16,  3.86s/it][A
loss=1.918:  19%|█▉        | 12/63 [00:38<03:16,  3.86s/it][A
loss=1.918:  21%|██        | 13/63 [00:39<02:23,  2.87s/it][A
loss=2.053:  21%|██        | 13/63 [00:39<02:23,  2.87s/it][A
loss=2.053:  22%|██▏       | 14/63 [00:39<01:46,  2.17s/it][A
loss=2.023:  22%|██▏       | 14/63 [00:40<01:46,  2.17s/it][A
loss=2.023:  24%|██▍       | 15/63 [00:40<01:21,  1.69s/it][A
loss=1.680:  24%|██▍       | 15/63 [00:40<01:21,  1.69s/it][A
loss=1.680:  25%|██▌       | 16/63 [00:41<01:03,  1.35s/it][A
loss=1.910:  25%|██▌       | 16/63 [00:41<01:03,  1.35s/it][A
loss=1.910:  27%|██▋       | 17/63 [00:41<00:51,  1.11s/it][A
loss=1.499:  27%|██▋       | 17/63 [00:41<00:51,  1.11

{"eval_acc": 0.7046796256299496, "eval_f1": 0.1618130911557963, "eval_acc_and_f1": 0.43324635839287295, "eval_loss": 1.5833151553358351, "learning_rate": 1.2280701754385966e-05, "train_loss": 1.8186583121617634, "step": 84}



loss=1.581:  35%|███▍      | 22/63 [01:18<05:17,  7.74s/it][A
loss=1.683:  35%|███▍      | 22/63 [01:18<05:17,  7.74s/it][A
loss=1.683:  37%|███▋      | 23/63 [01:18<03:43,  5.58s/it][A
loss=1.390:  37%|███▋      | 23/63 [01:18<03:43,  5.58s/it][A
loss=1.390:  38%|███▊      | 24/63 [01:19<02:38,  4.08s/it][A
loss=1.942:  38%|███▊      | 24/63 [01:19<02:38,  4.08s/it][A
loss=1.942:  40%|███▉      | 25/63 [01:19<01:54,  3.02s/it][A
loss=1.884:  40%|███▉      | 25/63 [01:19<01:54,  3.02s/it][A
loss=1.884:  41%|████▏     | 26/63 [01:20<01:24,  2.28s/it][A
loss=2.071:  41%|████▏     | 26/63 [01:20<01:24,  2.28s/it][A
loss=2.071:  43%|████▎     | 27/63 [01:20<01:03,  1.76s/it][A
loss=1.596:  43%|████▎     | 27/63 [01:21<01:03,  1.76s/it][A
loss=1.596:  44%|████▍     | 28/63 [01:21<00:49,  1.40s/it][A
loss=1.911:  44%|████▍     | 28/63 [01:21<00:49,  1.40s/it][A
loss=1.911:  46%|████▌     | 29/63 [01:22<00:39,  1.15s/it][A
loss=2.057:  46%|████▌     | 29/63 [01:22<00:39,  1.15

{"eval_acc": 0.7160547156227501, "eval_f1": 0.164876761311959, "eval_acc_and_f1": 0.4404657384673546, "eval_loss": 1.4089573664324624, "learning_rate": 1.0877192982456142e-05, "train_loss": 1.7335827847321827, "step": 96}



loss=1.641:  54%|█████▍    | 34/63 [01:56<03:33,  7.38s/it][A
loss=1.806:  54%|█████▍    | 34/63 [01:56<03:33,  7.38s/it][A
loss=1.806:  56%|█████▌    | 35/63 [01:57<02:29,  5.33s/it][A
loss=1.756:  56%|█████▌    | 35/63 [01:57<02:29,  5.33s/it][A
loss=1.756:  57%|█████▋    | 36/63 [01:57<01:45,  3.89s/it][A
loss=1.543:  57%|█████▋    | 36/63 [01:58<01:45,  3.89s/it][A
loss=1.543:  59%|█████▊    | 37/63 [01:58<01:15,  2.89s/it][A
loss=1.393:  59%|█████▊    | 37/63 [01:58<01:15,  2.89s/it][A
loss=1.393:  60%|██████    | 38/63 [01:58<00:54,  2.19s/it][A
loss=1.498:  60%|██████    | 38/63 [01:59<00:54,  2.19s/it][A
loss=1.498:  62%|██████▏   | 39/63 [01:59<00:40,  1.70s/it][A
loss=1.517:  62%|██████▏   | 39/63 [01:59<00:40,  1.70s/it][A
loss=1.517:  63%|██████▎   | 40/63 [02:00<00:31,  1.36s/it][A
loss=1.690:  63%|██████▎   | 40/63 [02:00<00:31,  1.36s/it][A
loss=1.690:  65%|██████▌   | 41/63 [02:00<00:24,  1.12s/it][A
loss=1.455:  65%|██████▌   | 41/63 [02:00<00:24,  1.12

{"eval_acc": 0.7249820014398848, "eval_f1": 0.17124160707503028, "eval_acc_and_f1": 0.44811180425745756, "eval_loss": 1.2947829876627241, "learning_rate": 9.473684210526315e-06, "train_loss": 1.5165987213452656, "step": 108}



loss=1.555:  73%|███████▎  | 46/63 [02:35<02:05,  7.39s/it][A
loss=1.327:  73%|███████▎  | 46/63 [02:35<02:05,  7.39s/it][A
loss=1.327:  75%|███████▍  | 47/63 [02:36<01:25,  5.34s/it][A
loss=1.461:  75%|███████▍  | 47/63 [02:36<01:25,  5.34s/it][A
loss=1.461:  76%|███████▌  | 48/63 [02:36<00:58,  3.90s/it][A
loss=1.345:  76%|███████▌  | 48/63 [02:36<00:58,  3.90s/it][A
loss=1.345:  78%|███████▊  | 49/63 [02:37<00:40,  2.89s/it][A
loss=1.546:  78%|███████▊  | 49/63 [02:37<00:40,  2.89s/it][A
loss=1.546:  79%|███████▉  | 50/63 [02:37<00:28,  2.19s/it][A
loss=1.466:  79%|███████▉  | 50/63 [02:37<00:28,  2.19s/it][A
loss=1.466:  81%|████████  | 51/63 [02:38<00:20,  1.70s/it][A
loss=1.347:  81%|████████  | 51/63 [02:38<00:20,  1.70s/it][A
loss=1.347:  83%|████████▎ | 52/63 [02:38<00:14,  1.36s/it][A
loss=1.161:  83%|████████▎ | 52/63 [02:39<00:14,  1.36s/it][A
loss=1.161:  84%|████████▍ | 53/63 [02:39<00:11,  1.12s/it][A
loss=1.524:  84%|████████▍ | 53/63 [02:39<00:11,  1.12

{"eval_acc": 0.7544996400287977, "eval_f1": 0.19285219625547298, "eval_acc_and_f1": 0.47367591814213533, "eval_loss": 1.1825341497148787, "learning_rate": 8.070175438596492e-06, "train_loss": 1.417107840379079, "step": 120}



loss=1.266:  92%|█████████▏| 58/63 [03:15<00:38,  7.70s/it][A
loss=1.307:  92%|█████████▏| 58/63 [03:15<00:38,  7.70s/it][A
loss=1.307:  94%|█████████▎| 59/63 [03:16<00:22,  5.55s/it][A
loss=1.520:  94%|█████████▎| 59/63 [03:16<00:22,  5.55s/it][A
loss=1.520:  95%|█████████▌| 60/63 [03:16<00:12,  4.06s/it][A
loss=1.293:  95%|█████████▌| 60/63 [03:17<00:12,  4.06s/it][A
loss=1.293:  97%|█████████▋| 61/63 [03:17<00:06,  3.00s/it][A
loss=1.329:  97%|█████████▋| 61/63 [03:17<00:06,  3.00s/it][A
loss=1.329:  98%|█████████▊| 62/63 [03:17<00:02,  2.27s/it][A
loss=1.353:  98%|█████████▊| 62/63 [03:18<00:02,  2.27s/it][A
loss=1.353: 100%|██████████| 63/63 [03:18<00:00,  3.15s/it]
Epoch:  67%|██████▋   | 2/3 [06:38<03:19, 199.48s/it]
Iteration:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=0.923:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=0.923:   2%|▏         | 1/63 [00:00<00:34,  1.80it/s][A
loss=1.230:   2%|▏         | 1/63 [00:00<00:34,  1.80it/s][A
loss=1.230:   3%|▎      

{"eval_acc": 0.7470122390208783, "eval_f1": 0.19040112021403538, "eval_acc_and_f1": 0.4687066796174568, "eval_loss": 1.1184390591723579, "learning_rate": 6.666666666666667e-06, "train_loss": 1.247430106004079, "step": 132}



loss=1.664:  11%|█         | 7/63 [00:35<06:46,  7.26s/it][A
loss=1.181:  11%|█         | 7/63 [00:35<06:46,  7.26s/it][A
loss=1.181:  13%|█▎        | 8/63 [00:36<04:48,  5.24s/it][A
loss=1.304:  13%|█▎        | 8/63 [00:36<04:48,  5.24s/it][A
loss=1.304:  14%|█▍        | 9/63 [00:36<03:27,  3.83s/it][A
loss=1.141:  14%|█▍        | 9/63 [00:37<03:27,  3.83s/it][A
loss=1.141:  16%|█▌        | 10/63 [00:37<02:31,  2.85s/it][A
loss=1.308:  16%|█▌        | 10/63 [00:37<02:31,  2.85s/it][A
loss=1.308:  17%|█▋        | 11/63 [00:38<01:52,  2.16s/it][A
loss=1.031:  17%|█▋        | 11/63 [00:38<01:52,  2.16s/it][A
loss=1.031:  19%|█▉        | 12/63 [00:38<01:25,  1.68s/it][A
loss=1.367:  19%|█▉        | 12/63 [00:38<01:25,  1.68s/it][A
loss=1.367:  21%|██        | 13/63 [00:39<01:07,  1.34s/it][A
loss=1.269:  21%|██        | 13/63 [00:39<01:07,  1.34s/it][A
loss=1.269:  22%|██▏       | 14/63 [00:39<00:54,  1.11s/it][A
loss=1.247:  22%|██▏       | 14/63 [00:39<00:54,  1.11s/it]

{"eval_acc": 0.7670266378689705, "eval_f1": 0.20303180732805845, "eval_acc_and_f1": 0.4850292225985145, "eval_loss": 1.0501477697065897, "learning_rate": 5.263157894736842e-06, "train_loss": 1.2617046137650807, "step": 144}



loss=1.504:  30%|███       | 19/63 [01:15<05:37,  7.66s/it][A
loss=1.150:  30%|███       | 19/63 [01:15<05:37,  7.66s/it][A
loss=1.150:  32%|███▏      | 20/63 [01:16<03:57,  5.53s/it][A
loss=1.197:  32%|███▏      | 20/63 [01:16<03:57,  5.53s/it][A
loss=1.197:  33%|███▎      | 21/63 [01:16<02:49,  4.04s/it][A
loss=1.042:  33%|███▎      | 21/63 [01:17<02:49,  4.04s/it][A
loss=1.042:  35%|███▍      | 22/63 [01:17<02:02,  2.99s/it][A
loss=0.822:  35%|███▍      | 22/63 [01:17<02:02,  2.99s/it][A
loss=0.822:  37%|███▋      | 23/63 [01:18<01:30,  2.26s/it][A
loss=1.102:  37%|███▋      | 23/63 [01:18<01:30,  2.26s/it][A
loss=1.102:  38%|███▊      | 24/63 [01:18<01:08,  1.75s/it][A
loss=0.917:  38%|███▊      | 24/63 [01:18<01:08,  1.75s/it][A
loss=0.917:  40%|███▉      | 25/63 [01:19<00:52,  1.39s/it][A
loss=1.428:  40%|███▉      | 25/63 [01:19<00:52,  1.39s/it][A
loss=1.428:  41%|████▏     | 26/63 [01:19<00:42,  1.14s/it][A
loss=1.127:  41%|████▏     | 26/63 [01:19<00:42,  1.14

{"eval_acc": 0.77264218862491, "eval_f1": 0.20714244787858174, "eval_acc_and_f1": 0.48989231825174584, "eval_loss": 1.0064596512487956, "learning_rate": 3.859649122807018e-06, "train_loss": 1.1641214142243068, "step": 156}



loss=0.798:  49%|████▉     | 31/63 [01:54<03:56,  7.38s/it][A
loss=1.115:  49%|████▉     | 31/63 [01:54<03:56,  7.38s/it][A
loss=1.115:  51%|█████     | 32/63 [01:55<02:45,  5.33s/it][A
loss=1.235:  51%|█████     | 32/63 [01:55<02:45,  5.33s/it][A
loss=1.235:  52%|█████▏    | 33/63 [01:55<01:56,  3.90s/it][A
loss=1.202:  52%|█████▏    | 33/63 [01:55<01:56,  3.90s/it][A
loss=1.202:  54%|█████▍    | 34/63 [01:56<01:23,  2.89s/it][A
loss=1.301:  54%|█████▍    | 34/63 [01:56<01:23,  2.89s/it][A
loss=1.301:  56%|█████▌    | 35/63 [01:56<01:01,  2.19s/it][A
loss=1.140:  56%|█████▌    | 35/63 [01:56<01:01,  2.19s/it][A
loss=1.140:  57%|█████▋    | 36/63 [01:57<00:46,  1.71s/it][A
loss=1.326:  57%|█████▋    | 36/63 [01:57<00:46,  1.71s/it][A
loss=1.326:  59%|█████▊    | 37/63 [01:57<00:35,  1.36s/it][A
loss=1.121:  59%|█████▊    | 37/63 [01:58<00:35,  1.36s/it][A
loss=1.121:  60%|██████    | 38/63 [01:58<00:28,  1.12s/it][A
loss=0.746:  60%|██████    | 38/63 [01:58<00:28,  1.12

{"eval_acc": 0.7801295896328294, "eval_f1": 0.21770662399381444, "eval_acc_and_f1": 0.49891810681332194, "eval_loss": 0.9801827264683587, "learning_rate": 2.456140350877193e-06, "train_loss": 1.1099049647649128, "step": 168}



loss=1.332:  68%|██████▊   | 43/63 [02:34<02:34,  7.72s/it][A
loss=1.160:  68%|██████▊   | 43/63 [02:35<02:34,  7.72s/it][A
loss=1.160:  70%|██████▉   | 44/63 [02:35<01:45,  5.57s/it][A
loss=1.338:  70%|██████▉   | 44/63 [02:35<01:45,  5.57s/it][A
loss=1.338:  71%|███████▏  | 45/63 [02:35<01:13,  4.07s/it][A
loss=1.191:  71%|███████▏  | 45/63 [02:36<01:13,  4.07s/it][A
loss=1.191:  73%|███████▎  | 46/63 [02:36<00:51,  3.02s/it][A
loss=1.491:  73%|███████▎  | 46/63 [02:36<00:51,  3.02s/it][A
loss=1.491:  75%|███████▍  | 47/63 [02:37<00:36,  2.28s/it][A
loss=0.993:  75%|███████▍  | 47/63 [02:37<00:36,  2.28s/it][A
loss=0.993:  76%|███████▌  | 48/63 [02:37<00:26,  1.76s/it][A
loss=1.071:  76%|███████▌  | 48/63 [02:37<00:26,  1.76s/it][A
loss=1.071:  78%|███████▊  | 49/63 [02:38<00:19,  1.40s/it][A
loss=0.749:  78%|███████▊  | 49/63 [02:38<00:19,  1.40s/it][A
loss=0.749:  79%|███████▉  | 50/63 [02:38<00:14,  1.15s/it][A
loss=0.983:  79%|███████▉  | 50/63 [02:38<00:14,  1.15

{"eval_acc": 0.7847372210223182, "eval_f1": 0.22303088659622802, "eval_acc_and_f1": 0.5038840538092731, "eval_loss": 0.9653739333152771, "learning_rate": 1.0526315789473685e-06, "train_loss": 1.1752779185771942, "step": 180}



loss=1.048:  87%|████████▋ | 55/63 [03:15<01:02,  7.77s/it][A
loss=1.063:  87%|████████▋ | 55/63 [03:15<01:02,  7.77s/it][A
loss=1.063:  89%|████████▉ | 56/63 [03:15<00:39,  5.60s/it][A
loss=1.172:  89%|████████▉ | 56/63 [03:16<00:39,  5.60s/it][A
loss=1.172:  90%|█████████ | 57/63 [03:16<00:24,  4.09s/it][A
loss=0.720:  90%|█████████ | 57/63 [03:16<00:24,  4.09s/it][A
loss=0.720:  92%|█████████▏| 58/63 [03:17<00:15,  3.03s/it][A
loss=0.973:  92%|█████████▏| 58/63 [03:17<00:15,  3.03s/it][A
loss=0.973:  94%|█████████▎| 59/63 [03:17<00:09,  2.29s/it][A
loss=1.062:  94%|█████████▎| 59/63 [03:17<00:09,  2.29s/it][A
loss=1.062:  95%|█████████▌| 60/63 [03:18<00:05,  1.77s/it][A
loss=1.245:  95%|█████████▌| 60/63 [03:18<00:05,  1.77s/it][A
loss=1.245:  97%|█████████▋| 61/63 [03:18<00:02,  1.40s/it][A
loss=0.852:  97%|█████████▋| 61/63 [03:18<00:02,  1.40s/it][A
loss=0.852:  98%|█████████▊| 62/63 [03:19<00:01,  1.15s/it][A
loss=1.520:  98%|█████████▊| 62/63 [03:19<00:01,  1.15

{"eval_acc": 0.7867530597552196, "eval_f1": 0.22342957599750207, "eval_acc_and_f1": 0.5050913178763609, "eval_loss": 0.9591873799051557, "learning_rate": 0.0, "train_loss": 0.8044830660025278, "step": 189}


07/18/2022 01:59:48 - INFO - utilities.trainers -   ***** Running evaluation iter-4_trial1 *****
07/18/2022 01:59:48 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 01:59:48 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.00it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 02:00:16 - INFO - utilities.trainers -   ***** Eval results iter-4_trial1 *****
07/18/2022 02:00:16 - INFO - utilities.trainers -     acc = 0.7867530597552196
07/18/2022 02:00:16 - INFO - utilities.trainers -     acc_and_f1 = 0.5050913178763609
07/18/2022 02:00:16 - INFO - utilities.trainers -     f1 = 0.22342957599750207
07/18/2022 02:00:19 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 02:00:24 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 02:00:24 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 02:00:25 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:19<00:00,  1.02s/it]
07/18/2022 02:02:44 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 02:02:44 - INFO - utilities.trainers -     acc = 0.7844594205402915
07/18/2022 02:02:44 - INFO - utilities.trainers -     acc_and_f1 = 0.49805967293981596
07/18/2022 02:02:44 - INFO - utilities.trainers -     f1 = 0.21165992533934042



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 02:02:45 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 02:03:03 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 02:03:09 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 02:03:09 - INFO - utilities.trainers -     Num examples = 48000
07/18/2022 02:03:09 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/188 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 188/188 [06:22<00:00,  2.04s/it]
07/18/2022 02:09:33 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 02:09:33 - INFO - utilities.trainers -     acc = 0.7841458333333333
07/18/2022 02:09:33 - INFO - utilities.trainers -     acc_and_f1 = 0.5010036364898176
07/18/2022 02:09:33 - INFO - utilities.trainers -     f1 = 0.2178614396463018
07/18/2022 02:09:33 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 02:09:51 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 02:09:56 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 02:09:56 - INFO - utilities.trainers -     Num examples = 2000
07/18/2022 02:09:56 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 8/8 [00:16<00:00,  2.00s/it]
07/18/2022 02:10:12 - INFO - utili



************
End of iteration 4:
Train loss 1.7775, Val loss 0.9591873799051557, Test loss 0.9687804347451996
Annotated 500 samples
Current labeled (training) data: 2500 samples
Remaining budget: 1500 (in samples)
************

Saving json with the results....

 Start Training model of iteration 5!



07/18/2022 02:12:44 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.7867530597552196, acc_best_iteration=4, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-4', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-5', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, 

warmup steps: 23
total steps: 234
logging steps: 15
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.357:   1%|▏         | 1/79 [00:00<00:37,  2.07it/s][A
loss=3.450:   1%|▏         | 1/79 [00:00<00:37,  2.07it/s][A
loss=3.450:   3%|▎         | 2/79 [00:01<00:38,  1.99it/s][A
loss=3.367:   3%|▎         | 2/79 [00:01<00:38,  1.99it/s][A
loss=3.367:   4%|▍         | 3/79 [00:01<00:39,  1.94it/s][A
loss=3.340:   4%|▍         | 3/79 [00:01<00:39,  1.94it/s][A
loss=3.340:   5%|▌         | 4/79 [00:02<00:39,  1.90it/s][A
loss=3.308:   5%|▌         | 4/79 [00:02<00:39,  1.90it/s][A
loss=3.308:   6%|▋         | 5/79 [00:02<00:39,  1.87it/s][A
loss=3.332:   6%|▋         | 5/79 [00:02<00:39,  1.87it/s][A
loss=3.332:   8%|▊         | 6/79 [00:03<00:39,  1.86it/s][A
loss=3.410:   8%|▊         | 6/79 [00:03<00:39,  1.86it/s][A
loss=3.410:   9%|▉         | 7/79 [00:03<00:38,  1.85it/s][A
loss=3.263:   9%|▉         | 7/79 [00:03<00:38,  1.85it/s][A
loss=3.263:  10%|█         | 8/79 [00:04<00:38,  1.83it/s][A
loss=3.263:  10%|█         | 8/79 [00:04<00:38,  1.83it/s][A
loss=3.

{"eval_acc": 0.17566594672426206, "eval_f1": 0.014586252591186668, "eval_acc_and_f1": 0.09512609965772437, "eval_loss": 3.0129567044121877, "learning_rate": 1.3043478260869566e-05, "train_loss": 3.2584966818491616, "step": 15}



loss=3.045:  20%|██        | 16/79 [00:40<07:31,  7.16s/it][A
loss=3.093:  20%|██        | 16/79 [00:40<07:31,  7.16s/it][A
loss=3.093:  22%|██▏       | 17/79 [00:40<05:21,  5.18s/it][A
loss=2.923:  22%|██▏       | 17/79 [00:40<05:21,  5.18s/it][A
loss=2.923:  23%|██▎       | 18/79 [00:41<03:51,  3.79s/it][A
loss=2.967:  23%|██▎       | 18/79 [00:41<03:51,  3.79s/it][A
loss=2.967:  24%|██▍       | 19/79 [00:41<02:49,  2.82s/it][A
loss=2.827:  24%|██▍       | 19/79 [00:42<02:49,  2.82s/it][A
loss=2.827:  25%|██▌       | 20/79 [00:42<02:06,  2.14s/it][A
loss=2.887:  25%|██▌       | 20/79 [00:42<02:06,  2.14s/it][A
loss=2.887:  27%|██▋       | 21/79 [00:42<01:36,  1.66s/it][A
loss=2.780:  27%|██▋       | 21/79 [00:43<01:36,  1.66s/it][A
loss=2.780:  28%|██▊       | 22/79 [00:43<01:15,  1.33s/it][A
loss=2.676:  28%|██▊       | 22/79 [00:43<01:15,  1.33s/it][A
loss=2.676:  29%|██▉       | 23/79 [00:44<01:01,  1.10s/it][A
loss=2.704:  29%|██▉       | 23/79 [00:44<01:01,  1.10

{"eval_acc": 0.29344852411807054, "eval_f1": 0.06881475556114931, "eval_acc_and_f1": 0.18113163983960992, "eval_loss": 2.4748197197914124, "learning_rate": 1.9345794392523366e-05, "train_loss": 2.809955342610677, "step": 30}



loss=2.619:  39%|███▉      | 31/79 [01:22<06:06,  7.64s/it][A
loss=2.319:  39%|███▉      | 31/79 [01:22<06:06,  7.64s/it][A
loss=2.319:  41%|████      | 32/79 [01:22<04:18,  5.51s/it][A
loss=2.577:  41%|████      | 32/79 [01:22<04:18,  5.51s/it][A
loss=2.577:  42%|████▏     | 33/79 [01:23<03:05,  4.02s/it][A
loss=2.304:  42%|████▏     | 33/79 [01:23<03:05,  4.02s/it][A
loss=2.304:  43%|████▎     | 34/79 [01:23<02:14,  2.98s/it][A
loss=2.280:  43%|████▎     | 34/79 [01:23<02:14,  2.98s/it][A
loss=2.280:  44%|████▍     | 35/79 [01:24<01:39,  2.26s/it][A
loss=2.406:  44%|████▍     | 35/79 [01:24<01:39,  2.26s/it][A
loss=2.406:  46%|████▌     | 36/79 [01:24<01:15,  1.75s/it][A
loss=2.448:  46%|████▌     | 36/79 [01:25<01:15,  1.75s/it][A
loss=2.448:  47%|████▋     | 37/79 [01:25<00:58,  1.39s/it][A
loss=2.450:  47%|████▋     | 37/79 [01:25<00:58,  1.39s/it][A
loss=2.450:  48%|████▊     | 38/79 [01:26<00:46,  1.14s/it][A
loss=2.412:  48%|████▊     | 38/79 [01:26<00:46,  1.14

{"eval_acc": 0.43628509719222464, "eval_f1": 0.10591639427145878, "eval_acc_and_f1": 0.27110074573184173, "eval_loss": 2.2196123940604076, "learning_rate": 1.794392523364486e-05, "train_loss": 2.4217097123463946, "step": 45}



loss=2.565:  58%|█████▊    | 46/79 [02:02<04:00,  7.30s/it][A
loss=2.343:  58%|█████▊    | 46/79 [02:02<04:00,  7.30s/it][A
loss=2.343:  59%|█████▉    | 47/79 [02:02<02:48,  5.27s/it][A
loss=1.980:  59%|█████▉    | 47/79 [02:03<02:48,  5.27s/it][A
loss=1.980:  61%|██████    | 48/79 [02:03<01:59,  3.86s/it][A
loss=2.307:  61%|██████    | 48/79 [02:03<01:59,  3.86s/it][A
loss=2.307:  62%|██████▏   | 49/79 [02:04<01:25,  2.87s/it][A
loss=1.988:  62%|██████▏   | 49/79 [02:04<01:25,  2.87s/it][A
loss=1.988:  63%|██████▎   | 50/79 [02:04<01:02,  2.17s/it][A
loss=2.251:  63%|██████▎   | 50/79 [02:04<01:02,  2.17s/it][A
loss=2.251:  65%|██████▍   | 51/79 [02:05<00:47,  1.69s/it][A
loss=2.462:  65%|██████▍   | 51/79 [02:05<00:47,  1.69s/it][A
loss=2.462:  66%|██████▌   | 52/79 [02:05<00:36,  1.35s/it][A
loss=2.227:  66%|██████▌   | 52/79 [02:05<00:36,  1.35s/it][A
loss=2.227:  67%|██████▋   | 53/79 [02:06<00:28,  1.11s/it][A
loss=1.902:  67%|██████▋   | 53/79 [02:06<00:28,  1.11

{"eval_acc": 0.5529157667386609, "eval_f1": 0.1407104547228736, "eval_acc_and_f1": 0.34681311073076726, "eval_loss": 2.0175274908542633, "learning_rate": 1.6542056074766357e-05, "train_loss": 2.302168917655945, "step": 60}



loss=1.985:  77%|███████▋  | 61/79 [02:46<02:25,  8.07s/it][A
loss=2.154:  77%|███████▋  | 61/79 [02:46<02:25,  8.07s/it][A
loss=2.154:  78%|███████▊  | 62/79 [02:46<01:38,  5.82s/it][A
loss=1.803:  78%|███████▊  | 62/79 [02:47<01:38,  5.82s/it][A
loss=1.803:  80%|███████▉  | 63/79 [02:47<01:07,  4.24s/it][A
loss=2.276:  80%|███████▉  | 63/79 [02:47<01:07,  4.24s/it][A
loss=2.276:  81%|████████  | 64/79 [02:48<00:46,  3.13s/it][A
loss=2.068:  81%|████████  | 64/79 [02:48<00:46,  3.13s/it][A
loss=2.068:  82%|████████▏ | 65/79 [02:48<00:33,  2.36s/it][A
loss=2.283:  82%|████████▏ | 65/79 [02:48<00:33,  2.36s/it][A
loss=2.283:  84%|████████▎ | 66/79 [02:49<00:23,  1.82s/it][A
loss=1.999:  84%|████████▎ | 66/79 [02:49<00:23,  1.82s/it][A
loss=1.999:  85%|████████▍ | 67/79 [02:49<00:17,  1.44s/it][A
loss=2.062:  85%|████████▍ | 67/79 [02:49<00:17,  1.44s/it][A
loss=2.062:  86%|████████▌ | 68/79 [02:50<00:12,  1.18s/it][A
loss=2.202:  86%|████████▌ | 68/79 [02:50<00:12,  1.18

{"eval_acc": 0.6722822174226062, "eval_f1": 0.16390625080035245, "eval_acc_and_f1": 0.41809423411147933, "eval_loss": 1.7390439084597997, "learning_rate": 1.5140186915887852e-05, "train_loss": 2.0640811125437417, "step": 75}



loss=1.846:  96%|█████████▌| 76/79 [03:26<00:21,  7.27s/it][A
loss=2.078:  96%|█████████▌| 76/79 [03:26<00:21,  7.27s/it][A
loss=2.078:  97%|█████████▋| 77/79 [03:27<00:10,  5.26s/it][A
loss=1.853:  97%|█████████▋| 77/79 [03:27<00:10,  5.26s/it][A
loss=1.853:  99%|█████████▊| 78/79 [03:27<00:03,  3.85s/it][A
loss=2.272:  99%|█████████▊| 78/79 [03:27<00:03,  3.85s/it][A
loss=2.272: 100%|██████████| 79/79 [03:27<00:00,  2.63s/it]
Epoch:  33%|███▎      | 1/3 [03:27<06:55, 207.76s/it]
Iteration:   0%|          | 0/79 [00:00<?, ?it/s][A
loss=1.773:   0%|          | 0/79 [00:00<?, ?it/s][A
loss=1.773:   1%|▏         | 1/79 [00:00<00:42,  1.82it/s][A
loss=1.715:   1%|▏         | 1/79 [00:00<00:42,  1.82it/s][A
loss=1.715:   3%|▎         | 2/79 [00:01<00:42,  1.82it/s][A
loss=1.721:   3%|▎         | 2/79 [00:01<00:42,  1.82it/s][A
loss=1.721:   4%|▍         | 3/79 [00:01<00:41,  1.81it/s][A
loss=1.758:   4%|▍         | 3/79 [00:01<00:41,  1.81it/s][A
loss=1.758:   5%|▌         |

{"eval_acc": 0.6910007199424046, "eval_f1": 0.1724953119192882, "eval_acc_and_f1": 0.43174801593084644, "eval_loss": 1.433093045439039, "learning_rate": 1.3738317757009347e-05, "train_loss": 1.8108073552449544, "step": 90}



loss=1.358:  15%|█▌        | 12/79 [00:41<08:53,  7.96s/it][A
loss=1.405:  15%|█▌        | 12/79 [00:42<08:53,  7.96s/it][A
loss=1.405:  16%|█▋        | 13/79 [00:42<06:18,  5.74s/it][A
loss=1.715:  16%|█▋        | 13/79 [00:42<06:18,  5.74s/it][A
loss=1.715:  18%|█▊        | 14/79 [00:43<04:31,  4.18s/it][A
loss=1.941:  18%|█▊        | 14/79 [00:43<04:31,  4.18s/it][A
loss=1.941:  19%|█▉        | 15/79 [00:43<03:18,  3.09s/it][A
loss=1.686:  19%|█▉        | 15/79 [00:43<03:18,  3.09s/it][A
loss=1.686:  20%|██        | 16/79 [00:44<02:26,  2.33s/it][A
loss=1.558:  20%|██        | 16/79 [00:44<02:26,  2.33s/it][A
loss=1.558:  22%|██▏       | 17/79 [00:44<01:51,  1.80s/it][A
loss=1.951:  22%|██▏       | 17/79 [00:44<01:51,  1.80s/it][A
loss=1.951:  23%|██▎       | 18/79 [00:45<01:26,  1.42s/it][A
loss=1.267:  23%|██▎       | 18/79 [00:45<01:26,  1.42s/it][A
loss=1.267:  24%|██▍       | 19/79 [00:45<01:09,  1.16s/it][A
loss=1.477:  24%|██▍       | 19/79 [00:45<01:09,  1.16

{"eval_acc": 0.7429805615550756, "eval_f1": 0.21006282029779139, "eval_acc_and_f1": 0.4765216909264335, "eval_loss": 1.255710972206933, "learning_rate": 1.233644859813084e-05, "train_loss": 1.5772854566574097, "step": 105}



loss=1.143:  34%|███▍      | 27/79 [01:23<06:39,  7.69s/it][A
loss=1.267:  34%|███▍      | 27/79 [01:24<06:39,  7.69s/it][A
loss=1.267:  35%|███▌      | 28/79 [01:24<04:42,  5.54s/it][A
loss=1.291:  35%|███▌      | 28/79 [01:24<04:42,  5.54s/it][A
loss=1.291:  37%|███▋      | 29/79 [01:25<03:22,  4.05s/it][A
loss=1.617:  37%|███▋      | 29/79 [01:25<03:22,  4.05s/it][A
loss=1.617:  38%|███▊      | 30/79 [01:25<02:27,  3.01s/it][A
loss=1.769:  38%|███▊      | 30/79 [01:25<02:27,  3.01s/it][A
loss=1.769:  39%|███▉      | 31/79 [01:26<01:49,  2.27s/it][A
loss=1.412:  39%|███▉      | 31/79 [01:26<01:49,  2.27s/it][A
loss=1.412:  41%|████      | 32/79 [01:26<01:22,  1.76s/it][A
loss=1.285:  41%|████      | 32/79 [01:26<01:22,  1.76s/it][A
loss=1.285:  42%|████▏     | 33/79 [01:27<01:04,  1.40s/it][A
loss=1.101:  42%|████▏     | 33/79 [01:27<01:04,  1.40s/it][A
loss=1.101:  43%|████▎     | 34/79 [01:27<00:51,  1.14s/it][A
loss=1.439:  43%|████▎     | 34/79 [01:28<00:51,  1.14

{"eval_acc": 0.7670266378689705, "eval_f1": 0.23704962646050634, "eval_acc_and_f1": 0.5020381321647385, "eval_loss": 1.064920676606042, "learning_rate": 1.0934579439252338e-05, "train_loss": 1.3712127606074016, "step": 120}



loss=1.561:  53%|█████▎    | 42/79 [02:04<04:29,  7.28s/it][A
loss=1.381:  53%|█████▎    | 42/79 [02:04<04:29,  7.28s/it][A
loss=1.381:  54%|█████▍    | 43/79 [02:04<03:09,  5.26s/it][A
loss=1.368:  54%|█████▍    | 43/79 [02:04<03:09,  5.26s/it][A
loss=1.368:  56%|█████▌    | 44/79 [02:05<02:14,  3.85s/it][A
loss=1.670:  56%|█████▌    | 44/79 [02:05<02:14,  3.85s/it][A
loss=1.670:  57%|█████▋    | 45/79 [02:05<01:37,  2.86s/it][A
loss=1.607:  57%|█████▋    | 45/79 [02:06<01:37,  2.86s/it][A
loss=1.607:  58%|█████▊    | 46/79 [02:06<01:11,  2.17s/it][A
loss=1.447:  58%|█████▊    | 46/79 [02:06<01:11,  2.17s/it][A
loss=1.447:  59%|█████▉    | 47/79 [02:07<00:53,  1.69s/it][A
loss=1.157:  59%|█████▉    | 47/79 [02:07<00:53,  1.69s/it][A
loss=1.157:  61%|██████    | 48/79 [02:07<00:41,  1.35s/it][A
loss=1.060:  61%|██████    | 48/79 [02:07<00:41,  1.35s/it][A
loss=1.060:  62%|██████▏   | 49/79 [02:08<00:33,  1.11s/it][A
loss=1.234:  62%|██████▏   | 49/79 [02:08<00:33,  1.11

{"eval_acc": 0.7929445644348452, "eval_f1": 0.25545867569352704, "eval_acc_and_f1": 0.5242016200641861, "eval_loss": 0.9332072756120137, "learning_rate": 9.532710280373833e-06, "train_loss": 1.300875715414683, "step": 135}



loss=1.496:  72%|███████▏  | 57/79 [02:44<02:40,  7.27s/it][A
loss=1.334:  72%|███████▏  | 57/79 [02:44<02:40,  7.27s/it][A
loss=1.334:  73%|███████▎  | 58/79 [02:44<01:50,  5.26s/it][A
loss=1.120:  73%|███████▎  | 58/79 [02:45<01:50,  5.26s/it][A
loss=1.120:  75%|███████▍  | 59/79 [02:45<01:16,  3.84s/it][A
loss=1.223:  75%|███████▍  | 59/79 [02:45<01:16,  3.84s/it][A
loss=1.223:  76%|███████▌  | 60/79 [02:46<00:54,  2.86s/it][A
loss=1.147:  76%|███████▌  | 60/79 [02:46<00:54,  2.86s/it][A
loss=1.147:  77%|███████▋  | 61/79 [02:46<00:39,  2.17s/it][A
loss=0.804:  77%|███████▋  | 61/79 [02:46<00:39,  2.17s/it][A
loss=0.804:  78%|███████▊  | 62/79 [02:47<00:28,  1.68s/it][A
loss=1.062:  78%|███████▊  | 62/79 [02:47<00:28,  1.68s/it][A
loss=1.062:  80%|███████▉  | 63/79 [02:47<00:21,  1.34s/it][A
loss=1.177:  80%|███████▉  | 63/79 [02:47<00:21,  1.34s/it][A
loss=1.177:  81%|████████  | 64/79 [02:48<00:16,  1.11s/it][A
loss=0.860:  81%|████████  | 64/79 [02:48<00:16,  1.11

{"eval_acc": 0.8197264218862491, "eval_f1": 0.29935605984343217, "eval_acc_and_f1": 0.5595412408648406, "eval_loss": 0.8464933740241187, "learning_rate": 8.130841121495327e-06, "train_loss": 1.150140357017517, "step": 150}



loss=0.891:  91%|█████████ | 72/79 [03:24<00:51,  7.29s/it][A
loss=0.962:  91%|█████████ | 72/79 [03:24<00:51,  7.29s/it][A
loss=0.962:  92%|█████████▏| 73/79 [03:25<00:31,  5.27s/it][A
loss=1.250:  92%|█████████▏| 73/79 [03:25<00:31,  5.27s/it][A
loss=1.250:  94%|█████████▎| 74/79 [03:25<00:19,  3.86s/it][A
loss=0.873:  94%|█████████▎| 74/79 [03:25<00:19,  3.86s/it][A
loss=0.873:  95%|█████████▍| 75/79 [03:26<00:11,  2.87s/it][A
loss=1.149:  95%|█████████▍| 75/79 [03:26<00:11,  2.87s/it][A
loss=1.149:  96%|█████████▌| 76/79 [03:26<00:06,  2.17s/it][A
loss=1.369:  96%|█████████▌| 76/79 [03:27<00:06,  2.17s/it][A
loss=1.369:  97%|█████████▋| 77/79 [03:27<00:03,  1.69s/it][A
loss=1.330:  97%|█████████▋| 77/79 [03:27<00:03,  1.69s/it][A
loss=1.330:  99%|█████████▊| 78/79 [03:27<00:01,  1.35s/it][A
loss=0.905:  99%|█████████▊| 78/79 [03:28<00:01,  1.35s/it][A
loss=0.905: 100%|██████████| 79/79 [03:28<00:00,  2.63s/it]
Epoch:  67%|██████▋   | 2/3 [06:55<03:27, 207.87s/it]
Ite

{"eval_acc": 0.8236141108711303, "eval_f1": 0.30511061168914183, "eval_acc_and_f1": 0.5643623612801361, "eval_loss": 0.7900410826717105, "learning_rate": 6.728971962616823e-06, "train_loss": 1.0733359575271606, "step": 165}



loss=1.221:  10%|█         | 8/79 [00:39<09:23,  7.94s/it][A
loss=0.949:  10%|█         | 8/79 [00:39<09:23,  7.94s/it][A
loss=0.949:  11%|█▏        | 9/79 [00:40<06:40,  5.73s/it][A
loss=0.877:  11%|█▏        | 9/79 [00:40<06:40,  5.73s/it][A
loss=0.877:  13%|█▎        | 10/79 [00:40<04:48,  4.18s/it][A
loss=0.983:  13%|█▎        | 10/79 [00:40<04:48,  4.18s/it][A
loss=0.983:  14%|█▍        | 11/79 [00:41<03:29,  3.09s/it][A
loss=0.689:  14%|█▍        | 11/79 [00:41<03:29,  3.09s/it][A
loss=0.689:  15%|█▌        | 12/79 [00:41<02:35,  2.33s/it][A
loss=1.010:  15%|█▌        | 12/79 [00:41<02:35,  2.33s/it][A
loss=1.010:  16%|█▋        | 13/79 [00:42<01:58,  1.79s/it][A
loss=0.929:  16%|█▋        | 13/79 [00:42<01:58,  1.79s/it][A
loss=0.929:  18%|█▊        | 14/79 [00:42<01:32,  1.42s/it][A
loss=1.107:  18%|█▊        | 14/79 [00:43<01:32,  1.42s/it][A
loss=1.107:  19%|█▉        | 15/79 [00:43<01:14,  1.16s/it][A
loss=0.788:  19%|█▉        | 15/79 [00:43<01:14,  1.16s/it

{"eval_acc": 0.859899208063355, "eval_f1": 0.3473396905415529, "eval_acc_and_f1": 0.6036194493024539, "eval_loss": 0.7415435803788049, "learning_rate": 5.3271028037383174e-06, "train_loss": 0.9964715520540873, "step": 180}



loss=1.012:  29%|██▉       | 23/79 [01:19<06:48,  7.29s/it][A
loss=0.822:  29%|██▉       | 23/79 [01:20<06:48,  7.29s/it][A
loss=0.822:  30%|███       | 24/79 [01:20<04:49,  5.27s/it][A
loss=0.618:  30%|███       | 24/79 [01:20<04:49,  5.27s/it][A
loss=0.618:  32%|███▏      | 25/79 [01:20<03:28,  3.86s/it][A
loss=0.951:  32%|███▏      | 25/79 [01:21<03:28,  3.86s/it][A
loss=0.951:  33%|███▎      | 26/79 [01:21<02:31,  2.86s/it][A
loss=0.708:  33%|███▎      | 26/79 [01:21<02:31,  2.86s/it][A
loss=0.708:  34%|███▍      | 27/79 [01:22<01:52,  2.17s/it][A
loss=0.875:  34%|███▍      | 27/79 [01:22<01:52,  2.17s/it][A
loss=0.875:  35%|███▌      | 28/79 [01:22<01:26,  1.69s/it][A
loss=0.541:  35%|███▌      | 28/79 [01:22<01:26,  1.69s/it][A
loss=0.541:  37%|███▋      | 29/79 [01:23<01:07,  1.35s/it][A
loss=1.077:  37%|███▋      | 29/79 [01:23<01:07,  1.35s/it][A
loss=1.077:  38%|███▊      | 30/79 [01:23<00:54,  1.11s/it][A
loss=1.104:  38%|███▊      | 30/79 [01:23<00:54,  1.11

{"eval_acc": 0.8617710583153347, "eval_f1": 0.3492497456701865, "eval_acc_and_f1": 0.6055104019927606, "eval_loss": 0.7038035328899112, "learning_rate": 3.925233644859814e-06, "train_loss": 0.8238467375437418, "step": 195}



loss=0.876:  48%|████▊     | 38/79 [02:01<05:13,  7.65s/it][A
loss=0.814:  48%|████▊     | 38/79 [02:01<05:13,  7.65s/it][A
loss=0.814:  49%|████▉     | 39/79 [02:02<03:40,  5.52s/it][A
loss=1.155:  49%|████▉     | 39/79 [02:02<03:40,  5.52s/it][A
loss=1.155:  51%|█████     | 40/79 [02:02<02:37,  4.04s/it][A
loss=0.990:  51%|█████     | 40/79 [02:03<02:37,  4.04s/it][A
loss=0.990:  52%|█████▏    | 41/79 [02:03<01:53,  3.00s/it][A
loss=0.940:  52%|█████▏    | 41/79 [02:03<01:53,  3.00s/it][A
loss=0.940:  53%|█████▎    | 42/79 [02:04<01:23,  2.26s/it][A
loss=0.641:  53%|█████▎    | 42/79 [02:04<01:23,  2.26s/it][A
loss=0.641:  54%|█████▍    | 43/79 [02:04<01:03,  1.75s/it][A
loss=0.880:  54%|█████▍    | 43/79 [02:04<01:03,  1.75s/it][A
loss=0.880:  56%|█████▌    | 44/79 [02:05<00:48,  1.39s/it][A
loss=0.692:  56%|█████▌    | 44/79 [02:05<00:48,  1.39s/it][A
loss=0.692:  57%|█████▋    | 45/79 [02:05<00:38,  1.14s/it][A
loss=1.015:  57%|█████▋    | 45/79 [02:05<00:38,  1.14

{"eval_acc": 0.8578833693304536, "eval_f1": 0.34141935202300994, "eval_acc_and_f1": 0.5996513606767317, "eval_loss": 0.6859496682882309, "learning_rate": 2.5233644859813085e-06, "train_loss": 0.8778717676798503, "step": 210}



loss=0.998:  67%|██████▋   | 53/79 [02:44<03:21,  7.74s/it][A
loss=0.897:  67%|██████▋   | 53/79 [02:44<03:21,  7.74s/it][A
loss=0.897:  68%|██████▊   | 54/79 [02:44<02:19,  5.58s/it][A
loss=0.820:  68%|██████▊   | 54/79 [02:44<02:19,  5.58s/it][A
loss=0.820:  70%|██████▉   | 55/79 [02:45<01:37,  4.07s/it][A
loss=0.642:  70%|██████▉   | 55/79 [02:45<01:37,  4.07s/it][A
loss=0.642:  71%|███████   | 56/79 [02:45<01:09,  3.02s/it][A
loss=1.061:  71%|███████   | 56/79 [02:46<01:09,  3.02s/it][A
loss=1.061:  72%|███████▏  | 57/79 [02:46<00:50,  2.28s/it][A
loss=0.782:  72%|███████▏  | 57/79 [02:46<00:50,  2.28s/it][A
loss=0.782:  73%|███████▎  | 58/79 [02:47<00:37,  1.76s/it][A
loss=1.280:  73%|███████▎  | 58/79 [02:47<00:37,  1.76s/it][A
loss=1.280:  75%|███████▍  | 59/79 [02:47<00:28,  1.40s/it][A
loss=0.916:  75%|███████▍  | 59/79 [02:47<00:28,  1.40s/it][A
loss=0.916:  76%|███████▌  | 60/79 [02:48<00:21,  1.15s/it][A
loss=1.129:  76%|███████▌  | 60/79 [02:48<00:21,  1.15

{"eval_acc": 0.8581713462922966, "eval_f1": 0.343781487161583, "eval_acc_and_f1": 0.6009764167269398, "eval_loss": 0.6800087924514499, "learning_rate": 1.1214953271028038e-06, "train_loss": 0.9688850124677022, "step": 225}



loss=0.964:  86%|████████▌ | 68/79 [03:28<01:28,  8.07s/it][A
loss=0.506:  86%|████████▌ | 68/79 [03:28<01:28,  8.07s/it][A
loss=0.506:  87%|████████▋ | 69/79 [03:28<00:58,  5.82s/it][A
loss=0.700:  87%|████████▋ | 69/79 [03:28<00:58,  5.82s/it][A
loss=0.700:  89%|████████▊ | 70/79 [03:29<00:38,  4.24s/it][A
loss=0.689:  89%|████████▊ | 70/79 [03:29<00:38,  4.24s/it][A
loss=0.689:  90%|████████▉ | 71/79 [03:29<00:25,  3.13s/it][A
loss=0.733:  90%|████████▉ | 71/79 [03:30<00:25,  3.13s/it][A
loss=0.733:  91%|█████████ | 72/79 [03:30<00:16,  2.36s/it][A
loss=0.993:  91%|█████████ | 72/79 [03:30<00:16,  2.36s/it][A
loss=0.993:  92%|█████████▏| 73/79 [03:30<00:10,  1.82s/it][A
loss=0.799:  92%|█████████▏| 73/79 [03:31<00:10,  1.82s/it][A
loss=0.799:  94%|█████████▎| 74/79 [03:31<00:07,  1.44s/it][A
loss=1.061:  94%|█████████▎| 74/79 [03:31<00:07,  1.44s/it][A
loss=1.061:  95%|█████████▍| 75/79 [03:32<00:04,  1.17s/it][A
loss=0.554:  95%|█████████▍| 75/79 [03:32<00:04,  1.17

{"eval_acc": 0.8580273578113751, "eval_f1": 0.34403718424604646, "eval_acc_and_f1": 0.6010322710287108, "eval_loss": 0.6784219912120274, "learning_rate": 0.0, "train_loss": 0.6456904252370198, "step": 237}


07/18/2022 02:24:17 - INFO - utilities.trainers -   ***** Running evaluation iter-5_trial1 *****
07/18/2022 02:24:17 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 02:24:17 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 02:24:45 - INFO - utilities.trainers -   ***** Eval results iter-5_trial1 *****
07/18/2022 02:24:45 - INFO - utilities.trainers -     acc = 0.8580273578113751
07/18/2022 02:24:45 - INFO - utilities.trainers -     acc_and_f1 = 0.6010322710287108
07/18/2022 02:24:45 - INFO - utilities.trainers -     f1 = 0.34403718424604646
07/18/2022 02:24:48 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 02:24:54 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 02:24:54 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 02:24:54 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:19<00:00,  1.02s/it]
07/18/2022 02:27:13 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 02:27:13 - INFO - utilities.trainers -     acc = 0.8593686999596798
07/18/2022 02:27:13 - INFO - utilities.trainers -     acc_and_f1 = 0.5935776156748097
07/18/2022 02:27:13 - INFO - utilities.trainers -     f1 = 0.32778653138993963



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 02:27:14 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 02:27:33 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 02:27:38 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 02:27:38 - INFO - utilities.trainers -     Num examples = 47500
07/18/2022 02:27:38 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/186 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 186/186 [06:19<00:00,  2.04s/it]
07/18/2022 02:33:58 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 02:33:58 - INFO - utilities.trainers -     acc = 0.8629052631578947
07/18/2022 02:33:58 - INFO - utilities.trainers -     acc_and_f1 = 0.6015990841881464
07/18/2022 02:33:58 - INFO - utilities.trainers -     f1 = 0.34029290521839817
07/18/2022 02:33:58 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 02:34:16 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 02:34:20 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 02:34:20 - INFO - utilities.trainers -     Num examples = 2500
07/18/2022 02:34:20 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 10/10 [00:19<00:00,  1.99s/it]
07/18/2022 02:34:40 - INFO - ut



************
End of iteration 5:
Train loss 1.6109, Val loss 0.6784219912120274, Test loss 0.6951640686568092
Annotated 500 samples
Current labeled (training) data: 3000 samples
Remaining budget: 1000 (in samples)
************

Saving json with the results....

 Start Training model of iteration 6!



07/18/2022 02:37:26 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.8580273578113751, acc_best_iteration=5, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-5', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-6', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, 

warmup steps: 28
total steps: 281
logging steps: 18
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.227:   1%|          | 1/94 [00:00<00:47,  1.95it/s][A
loss=3.216:   1%|          | 1/94 [00:00<00:47,  1.95it/s][A
loss=3.216:   2%|▏         | 2/94 [00:01<00:49,  1.87it/s][A
loss=3.191:   2%|▏         | 2/94 [00:01<00:49,  1.87it/s][A
loss=3.191:   3%|▎         | 3/94 [00:01<00:50,  1.82it/s][A
loss=3.127:   3%|▎         | 3/94 [00:01<00:50,  1.82it/s][A
loss=3.127:   4%|▍         | 4/94 [00:02<00:50,  1.79it/s][A
loss=3.233:   4%|▍         | 4/94 [00:02<00:50,  1.79it/s][A
loss=3.233:   5%|▌         | 5/94 [00:02<00:50,  1.77it/s][A
loss=3.157:   5%|▌         | 5/94 [00:02<00:50,  1.77it/s][A
loss=3.157:   6%|▋         | 6/94 [00:03<00:50,  1.75it/s][A
loss=3.132:   6%|▋         | 6/94 [00:03<00:50,  1.75it/s][A
loss=3.132:   7%|▋         | 7/94 [00:04<00:49,  1.74it/s][A
loss=3.064:   7%|▋         | 7/94 [00:04<00:49,  1.74it/s][A
loss=3.064:   9%|▊         | 8/94 [00:04<00:49,  1.74it/s][A
loss=3.137:   9%|▊         | 8/94 [00:04<00:49,  1.74it/s][A
loss=3.

{"eval_acc": 0.18416126709863212, "eval_f1": 0.01937594481008751, "eval_acc_and_f1": 0.10176860595435981, "eval_loss": 2.808576830795833, "learning_rate": 1.2857142857142859e-05, "train_loss": 3.0869629118177624, "step": 18}



loss=2.863:  20%|██        | 19/94 [00:42<08:59,  7.20s/it][A
loss=2.844:  20%|██        | 19/94 [00:42<08:59,  7.20s/it][A
loss=2.844:  21%|██▏       | 20/94 [00:43<06:26,  5.22s/it][A
loss=2.606:  21%|██▏       | 20/94 [00:43<06:26,  5.22s/it][A
loss=2.606:  22%|██▏       | 21/94 [00:43<04:39,  3.83s/it][A
loss=2.818:  22%|██▏       | 21/94 [00:43<04:39,  3.83s/it][A
loss=2.818:  23%|██▎       | 22/94 [00:44<03:25,  2.85s/it][A
loss=2.822:  23%|██▎       | 22/94 [00:44<03:25,  2.85s/it][A
loss=2.822:  24%|██▍       | 23/94 [00:44<02:34,  2.17s/it][A
loss=2.856:  24%|██▍       | 23/94 [00:44<02:34,  2.17s/it][A
loss=2.856:  26%|██▌       | 24/94 [00:45<01:58,  1.69s/it][A
loss=2.620:  26%|██▌       | 24/94 [00:45<01:58,  1.69s/it][A
loss=2.620:  27%|██▋       | 25/94 [00:45<01:33,  1.36s/it][A
loss=2.734:  27%|██▋       | 25/94 [00:46<01:33,  1.36s/it][A
loss=2.734:  28%|██▊       | 26/94 [00:46<01:16,  1.13s/it][A
loss=2.917:  28%|██▊       | 26/94 [00:46<01:16,  1.13

{"eval_acc": 0.26220302375809934, "eval_f1": 0.05250830869968747, "eval_acc_and_f1": 0.1573556662288934, "eval_loss": 2.4072379214423045, "learning_rate": 1.937007874015748e-05, "train_loss": 2.650799592336019, "step": 36}



loss=2.714:  39%|███▉      | 37/94 [01:24<06:56,  7.30s/it][A
loss=2.494:  39%|███▉      | 37/94 [01:25<06:56,  7.30s/it][A
loss=2.494:  40%|████      | 38/94 [01:25<04:56,  5.29s/it][A
loss=2.331:  40%|████      | 38/94 [01:25<04:56,  5.29s/it][A
loss=2.331:  41%|████▏     | 39/94 [01:26<03:33,  3.88s/it][A
loss=2.549:  41%|████▏     | 39/94 [01:26<03:33,  3.88s/it][A
loss=2.549:  43%|████▎     | 40/94 [01:26<02:36,  2.89s/it][A
loss=2.294:  43%|████▎     | 40/94 [01:26<02:36,  2.89s/it][A
loss=2.294:  44%|████▎     | 41/94 [01:27<01:56,  2.20s/it][A
loss=2.405:  44%|████▎     | 41/94 [01:27<01:56,  2.20s/it][A
loss=2.405:  45%|████▍     | 42/94 [01:27<01:29,  1.71s/it][A
loss=2.595:  45%|████▍     | 42/94 [01:28<01:29,  1.71s/it][A
loss=2.595:  46%|████▌     | 43/94 [01:28<01:09,  1.37s/it][A
loss=2.470:  46%|████▌     | 43/94 [01:28<01:09,  1.37s/it][A
loss=2.470:  47%|████▋     | 44/94 [01:29<00:56,  1.14s/it][A
loss=2.293:  47%|████▋     | 44/94 [01:29<00:56,  1.14

{"eval_acc": 0.3337652987760979, "eval_f1": 0.07894043552929013, "eval_acc_and_f1": 0.20635286715269402, "eval_loss": 2.193633190223149, "learning_rate": 1.7952755905511813e-05, "train_loss": 2.406378322177463, "step": 54}



loss=2.151:  59%|█████▊    | 55/94 [02:08<04:57,  7.62s/it][A
loss=2.429:  59%|█████▊    | 55/94 [02:09<04:57,  7.62s/it][A
loss=2.429:  60%|█████▉    | 56/94 [02:09<03:29,  5.51s/it][A
loss=2.046:  60%|█████▉    | 56/94 [02:09<03:29,  5.51s/it][A
loss=2.046:  61%|██████    | 57/94 [02:10<02:29,  4.04s/it][A
loss=2.389:  61%|██████    | 57/94 [02:10<02:29,  4.04s/it][A
loss=2.389:  62%|██████▏   | 58/94 [02:10<01:47,  3.00s/it][A
loss=2.243:  62%|██████▏   | 58/94 [02:10<01:47,  3.00s/it][A
loss=2.243:  63%|██████▎   | 59/94 [02:11<01:19,  2.28s/it][A
loss=2.282:  63%|██████▎   | 59/94 [02:11<01:19,  2.28s/it][A
loss=2.282:  64%|██████▍   | 60/94 [02:11<01:00,  1.77s/it][A
loss=2.248:  64%|██████▍   | 60/94 [02:11<01:00,  1.77s/it][A
loss=2.248:  65%|██████▍   | 61/94 [02:12<00:46,  1.41s/it][A
loss=2.320:  65%|██████▍   | 61/94 [02:12<00:46,  1.41s/it][A
loss=2.320:  66%|██████▌   | 62/94 [02:12<00:37,  1.16s/it][A
loss=2.322:  66%|██████▌   | 62/94 [02:13<00:37,  1.16

{"eval_acc": 0.5174946004319655, "eval_f1": 0.13582198294868036, "eval_acc_and_f1": 0.3266582916903229, "eval_loss": 1.9283049745219094, "learning_rate": 1.6535433070866142e-05, "train_loss": 2.2216269108984203, "step": 72}



loss=2.313:  78%|███████▊  | 73/94 [02:52<02:39,  7.61s/it][A
loss=2.221:  78%|███████▊  | 73/94 [02:52<02:39,  7.61s/it][A
loss=2.221:  79%|███████▊  | 74/94 [02:53<01:49,  5.50s/it][A
loss=2.092:  79%|███████▊  | 74/94 [02:53<01:49,  5.50s/it][A
loss=2.092:  80%|███████▉  | 75/94 [02:53<01:16,  4.02s/it][A
loss=1.805:  80%|███████▉  | 75/94 [02:54<01:16,  4.02s/it][A
loss=1.805:  81%|████████  | 76/94 [02:54<00:53,  2.99s/it][A
loss=1.940:  81%|████████  | 76/94 [02:54<00:53,  2.99s/it][A
loss=1.940:  82%|████████▏ | 77/94 [02:55<00:38,  2.27s/it][A
loss=2.007:  82%|████████▏ | 77/94 [02:55<00:38,  2.27s/it][A
loss=2.007:  83%|████████▎ | 78/94 [02:55<00:28,  1.76s/it][A
loss=2.084:  83%|████████▎ | 78/94 [02:55<00:28,  1.76s/it][A
loss=2.084:  84%|████████▍ | 79/94 [02:56<00:21,  1.41s/it][A
loss=1.978:  84%|████████▍ | 79/94 [02:56<00:21,  1.41s/it][A
loss=1.978:  85%|████████▌ | 80/94 [02:56<00:16,  1.16s/it][A
loss=1.930:  85%|████████▌ | 80/94 [02:57<00:16,  1.16

{"eval_acc": 0.7245500359971202, "eval_f1": 0.20002771673721736, "eval_acc_and_f1": 0.46228887636716876, "eval_loss": 1.5831841145242964, "learning_rate": 1.5118110236220473e-05, "train_loss": 1.9754418267144098, "step": 90}



loss=1.929:  97%|█████████▋| 91/94 [03:36<00:22,  7.52s/it][A
loss=1.860:  97%|█████████▋| 91/94 [03:36<00:22,  7.52s/it][A
loss=1.860:  98%|█████████▊| 92/94 [03:36<00:10,  5.44s/it][A
loss=1.478:  98%|█████████▊| 92/94 [03:37<00:10,  5.44s/it][A
loss=1.478:  99%|█████████▉| 93/94 [03:37<00:03,  3.98s/it][A
loss=2.244:  99%|█████████▉| 93/94 [03:37<00:03,  3.98s/it][A
loss=2.244: 100%|██████████| 94/94 [03:37<00:00,  2.32s/it]
Epoch:  33%|███▎      | 1/3 [03:37<07:15, 217.93s/it]
Iteration:   0%|          | 0/94 [00:00<?, ?it/s][A
loss=1.954:   0%|          | 0/94 [00:00<?, ?it/s][A
loss=1.954:   1%|          | 1/94 [00:00<00:53,  1.73it/s][A
loss=1.790:   1%|          | 1/94 [00:00<00:53,  1.73it/s][A
loss=1.790:   2%|▏         | 2/94 [00:01<00:53,  1.72it/s][A
loss=1.643:   2%|▏         | 2/94 [00:01<00:53,  1.72it/s][A
loss=1.643:   3%|▎         | 3/94 [00:01<00:52,  1.73it/s][A
loss=1.758:   3%|▎         | 3/94 [00:01<00:52,  1.73it/s][A
loss=1.758:   4%|▍         |

{"eval_acc": 0.7500359971202304, "eval_f1": 0.20969044908061119, "eval_acc_and_f1": 0.4798632231004208, "eval_loss": 1.2519713597638267, "learning_rate": 1.3700787401574804e-05, "train_loss": 1.7229860027631123, "step": 108}



loss=1.531:  16%|█▌        | 15/94 [00:43<10:22,  7.88s/it][A
loss=1.619:  16%|█▌        | 15/94 [00:43<10:22,  7.88s/it][A
loss=1.619:  17%|█▋        | 16/94 [00:44<07:23,  5.69s/it][A
loss=1.448:  17%|█▋        | 16/94 [00:44<07:23,  5.69s/it][A
loss=1.448:  18%|█▊        | 17/94 [00:44<05:20,  4.16s/it][A
loss=1.468:  18%|█▊        | 17/94 [00:44<05:20,  4.16s/it][A
loss=1.468:  19%|█▉        | 18/94 [00:45<03:54,  3.09s/it][A
loss=1.506:  19%|█▉        | 18/94 [00:45<03:54,  3.09s/it][A
loss=1.506:  20%|██        | 19/94 [00:45<02:55,  2.34s/it][A
loss=1.240:  20%|██        | 19/94 [00:45<02:55,  2.34s/it][A
loss=1.240:  21%|██▏       | 20/94 [00:46<02:13,  1.81s/it][A
loss=1.373:  21%|██▏       | 20/94 [00:46<02:13,  1.81s/it][A
loss=1.373:  22%|██▏       | 21/94 [00:46<01:45,  1.44s/it][A
loss=1.401:  22%|██▏       | 21/94 [00:47<01:45,  1.44s/it][A
loss=1.401:  23%|██▎       | 22/94 [00:47<01:25,  1.18s/it][A
loss=1.011:  23%|██▎       | 22/94 [00:47<01:25,  1.18

{"eval_acc": 0.7582433405327573, "eval_f1": 0.21733916312351867, "eval_acc_and_f1": 0.487791251828138, "eval_loss": 1.0643449872732162, "learning_rate": 1.2283464566929135e-05, "train_loss": 1.4087227450476751, "step": 126}



loss=1.320:  35%|███▌      | 33/94 [01:29<08:13,  8.08s/it][A
loss=1.562:  35%|███▌      | 33/94 [01:29<08:13,  8.08s/it][A
loss=1.562:  36%|███▌      | 34/94 [01:30<05:49,  5.83s/it][A
loss=1.615:  36%|███▌      | 34/94 [01:30<05:49,  5.83s/it][A
loss=1.615:  37%|███▋      | 35/94 [01:30<04:11,  4.26s/it][A
loss=1.234:  37%|███▋      | 35/94 [01:30<04:11,  4.26s/it][A
loss=1.234:  38%|███▊      | 36/94 [01:31<03:02,  3.15s/it][A
loss=1.188:  38%|███▊      | 36/94 [01:31<03:02,  3.15s/it][A
loss=1.188:  39%|███▉      | 37/94 [01:31<02:15,  2.38s/it][A
loss=1.089:  39%|███▉      | 37/94 [01:32<02:15,  2.38s/it][A
loss=1.089:  40%|████      | 38/94 [01:32<01:43,  1.84s/it][A
loss=1.255:  40%|████      | 38/94 [01:32<01:43,  1.84s/it][A
loss=1.255:  41%|████▏     | 39/94 [01:33<01:20,  1.46s/it][A
loss=1.181:  41%|████▏     | 39/94 [01:33<01:20,  1.46s/it][A
loss=1.181:  43%|████▎     | 40/94 [01:33<01:04,  1.20s/it][A
loss=1.417:  43%|████▎     | 40/94 [01:33<01:04,  1.20

{"eval_acc": 0.772354211663067, "eval_f1": 0.23121964699908637, "eval_acc_and_f1": 0.5017869293310766, "eval_loss": 0.9240171419722694, "learning_rate": 1.0866141732283466e-05, "train_loss": 1.2942378123601277, "step": 144}



loss=1.228:  54%|█████▍    | 51/94 [02:11<05:13,  7.29s/it][A
loss=1.060:  54%|█████▍    | 51/94 [02:12<05:13,  7.29s/it][A
loss=1.060:  55%|█████▌    | 52/94 [02:12<03:41,  5.28s/it][A
loss=1.192:  55%|█████▌    | 52/94 [02:12<03:41,  5.28s/it][A
loss=1.192:  56%|█████▋    | 53/94 [02:13<02:38,  3.87s/it][A
loss=1.445:  56%|█████▋    | 53/94 [02:13<02:38,  3.87s/it][A
loss=1.445:  57%|█████▋    | 54/94 [02:13<01:55,  2.88s/it][A
loss=1.443:  57%|█████▋    | 54/94 [02:13<01:55,  2.88s/it][A
loss=1.443:  59%|█████▊    | 55/94 [02:14<01:25,  2.19s/it][A
loss=1.022:  59%|█████▊    | 55/94 [02:14<01:25,  2.19s/it][A
loss=1.022:  60%|█████▉    | 56/94 [02:14<01:04,  1.71s/it][A
loss=1.358:  60%|█████▉    | 56/94 [02:15<01:04,  1.71s/it][A
loss=1.358:  61%|██████    | 57/94 [02:15<00:50,  1.37s/it][A
loss=1.374:  61%|██████    | 57/94 [02:15<00:50,  1.37s/it][A
loss=1.374:  62%|██████▏   | 58/94 [02:16<00:40,  1.14s/it][A
loss=1.141:  62%|██████▏   | 58/94 [02:16<00:40,  1.14

{"eval_acc": 0.8083513318934485, "eval_f1": 0.29340080872577334, "eval_acc_and_f1": 0.5508760703096109, "eval_loss": 0.8428762022938047, "learning_rate": 9.448818897637797e-06, "train_loss": 1.2675085332658556, "step": 162}



loss=1.287:  73%|███████▎  | 69/94 [02:56<03:14,  7.80s/it][A
loss=1.038:  73%|███████▎  | 69/94 [02:56<03:14,  7.80s/it][A
loss=1.038:  74%|███████▍  | 70/94 [02:57<02:15,  5.63s/it][A
loss=1.153:  74%|███████▍  | 70/94 [02:57<02:15,  5.63s/it][A
loss=1.153:  76%|███████▌  | 71/94 [02:57<01:34,  4.12s/it][A
loss=1.121:  76%|███████▌  | 71/94 [02:58<01:34,  4.12s/it][A
loss=1.121:  77%|███████▋  | 72/94 [02:58<01:07,  3.06s/it][A
loss=1.102:  77%|███████▋  | 72/94 [02:58<01:07,  3.06s/it][A
loss=1.102:  78%|███████▊  | 73/94 [02:59<00:48,  2.31s/it][A
loss=0.798:  78%|███████▊  | 73/94 [02:59<00:48,  2.31s/it][A
loss=0.798:  79%|███████▊  | 74/94 [02:59<00:35,  1.80s/it][A
loss=0.981:  79%|███████▊  | 74/94 [02:59<00:35,  1.80s/it][A
loss=0.981:  80%|███████▉  | 75/94 [03:00<00:27,  1.43s/it][A
loss=1.221:  80%|███████▉  | 75/94 [03:00<00:27,  1.43s/it][A
loss=1.221:  81%|████████  | 76/94 [03:00<00:21,  1.18s/it][A
loss=1.052:  81%|████████  | 76/94 [03:01<00:21,  1.18

{"eval_acc": 0.8254859611231101, "eval_f1": 0.3364281529028746, "eval_acc_and_f1": 0.5809570570129924, "eval_loss": 0.7571811548301152, "learning_rate": 8.031496062992128e-06, "train_loss": 1.0811875330077276, "step": 180}



loss=0.761:  93%|█████████▎| 87/94 [03:40<00:53,  7.62s/it][A
loss=0.920:  93%|█████████▎| 87/94 [03:40<00:53,  7.62s/it][A
loss=0.920:  94%|█████████▎| 88/94 [03:41<00:33,  5.51s/it][A
loss=0.941:  94%|█████████▎| 88/94 [03:41<00:33,  5.51s/it][A
loss=0.941:  95%|█████████▍| 89/94 [03:41<00:20,  4.03s/it][A
loss=1.343:  95%|█████████▍| 89/94 [03:42<00:20,  4.03s/it][A
loss=1.343:  96%|█████████▌| 90/94 [03:42<00:11,  3.00s/it][A
loss=0.848:  96%|█████████▌| 90/94 [03:42<00:11,  3.00s/it][A
loss=0.848:  97%|█████████▋| 91/94 [03:43<00:06,  2.27s/it][A
loss=1.028:  97%|█████████▋| 91/94 [03:43<00:06,  2.27s/it][A
loss=1.028:  98%|█████████▊| 92/94 [03:43<00:03,  1.77s/it][A
loss=1.178:  98%|█████████▊| 92/94 [03:43<00:03,  1.77s/it][A
loss=1.178:  99%|█████████▉| 93/94 [03:44<00:01,  1.41s/it][A
loss=1.106:  99%|█████████▉| 93/94 [03:44<00:01,  1.41s/it][A
loss=1.106: 100%|██████████| 94/94 [03:44<00:00,  2.39s/it]
Epoch:  67%|██████▋   | 2/3 [07:22<03:39, 219.97s/it]
Ite

{"eval_acc": 0.8613390928725702, "eval_f1": 0.368748546237746, "eval_acc_and_f1": 0.6150438195551581, "eval_loss": 0.6970530769654683, "learning_rate": 6.614173228346458e-06, "train_loss": 0.9873553878731198, "step": 198}



loss=1.067:  12%|█▏        | 11/94 [00:40<10:38,  7.69s/it][A
loss=0.908:  12%|█▏        | 11/94 [00:40<10:38,  7.69s/it][A
loss=0.908:  13%|█▎        | 12/94 [00:40<07:36,  5.56s/it][A
loss=0.801:  13%|█▎        | 12/94 [00:41<07:36,  5.56s/it][A
loss=0.801:  14%|█▍        | 13/94 [00:41<05:29,  4.07s/it][A
loss=1.135:  14%|█▍        | 13/94 [00:41<05:29,  4.07s/it][A
loss=1.135:  15%|█▍        | 14/94 [00:42<04:01,  3.02s/it][A
loss=0.857:  15%|█▍        | 14/94 [00:42<04:01,  3.02s/it][A
loss=0.857:  16%|█▌        | 15/94 [00:42<03:00,  2.29s/it][A
loss=1.155:  16%|█▌        | 15/94 [00:42<03:00,  2.29s/it][A
loss=1.155:  17%|█▋        | 16/94 [00:43<02:18,  1.78s/it][A
loss=1.171:  17%|█▋        | 16/94 [00:43<02:18,  1.78s/it][A
loss=1.171:  18%|█▊        | 17/94 [00:43<01:49,  1.42s/it][A
loss=0.677:  18%|█▊        | 17/94 [00:43<01:49,  1.42s/it][A
loss=0.677:  19%|█▉        | 18/94 [00:44<01:28,  1.17s/it][A
loss=0.968:  19%|█▉        | 18/94 [00:44<01:28,  1.17

{"eval_acc": 0.8735781137508999, "eval_f1": 0.3809668531060372, "eval_acc_and_f1": 0.6272724834284685, "eval_loss": 0.6538536101579666, "learning_rate": 5.196850393700788e-06, "train_loss": 0.961915608909395, "step": 216}



loss=0.683:  31%|███       | 29/94 [01:24<08:16,  7.63s/it][A
loss=0.886:  31%|███       | 29/94 [01:24<08:16,  7.63s/it][A
loss=0.886:  32%|███▏      | 30/94 [01:24<05:53,  5.52s/it][A
loss=0.932:  32%|███▏      | 30/94 [01:25<05:53,  5.52s/it][A
loss=0.932:  33%|███▎      | 31/94 [01:25<04:14,  4.04s/it][A
loss=1.230:  33%|███▎      | 31/94 [01:25<04:14,  4.04s/it][A
loss=1.230:  34%|███▍      | 32/94 [01:26<03:06,  3.00s/it][A
loss=0.787:  34%|███▍      | 32/94 [01:26<03:06,  3.00s/it][A
loss=0.787:  35%|███▌      | 33/94 [01:26<02:18,  2.28s/it][A
loss=0.682:  35%|███▌      | 33/94 [01:26<02:18,  2.28s/it][A
loss=0.682:  36%|███▌      | 34/94 [01:27<01:46,  1.77s/it][A
loss=0.940:  36%|███▌      | 34/94 [01:27<01:46,  1.77s/it][A
loss=0.940:  37%|███▋      | 35/94 [01:27<01:23,  1.41s/it][A
loss=0.958:  37%|███▋      | 35/94 [01:27<01:23,  1.41s/it][A
loss=0.958:  38%|███▊      | 36/94 [01:28<01:07,  1.16s/it][A
loss=0.729:  38%|███▊      | 36/94 [01:28<01:07,  1.16

{"eval_acc": 0.8758819294456444, "eval_f1": 0.38495395762690726, "eval_acc_and_f1": 0.6304179435362758, "eval_loss": 0.628968748663153, "learning_rate": 3.7795275590551182e-06, "train_loss": 0.8162025080786811, "step": 234}



loss=0.899:  50%|█████     | 47/94 [02:07<05:54,  7.55s/it][A
loss=0.860:  50%|█████     | 47/94 [02:08<05:54,  7.55s/it][A
loss=0.860:  51%|█████     | 48/94 [02:08<04:11,  5.46s/it][A
loss=0.752:  51%|█████     | 48/94 [02:08<04:11,  5.46s/it][A
loss=0.752:  52%|█████▏    | 49/94 [02:09<02:59,  4.00s/it][A
loss=0.574:  52%|█████▏    | 49/94 [02:09<02:59,  4.00s/it][A
loss=0.574:  53%|█████▎    | 50/94 [02:09<02:10,  2.98s/it][A
loss=0.935:  53%|█████▎    | 50/94 [02:09<02:10,  2.98s/it][A
loss=0.935:  54%|█████▍    | 51/94 [02:10<01:37,  2.26s/it][A
loss=0.717:  54%|█████▍    | 51/94 [02:10<01:37,  2.26s/it][A
loss=0.717:  55%|█████▌    | 52/94 [02:10<01:13,  1.76s/it][A
loss=0.628:  55%|█████▌    | 52/94 [02:11<01:13,  1.76s/it][A
loss=0.628:  56%|█████▋    | 53/94 [02:11<00:57,  1.41s/it][A
loss=0.947:  56%|█████▋    | 53/94 [02:11<00:57,  1.41s/it][A
loss=0.947:  57%|█████▋    | 54/94 [02:12<00:46,  1.16s/it][A
loss=0.615:  57%|█████▋    | 54/94 [02:12<00:46,  1.16

{"eval_acc": 0.8771778257739381, "eval_f1": 0.3825651558481817, "eval_acc_and_f1": 0.6298714908110599, "eval_loss": 0.6089815774134227, "learning_rate": 2.362204724409449e-06, "train_loss": 0.7823153369956546, "step": 252}



loss=0.641:  69%|██████▉   | 65/94 [02:51<03:41,  7.63s/it][A
loss=0.671:  69%|██████▉   | 65/94 [02:52<03:41,  7.63s/it][A
loss=0.671:  70%|███████   | 66/94 [02:52<02:34,  5.51s/it][A
loss=0.939:  70%|███████   | 66/94 [02:52<02:34,  5.51s/it][A
loss=0.939:  71%|███████▏  | 67/94 [02:53<01:48,  4.04s/it][A
loss=0.751:  71%|███████▏  | 67/94 [02:53<01:48,  4.04s/it][A
loss=0.751:  72%|███████▏  | 68/94 [02:53<01:18,  3.00s/it][A
loss=0.967:  72%|███████▏  | 68/94 [02:53<01:18,  3.00s/it][A
loss=0.967:  73%|███████▎  | 69/94 [02:54<00:56,  2.28s/it][A
loss=0.901:  73%|███████▎  | 69/94 [02:54<00:56,  2.28s/it][A
loss=0.901:  74%|███████▍  | 70/94 [02:54<00:42,  1.77s/it][A
loss=0.909:  74%|███████▍  | 70/94 [02:55<00:42,  1.77s/it][A
loss=0.909:  76%|███████▌  | 71/94 [02:55<00:32,  1.41s/it][A
loss=0.679:  76%|███████▌  | 71/94 [02:55<00:32,  1.41s/it][A
loss=0.679:  77%|███████▋  | 72/94 [02:56<00:25,  1.17s/it][A
loss=1.011:  77%|███████▋  | 72/94 [02:56<00:25,  1.17

{"eval_acc": 0.874154067674586, "eval_f1": 0.38112068171478536, "eval_acc_and_f1": 0.6276373746946857, "eval_loss": 0.6018256108675685, "learning_rate": 9.448818897637796e-07, "train_loss": 0.8393709162871043, "step": 270}



loss=0.752:  88%|████████▊ | 83/94 [03:34<01:20,  7.30s/it][A
loss=0.948:  88%|████████▊ | 83/94 [03:34<01:20,  7.30s/it][A
loss=0.948:  89%|████████▉ | 84/94 [03:34<00:52,  5.28s/it][A
loss=0.959:  89%|████████▉ | 84/94 [03:35<00:52,  5.28s/it][A
loss=0.959:  90%|█████████ | 85/94 [03:35<00:34,  3.87s/it][A
loss=0.695:  90%|█████████ | 85/94 [03:35<00:34,  3.87s/it][A
loss=0.695:  91%|█████████▏| 86/94 [03:36<00:23,  2.88s/it][A
loss=0.986:  91%|█████████▏| 86/94 [03:36<00:23,  2.88s/it][A
loss=0.986:  93%|█████████▎| 87/94 [03:36<00:15,  2.19s/it][A
loss=0.691:  93%|█████████▎| 87/94 [03:36<00:15,  2.19s/it][A
loss=0.691:  94%|█████████▎| 88/94 [03:37<00:10,  1.71s/it][A
loss=0.809:  94%|█████████▎| 88/94 [03:37<00:10,  1.71s/it][A
loss=0.809:  95%|█████████▍| 89/94 [03:37<00:06,  1.37s/it][A
loss=0.823:  95%|█████████▍| 89/94 [03:37<00:06,  1.37s/it][A
loss=0.823:  96%|█████████▌| 90/94 [03:38<00:04,  1.13s/it][A
loss=0.785:  96%|█████████▌| 90/94 [03:38<00:04,  1.13

{"eval_acc": 0.8777537796976241, "eval_f1": 0.3848272948604038, "eval_acc_and_f1": 0.631290537279014, "eval_loss": 0.5975852587393352, "learning_rate": 0.0, "train_loss": 0.5433742337756686, "step": 282}


07/18/2022 02:49:31 - INFO - utilities.trainers -   ***** Running evaluation iter-6_trial1 *****
07/18/2022 02:49:31 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 02:49:31 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 02:49:58 - INFO - utilities.trainers -   ***** Eval results iter-6_trial1 *****
07/18/2022 02:49:58 - INFO - utilities.trainers -     acc = 0.8777537796976241
07/18/2022 02:49:58 - INFO - utilities.trainers -     acc_and_f1 = 0.631290537279014
07/18/2022 02:49:58 - INFO - utilities.trainers -     f1 = 0.3848272948604038
07/18/2022 02:50:01 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 02:50:07 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 02:50:07 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 02:50:07 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:19<00:00,  1.02s/it]
07/18/2022 02:52:26 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 02:52:26 - INFO - utilities.trainers -     acc = 0.8743159956223719
07/18/2022 02:52:26 - INFO - utilities.trainers -     acc_and_f1 = 0.6200347864826964
07/18/2022 02:52:26 - INFO - utilities.trainers -     f1 = 0.3657535773430209



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 02:52:27 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 02:52:45 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 02:52:51 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 02:52:51 - INFO - utilities.trainers -     Num examples = 47000
07/18/2022 02:52:51 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/184 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 184/184 [06:15<00:00,  2.04s/it]
07/18/2022 02:59:06 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 02:59:06 - INFO - utilities.trainers -     acc = 0.8804255319148936
07/18/2022 02:59:06 - INFO - utilities.trainers -     acc_and_f1 = 0.6304528721367622
07/18/2022 02:59:06 - INFO - utilities.trainers -     f1 = 0.3804802123586308
07/18/2022 02:59:06 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 02:59:25 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 02:59:30 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 02:59:30 - INFO - utilities.trainers -     Num examples = 3000
07/18/2022 02:59:30 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 12/12 [00:23<00:00,  1.99s/it]
07/18/2022 02:59:53 - INFO - uti



************
End of iteration 6:
Train loss 1.5349, Val loss 0.5975852587393352, Test loss 0.6115047309328528
Annotated 500 samples
Current labeled (training) data: 3500 samples
Remaining budget: 500 (in samples)
************

Saving json with the results....

 Start Training model of iteration 7!



07/18/2022 03:03:12 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.8777537796976241, acc_best_iteration=6, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-6', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_2451/ornl20_bert-cls/iter-7', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, 

warmup steps: 32
total steps: 328
logging steps: 21
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.355:   1%|          | 1/110 [00:00<00:57,  1.89it/s][A
loss=3.456:   1%|          | 1/110 [00:00<00:57,  1.89it/s][A
loss=3.456:   2%|▏         | 2/110 [00:01<00:59,  1.80it/s][A
loss=3.350:   2%|▏         | 2/110 [00:01<00:59,  1.80it/s][A
loss=3.350:   3%|▎         | 3/110 [00:01<01:00,  1.76it/s][A
loss=3.372:   3%|▎         | 3/110 [00:01<01:00,  1.76it/s][A
loss=3.372:   4%|▎         | 4/110 [00:02<01:01,  1.72it/s][A
loss=3.390:   4%|▎         | 4/110 [00:02<01:01,  1.72it/s][A
loss=3.390:   5%|▍         | 5/110 [00:02<01:01,  1.70it/s][A
loss=3.320:   5%|▍         | 5/110 [00:03<01:01,  1.70it/s][A
loss=3.320:   5%|▌         | 6/110 [00:03<01:01,  1.69it/s][A
loss=3.202:   5%|▌         | 6/110 [00:03<01:01,  1.69it/s][A
loss=3.202:   6%|▋         | 7/110 [00:04<01:01,  1.68it/s][A
loss=3.246:   6%|▋         | 7/110 [00:04<01:01,  1.68it/s][A
loss=3.246:   7%|▋         | 8/110 [00:04<01:00,  1.67it/s][A
loss=3.335:   7%|▋         | 8/110 [00:04<01:00,  1.67

{"eval_acc": 0.2224622030237581, "eval_f1": 0.02934208716703572, "eval_acc_and_f1": 0.1259021450953969, "eval_loss": 2.7748415895870755, "learning_rate": 1.3125e-05, "train_loss": 3.2081542015075684, "step": 21}



loss=2.878:  20%|██        | 22/110 [00:44<10:31,  7.18s/it][A
loss=2.876:  20%|██        | 22/110 [00:44<10:31,  7.18s/it][A
loss=2.876:  21%|██        | 23/110 [00:45<07:32,  5.20s/it][A
loss=2.910:  21%|██        | 23/110 [00:45<07:32,  5.20s/it][A
loss=2.910:  22%|██▏       | 24/110 [00:45<05:29,  3.83s/it][A
loss=2.745:  22%|██▏       | 24/110 [00:45<05:29,  3.83s/it][A
loss=2.745:  23%|██▎       | 25/110 [00:46<04:02,  2.86s/it][A
loss=2.903:  23%|██▎       | 25/110 [00:46<04:02,  2.86s/it][A
loss=2.903:  24%|██▎       | 26/110 [00:47<03:03,  2.18s/it][A
loss=2.866:  24%|██▎       | 26/110 [00:47<03:03,  2.18s/it][A
loss=2.866:  25%|██▍       | 27/110 [00:47<02:21,  1.71s/it][A
loss=2.710:  25%|██▍       | 27/110 [00:47<02:21,  1.71s/it][A
loss=2.710:  25%|██▌       | 28/110 [00:48<01:53,  1.38s/it][A
loss=2.632:  25%|██▌       | 28/110 [00:48<01:53,  1.38s/it][A
loss=2.632:  26%|██▋       | 29/110 [00:48<01:33,  1.15s/it][A
loss=2.939:  26%|██▋       | 29/110 [00

{"eval_acc": 0.4118070554355652, "eval_f1": 0.06797865707292158, "eval_acc_and_f1": 0.2398928562542434, "eval_loss": 2.3173706446375166, "learning_rate": 1.9328859060402687e-05, "train_loss": 2.704624448503767, "step": 42}



loss=2.413:  39%|███▉      | 43/110 [01:30<08:33,  7.66s/it][A
loss=2.311:  39%|███▉      | 43/110 [01:31<08:33,  7.66s/it][A
loss=2.311:  40%|████      | 44/110 [01:31<06:05,  5.54s/it][A
loss=2.436:  40%|████      | 44/110 [01:31<06:05,  5.54s/it][A
loss=2.436:  41%|████      | 45/110 [01:32<04:24,  4.06s/it][A
loss=2.588:  41%|████      | 45/110 [01:32<04:24,  4.06s/it][A
loss=2.588:  42%|████▏     | 46/110 [01:32<03:13,  3.03s/it][A
loss=2.638:  42%|████▏     | 46/110 [01:32<03:13,  3.03s/it][A
loss=2.638:  43%|████▎     | 47/110 [01:33<02:25,  2.30s/it][A
loss=2.650:  43%|████▎     | 47/110 [01:33<02:25,  2.30s/it][A
loss=2.650:  44%|████▎     | 48/110 [01:34<01:51,  1.79s/it][A
loss=2.627:  44%|████▎     | 48/110 [01:34<01:51,  1.79s/it][A
loss=2.627:  45%|████▍     | 49/110 [01:34<01:28,  1.44s/it][A
loss=2.398:  45%|████▍     | 49/110 [01:34<01:28,  1.44s/it][A
loss=2.398:  45%|████▌     | 50/110 [01:35<01:11,  1.19s/it][A
loss=2.455:  45%|████▌     | 50/110 [01

{"eval_acc": 0.54528437724982, "eval_f1": 0.14520412494113494, "eval_acc_and_f1": 0.3452442510954775, "eval_loss": 2.018208916698183, "learning_rate": 1.7919463087248323e-05, "train_loss": 2.480471213658651, "step": 63}



loss=2.395:  58%|█████▊    | 64/110 [02:17<05:54,  7.72s/it][A
loss=2.398:  58%|█████▊    | 64/110 [02:17<05:54,  7.72s/it][A
loss=2.398:  59%|█████▉    | 65/110 [02:18<04:11,  5.59s/it][A
loss=2.321:  59%|█████▉    | 65/110 [02:18<04:11,  5.59s/it][A
loss=2.321:  60%|██████    | 66/110 [02:18<03:00,  4.09s/it][A
loss=2.093:  60%|██████    | 66/110 [02:19<03:00,  4.09s/it][A
loss=2.093:  61%|██████    | 67/110 [02:19<02:11,  3.05s/it][A
loss=2.009:  61%|██████    | 67/110 [02:19<02:11,  3.05s/it][A
loss=2.009:  62%|██████▏   | 68/110 [02:20<01:37,  2.32s/it][A
loss=2.064:  62%|██████▏   | 68/110 [02:20<01:37,  2.32s/it][A
loss=2.064:  63%|██████▎   | 69/110 [02:20<01:14,  1.81s/it][A
loss=2.330:  63%|██████▎   | 69/110 [02:20<01:14,  1.81s/it][A
loss=2.330:  64%|██████▎   | 70/110 [02:21<00:57,  1.45s/it][A
loss=2.131:  64%|██████▎   | 70/110 [02:21<00:57,  1.45s/it][A
loss=2.131:  65%|██████▍   | 71/110 [02:21<00:46,  1.20s/it][A
loss=2.028:  65%|██████▍   | 71/110 [02

{"eval_acc": 0.7185025197984162, "eval_f1": 0.19000916152478328, "eval_acc_and_f1": 0.45425584066159974, "eval_loss": 1.52665856054851, "learning_rate": 1.6510067114093962e-05, "train_loss": 2.0761623098736717, "step": 84}



loss=1.665:  77%|███████▋  | 85/110 [03:02<03:03,  7.32s/it][A
loss=1.903:  77%|███████▋  | 85/110 [03:02<03:03,  7.32s/it][A
loss=1.903:  78%|███████▊  | 86/110 [03:03<02:07,  5.31s/it][A
loss=2.160:  78%|███████▊  | 86/110 [03:03<02:07,  5.31s/it][A
loss=2.160:  79%|███████▉  | 87/110 [03:03<01:29,  3.90s/it][A
loss=1.858:  79%|███████▉  | 87/110 [03:03<01:29,  3.90s/it][A
loss=1.858:  80%|████████  | 88/110 [03:04<01:04,  2.91s/it][A
loss=1.871:  80%|████████  | 88/110 [03:04<01:04,  2.91s/it][A
loss=1.871:  81%|████████  | 89/110 [03:04<00:46,  2.22s/it][A
loss=2.105:  81%|████████  | 89/110 [03:05<00:46,  2.22s/it][A
loss=2.105:  82%|████████▏ | 90/110 [03:05<00:34,  1.74s/it][A
loss=2.038:  82%|████████▏ | 90/110 [03:05<00:34,  1.74s/it][A
loss=2.038:  83%|████████▎ | 91/110 [03:06<00:26,  1.40s/it][A
loss=1.841:  83%|████████▎ | 91/110 [03:06<00:26,  1.40s/it][A
loss=1.841:  84%|████████▎ | 92/110 [03:06<00:20,  1.16s/it][A
loss=1.946:  84%|████████▎ | 92/110 [03

{"eval_acc": 0.7624190064794817, "eval_f1": 0.20592755707083868, "eval_acc_and_f1": 0.48417328177516017, "eval_loss": 1.1645348540374212, "learning_rate": 1.5100671140939598e-05, "train_loss": 1.821563618523734, "step": 105}



loss=1.786:  96%|█████████▋| 106/110 [03:48<00:30,  7.53s/it][A
loss=1.892:  96%|█████████▋| 106/110 [03:48<00:30,  7.53s/it][A
loss=1.892:  97%|█████████▋| 107/110 [03:48<00:16,  5.45s/it][A
loss=1.788:  97%|█████████▋| 107/110 [03:48<00:16,  5.45s/it][A
loss=1.788:  98%|█████████▊| 108/110 [03:49<00:08,  4.00s/it][A
loss=1.616:  98%|█████████▊| 108/110 [03:49<00:08,  4.00s/it][A
loss=1.616:  99%|█████████▉| 109/110 [03:50<00:02,  2.99s/it][A
loss=2.318:  99%|█████████▉| 109/110 [03:50<00:02,  2.99s/it][A
loss=2.318: 100%|██████████| 110/110 [03:50<00:00,  2.09s/it]
Epoch:  33%|███▎      | 1/3 [03:50<07:40, 230.34s/it]
Iteration:   0%|          | 0/110 [00:00<?, ?it/s][A
loss=1.810:   0%|          | 0/110 [00:00<?, ?it/s][A
loss=1.810:   1%|          | 1/110 [00:00<01:05,  1.66it/s][A
loss=1.812:   1%|          | 1/110 [00:00<01:05,  1.66it/s][A
loss=1.812:   2%|▏         | 2/110 [00:01<01:05,  1.65it/s][A
loss=1.601:   2%|▏         | 2/110 [00:01<01:05,  1.65it/s][A
lo

{"eval_acc": 0.7948164146868251, "eval_f1": 0.24018842351369837, "eval_acc_and_f1": 0.5175024191002617, "eval_loss": 0.9799151590892247, "learning_rate": 1.3691275167785237e-05, "train_loss": 1.6740345330465407, "step": 126}



loss=1.336:  15%|█▌        | 17/110 [00:42<11:19,  7.30s/it][A
loss=1.345:  15%|█▌        | 17/110 [00:42<11:19,  7.30s/it][A
loss=1.345:  16%|█▋        | 18/110 [00:42<08:07,  5.30s/it][A
loss=1.304:  16%|█▋        | 18/110 [00:43<08:07,  5.30s/it][A
loss=1.304:  17%|█▋        | 19/110 [00:43<05:54,  3.89s/it][A
loss=1.460:  17%|█▋        | 19/110 [00:43<05:54,  3.89s/it][A
loss=1.460:  18%|█▊        | 20/110 [00:44<04:21,  2.90s/it][A
loss=1.307:  18%|█▊        | 20/110 [00:44<04:21,  2.90s/it][A
loss=1.307:  19%|█▉        | 21/110 [00:44<03:17,  2.22s/it][A
loss=1.381:  19%|█▉        | 21/110 [00:44<03:17,  2.22s/it][A
loss=1.381:  20%|██        | 22/110 [00:45<02:32,  1.74s/it][A
loss=1.546:  20%|██        | 22/110 [00:45<02:32,  1.74s/it][A
loss=1.546:  21%|██        | 23/110 [00:45<02:01,  1.40s/it][A
loss=1.688:  21%|██        | 23/110 [00:46<02:01,  1.40s/it][A
loss=1.688:  22%|██▏       | 24/110 [00:46<01:39,  1.16s/it][A
loss=1.205:  22%|██▏       | 24/110 [00

{"eval_acc": 0.8174226061915046, "eval_f1": 0.2708537072632396, "eval_acc_and_f1": 0.5441381567273721, "eval_loss": 0.8202474202428546, "learning_rate": 1.2281879194630872e-05, "train_loss": 1.3276293107441492, "step": 147}



loss=1.216:  35%|███▍      | 38/110 [01:30<09:38,  8.03s/it][A
loss=1.423:  35%|███▍      | 38/110 [01:30<09:38,  8.03s/it][A
loss=1.423:  35%|███▌      | 39/110 [01:30<06:52,  5.80s/it][A
loss=1.276:  35%|███▌      | 39/110 [01:31<06:52,  5.80s/it][A
loss=1.276:  36%|███▋      | 40/110 [01:31<04:57,  4.25s/it][A
loss=0.925:  36%|███▋      | 40/110 [01:31<04:57,  4.25s/it][A
loss=0.925:  37%|███▋      | 41/110 [01:32<03:37,  3.16s/it][A
loss=1.057:  37%|███▋      | 41/110 [01:32<03:37,  3.16s/it][A
loss=1.057:  38%|███▊      | 42/110 [01:32<02:42,  2.39s/it][A
loss=1.253:  38%|███▊      | 42/110 [01:32<02:42,  2.39s/it][A
loss=1.253:  39%|███▉      | 43/110 [01:33<02:04,  1.86s/it][A
loss=1.070:  39%|███▉      | 43/110 [01:33<02:04,  1.86s/it][A
loss=1.070:  40%|████      | 44/110 [01:34<01:37,  1.48s/it][A
loss=1.276:  40%|████      | 44/110 [01:34<01:37,  1.48s/it][A
loss=1.276:  41%|████      | 45/110 [01:34<01:19,  1.22s/it][A
loss=1.451:  41%|████      | 45/110 [01

{"eval_acc": 0.8469402447804175, "eval_f1": 0.32582338670542604, "eval_acc_and_f1": 0.5863818157429218, "eval_loss": 0.7254863104649952, "learning_rate": 1.0872483221476512e-05, "train_loss": 1.1970604062080383, "step": 168}



loss=1.152:  54%|█████▎    | 59/110 [02:15<06:15,  7.37s/it][A
loss=0.833:  54%|█████▎    | 59/110 [02:15<06:15,  7.37s/it][A
loss=0.833:  55%|█████▍    | 60/110 [02:15<04:26,  5.34s/it][A
loss=1.091:  55%|█████▍    | 60/110 [02:16<04:26,  5.34s/it][A
loss=1.091:  55%|█████▌    | 61/110 [02:16<03:12,  3.92s/it][A
loss=0.993:  55%|█████▌    | 61/110 [02:16<03:12,  3.92s/it][A
loss=0.993:  56%|█████▋    | 62/110 [02:17<02:20,  2.92s/it][A
loss=1.017:  56%|█████▋    | 62/110 [02:17<02:20,  2.92s/it][A
loss=1.017:  57%|█████▋    | 63/110 [02:17<01:44,  2.23s/it][A
loss=0.996:  57%|█████▋    | 63/110 [02:17<01:44,  2.23s/it][A
loss=0.996:  58%|█████▊    | 64/110 [02:18<01:20,  1.74s/it][A
loss=1.242:  58%|█████▊    | 64/110 [02:18<01:20,  1.74s/it][A
loss=1.242:  59%|█████▉    | 65/110 [02:18<01:03,  1.40s/it][A
loss=1.502:  59%|█████▉    | 65/110 [02:19<01:03,  1.40s/it][A
loss=1.502:  60%|██████    | 66/110 [02:19<00:51,  1.16s/it][A
loss=1.340:  60%|██████    | 66/110 [02

{"eval_acc": 0.8571634269258459, "eval_f1": 0.3630691997185055, "eval_acc_and_f1": 0.6101163133221756, "eval_loss": 0.6571985525744302, "learning_rate": 9.463087248322147e-06, "train_loss": 1.1035321383249193, "step": 189}



loss=1.314:  73%|███████▎  | 80/110 [03:03<04:02,  8.08s/it][A
loss=1.034:  73%|███████▎  | 80/110 [03:03<04:02,  8.08s/it][A
loss=1.034:  74%|███████▎  | 81/110 [03:04<02:49,  5.84s/it][A
loss=1.013:  74%|███████▎  | 81/110 [03:04<02:49,  5.84s/it][A
loss=1.013:  75%|███████▍  | 82/110 [03:04<01:59,  4.27s/it][A
loss=1.273:  75%|███████▍  | 82/110 [03:05<01:59,  4.27s/it][A
loss=1.273:  75%|███████▌  | 83/110 [03:05<01:25,  3.17s/it][A
loss=0.716:  75%|███████▌  | 83/110 [03:05<01:25,  3.17s/it][A
loss=0.716:  76%|███████▋  | 84/110 [03:06<01:02,  2.40s/it][A
loss=0.855:  76%|███████▋  | 84/110 [03:06<01:02,  2.40s/it][A
loss=0.855:  77%|███████▋  | 85/110 [03:06<00:46,  1.87s/it][A
loss=1.146:  77%|███████▋  | 85/110 [03:06<00:46,  1.87s/it][A
loss=1.146:  78%|███████▊  | 86/110 [03:07<00:35,  1.49s/it][A
loss=1.116:  78%|███████▊  | 86/110 [03:07<00:35,  1.49s/it][A
loss=1.116:  79%|███████▉  | 87/110 [03:07<00:28,  1.23s/it][A
loss=1.135:  79%|███████▉  | 87/110 [03

{"eval_acc": 0.8925845932325414, "eval_f1": 0.387732303216131, "eval_acc_and_f1": 0.6401584482243362, "eval_loss": 0.6087275126150676, "learning_rate": 8.053691275167785e-06, "train_loss": 1.05350756361371, "step": 210}



loss=0.923:  92%|█████████▏| 101/110 [03:48<01:05,  7.32s/it][A
loss=1.014:  92%|█████████▏| 101/110 [03:48<01:05,  7.32s/it][A
loss=1.014:  93%|█████████▎| 102/110 [03:48<00:42,  5.30s/it][A
loss=0.952:  93%|█████████▎| 102/110 [03:49<00:42,  5.30s/it][A
loss=0.952:  94%|█████████▎| 103/110 [03:49<00:27,  3.89s/it][A
loss=0.628:  94%|█████████▎| 103/110 [03:49<00:27,  3.89s/it][A
loss=0.628:  95%|█████████▍| 104/110 [03:50<00:17,  2.91s/it][A
loss=1.130:  95%|█████████▍| 104/110 [03:50<00:17,  2.91s/it][A
loss=1.130:  95%|█████████▌| 105/110 [03:50<00:11,  2.22s/it][A
loss=0.903:  95%|█████████▌| 105/110 [03:51<00:11,  2.22s/it][A
loss=0.903:  96%|█████████▋| 106/110 [03:51<00:06,  1.74s/it][A
loss=0.709:  96%|█████████▋| 106/110 [03:51<00:06,  1.74s/it][A
loss=0.709:  97%|█████████▋| 107/110 [03:52<00:04,  1.41s/it][A
loss=0.881:  97%|█████████▋| 107/110 [03:52<00:04,  1.41s/it][A
loss=0.881:  98%|█████████▊| 108/110 [03:52<00:02,  1.17s/it][A
loss=1.054:  98%|███████

{"eval_acc": 0.8937365010799136, "eval_f1": 0.39649086021631147, "eval_acc_and_f1": 0.6451136806481126, "eval_loss": 0.5727360025048256, "learning_rate": 6.644295302013424e-06, "train_loss": 0.9266140517734346, "step": 231}



loss=0.951:  11%|█         | 12/110 [00:43<13:20,  8.16s/it][A
loss=0.909:  11%|█         | 12/110 [00:43<13:20,  8.16s/it][A
loss=0.909:  12%|█▏        | 13/110 [00:43<09:31,  5.90s/it][A
loss=1.010:  12%|█▏        | 13/110 [00:44<09:31,  5.90s/it][A
loss=1.010:  13%|█▎        | 14/110 [00:44<06:53,  4.31s/it][A
loss=1.057:  13%|█▎        | 14/110 [00:44<06:53,  4.31s/it][A
loss=1.057:  14%|█▎        | 15/110 [00:45<05:03,  3.20s/it][A
loss=0.758:  14%|█▎        | 15/110 [00:45<05:03,  3.20s/it][A
loss=0.758:  15%|█▍        | 16/110 [00:45<03:47,  2.43s/it][A
loss=0.901:  15%|█▍        | 16/110 [00:45<03:47,  2.43s/it][A
loss=0.901:  15%|█▌        | 17/110 [00:46<02:54,  1.88s/it][A
loss=0.707:  15%|█▌        | 17/110 [00:46<02:54,  1.88s/it][A
loss=0.707:  16%|█▋        | 18/110 [00:46<02:18,  1.50s/it][A
loss=1.031:  16%|█▋        | 18/110 [00:47<02:18,  1.50s/it][A
loss=1.031:  17%|█▋        | 19/110 [00:47<01:52,  1.24s/it][A
loss=0.945:  17%|█▋        | 19/110 [00

{"eval_acc": 0.8970482361411087, "eval_f1": 0.40132353835327195, "eval_acc_and_f1": 0.6491858872471903, "eval_loss": 0.5350929413522992, "learning_rate": 5.234899328859061e-06, "train_loss": 0.8778626464662098, "step": 252}



loss=0.911:  30%|███       | 33/110 [01:27<09:22,  7.31s/it][A
loss=0.905:  30%|███       | 33/110 [01:28<09:22,  7.31s/it][A
loss=0.905:  31%|███       | 34/110 [01:28<06:42,  5.30s/it][A
loss=0.825:  31%|███       | 34/110 [01:28<06:42,  5.30s/it][A
loss=0.825:  32%|███▏      | 35/110 [01:29<04:51,  3.89s/it][A
loss=0.558:  32%|███▏      | 35/110 [01:29<04:51,  3.89s/it][A
loss=0.558:  33%|███▎      | 36/110 [01:29<03:34,  2.90s/it][A
loss=0.614:  33%|███▎      | 36/110 [01:29<03:34,  2.90s/it][A
loss=0.614:  34%|███▎      | 37/110 [01:30<02:41,  2.22s/it][A
loss=0.814:  34%|███▎      | 37/110 [01:30<02:41,  2.22s/it][A
loss=0.814:  35%|███▍      | 38/110 [01:30<02:04,  1.73s/it][A
loss=1.103:  35%|███▍      | 38/110 [01:31<02:04,  1.73s/it][A
loss=1.103:  35%|███▌      | 39/110 [01:31<01:39,  1.40s/it][A
loss=0.602:  35%|███▌      | 39/110 [01:31<01:39,  1.40s/it][A
loss=0.602:  36%|███▋      | 40/110 [01:32<01:21,  1.16s/it][A
loss=0.584:  36%|███▋      | 40/110 [01

{"eval_acc": 0.90093592512599, "eval_f1": 0.40954200710734784, "eval_acc_and_f1": 0.6552389661166689, "eval_loss": 0.5187803709081241, "learning_rate": 3.825503355704698e-06, "train_loss": 0.7934767632257371, "step": 273}



loss=0.923:  49%|████▉     | 54/110 [02:16<07:31,  8.06s/it][A
loss=0.933:  49%|████▉     | 54/110 [02:16<07:31,  8.06s/it][A
loss=0.933:  50%|█████     | 55/110 [02:16<05:20,  5.82s/it][A
loss=0.775:  50%|█████     | 55/110 [02:16<05:20,  5.82s/it][A
loss=0.775:  51%|█████     | 56/110 [02:17<03:50,  4.26s/it][A
loss=0.964:  51%|█████     | 56/110 [02:17<03:50,  4.26s/it][A
loss=0.964:  52%|█████▏    | 57/110 [02:18<02:47,  3.17s/it][A
loss=0.885:  52%|█████▏    | 57/110 [02:18<02:47,  3.17s/it][A
loss=0.885:  53%|█████▎    | 58/110 [02:18<02:04,  2.40s/it][A
loss=0.943:  53%|█████▎    | 58/110 [02:18<02:04,  2.40s/it][A
loss=0.943:  54%|█████▎    | 59/110 [02:19<01:34,  1.86s/it][A
loss=0.714:  54%|█████▎    | 59/110 [02:19<01:34,  1.86s/it][A
loss=0.714:  55%|█████▍    | 60/110 [02:19<01:14,  1.49s/it][A
loss=0.579:  55%|█████▍    | 60/110 [02:20<01:14,  1.49s/it][A
loss=0.579:  55%|█████▌    | 61/110 [02:20<01:00,  1.23s/it][A
loss=0.788:  55%|█████▌    | 61/110 [02

{"eval_acc": 0.9016558675305976, "eval_f1": 0.4100877404037594, "eval_acc_and_f1": 0.6558718039671785, "eval_loss": 0.504720209964684, "learning_rate": 2.416107382550336e-06, "train_loss": 0.8189332939329601, "step": 294}



loss=0.820:  68%|██████▊   | 75/110 [03:03<04:36,  7.89s/it][A
loss=1.212:  68%|██████▊   | 75/110 [03:03<04:36,  7.89s/it][A
loss=1.212:  69%|██████▉   | 76/110 [03:04<03:13,  5.70s/it][A
loss=0.407:  69%|██████▉   | 76/110 [03:04<03:13,  5.70s/it][A
loss=0.407:  70%|███████   | 77/110 [03:04<02:17,  4.17s/it][A
loss=0.828:  70%|███████   | 77/110 [03:05<02:17,  4.17s/it][A
loss=0.828:  71%|███████   | 78/110 [03:05<01:39,  3.11s/it][A
loss=0.601:  71%|███████   | 78/110 [03:05<01:39,  3.11s/it][A
loss=0.601:  72%|███████▏  | 79/110 [03:06<01:13,  2.36s/it][A
loss=0.648:  72%|███████▏  | 79/110 [03:06<01:13,  2.36s/it][A
loss=0.648:  73%|███████▎  | 80/110 [03:06<00:54,  1.83s/it][A
loss=1.157:  73%|███████▎  | 80/110 [03:06<00:54,  1.83s/it][A
loss=1.157:  74%|███████▎  | 81/110 [03:07<00:42,  1.47s/it][A
loss=0.824:  74%|███████▎  | 81/110 [03:07<00:42,  1.47s/it][A
loss=0.824:  75%|███████▍  | 82/110 [03:07<00:33,  1.21s/it][A
loss=0.784:  75%|███████▍  | 82/110 [03

{"eval_acc": 0.9026637868970482, "eval_f1": 0.40928326849783125, "eval_acc_and_f1": 0.6559735276974397, "eval_loss": 0.49590870099408285, "learning_rate": 1.006711409395973e-06, "train_loss": 0.7919908137548537, "step": 315}



loss=0.850:  87%|████████▋ | 96/110 [03:48<01:42,  7.31s/it][A
loss=0.694:  87%|████████▋ | 96/110 [03:48<01:42,  7.31s/it][A
loss=0.694:  88%|████████▊ | 97/110 [03:48<01:08,  5.30s/it][A
loss=0.810:  88%|████████▊ | 97/110 [03:49<01:08,  5.30s/it][A
loss=0.810:  89%|████████▉ | 98/110 [03:49<00:46,  3.89s/it][A
loss=0.825:  89%|████████▉ | 98/110 [03:49<00:46,  3.89s/it][A
loss=0.825:  90%|█████████ | 99/110 [03:50<00:31,  2.91s/it][A
loss=0.763:  90%|█████████ | 99/110 [03:50<00:31,  2.91s/it][A
loss=0.763:  91%|█████████ | 100/110 [03:50<00:22,  2.22s/it][A
loss=0.702:  91%|█████████ | 100/110 [03:50<00:22,  2.22s/it][A
loss=0.702:  92%|█████████▏| 101/110 [03:51<00:15,  1.74s/it][A
loss=0.927:  92%|█████████▏| 101/110 [03:51<00:15,  1.74s/it][A
loss=0.927:  93%|█████████▎| 102/110 [03:52<00:11,  1.40s/it][A
loss=1.063:  93%|█████████▎| 102/110 [03:52<00:11,  1.40s/it][A
loss=1.063:  94%|█████████▎| 103/110 [03:52<00:08,  1.16s/it][A
loss=0.939:  94%|█████████▎| 103

{"eval_acc": 0.9035277177825773, "eval_f1": 0.40953971134996464, "eval_acc_and_f1": 0.656533714566271, "eval_loss": 0.49445513635873795, "learning_rate": 0.0, "train_loss": 0.5693298535687583, "step": 330}


07/18/2022 03:15:55 - INFO - utilities.trainers -   ***** Running evaluation iter-7_trial1 *****
07/18/2022 03:15:55 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:15:55 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 03:16:22 - INFO - utilities.trainers -   ***** Eval results iter-7_trial1 *****
07/18/2022 03:16:22 - INFO - utilities.trainers -     acc = 0.9035277177825773
07/18/2022 03:16:22 - INFO - utilities.trainers -     acc_and_f1 = 0.656533714566271
07/18/2022 03:16:22 - INFO - utilities.trainers -     f1 = 0.40953971134996464
07/18/2022 03:16:25 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 03:16:31 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:16:31 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 03:16:31 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:18<00:00,  1.02s/it]
07/18/2022 03:18:50 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 03:18:50 - INFO - utilities.trainers -     acc = 0.9037785841829388
07/18/2022 03:18:50 - INFO - utilities.trainers -     acc_and_f1 = 0.6480507615299635
07/18/2022 03:18:50 - INFO - utilities.trainers -     f1 = 0.3923229388769883



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 03:18:51 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 03:19:10 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 03:19:15 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:19:15 - INFO - utilities.trainers -     Num examples = 46500
07/18/2022 03:19:15 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/182 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 182/182 [06:11<00:00,  2.04s/it]
07/18/2022 03:25:27 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 03:25:27 - INFO - utilities.trainers -     acc = 0.9117204301075269
07/18/2022 03:25:27 - INFO - utilities.trainers -     acc_and_f1 = 0.6606430162383117
07/18/2022 03:25:27 - INFO - utilities.trainers -     f1 = 0.40956560236909634
07/18/2022 03:25:27 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 03:25:46 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 03:25:49 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:25:49 - INFO - utilities.trainers -     Num examples = 3500
07/18/2022 03:25:49 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 14/14 [00:27<00:00,  1.99s/it]
07/18/2022 03:26:17 - INFO - ut



************
End of iteration 7:
Train loss 1.4907, Val loss 0.49445513635873795, Test loss 0.503455628586166
Annotated 500 samples
Current labeled (training) data: 4000 samples
Remaining budget: 0 (in samples)
************

Saving json with the results....




The end!....
5262

 --dataset_name ornl20 --budget 8% --per_gpu_train_batch_size 32 --max_seq_length 256 --resume False --cap_training_pool 50000 --init random --init_train_data 1% --acquisition_size 1% --model_name_or_path wietsedv/bert-base-dutch-cased --acquisition cal --seed 5262 

device: cuda:0
output_dir=/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls
Created /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls



 /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20 





07/18/2022 03:29:32 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_ornl20_original
07/18/2022 03:29:36 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_dev_ornl20_original
07/18/2022 03:29:37 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_ornl20_original



train set stats: class 21: 17% class 20: 17% class 17: 7% class 25: 17% class 3: 17% class 2: 3% class 23: 5% class 18: 1% class 4: 1% class 1: 5% class 9: 0% class 11: 3% class 14: 0% class 5: 1% class 15: 1% class 13: 0% class 6: 2% class 22: 0% class 10: 0% class 12: 1% class 19: 0% class 0: 0% class 16: 0% class 8: 0% class 7: 0% 
validation set stats: class 25: 17% class 20: 17% class 3: 17% class 21: 17% class 17: 7% class 0: 0% class 6: 2% class 23: 5% class 2: 3% class 1: 5% class 11: 3% class 12: 1% class 4: 1% class 18: 1% class 15: 1% class 14: 0% class 5: 1% class 10: 0% class 9: 0% class 19: 0% class 8: 0% class 22: 1% class 16: 0% class 13: 0% class 7: 0% 
test set stats: class 3: 17% class 5: 1% class 25: 18% class 1: 5% class 15: 1% class 20: 17% class 21: 17% class 2: 3% class 17: 7% class 6: 2% class 4: 1% class 23: 5% class 10: 0% class 0: 0% class 11: 2% class 8: 0% class 12: 1% class 22: 0% class 9: 0% class 16: 0% class 18: 1% class 14: 0% class 13: 0% class 19: 

07/18/2022 03:30:45 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0, acc_best_iteration=0, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-1', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, evaluate_during_training=True, fp16=False, fp16_opt_level='O1', gpu='0', gradient_accumulation_steps=1, indicator=None, init='random', init_train_data=500, knn_lab=False, learning_rate=2e-05, local_rank=-1,

warmup steps: 4
total steps: 46
logging steps: 3
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.310:   6%|▋         | 1/16 [00:00<00:07,  1.88it/s][A
loss=3.416:   6%|▋         | 1/16 [00:00<00:07,  1.88it/s][A
loss=3.416:  12%|█▎        | 2/16 [00:01<00:07,  1.81it/s][A
loss=3.278:  12%|█▎        | 2/16 [00:01<00:07,  1.81it/s][A07/18/2022 03:31:15 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:31:15 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:31:15 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.06263498920086392, "eval_f1": 0.012319526158824295, "eval_acc_and_f1": 0.03747725767984411, "eval_loss": 3.236840239592961, "learning_rate": 1.5000000000000002e-05, "train_loss": 3.334469715754191, "step": 3}



loss=3.243:  25%|██▌       | 4/16 [00:34<01:27,  7.28s/it][A
loss=3.112:  25%|██▌       | 4/16 [00:34<01:27,  7.28s/it][A
loss=3.112:  31%|███▏      | 5/16 [00:34<00:58,  5.28s/it][A
loss=3.061:  31%|███▏      | 5/16 [00:35<00:58,  5.28s/it][A07/18/2022 03:31:48 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:31:48 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:31:48 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.20259179265658747, "eval_f1": 0.03011173131549667, "eval_acc_and_f1": 0.11635176198604207, "eval_loss": 3.056640233312334, "learning_rate": 1.9090909090909094e-05, "train_loss": 3.138713836669922, "step": 6}



loss=3.106:  44%|████▍     | 7/16 [01:11<01:33, 10.37s/it][A
loss=2.861:  44%|████▍     | 7/16 [01:11<01:33, 10.37s/it][A
loss=2.861:  50%|█████     | 8/16 [01:12<00:59,  7.44s/it][A
loss=2.942:  50%|█████     | 8/16 [01:12<00:59,  7.44s/it][A07/18/2022 03:32:26 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:32:26 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:32:26 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.03s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.03s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.03s/it][A[A

E

{"eval_acc": 0.2969042476601872, "eval_f1": 0.04680004254144593, "eval_acc_and_f1": 0.17185214510081656, "eval_loss": 2.849212280341557, "learning_rate": 1.772727272727273e-05, "train_loss": 2.9696566263834634, "step": 9}



loss=2.834:  62%|██████▎   | 10/16 [01:45<01:03, 10.63s/it][A
loss=2.903:  62%|██████▎   | 10/16 [01:45<01:03, 10.63s/it][A
loss=2.903:  69%|██████▉   | 11/16 [01:45<00:38,  7.62s/it][A
loss=2.750:  69%|██████▉   | 11/16 [01:46<00:38,  7.62s/it][A07/18/2022 03:32:59 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:32:59 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:32:59 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.03s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.03s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.03s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.03s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.03s/it][A[

{"eval_acc": 0.3056875449964003, "eval_f1": 0.04853263312631875, "eval_acc_and_f1": 0.1771100890613595, "eval_loss": 2.70573137487684, "learning_rate": 1.6363636363636366e-05, "train_loss": 2.829087257385254, "step": 12}



loss=2.809:  81%|████████▏ | 13/16 [02:22<00:34, 11.58s/it][A
loss=2.757:  81%|████████▏ | 13/16 [02:23<00:34, 11.58s/it][A
loss=2.757:  88%|████████▊ | 14/16 [02:23<00:16,  8.29s/it][A
loss=2.759:  88%|████████▊ | 14/16 [02:23<00:16,  8.29s/it][A07/18/2022 03:33:37 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:33:37 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:33:37 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.26119510439164867, "eval_f1": 0.040220392557941374, "eval_acc_and_f1": 0.150707748474795, "eval_loss": 2.6037780046463013, "learning_rate": 1.5000000000000002e-05, "train_loss": 2.775038957595825, "step": 15}



loss=2.519: 100%|██████████| 16/16 [02:57<00:00, 11.09s/it]
Epoch:  33%|███▎      | 1/3 [02:57<05:54, 177.42s/it]
Iteration:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=2.472:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=2.472:   6%|▋         | 1/16 [00:00<00:08,  1.70it/s][A
loss=2.548:   6%|▋         | 1/16 [00:00<00:08,  1.70it/s][A07/18/2022 03:34:11 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:34:11 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:34:11 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [0

{"eval_acc": 0.22001439884809215, "eval_f1": 0.03300502641146546, "eval_acc_and_f1": 0.1265097126297788, "eval_loss": 2.5142789483070374, "learning_rate": 1.3636363636363637e-05, "train_loss": 2.512741724650065, "step": 18}



loss=2.588:  19%|█▉        | 3/16 [00:33<01:35,  7.32s/it][A
loss=2.487:  19%|█▉        | 3/16 [00:33<01:35,  7.32s/it][A
loss=2.487:  25%|██▌       | 4/16 [00:34<01:03,  5.30s/it][A
loss=2.579:  25%|██▌       | 4/16 [00:34<01:03,  5.30s/it][A07/18/2022 03:34:45 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:34:45 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:34:45 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.03s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.27429805615550756, "eval_f1": 0.0420396231511011, "eval_acc_and_f1": 0.15816883965330433, "eval_loss": 2.456186967236655, "learning_rate": 1.2272727272727274e-05, "train_loss": 2.5510865847269693, "step": 21}



loss=2.502:  38%|███▊      | 6/16 [01:09<01:39,  9.99s/it][A
loss=2.315:  38%|███▊      | 6/16 [01:09<01:39,  9.99s/it][A
loss=2.315:  44%|████▍     | 7/16 [01:09<01:04,  7.17s/it][A
loss=2.522:  44%|████▍     | 7/16 [01:10<01:04,  7.17s/it][A07/18/2022 03:35:21 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:35:21 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:35:21 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.28192944564434846, "eval_f1": 0.050963084888806326, "eval_acc_and_f1": 0.1664462652665774, "eval_loss": 2.4027508412088667, "learning_rate": 1.0909090909090909e-05, "train_loss": 2.446011940638224, "step": 24}



loss=2.708:  56%|█████▋    | 9/16 [01:45<01:17, 11.03s/it][A
loss=2.514:  56%|█████▋    | 9/16 [01:45<01:17, 11.03s/it][A
loss=2.514:  62%|██████▎   | 10/16 [01:46<00:47,  7.90s/it][A
loss=2.404:  62%|██████▎   | 10/16 [01:46<00:47,  7.90s/it][A07/18/2022 03:35:57 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:35:57 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:35:57 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.03s/it][A[A


{"eval_acc": 0.3359251259899208, "eval_f1": 0.06361594310000915, "eval_acc_and_f1": 0.19977053454496496, "eval_loss": 2.360721903187888, "learning_rate": 9.545454545454547e-06, "train_loss": 2.542139927546183, "step": 27}



loss=2.399:  75%|███████▌  | 12/16 [02:19<00:43, 10.89s/it][A
loss=2.138:  75%|███████▌  | 12/16 [02:19<00:43, 10.89s/it][A
loss=2.138:  81%|████████▏ | 13/16 [02:19<00:23,  7.80s/it][A
loss=2.469:  81%|████████▏ | 13/16 [02:19<00:23,  7.80s/it][A07/18/2022 03:36:31 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:36:31 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:36:31 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.39136069114470845, "eval_f1": 0.073887747984277, "eval_acc_and_f1": 0.23262421956449272, "eval_loss": 2.3219669376100813, "learning_rate": 8.181818181818183e-06, "train_loss": 2.3353039423624673, "step": 30}



loss=2.269:  94%|█████████▍| 15/16 [02:57<00:11, 11.70s/it][A
loss=2.192:  94%|█████████▍| 15/16 [02:57<00:11, 11.70s/it][A
loss=2.192: 100%|██████████| 16/16 [02:57<00:00, 11.09s/it]
Epoch:  67%|██████▋   | 2/3 [05:54<02:57, 177.43s/it]
Iteration:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=2.330:   0%|          | 0/16 [00:00<?, ?it/s][A07/18/2022 03:37:08 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:37:08 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:37:08 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 

{"eval_acc": 0.4673866090712743, "eval_f1": 0.09003262355460084, "eval_acc_and_f1": 0.27870961631293756, "eval_loss": 2.284376331738063, "learning_rate": 6.818181818181818e-06, "train_loss": 2.263935168584188, "step": 33}



loss=2.335:  12%|█▎        | 2/16 [00:35<05:43, 24.53s/it][A
loss=2.398:  12%|█▎        | 2/16 [00:35<05:43, 24.53s/it][A
loss=2.398:  19%|█▉        | 3/16 [00:36<03:45, 17.35s/it][A
loss=2.220:  19%|█▉        | 3/16 [00:36<03:45, 17.35s/it][A07/18/2022 03:37:44 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:37:44 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:37:44 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.03s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.03s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.03s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.03s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.03s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.03s/it][A[A

E

{"eval_acc": 0.5064074874010079, "eval_f1": 0.09617568429051145, "eval_acc_and_f1": 0.3012915858457597, "eval_loss": 2.2557651826313565, "learning_rate": 5.4545454545454545e-06, "train_loss": 2.31777556737264, "step": 36}



loss=2.058:  31%|███▏      | 5/16 [01:11<02:56, 16.04s/it][A
loss=2.255:  31%|███▏      | 5/16 [01:11<02:56, 16.04s/it][A
loss=2.255:  38%|███▊      | 6/16 [01:12<01:54, 11.41s/it][A
loss=2.050:  38%|███▊      | 6/16 [01:12<01:54, 11.41s/it][A07/18/2022 03:38:20 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:38:20 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:38:20 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.03s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.03s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.03s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.03s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.03s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.03s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.03s/it][A[A

E

{"eval_acc": 0.5229661627069835, "eval_f1": 0.0991122985109193, "eval_acc_and_f1": 0.3110392306089514, "eval_loss": 2.2335645045552934, "learning_rate": 4.0909090909090915e-06, "train_loss": 2.1209096908569336, "step": 39}



loss=2.310:  50%|█████     | 8/16 [01:47<01:44, 13.02s/it][A
loss=2.405:  50%|█████     | 8/16 [01:47<01:44, 13.02s/it][A
loss=2.405:  56%|█████▋    | 9/16 [01:47<01:05,  9.29s/it][A
loss=2.225:  56%|█████▋    | 9/16 [01:48<01:05,  9.29s/it][A07/18/2022 03:38:56 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:38:56 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:38:56 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[A

E

{"eval_acc": 0.5333333333333333, "eval_f1": 0.10126331010567391, "eval_acc_and_f1": 0.3172983217195036, "eval_loss": 2.214598689760481, "learning_rate": 2.7272727272727272e-06, "train_loss": 2.313279072443644, "step": 42}



loss=2.077:  69%|██████▉   | 11/16 [02:22<00:59, 11.81s/it][A
loss=2.313:  69%|██████▉   | 11/16 [02:22<00:59, 11.81s/it][A
loss=2.313:  75%|███████▌  | 12/16 [02:22<00:33,  8.44s/it][A
loss=2.135:  75%|███████▌  | 12/16 [02:22<00:33,  8.44s/it][A07/18/2022 03:39:31 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:39:31 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:39:31 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.03s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.5337652987760979, "eval_f1": 0.10163163479156634, "eval_acc_and_f1": 0.3176984667838321, "eval_loss": 2.2044726269585744, "learning_rate": 1.3636363636363636e-06, "train_loss": 2.1748218536376953, "step": 45}



loss=2.162:  88%|████████▊ | 14/16 [02:59<00:23, 11.90s/it][A
loss=2.483:  88%|████████▊ | 14/16 [02:59<00:23, 11.90s/it][A
loss=2.483:  94%|█████████▍| 15/16 [03:00<00:08,  8.52s/it][A
loss=2.007:  94%|█████████▍| 15/16 [03:00<00:08,  8.52s/it][A07/18/2022 03:40:08 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:40:08 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:40:08 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:26,  1.02s/it][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.02s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.02s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.02s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.02s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.02s/it][A[

{"eval_acc": 0.534341252699784, "eval_f1": 0.1017801511054394, "eval_acc_and_f1": 0.3180607019026117, "eval_loss": 2.20024060351508, "learning_rate": 0.0, "train_loss": 2.21732505162557, "step": 48}


07/18/2022 03:40:43 - INFO - utilities.trainers -   ***** Running evaluation iter-1_trial1 *****
07/18/2022 03:40:43 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:40:43 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 03:41:11 - INFO - utilities.trainers -   ***** Eval results iter-1_trial1 *****
07/18/2022 03:41:11 - INFO - utilities.trainers -     acc = 0.534341252699784
07/18/2022 03:41:11 - INFO - utilities.trainers -     acc_and_f1 = 0.3180607019026117
07/18/2022 03:41:11 - INFO - utilities.trainers -     f1 = 0.1017801511054394
07/18/2022 03:41:14 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 03:41:19 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:41:19 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 03:41:19 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:19<00:00,  1.02s/it]
07/18/2022 03:43:39 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 03:43:39 - INFO - utilities.trainers -     acc = 0.5305857957490928
07/18/2022 03:43:39 - INFO - utilities.trainers -     acc_and_f1 = 0.3139096058608465
07/18/2022 03:43:39 - INFO - utilities.trainers -     f1 = 0.09723341597260018



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 03:43:39 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 03:43:57 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 03:44:03 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:44:03 - INFO - utilities.trainers -     Num examples = 49500
07/18/2022 03:44:03 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/194 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 194/194 [06:35<00:00,  2.04s/it]
07/18/2022 03:50:39 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 03:50:39 - INFO - utilities.trainers -     acc = 0.5292323232323233
07/18/2022 03:50:39 - INFO - utilities.trainers -     acc_and_f1 = 0.3150475759276026
07/18/2022 03:50:39 - INFO - utilities.trainers -     f1 = 0.10086282862288193
07/18/2022 03:50:39 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 03:50:58 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 03:51:02 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:51:02 - INFO - utilities.trainers -     Num examples = 500
07/18/2022 03:51:02 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 2/2 [00:03<00:00,  2.00s/it]
07/18/2022 03:51:06 - INFO - utili



************
End of iteration 1:
Train loss 2.5526, Val loss 2.20024060351508, Test loss 2.1994151928845573
Annotated 500 samples
Current labeled (training) data: 1000 samples
Remaining budget: 3000 (in samples)
************

Saving json with the results....

 Start Training model of iteration 2!



07/18/2022 03:52:48 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.534341252699784, acc_best_iteration=1, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-1', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-2', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, f

warmup steps: 9
total steps: 93
logging steps: 6
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.398:   3%|▎         | 1/32 [00:00<00:16,  1.93it/s][A
loss=3.297:   3%|▎         | 1/32 [00:00<00:16,  1.93it/s][A
loss=3.297:   6%|▋         | 2/32 [00:01<00:16,  1.85it/s][A
loss=3.269:   6%|▋         | 2/32 [00:01<00:16,  1.85it/s][A
loss=3.269:   9%|▉         | 3/32 [00:01<00:16,  1.81it/s][A
loss=3.335:   9%|▉         | 3/32 [00:01<00:16,  1.81it/s][A
loss=3.335:  12%|█▎        | 4/32 [00:02<00:15,  1.79it/s][A
loss=3.229:  12%|█▎        | 4/32 [00:02<00:15,  1.79it/s][A
loss=3.229:  16%|█▌        | 5/32 [00:02<00:15,  1.76it/s][A
loss=3.133:  16%|█▌        | 5/32 [00:03<00:15,  1.76it/s][A07/18/2022 03:53:19 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:53:19 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:53:19 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋       

{"eval_acc": 0.06335493160547156, "eval_f1": 0.022570542393022634, "eval_acc_and_f1": 0.0429627369992471, "eval_loss": 3.1349602171352933, "learning_rate": 1.3333333333333333e-05, "train_loss": 3.27695894241333, "step": 6}



loss=3.078:  19%|█▉        | 6/32 [00:38<04:45, 11.00s/it][A
loss=3.078:  22%|██▏       | 7/32 [00:38<03:16,  7.88s/it][A
loss=2.955:  22%|██▏       | 7/32 [00:38<03:16,  7.88s/it][A
loss=2.955:  25%|██▌       | 8/32 [00:39<02:16,  5.69s/it][A
loss=2.964:  25%|██▌       | 8/32 [00:39<02:16,  5.69s/it][A
loss=2.964:  28%|██▊       | 9/32 [00:39<01:35,  4.16s/it][A
loss=2.661:  28%|██▊       | 9/32 [00:40<01:35,  4.16s/it][A
loss=2.661:  31%|███▏      | 10/32 [00:40<01:07,  3.09s/it][A
loss=2.502:  31%|███▏      | 10/32 [00:40<01:07,  3.09s/it][A
loss=2.502:  34%|███▍      | 11/32 [00:41<00:49,  2.34s/it][A
loss=2.587:  34%|███▍      | 11/32 [00:41<00:49,  2.34s/it][A07/18/2022 03:53:57 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:53:57 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:53:57 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎       

{"eval_acc": 0.20849532037437005, "eval_f1": 0.02519119706117968, "eval_acc_and_f1": 0.11684325871777486, "eval_loss": 2.7838916267667497, "learning_rate": 1.931034482758621e-05, "train_loss": 2.791354179382324, "step": 12}



loss=2.383:  41%|████      | 13/32 [01:15<02:39,  8.39s/it][A
loss=2.336:  41%|████      | 13/32 [01:15<02:39,  8.39s/it][A
loss=2.336:  44%|████▍     | 14/32 [01:15<01:48,  6.04s/it][A
loss=2.583:  44%|████▍     | 14/32 [01:16<01:48,  6.04s/it][A
loss=2.583:  47%|████▋     | 15/32 [01:16<01:14,  4.41s/it][A
loss=2.317:  47%|████▋     | 15/32 [01:16<01:14,  4.41s/it][A
loss=2.317:  50%|█████     | 16/32 [01:17<00:52,  3.26s/it][A
loss=2.406:  50%|█████     | 16/32 [01:17<00:52,  3.26s/it][A
loss=2.406:  53%|█████▎    | 17/32 [01:17<00:36,  2.46s/it][A
loss=2.494:  53%|█████▎    | 17/32 [01:17<00:36,  2.46s/it][A07/18/2022 03:54:34 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:54:34 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:54:34 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.03s/it][A[A

Evaluating:   7

{"eval_acc": 0.17350611951043918, "eval_f1": 0.012189697525355965, "eval_acc_and_f1": 0.09284790851789756, "eval_loss": 2.58600823368345, "learning_rate": 1.7931034482758623e-05, "train_loss": 2.419809182484945, "step": 18}



loss=2.066:  59%|█████▉    | 19/32 [01:52<01:51,  8.56s/it][A
loss=2.069:  59%|█████▉    | 19/32 [01:52<01:51,  8.56s/it][A
loss=2.069:  62%|██████▎   | 20/32 [01:53<01:13,  6.16s/it][A
loss=2.046:  62%|██████▎   | 20/32 [01:53<01:13,  6.16s/it][A
loss=2.046:  66%|██████▌   | 21/32 [01:53<00:49,  4.49s/it][A
loss=2.100:  66%|██████▌   | 21/32 [01:53<00:49,  4.49s/it][A
loss=2.100:  69%|██████▉   | 22/32 [01:54<00:33,  3.32s/it][A
loss=1.618:  69%|██████▉   | 22/32 [01:54<00:33,  3.32s/it][A
loss=1.618:  72%|███████▏  | 23/32 [01:54<00:22,  2.50s/it][A
loss=2.001:  72%|███████▏  | 23/32 [01:54<00:22,  2.50s/it][A07/18/2022 03:55:11 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:55:11 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:55:11 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.17350611951043918, "eval_f1": 0.012189697525355965, "eval_acc_and_f1": 0.09284790851789756, "eval_loss": 2.430539846420288, "learning_rate": 1.6551724137931037e-05, "train_loss": 1.9833906094233196, "step": 24}



loss=1.935:  78%|███████▊  | 25/32 [02:27<00:57,  8.22s/it][A
loss=1.948:  78%|███████▊  | 25/32 [02:28<00:57,  8.22s/it][A
loss=1.948:  81%|████████▏ | 26/32 [02:28<00:35,  5.93s/it][A
loss=1.948:  81%|████████▏ | 26/32 [02:28<00:35,  5.93s/it][A
loss=1.948:  84%|████████▍ | 27/32 [02:29<00:21,  4.33s/it][A
loss=2.087:  84%|████████▍ | 27/32 [02:29<00:21,  4.33s/it][A
loss=2.087:  88%|████████▊ | 28/32 [02:29<00:12,  3.20s/it][A
loss=2.026:  88%|████████▊ | 28/32 [02:29<00:12,  3.20s/it][A
loss=2.026:  91%|█████████ | 29/32 [02:30<00:07,  2.42s/it][A
loss=1.985:  91%|█████████ | 29/32 [02:30<00:07,  2.42s/it][A07/18/2022 03:55:46 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:55:46 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:55:46 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.03s/it][A[A

Evaluating:   7

{"eval_acc": 0.17350611951043918, "eval_f1": 0.012189697525355965, "eval_acc_and_f1": 0.09284790851789756, "eval_loss": 2.3195667607443675, "learning_rate": 1.5172413793103448e-05, "train_loss": 1.9883235891660054, "step": 30}



loss=1.921:  97%|█████████▋| 31/32 [03:06<00:08,  8.95s/it][A
loss=1.894:  97%|█████████▋| 31/32 [03:06<00:08,  8.95s/it][A
loss=1.894: 100%|██████████| 32/32 [03:07<00:00,  5.85s/it]
Epoch:  33%|███▎      | 1/3 [03:07<06:14, 187.11s/it]
Iteration:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=1.516:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=1.516:   3%|▎         | 1/32 [00:00<00:18,  1.70it/s][A
loss=1.997:   3%|▎         | 1/32 [00:00<00:18,  1.70it/s][A
loss=1.997:   6%|▋         | 2/32 [00:01<00:17,  1.69it/s][A
loss=1.749:   6%|▋         | 2/32 [00:01<00:17,  1.69it/s][A
loss=1.749:   9%|▉         | 3/32 [00:01<00:17,  1.69it/s][A
loss=1.892:   9%|▉         | 3/32 [00:01<00:17,  1.69it/s][A07/18/2022 03:56:25 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:56:25 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:56:25 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s]

{"eval_acc": 0.21324694024478041, "eval_f1": 0.02659415404598259, "eval_acc_and_f1": 0.1199205471453815, "eval_loss": 2.209874834333147, "learning_rate": 1.3793103448275863e-05, "train_loss": 1.8282071948051453, "step": 36}



loss=1.691:  16%|█▌        | 5/32 [00:34<03:16,  7.28s/it][A
loss=2.047:  16%|█▌        | 5/32 [00:34<03:16,  7.28s/it][A
loss=2.047:  19%|█▉        | 6/32 [00:35<02:17,  5.27s/it][A
loss=1.833:  19%|█▉        | 6/32 [00:35<02:17,  5.27s/it][A
loss=1.833:  22%|██▏       | 7/32 [00:35<01:36,  3.86s/it][A
loss=1.375:  22%|██▏       | 7/32 [00:36<01:36,  3.86s/it][A
loss=1.375:  25%|██▌       | 8/32 [00:36<01:09,  2.88s/it][A
loss=1.747:  25%|██▌       | 8/32 [00:36<01:09,  2.88s/it][A
loss=1.747:  28%|██▊       | 9/32 [00:37<00:50,  2.19s/it][A
loss=1.822:  28%|██▊       | 9/32 [00:37<00:50,  2.19s/it][A07/18/2022 03:57:00 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:57:00 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:57:00 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋       

{"eval_acc": 0.2859611231101512, "eval_f1": 0.04673093538570078, "eval_acc_and_f1": 0.166346029247926, "eval_loss": 2.140527273927416, "learning_rate": 1.2413793103448277e-05, "train_loss": 1.7525269587834675, "step": 42}



loss=1.519:  34%|███▍      | 11/32 [01:10<02:49,  8.08s/it][A
loss=1.503:  34%|███▍      | 11/32 [01:10<02:49,  8.08s/it][A
loss=1.503:  38%|███▊      | 12/32 [01:10<01:56,  5.83s/it][A
loss=1.629:  38%|███▊      | 12/32 [01:10<01:56,  5.83s/it][A
loss=1.629:  41%|████      | 13/32 [01:11<01:20,  4.26s/it][A
loss=1.660:  41%|████      | 13/32 [01:11<01:20,  4.26s/it][A
loss=1.660:  44%|████▍     | 14/32 [01:11<00:56,  3.16s/it][A
loss=1.571:  44%|████▍     | 14/32 [01:12<00:56,  3.16s/it][A
loss=1.571:  47%|████▋     | 15/32 [01:12<00:40,  2.39s/it][A
loss=1.651:  47%|████▋     | 15/32 [01:12<00:40,  2.39s/it][A07/18/2022 03:57:35 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:57:35 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:57:35 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.36961843052555793, "eval_f1": 0.07087742012066417, "eval_acc_and_f1": 0.22024792532311105, "eval_loss": 2.078400569302695, "learning_rate": 1.103448275862069e-05, "train_loss": 1.5889016389846802, "step": 48}



loss=1.534:  53%|█████▎    | 17/32 [01:47<02:09,  8.62s/it][A
loss=1.684:  53%|█████▎    | 17/32 [01:47<02:09,  8.62s/it][A
loss=1.684:  56%|█████▋    | 18/32 [01:48<01:26,  6.21s/it][A
loss=1.531:  56%|█████▋    | 18/32 [01:48<01:26,  6.21s/it][A
loss=1.531:  59%|█████▉    | 19/32 [01:48<00:58,  4.53s/it][A
loss=1.975:  59%|█████▉    | 19/32 [01:49<00:58,  4.53s/it][A
loss=1.975:  62%|██████▎   | 20/32 [01:49<00:40,  3.34s/it][A
loss=1.405:  62%|██████▎   | 20/32 [01:49<00:40,  3.34s/it][A
loss=1.405:  66%|██████▌   | 21/32 [01:50<00:27,  2.52s/it][A
loss=1.605:  66%|██████▌   | 21/32 [01:50<00:27,  2.52s/it][A07/18/2022 03:58:13 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:58:13 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:58:13 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.5225341972642189, "eval_f1": 0.11649138940435964, "eval_acc_and_f1": 0.3195127933342893, "eval_loss": 1.9763809995991843, "learning_rate": 9.655172413793105e-06, "train_loss": 1.6220930417378743, "step": 54}



loss=1.462:  72%|███████▏  | 23/32 [02:23<01:14,  8.26s/it][A
loss=1.701:  72%|███████▏  | 23/32 [02:23<01:14,  8.26s/it][A
loss=1.701:  75%|███████▌  | 24/32 [02:23<00:47,  5.96s/it][A
loss=1.937:  75%|███████▌  | 24/32 [02:24<00:47,  5.96s/it][A
loss=1.937:  78%|███████▊  | 25/32 [02:24<00:30,  4.35s/it][A
loss=1.546:  78%|███████▊  | 25/32 [02:24<00:30,  4.35s/it][A
loss=1.546:  81%|████████▏ | 26/32 [02:25<00:19,  3.22s/it][A
loss=1.832:  81%|████████▏ | 26/32 [02:25<00:19,  3.22s/it][A
loss=1.832:  84%|████████▍ | 27/32 [02:25<00:12,  2.43s/it][A
loss=1.612:  84%|████████▍ | 27/32 [02:25<00:12,  2.43s/it][A07/18/2022 03:58:49 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:58:49 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:58:49 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.5208063354931606, "eval_f1": 0.12172738529740118, "eval_acc_and_f1": 0.32126686039528085, "eval_loss": 1.944155467408044, "learning_rate": 8.275862068965518e-06, "train_loss": 1.681554098924001, "step": 60}



loss=1.280:  91%|█████████ | 29/32 [02:59<00:25,  8.44s/it][A
loss=1.467:  91%|█████████ | 29/32 [03:00<00:25,  8.44s/it][A
loss=1.467:  94%|█████████▍| 30/32 [03:00<00:12,  6.08s/it][A
loss=1.745:  94%|█████████▍| 30/32 [03:00<00:12,  6.08s/it][A
loss=1.745:  97%|█████████▋| 31/32 [03:01<00:04,  4.43s/it][A
loss=1.290:  97%|█████████▋| 31/32 [03:01<00:04,  4.43s/it][A
loss=1.290: 100%|██████████| 32/32 [03:01<00:00,  5.67s/it]
Epoch:  67%|██████▋   | 2/3 [06:08<03:05, 185.37s/it]
Iteration:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=1.337:   0%|          | 0/32 [00:00<?, ?it/s][A
loss=1.337:   3%|▎         | 1/32 [00:00<00:17,  1.74it/s][A
loss=1.228:   3%|▎         | 1/32 [00:00<00:17,  1.74it/s][A07/18/2022 03:59:25 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 03:59:25 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 03:59:25 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?i

{"eval_acc": 0.5693304535637149, "eval_f1": 0.13094857241998978, "eval_acc_and_f1": 0.35013951299185236, "eval_loss": 1.8813251767839705, "learning_rate": 6.896551724137932e-06, "train_loss": 1.3913299043973286, "step": 66}



loss=1.580:   9%|▉         | 3/32 [00:33<03:32,  7.33s/it][A
loss=1.204:   9%|▉         | 3/32 [00:34<03:32,  7.33s/it][A
loss=1.204:  12%|█▎        | 4/32 [00:34<02:28,  5.31s/it][A
loss=1.409:  12%|█▎        | 4/32 [00:34<02:28,  5.31s/it][A
loss=1.409:  16%|█▌        | 5/32 [00:35<01:45,  3.89s/it][A
loss=1.452:  16%|█▌        | 5/32 [00:35<01:45,  3.89s/it][A
loss=1.452:  19%|█▉        | 6/32 [00:35<01:15,  2.90s/it][A
loss=1.110:  19%|█▉        | 6/32 [00:35<01:15,  2.90s/it][A
loss=1.110:  22%|██▏       | 7/32 [00:36<00:55,  2.20s/it][A
loss=1.612:  22%|██▏       | 7/32 [00:36<00:55,  2.20s/it][A07/18/2022 04:00:00 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:00:00 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 04:00:00 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|▋       

{"eval_acc": 0.5916486681065515, "eval_f1": 0.13539807044330365, "eval_acc_and_f1": 0.36352336927492757, "eval_loss": 1.8346771895885468, "learning_rate": 5.517241379310345e-06, "train_loss": 1.3945782780647278, "step": 72}



loss=1.272:  28%|██▊       | 9/32 [01:12<03:23,  8.85s/it][A
loss=1.327:  28%|██▊       | 9/32 [01:13<03:23,  8.85s/it][A
loss=1.327:  31%|███▏      | 10/32 [01:13<02:20,  6.37s/it][A
loss=1.839:  31%|███▏      | 10/32 [01:13<02:20,  6.37s/it][A
loss=1.839:  34%|███▍      | 11/32 [01:14<01:37,  4.63s/it][A
loss=1.275:  34%|███▍      | 11/32 [01:14<01:37,  4.63s/it][A
loss=1.275:  38%|███▊      | 12/32 [01:14<01:08,  3.42s/it][A
loss=1.194:  38%|███▊      | 12/32 [01:14<01:08,  3.42s/it][A
loss=1.194:  41%|████      | 13/32 [01:15<00:48,  2.57s/it][A
loss=1.633:  41%|████      | 13/32 [01:15<00:48,  2.57s/it][A07/18/2022 04:00:40 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:00:40 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 04:00:40 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7%|

{"eval_acc": 0.5985601151907848, "eval_f1": 0.13819429956962925, "eval_acc_and_f1": 0.368377207380207, "eval_loss": 1.8156553634575434, "learning_rate": 4.137931034482759e-06, "train_loss": 1.4233122269312541, "step": 78}



loss=1.289:  47%|████▋     | 15/32 [01:48<02:20,  8.25s/it][A
loss=1.392:  47%|████▋     | 15/32 [01:48<02:20,  8.25s/it][A
loss=1.392:  50%|█████     | 16/32 [01:48<01:35,  5.95s/it][A
loss=1.572:  50%|█████     | 16/32 [01:49<01:35,  5.95s/it][A
loss=1.572:  53%|█████▎    | 17/32 [01:49<01:05,  4.34s/it][A
loss=1.415:  53%|█████▎    | 17/32 [01:49<01:05,  4.34s/it][A
loss=1.415:  56%|█████▋    | 18/32 [01:50<00:44,  3.21s/it][A
loss=1.670:  56%|█████▋    | 18/32 [01:50<00:44,  3.21s/it][A
loss=1.670:  59%|█████▉    | 19/32 [01:50<00:31,  2.43s/it][A
loss=1.466:  59%|█████▉    | 19/32 [01:50<00:31,  2.43s/it][A07/18/2022 04:01:15 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:01:15 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 04:01:15 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.5966882649388049, "eval_f1": 0.1393429197494153, "eval_acc_and_f1": 0.3680155923441101, "eval_loss": 1.8091620590005602, "learning_rate": 2.7586206896551725e-06, "train_loss": 1.4675812323888142, "step": 84}



loss=1.324:  66%|██████▌   | 21/32 [02:27<01:38,  8.99s/it][A
loss=1.361:  66%|██████▌   | 21/32 [02:27<01:38,  8.99s/it][A
loss=1.361:  69%|██████▉   | 22/32 [02:28<01:04,  6.47s/it][A
loss=1.211:  69%|██████▉   | 22/32 [02:28<01:04,  6.47s/it][A
loss=1.211:  72%|███████▏  | 23/32 [02:28<00:42,  4.71s/it][A
loss=1.475:  72%|███████▏  | 23/32 [02:28<00:42,  4.71s/it][A
loss=1.475:  75%|███████▌  | 24/32 [02:29<00:27,  3.47s/it][A
loss=1.488:  75%|███████▌  | 24/32 [02:29<00:27,  3.47s/it][A
loss=1.488:  78%|███████▊  | 25/32 [02:29<00:18,  2.60s/it][A
loss=1.520:  78%|███████▊  | 25/32 [02:30<00:18,  2.60s/it][A07/18/2022 04:01:54 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:01:54 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 04:01:54 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.6096472282217422, "eval_f1": 0.14079904911174712, "eval_acc_and_f1": 0.37522313866674467, "eval_loss": 1.7806347608566284, "learning_rate": 1.3793103448275862e-06, "train_loss": 1.396402359008789, "step": 90}



loss=1.153:  84%|████████▍ | 27/32 [03:06<00:44,  8.95s/it][A
loss=1.396:  84%|████████▍ | 27/32 [03:06<00:44,  8.95s/it][A
loss=1.396:  88%|████████▊ | 28/32 [03:06<00:25,  6.44s/it][A
loss=2.220:  88%|████████▊ | 28/32 [03:06<00:25,  6.44s/it][A
loss=2.220:  91%|█████████ | 29/32 [03:07<00:14,  4.69s/it][A
loss=1.510:  91%|█████████ | 29/32 [03:07<00:14,  4.69s/it][A
loss=1.510:  94%|█████████▍| 30/32 [03:07<00:06,  3.46s/it][A
loss=1.415:  94%|█████████▍| 30/32 [03:08<00:06,  3.46s/it][A
loss=1.415:  97%|█████████▋| 31/32 [03:08<00:02,  2.60s/it][A
loss=0.859:  97%|█████████▋| 31/32 [03:08<00:02,  2.60s/it][A07/18/2022 04:02:33 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:02:33 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 04:02:33 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.6143988480921526, "eval_f1": 0.1414942362606392, "eval_acc_and_f1": 0.3779465421763959, "eval_loss": 1.7706135085650854, "learning_rate": 0.0, "train_loss": 1.4256490270296733, "step": 96}


07/18/2022 04:03:09 - INFO - utilities.trainers -   ***** Running evaluation iter-2_trial1 *****
07/18/2022 04:03:09 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 04:03:09 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 04:03:37 - INFO - utilities.trainers -   ***** Eval results iter-2_trial1 *****
07/18/2022 04:03:37 - INFO - utilities.trainers -     acc = 0.6143988480921526
07/18/2022 04:03:37 - INFO - utilities.trainers -     acc_and_f1 = 0.3779465421763959
07/18/2022 04:03:37 - INFO - utilities.trainers -     f1 = 0.1414942362606392
07/18/2022 04:03:40 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 04:03:45 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:03:45 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 04:03:45 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:19<00:00,  1.02s/it]
07/18/2022 04:06:04 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 04:06:04 - INFO - utilities.trainers -     acc = 0.6137031277000172
07/18/2022 04:06:04 - INFO - utilities.trainers -     acc_and_f1 = 0.37444096860976256
07/18/2022 04:06:04 - INFO - utilities.trainers -     f1 = 0.13517880951950792



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 04:06:05 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 04:06:24 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 04:06:30 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:06:30 - INFO - utilities.trainers -     Num examples = 49000
07/18/2022 04:06:30 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/192 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 192/192 [06:31<00:00,  2.04s/it]
07/18/2022 04:13:01 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 04:13:01 - INFO - utilities.trainers -     acc = 0.6117551020408163
07/18/2022 04:13:01 - INFO - utilities.trainers -     acc_and_f1 = 0.3761978755046071
07/18/2022 04:13:01 - INFO - utilities.trainers -     f1 = 0.14064064896839792
07/18/2022 04:13:01 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 04:13:19 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 04:13:23 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:13:23 - INFO - utilities.trainers -     Num examples = 1000
07/18/2022 04:13:23 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 4/4 [00:07<00:00,  1.99s/it]
07/18/2022 04:13:31 - INFO - util



************
End of iteration 2:
Train loss 1.8395, Val loss 1.7706135085650854, Test loss 1.773132531081929
Annotated 500 samples
Current labeled (training) data: 1500 samples
Remaining budget: 2500 (in samples)
************

Saving json with the results....

 Start Training model of iteration 3!



07/18/2022 04:15:32 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.6143988480921526, acc_best_iteration=2, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-2', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-3', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, 

warmup steps: 14
total steps: 140
logging steps: 9
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.344:   2%|▏         | 1/47 [00:00<00:23,  1.92it/s][A
loss=3.256:   2%|▏         | 1/47 [00:00<00:23,  1.92it/s][A
loss=3.256:   4%|▍         | 2/47 [00:01<00:24,  1.85it/s][A
loss=3.276:   4%|▍         | 2/47 [00:01<00:24,  1.85it/s][A
loss=3.276:   6%|▋         | 3/47 [00:01<00:24,  1.80it/s][A
loss=3.258:   6%|▋         | 3/47 [00:01<00:24,  1.80it/s][A
loss=3.258:   9%|▊         | 4/47 [00:02<00:24,  1.78it/s][A
loss=3.265:   9%|▊         | 4/47 [00:02<00:24,  1.78it/s][A
loss=3.265:  11%|█         | 5/47 [00:02<00:23,  1.76it/s][A
loss=3.201:  11%|█         | 5/47 [00:03<00:23,  1.76it/s][A
loss=3.201:  13%|█▎        | 6/47 [00:03<00:23,  1.75it/s][A
loss=3.122:  13%|█▎        | 6/47 [00:03<00:23,  1.75it/s][A
loss=3.122:  15%|█▍        | 7/47 [00:04<00:22,  1.74it/s][A
loss=2.976:  15%|█▍        | 7/47 [00:04<00:22,  1.74it/s][A
loss=2.976:  17%|█▋        | 8/47 [00:04<00:22,  1.74it/s][A
loss=3.085:  17%|█▋        | 8/47 [00:04<00:22,  1.74it/s][A07/18/20

{"eval_acc": 0.17264218862491001, "eval_f1": 0.011798277982779827, "eval_acc_and_f1": 0.09222023330384492, "eval_loss": 3.0688792126519338, "learning_rate": 1.2857142857142859e-05, "train_loss": 3.1980236636267767, "step": 9}



loss=2.980:  21%|██▏       | 10/47 [00:37<04:24,  7.16s/it][A
loss=2.901:  21%|██▏       | 10/47 [00:37<04:24,  7.16s/it][A
loss=2.901:  23%|██▎       | 11/47 [00:37<03:06,  5.19s/it][A
loss=2.900:  23%|██▎       | 11/47 [00:37<03:06,  5.19s/it][A
loss=2.900:  26%|██▌       | 12/47 [00:38<02:13,  3.81s/it][A
loss=2.954:  26%|██▌       | 12/47 [00:38<02:13,  3.81s/it][A
loss=2.954:  28%|██▊       | 13/47 [00:38<01:36,  2.84s/it][A
loss=2.782:  28%|██▊       | 13/47 [00:38<01:36,  2.84s/it][A
loss=2.782:  30%|██▉       | 14/47 [00:39<01:11,  2.16s/it][A
loss=3.052:  30%|██▉       | 14/47 [00:39<01:11,  2.16s/it][A
loss=3.052:  32%|███▏      | 15/47 [00:40<00:54,  1.69s/it][A
loss=2.759:  32%|███▏      | 15/47 [00:40<00:54,  1.69s/it][A
loss=2.759:  34%|███▍      | 16/47 [00:40<00:42,  1.36s/it][A
loss=2.476:  34%|███▍      | 16/47 [00:40<00:42,  1.36s/it][A
loss=2.476:  36%|███▌      | 17/47 [00:41<00:33,  1.12s/it][A
loss=2.648:  36%|███▌      | 17/47 [00:41<00:33,  1.12

{"eval_acc": 0.17379409647228222, "eval_f1": 0.012327729459857877, "eval_acc_and_f1": 0.09306091296607005, "eval_loss": 2.6638072984559193, "learning_rate": 1.937007874015748e-05, "train_loss": 2.8280161486731634, "step": 18}



loss=2.666:  40%|████      | 19/47 [01:15<03:41,  7.91s/it][A
loss=2.346:  40%|████      | 19/47 [01:16<03:41,  7.91s/it][A
loss=2.346:  43%|████▎     | 20/47 [01:16<02:34,  5.71s/it][A
loss=2.318:  43%|████▎     | 20/47 [01:16<02:34,  5.71s/it][A
loss=2.318:  45%|████▍     | 21/47 [01:17<01:48,  4.18s/it][A
loss=2.469:  45%|████▍     | 21/47 [01:17<01:48,  4.18s/it][A
loss=2.469:  47%|████▋     | 22/47 [01:17<01:17,  3.10s/it][A
loss=2.239:  47%|████▋     | 22/47 [01:17<01:17,  3.10s/it][A
loss=2.239:  49%|████▉     | 23/47 [01:18<00:56,  2.35s/it][A
loss=2.401:  49%|████▉     | 23/47 [01:18<00:56,  2.35s/it][A
loss=2.401:  51%|█████     | 24/47 [01:18<00:41,  1.82s/it][A
loss=2.596:  51%|█████     | 24/47 [01:19<00:41,  1.82s/it][A
loss=2.596:  53%|█████▎    | 25/47 [01:19<00:31,  1.45s/it][A
loss=2.011:  53%|█████▎    | 25/47 [01:19<00:31,  1.45s/it][A
loss=2.011:  55%|█████▌    | 26/47 [01:20<00:24,  1.19s/it][A
loss=2.362:  55%|█████▌    | 26/47 [01:20<00:24,  1.19

{"eval_acc": 0.25457163426925844, "eval_f1": 0.03175651505229114, "eval_acc_and_f1": 0.1431640746607748, "eval_loss": 2.482103339263371, "learning_rate": 1.7952755905511813e-05, "train_loss": 2.378466659122043, "step": 27}



loss=2.275:  60%|█████▉    | 28/47 [01:55<02:35,  8.17s/it][A
loss=2.199:  60%|█████▉    | 28/47 [01:56<02:35,  8.17s/it][A
loss=2.199:  62%|██████▏   | 29/47 [01:56<01:46,  5.89s/it][A
loss=2.134:  62%|██████▏   | 29/47 [01:56<01:46,  5.89s/it][A
loss=2.134:  64%|██████▍   | 30/47 [01:57<01:13,  4.30s/it][A
loss=1.992:  64%|██████▍   | 30/47 [01:57<01:13,  4.30s/it][A
loss=1.992:  66%|██████▌   | 31/47 [01:57<00:50,  3.19s/it][A
loss=2.158:  66%|██████▌   | 31/47 [01:57<00:50,  3.19s/it][A
loss=2.158:  68%|██████▊   | 32/47 [01:58<00:36,  2.41s/it][A
loss=2.157:  68%|██████▊   | 32/47 [01:58<00:36,  2.41s/it][A
loss=2.157:  70%|███████   | 33/47 [01:58<00:26,  1.86s/it][A
loss=2.408:  70%|███████   | 33/47 [01:58<00:26,  1.86s/it][A
loss=2.408:  72%|███████▏  | 34/47 [01:59<00:19,  1.48s/it][A
loss=2.062:  72%|███████▏  | 34/47 [01:59<00:19,  1.48s/it][A
loss=2.062:  74%|███████▍  | 35/47 [02:00<00:14,  1.21s/it][A
loss=2.052:  74%|███████▍  | 35/47 [02:00<00:14,  1.21

{"eval_acc": 0.30309575233981284, "eval_f1": 0.03760685562291322, "eval_acc_and_f1": 0.17035130398136303, "eval_loss": 2.3384896772248402, "learning_rate": 1.6535433070866142e-05, "train_loss": 2.1595403220918445, "step": 36}



loss=1.915:  79%|███████▊  | 37/47 [02:37<01:24,  8.44s/it][A
loss=1.539:  79%|███████▊  | 37/47 [02:37<01:24,  8.44s/it][A
loss=1.539:  81%|████████  | 38/47 [02:37<00:54,  6.08s/it][A
loss=2.149:  81%|████████  | 38/47 [02:37<00:54,  6.08s/it][A
loss=2.149:  83%|████████▎ | 39/47 [02:38<00:35,  4.44s/it][A
loss=1.599:  83%|████████▎ | 39/47 [02:38<00:35,  4.44s/it][A
loss=1.599:  85%|████████▌ | 40/47 [02:38<00:22,  3.28s/it][A
loss=1.874:  85%|████████▌ | 40/47 [02:39<00:22,  3.28s/it][A
loss=1.874:  87%|████████▋ | 41/47 [02:39<00:14,  2.47s/it][A
loss=1.886:  87%|████████▋ | 41/47 [02:39<00:14,  2.47s/it][A
loss=1.886:  89%|████████▉ | 42/47 [02:40<00:09,  1.91s/it][A
loss=2.110:  89%|████████▉ | 42/47 [02:40<00:09,  1.91s/it][A
loss=2.110:  91%|█████████▏| 43/47 [02:40<00:06,  1.51s/it][A
loss=1.844:  91%|█████████▏| 43/47 [02:40<00:06,  1.51s/it][A
loss=1.844:  94%|█████████▎| 44/47 [02:41<00:03,  1.24s/it][A
loss=1.876:  94%|█████████▎| 44/47 [02:41<00:03,  1.24

{"eval_acc": 0.32973362131029516, "eval_f1": 0.04620343730061923, "eval_acc_and_f1": 0.18796852930545718, "eval_loss": 2.175414408956255, "learning_rate": 1.5118110236220473e-05, "train_loss": 1.8657180468241374, "step": 45}



loss=1.617:  98%|█████████▊| 46/47 [03:16<00:07,  7.98s/it][A
loss=1.851:  98%|█████████▊| 46/47 [03:16<00:07,  7.98s/it][A
loss=1.851: 100%|██████████| 47/47 [03:16<00:00,  4.18s/it]
Epoch:  33%|███▎      | 1/3 [03:16<06:33, 196.63s/it]
Iteration:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=1.783:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=1.783:   2%|▏         | 1/47 [00:00<00:26,  1.73it/s][A
loss=1.710:   2%|▏         | 1/47 [00:00<00:26,  1.73it/s][A
loss=1.710:   4%|▍         | 2/47 [00:01<00:26,  1.73it/s][A
loss=1.847:   4%|▍         | 2/47 [00:01<00:26,  1.73it/s][A
loss=1.847:   6%|▋         | 3/47 [00:01<00:25,  1.72it/s][A
loss=1.572:   6%|▋         | 3/47 [00:01<00:25,  1.72it/s][A
loss=1.572:   9%|▊         | 4/47 [00:02<00:25,  1.71it/s][A
loss=2.063:   9%|▊         | 4/47 [00:02<00:25,  1.71it/s][A
loss=2.063:  11%|█         | 5/47 [00:02<00:24,  1.71it/s][A
loss=1.425:  11%|█         | 5/47 [00:03<00:24,  1.71it/s][A
loss=1.425:  13%|█▎        | 6/4

{"eval_acc": 0.4349892008639309, "eval_f1": 0.08053745195771182, "eval_acc_and_f1": 0.25776332641082134, "eval_loss": 1.9781050341469901, "learning_rate": 1.3700787401574804e-05, "train_loss": 1.7308219406339858, "step": 54}



loss=1.865:  17%|█▋        | 8/47 [00:36<04:43,  7.28s/it][A
loss=1.876:  17%|█▋        | 8/47 [00:36<04:43,  7.28s/it][A
loss=1.876:  19%|█▉        | 9/47 [00:37<03:20,  5.27s/it][A
loss=1.593:  19%|█▉        | 9/47 [00:37<03:20,  5.27s/it][A
loss=1.593:  21%|██▏       | 10/47 [00:37<02:22,  3.86s/it][A
loss=1.825:  21%|██▏       | 10/47 [00:37<02:22,  3.86s/it][A
loss=1.825:  23%|██▎       | 11/47 [00:38<01:43,  2.88s/it][A
loss=1.435:  23%|██▎       | 11/47 [00:38<01:43,  2.88s/it][A
loss=1.435:  26%|██▌       | 12/47 [00:38<01:16,  2.20s/it][A
loss=1.535:  26%|██▌       | 12/47 [00:39<01:16,  2.20s/it][A
loss=1.535:  28%|██▊       | 13/47 [00:39<00:58,  1.72s/it][A
loss=1.718:  28%|██▊       | 13/47 [00:39<00:58,  1.72s/it][A
loss=1.718:  30%|██▉       | 14/47 [00:40<00:45,  1.38s/it][A
loss=1.567:  30%|██▉       | 14/47 [00:40<00:45,  1.38s/it][A
loss=1.567:  32%|███▏      | 15/47 [00:40<00:36,  1.14s/it][A
loss=1.036:  32%|███▏      | 15/47 [00:40<00:36,  1.14s/it

{"eval_acc": 0.5615550755939525, "eval_f1": 0.12876474632984125, "eval_acc_and_f1": 0.34515991096189685, "eval_loss": 1.7595109726701463, "learning_rate": 1.2283464566929135e-05, "train_loss": 1.6054498884412978, "step": 63}



loss=1.414:  36%|███▌      | 17/47 [01:14<03:49,  7.67s/it][A
loss=1.513:  36%|███▌      | 17/47 [01:14<03:49,  7.67s/it][A
loss=1.513:  38%|███▊      | 18/47 [01:14<02:40,  5.54s/it][A
loss=1.312:  38%|███▊      | 18/47 [01:14<02:40,  5.54s/it][A
loss=1.312:  40%|████      | 19/47 [01:15<01:53,  4.05s/it][A
loss=1.185:  40%|████      | 19/47 [01:15<01:53,  4.05s/it][A
loss=1.185:  43%|████▎     | 20/47 [01:15<01:21,  3.01s/it][A
loss=1.099:  43%|████▎     | 20/47 [01:16<01:21,  3.01s/it][A
loss=1.099:  45%|████▍     | 21/47 [01:16<00:59,  2.29s/it][A
loss=1.110:  45%|████▍     | 21/47 [01:16<00:59,  2.29s/it][A
loss=1.110:  47%|████▋     | 22/47 [01:17<00:44,  1.78s/it][A
loss=1.663:  47%|████▋     | 22/47 [01:17<00:44,  1.78s/it][A
loss=1.663:  49%|████▉     | 23/47 [01:17<00:34,  1.42s/it][A
loss=1.296:  49%|████▉     | 23/47 [01:17<00:34,  1.42s/it][A
loss=1.296:  51%|█████     | 24/47 [01:18<00:26,  1.17s/it][A
loss=1.636:  51%|█████     | 24/47 [01:18<00:26,  1.17

{"eval_acc": 0.6375809935205183, "eval_f1": 0.1543033962885763, "eval_acc_and_f1": 0.3959421949045473, "eval_loss": 1.6055403905255454, "learning_rate": 1.0866141732283466e-05, "train_loss": 1.358741111225552, "step": 72}



loss=1.184:  55%|█████▌    | 26/47 [01:51<02:38,  7.56s/it][A
loss=1.505:  55%|█████▌    | 26/47 [01:51<02:38,  7.56s/it][A
loss=1.505:  57%|█████▋    | 27/47 [01:51<01:49,  5.47s/it][A
loss=1.301:  57%|█████▋    | 27/47 [01:52<01:49,  5.47s/it][A
loss=1.301:  60%|█████▉    | 28/47 [01:52<01:16,  4.00s/it][A
loss=1.022:  60%|█████▉    | 28/47 [01:52<01:16,  4.00s/it][A
loss=1.022:  62%|██████▏   | 29/47 [01:53<00:53,  2.98s/it][A
loss=1.355:  62%|██████▏   | 29/47 [01:53<00:53,  2.98s/it][A
loss=1.355:  64%|██████▍   | 30/47 [01:53<00:38,  2.26s/it][A
loss=1.054:  64%|██████▍   | 30/47 [01:53<00:38,  2.26s/it][A
loss=1.054:  66%|██████▌   | 31/47 [01:54<00:28,  1.76s/it][A
loss=1.411:  66%|██████▌   | 31/47 [01:54<00:28,  1.76s/it][A
loss=1.411:  68%|██████▊   | 32/47 [01:54<00:21,  1.41s/it][A
loss=1.486:  68%|██████▊   | 32/47 [01:55<00:21,  1.41s/it][A
loss=1.486:  70%|███████   | 33/47 [01:55<00:16,  1.17s/it][A
loss=1.328:  70%|███████   | 33/47 [01:55<00:16,  1.17

{"eval_acc": 0.7259899208063355, "eval_f1": 0.18053552748729818, "eval_acc_and_f1": 0.45326272414681684, "eval_loss": 1.4408876172133855, "learning_rate": 9.448818897637797e-06, "train_loss": 1.2941493458218045, "step": 81}



loss=1.334:  74%|███████▍  | 35/47 [02:28<01:30,  7.58s/it][A
loss=1.242:  74%|███████▍  | 35/47 [02:28<01:30,  7.58s/it][A
loss=1.242:  77%|███████▋  | 36/47 [02:29<01:00,  5.48s/it][A
loss=1.217:  77%|███████▋  | 36/47 [02:29<01:00,  5.48s/it][A
loss=1.217:  79%|███████▊  | 37/47 [02:29<00:40,  4.01s/it][A
loss=1.184:  79%|███████▊  | 37/47 [02:29<00:40,  4.01s/it][A
loss=1.184:  81%|████████  | 38/47 [02:30<00:26,  2.99s/it][A
loss=1.196:  81%|████████  | 38/47 [02:30<00:26,  2.99s/it][A
loss=1.196:  83%|████████▎ | 39/47 [02:30<00:18,  2.26s/it][A
loss=1.478:  83%|████████▎ | 39/47 [02:31<00:18,  2.26s/it][A
loss=1.478:  85%|████████▌ | 40/47 [02:31<00:12,  1.76s/it][A
loss=1.210:  85%|████████▌ | 40/47 [02:31<00:12,  1.76s/it][A
loss=1.210:  87%|████████▋ | 41/47 [02:32<00:08,  1.41s/it][A
loss=1.351:  87%|████████▋ | 41/47 [02:32<00:08,  1.41s/it][A
loss=1.351:  89%|████████▉ | 42/47 [02:32<00:05,  1.16s/it][A
loss=1.151:  89%|████████▉ | 42/47 [02:32<00:05,  1.16

{"eval_acc": 0.7386609071274298, "eval_f1": 0.18574843097988095, "eval_acc_and_f1": 0.46220466905365537, "eval_loss": 1.3193321824073792, "learning_rate": 8.031496062992128e-06, "train_loss": 1.2625792821248372, "step": 90}



loss=1.180:  94%|█████████▎| 44/47 [03:05<00:22,  7.56s/it][A
loss=0.963:  94%|█████████▎| 44/47 [03:05<00:22,  7.56s/it][A
loss=0.963:  96%|█████████▌| 45/47 [03:06<00:10,  5.47s/it][A
loss=1.382:  96%|█████████▌| 45/47 [03:06<00:10,  5.47s/it][A
loss=1.382:  98%|█████████▊| 46/47 [03:06<00:04,  4.00s/it][A
loss=1.020:  98%|█████████▊| 46/47 [03:07<00:04,  4.00s/it][A
loss=1.020: 100%|██████████| 47/47 [03:07<00:00,  3.99s/it]
Epoch:  67%|██████▋   | 2/3 [06:24<03:13, 193.88s/it]
Iteration:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=1.000:   0%|          | 0/47 [00:00<?, ?it/s][A
loss=1.000:   2%|▏         | 1/47 [00:00<00:26,  1.73it/s][A
loss=1.056:   2%|▏         | 1/47 [00:00<00:26,  1.73it/s][A
loss=1.056:   4%|▍         | 2/47 [00:01<00:26,  1.72it/s][A
loss=1.066:   4%|▍         | 2/47 [00:01<00:26,  1.72it/s][A
loss=1.066:   6%|▋         | 3/47 [00:01<00:25,  1.71it/s][A
loss=0.909:   6%|▋         | 3/47 [00:01<00:25,  1.71it/s][A
loss=0.909:   9%|▊         |

{"eval_acc": 0.7329013678905687, "eval_f1": 0.1830486744479777, "eval_acc_and_f1": 0.4579750211692732, "eval_loss": 1.2652011173112052, "learning_rate": 6.614173228346458e-06, "train_loss": 1.0379890137248569, "step": 99}



loss=0.944:  13%|█▎        | 6/47 [00:38<05:21,  7.84s/it][A
loss=1.255:  13%|█▎        | 6/47 [00:38<05:21,  7.84s/it][A
loss=1.255:  15%|█▍        | 7/47 [00:38<03:46,  5.66s/it][A
loss=0.960:  15%|█▍        | 7/47 [00:38<03:46,  5.66s/it][A
loss=0.960:  17%|█▋        | 8/47 [00:39<02:41,  4.14s/it][A
loss=1.057:  17%|█▋        | 8/47 [00:39<02:41,  4.14s/it][A
loss=1.057:  19%|█▉        | 9/47 [00:39<01:56,  3.07s/it][A
loss=1.240:  19%|█▉        | 9/47 [00:39<01:56,  3.07s/it][A
loss=1.240:  21%|██▏       | 10/47 [00:40<01:26,  2.32s/it][A
loss=1.537:  21%|██▏       | 10/47 [00:40<01:26,  2.32s/it][A
loss=1.537:  23%|██▎       | 11/47 [00:40<01:04,  1.80s/it][A
loss=0.825:  23%|██▎       | 11/47 [00:41<01:04,  1.80s/it][A
loss=0.825:  26%|██▌       | 12/47 [00:41<00:50,  1.44s/it][A
loss=1.086:  26%|██▌       | 12/47 [00:41<00:50,  1.44s/it][A
loss=1.086:  28%|██▊       | 13/47 [00:42<00:40,  1.18s/it][A
loss=0.613:  28%|██▊       | 13/47 [00:42<00:40,  1.18s/it][A

{"eval_acc": 0.7681785457163427, "eval_f1": 0.20670140318877178, "eval_acc_and_f1": 0.48743997445255727, "eval_loss": 1.1749011193002974, "learning_rate": 5.196850393700788e-06, "train_loss": 1.057367079787784, "step": 108}



loss=1.164:  32%|███▏      | 15/47 [01:15<04:04,  7.63s/it][A
loss=0.781:  32%|███▏      | 15/47 [01:15<04:04,  7.63s/it][A
loss=0.781:  34%|███▍      | 16/47 [01:16<02:51,  5.52s/it][A
loss=0.793:  34%|███▍      | 16/47 [01:16<02:51,  5.52s/it][A
loss=0.793:  36%|███▌      | 17/47 [01:16<02:01,  4.03s/it][A
loss=1.182:  36%|███▌      | 17/47 [01:16<02:01,  4.03s/it][A
loss=1.182:  38%|███▊      | 18/47 [01:17<01:26,  3.00s/it][A
loss=1.070:  38%|███▊      | 18/47 [01:17<01:26,  3.00s/it][A
loss=1.070:  40%|████      | 19/47 [01:17<01:03,  2.27s/it][A
loss=1.016:  40%|████      | 19/47 [01:17<01:03,  2.27s/it][A
loss=1.016:  43%|████▎     | 20/47 [01:18<00:47,  1.77s/it][A
loss=1.136:  43%|████▎     | 20/47 [01:18<00:47,  1.77s/it][A
loss=1.136:  45%|████▍     | 21/47 [01:18<00:36,  1.41s/it][A
loss=0.942:  45%|████▍     | 21/47 [01:19<00:36,  1.41s/it][A
loss=0.942:  47%|████▋     | 22/47 [01:19<00:29,  1.17s/it][A
loss=0.782:  47%|████▋     | 22/47 [01:19<00:29,  1.17

{"eval_acc": 0.7553635709143268, "eval_f1": 0.19546624696985432, "eval_acc_and_f1": 0.4754149089420906, "eval_loss": 1.151938749211175, "learning_rate": 3.7795275590551182e-06, "train_loss": 0.985237287150489, "step": 117}



loss=0.961:  51%|█████     | 24/47 [01:54<03:02,  7.92s/it][A
loss=0.783:  51%|█████     | 24/47 [01:54<03:02,  7.92s/it][A
loss=0.783:  53%|█████▎    | 25/47 [01:54<02:05,  5.71s/it][A
loss=1.094:  53%|█████▎    | 25/47 [01:55<02:05,  5.71s/it][A
loss=1.094:  55%|█████▌    | 26/47 [01:55<01:27,  4.18s/it][A
loss=0.829:  55%|█████▌    | 26/47 [01:55<01:27,  4.18s/it][A
loss=0.829:  57%|█████▋    | 27/47 [01:56<01:01,  3.10s/it][A
loss=1.064:  57%|█████▋    | 27/47 [01:56<01:01,  3.10s/it][A
loss=1.064:  60%|█████▉    | 28/47 [01:56<00:44,  2.34s/it][A
loss=0.862:  60%|█████▉    | 28/47 [01:56<00:44,  2.34s/it][A
loss=0.862:  62%|██████▏   | 29/47 [01:57<00:32,  1.82s/it][A
loss=0.766:  62%|██████▏   | 29/47 [01:57<00:32,  1.82s/it][A
loss=0.766:  64%|██████▍   | 30/47 [01:57<00:24,  1.45s/it][A
loss=1.026:  64%|██████▍   | 30/47 [01:57<00:24,  1.45s/it][A
loss=1.026:  66%|██████▌   | 31/47 [01:58<00:19,  1.19s/it][A
loss=0.903:  66%|██████▌   | 31/47 [01:58<00:19,  1.19

{"eval_acc": 0.770194384449244, "eval_f1": 0.2093566710170193, "eval_acc_and_f1": 0.48977552773313165, "eval_loss": 1.1104440199477332, "learning_rate": 2.362204724409449e-06, "train_loss": 0.9208823376231723, "step": 126}



loss=0.978:  70%|███████   | 33/47 [02:32<01:48,  7.73s/it][A
loss=1.074:  70%|███████   | 33/47 [02:32<01:48,  7.73s/it][A
loss=1.074:  72%|███████▏  | 34/47 [02:32<01:12,  5.58s/it][A
loss=0.717:  72%|███████▏  | 34/47 [02:32<01:12,  5.58s/it][A
loss=0.717:  74%|███████▍  | 35/47 [02:33<00:48,  4.08s/it][A
loss=0.679:  74%|███████▍  | 35/47 [02:33<00:48,  4.08s/it][A
loss=0.679:  77%|███████▋  | 36/47 [02:33<00:33,  3.03s/it][A
loss=0.979:  77%|███████▋  | 36/47 [02:34<00:33,  3.03s/it][A
loss=0.979:  79%|███████▊  | 37/47 [02:34<00:22,  2.30s/it][A
loss=0.934:  79%|███████▊  | 37/47 [02:34<00:22,  2.30s/it][A
loss=0.934:  81%|████████  | 38/47 [02:35<00:16,  1.78s/it][A
loss=0.880:  81%|████████  | 38/47 [02:35<00:16,  1.78s/it][A
loss=0.880:  83%|████████▎ | 39/47 [02:35<00:11,  1.42s/it][A
loss=1.058:  83%|████████▎ | 39/47 [02:35<00:11,  1.42s/it][A
loss=1.058:  85%|████████▌ | 40/47 [02:36<00:08,  1.17s/it][A
loss=0.858:  85%|████████▌ | 40/47 [02:36<00:08,  1.17

{"eval_acc": 0.7736501079913607, "eval_f1": 0.21296652252526527, "eval_acc_and_f1": 0.493308315258313, "eval_loss": 1.0905309404645647, "learning_rate": 9.448818897637796e-07, "train_loss": 0.9062284562322829, "step": 135}



loss=0.903:  89%|████████▉ | 42/47 [03:10<00:39,  7.90s/it][A
loss=0.659:  89%|████████▉ | 42/47 [03:10<00:39,  7.90s/it][A
loss=0.659:  91%|█████████▏| 43/47 [03:11<00:22,  5.71s/it][A
loss=1.607:  91%|█████████▏| 43/47 [03:11<00:22,  5.71s/it][A
loss=1.607:  94%|█████████▎| 44/47 [03:12<00:12,  4.17s/it][A
loss=0.663:  94%|█████████▎| 44/47 [03:12<00:12,  4.17s/it][A
loss=0.663:  96%|█████████▌| 45/47 [03:12<00:06,  3.09s/it][A
loss=0.832:  96%|█████████▌| 45/47 [03:12<00:06,  3.09s/it][A
loss=0.832:  98%|█████████▊| 46/47 [03:13<00:02,  2.34s/it][A
loss=1.152:  98%|█████████▊| 46/47 [03:13<00:02,  2.34s/it][A07/18/2022 04:25:37 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:25:37 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 04:25:37 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:27,  1.02s/it][A[A

Evaluating:   7

{"eval_acc": 0.7748020158387329, "eval_f1": 0.21372699823569696, "eval_acc_and_f1": 0.4942645070372149, "eval_loss": 1.0849478713103704, "learning_rate": 0.0, "train_loss": 0.6461824907196893, "step": 141}


07/18/2022 04:26:12 - INFO - utilities.trainers -   ***** Running evaluation iter-3_trial1 *****
07/18/2022 04:26:12 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 04:26:12 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 04:26:39 - INFO - utilities.trainers -   ***** Eval results iter-3_trial1 *****
07/18/2022 04:26:39 - INFO - utilities.trainers -     acc = 0.7748020158387329
07/18/2022 04:26:39 - INFO - utilities.trainers -     acc_and_f1 = 0.4942645070372149
07/18/2022 04:26:39 - INFO - utilities.trainers -     f1 = 0.21372699823569696
07/18/2022 04:26:42 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 04:26:48 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:26:48 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 04:26:48 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:18<00:00,  1.02s/it]
07/18/2022 04:29:07 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 04:29:07 - INFO - utilities.trainers -     acc = 0.7746385576867691
07/18/2022 04:29:07 - INFO - utilities.trainers -     acc_and_f1 = 0.4906188036406792
07/18/2022 04:29:07 - INFO - utilities.trainers -     f1 = 0.20659904959458933



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 04:29:08 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 04:29:26 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 04:29:31 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:29:31 - INFO - utilities.trainers -     Num examples = 48500
07/18/2022 04:29:31 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/190 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 190/190 [06:27<00:00,  2.04s/it]
07/18/2022 04:35:59 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 04:35:59 - INFO - utilities.trainers -     acc = 0.7733608247422681
07/18/2022 04:35:59 - INFO - utilities.trainers -     acc_and_f1 = 0.49326727366157763
07/18/2022 04:35:59 - INFO - utilities.trainers -     f1 = 0.21317372258088718
07/18/2022 04:35:59 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 04:36:17 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 04:36:21 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:36:21 - INFO - utilities.trainers -     Num examples = 1500
07/18/2022 04:36:21 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 6/6 [00:11<00:00,  1.99s/it]
07/18/2022 04:36:33 - INFO - uti



************
End of iteration 3:
Train loss 1.6108, Val loss 1.0849478713103704, Test loss 1.0915694210459204
Annotated 500 samples
Current labeled (training) data: 2000 samples
Remaining budget: 2000 (in samples)
************

Saving json with the results....

 Start Training model of iteration 4!



07/18/2022 04:38:55 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.7748020158387329, acc_best_iteration=3, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-3', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-4', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, 

warmup steps: 18
total steps: 187
logging steps: 12
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.308:   2%|▏         | 1/63 [00:00<00:32,  1.90it/s][A
loss=3.310:   2%|▏         | 1/63 [00:00<00:32,  1.90it/s][A
loss=3.310:   3%|▎         | 2/63 [00:01<00:33,  1.84it/s][A
loss=3.235:   3%|▎         | 2/63 [00:01<00:33,  1.84it/s][A
loss=3.235:   5%|▍         | 3/63 [00:01<00:33,  1.81it/s][A
loss=3.241:   5%|▍         | 3/63 [00:01<00:33,  1.81it/s][A
loss=3.241:   6%|▋         | 4/63 [00:02<00:32,  1.79it/s][A
loss=3.195:   6%|▋         | 4/63 [00:02<00:32,  1.79it/s][A
loss=3.195:   8%|▊         | 5/63 [00:02<00:32,  1.77it/s][A
loss=3.248:   8%|▊         | 5/63 [00:02<00:32,  1.77it/s][A
loss=3.248:  10%|▉         | 6/63 [00:03<00:32,  1.75it/s][A
loss=3.135:  10%|▉         | 6/63 [00:03<00:32,  1.75it/s][A
loss=3.135:  11%|█         | 7/63 [00:03<00:32,  1.75it/s][A
loss=3.183:  11%|█         | 7/63 [00:04<00:32,  1.75it/s][A
loss=3.183:  13%|█▎        | 8/63 [00:04<00:31,  1.75it/s][A
loss=3.009:  13%|█▎        | 8/63 [00:04<00:31,  1.75it/s][A
loss=3.

{"eval_acc": 0.17638588912886968, "eval_f1": 0.016350186067888038, "eval_acc_and_f1": 0.09636803759837886, "eval_loss": 2.917318659169333, "learning_rate": 1.3333333333333333e-05, "train_loss": 3.1475822925567627, "step": 12}



loss=2.796:  21%|██        | 13/63 [00:39<06:03,  7.28s/it][A
loss=2.850:  21%|██        | 13/63 [00:39<06:03,  7.28s/it][A
loss=2.850:  22%|██▏       | 14/63 [00:39<04:18,  5.27s/it][A
loss=2.949:  22%|██▏       | 14/63 [00:40<04:18,  5.27s/it][A
loss=2.949:  24%|██▍       | 15/63 [00:40<03:05,  3.86s/it][A
loss=2.701:  24%|██▍       | 15/63 [00:40<03:05,  3.86s/it][A
loss=2.701:  25%|██▌       | 16/63 [00:41<02:15,  2.88s/it][A
loss=2.698:  25%|██▌       | 16/63 [00:41<02:15,  2.88s/it][A
loss=2.698:  27%|██▋       | 17/63 [00:41<01:40,  2.19s/it][A
loss=2.738:  27%|██▋       | 17/63 [00:41<01:40,  2.19s/it][A
loss=2.738:  29%|██▊       | 18/63 [00:42<01:16,  1.71s/it][A
loss=2.616:  29%|██▊       | 18/63 [00:42<01:16,  1.71s/it][A
loss=2.616:  30%|███       | 19/63 [00:42<01:00,  1.37s/it][A
loss=2.362:  30%|███       | 19/63 [00:42<01:00,  1.37s/it][A
loss=2.362:  32%|███▏      | 20/63 [00:43<00:48,  1.13s/it][A
loss=2.543:  32%|███▏      | 20/63 [00:43<00:48,  1.13

{"eval_acc": 0.18329733621310296, "eval_f1": 0.016655312864916533, "eval_acc_and_f1": 0.09997632453900974, "eval_loss": 2.5389986634254456, "learning_rate": 1.929824561403509e-05, "train_loss": 2.633288860321045, "step": 24}



loss=2.174:  40%|███▉      | 25/63 [01:17<04:38,  7.32s/it][A
loss=2.203:  40%|███▉      | 25/63 [01:18<04:38,  7.32s/it][A
loss=2.203:  41%|████▏     | 26/63 [01:18<03:16,  5.30s/it][A
loss=2.501:  41%|████▏     | 26/63 [01:18<03:16,  5.30s/it][A
loss=2.501:  43%|████▎     | 27/63 [01:19<02:19,  3.88s/it][A
loss=2.449:  43%|████▎     | 27/63 [01:19<02:19,  3.88s/it][A
loss=2.449:  44%|████▍     | 28/63 [01:19<01:41,  2.89s/it][A
loss=2.110:  44%|████▍     | 28/63 [01:19<01:41,  2.89s/it][A
loss=2.110:  46%|████▌     | 29/63 [01:20<01:14,  2.20s/it][A
loss=2.208:  46%|████▌     | 29/63 [01:20<01:14,  2.20s/it][A
loss=2.208:  48%|████▊     | 30/63 [01:20<00:56,  1.72s/it][A
loss=1.977:  48%|████▊     | 30/63 [01:21<00:56,  1.72s/it][A
loss=1.977:  49%|████▉     | 31/63 [01:21<00:44,  1.38s/it][A
loss=2.147:  49%|████▉     | 31/63 [01:21<00:44,  1.38s/it][A
loss=2.147:  51%|█████     | 32/63 [01:22<00:35,  1.14s/it][A
loss=1.986:  51%|█████     | 32/63 [01:22<00:35,  1.14

{"eval_acc": 0.1812814974802016, "eval_f1": 0.0183898963660972, "eval_acc_and_f1": 0.09983569692314939, "eval_loss": 2.38749166897365, "learning_rate": 1.7894736842105264e-05, "train_loss": 2.2031860053539276, "step": 36}



loss=2.096:  59%|█████▊    | 37/63 [01:58<03:20,  7.69s/it][A
loss=1.853:  59%|█████▊    | 37/63 [01:58<03:20,  7.69s/it][A
loss=1.853:  60%|██████    | 38/63 [01:58<02:19,  5.56s/it][A
loss=1.986:  60%|██████    | 38/63 [01:59<02:19,  5.56s/it][A
loss=1.986:  62%|██████▏   | 39/63 [01:59<01:37,  4.07s/it][A
loss=1.714:  62%|██████▏   | 39/63 [01:59<01:37,  4.07s/it][A
loss=1.714:  63%|██████▎   | 40/63 [02:00<01:09,  3.02s/it][A
loss=2.111:  63%|██████▎   | 40/63 [02:00<01:09,  3.02s/it][A
loss=2.111:  65%|██████▌   | 41/63 [02:00<00:50,  2.29s/it][A
loss=2.247:  65%|██████▌   | 41/63 [02:00<00:50,  2.29s/it][A
loss=2.247:  67%|██████▋   | 42/63 [02:01<00:37,  1.78s/it][A
loss=2.106:  67%|██████▋   | 42/63 [02:01<00:37,  1.78s/it][A
loss=2.106:  68%|██████▊   | 43/63 [02:01<00:28,  1.42s/it][A
loss=1.907:  68%|██████▊   | 43/63 [02:02<00:28,  1.42s/it][A
loss=1.907:  70%|██████▉   | 44/63 [02:02<00:22,  1.17s/it][A
loss=2.205:  70%|██████▉   | 44/63 [02:02<00:22,  1.17

{"eval_acc": 0.34528437724982003, "eval_f1": 0.0651485521165747, "eval_acc_and_f1": 0.20521646468319737, "eval_loss": 2.1632068157196045, "learning_rate": 1.649122807017544e-05, "train_loss": 2.002869486808777, "step": 48}



loss=1.848:  78%|███████▊  | 49/63 [02:37<01:43,  7.38s/it][A
loss=1.945:  78%|███████▊  | 49/63 [02:37<01:43,  7.38s/it][A
loss=1.945:  79%|███████▉  | 50/63 [02:37<01:09,  5.34s/it][A
loss=2.241:  79%|███████▉  | 50/63 [02:37<01:09,  5.34s/it][A
loss=2.241:  81%|████████  | 51/63 [02:38<00:46,  3.91s/it][A
loss=1.918:  81%|████████  | 51/63 [02:38<00:46,  3.91s/it][A
loss=1.918:  83%|████████▎ | 52/63 [02:38<00:32,  2.91s/it][A
loss=2.214:  83%|████████▎ | 52/63 [02:39<00:32,  2.91s/it][A
loss=2.214:  84%|████████▍ | 53/63 [02:39<00:22,  2.21s/it][A
loss=2.196:  84%|████████▍ | 53/63 [02:39<00:22,  2.21s/it][A
loss=2.196:  86%|████████▌ | 54/63 [02:40<00:15,  1.72s/it][A
loss=1.660:  86%|████████▌ | 54/63 [02:40<00:15,  1.72s/it][A
loss=1.660:  87%|████████▋ | 55/63 [02:40<00:11,  1.38s/it][A
loss=1.754:  87%|████████▋ | 55/63 [02:40<00:11,  1.38s/it][A
loss=1.754:  89%|████████▉ | 56/63 [02:41<00:07,  1.14s/it][A
loss=1.774:  89%|████████▉ | 56/63 [02:41<00:07,  1.14

{"eval_acc": 0.4295176385889129, "eval_f1": 0.09351545402924946, "eval_acc_and_f1": 0.26151654630908117, "eval_loss": 2.0017681377274648, "learning_rate": 1.5087719298245615e-05, "train_loss": 1.96704896291097, "step": 60}



loss=1.933:  97%|█████████▋| 61/63 [03:19<00:16,  8.12s/it][A
loss=1.693:  97%|█████████▋| 61/63 [03:19<00:16,  8.12s/it][A
loss=1.693:  98%|█████████▊| 62/63 [03:20<00:05,  5.86s/it][A
loss=1.969:  98%|█████████▊| 62/63 [03:20<00:05,  5.86s/it][A
loss=1.969: 100%|██████████| 63/63 [03:20<00:00,  3.18s/it]
Epoch:  33%|███▎      | 1/3 [03:20<06:41, 200.62s/it]
Iteration:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=2.012:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=2.012:   2%|▏         | 1/63 [00:00<00:35,  1.73it/s][A
loss=1.819:   2%|▏         | 1/63 [00:00<00:35,  1.73it/s][A
loss=1.819:   3%|▎         | 2/63 [00:01<00:35,  1.72it/s][A
loss=1.704:   3%|▎         | 2/63 [00:01<00:35,  1.72it/s][A
loss=1.704:   5%|▍         | 3/63 [00:01<00:34,  1.72it/s][A
loss=1.892:   5%|▍         | 3/63 [00:01<00:34,  1.72it/s][A
loss=1.892:   6%|▋         | 4/63 [00:02<00:34,  1.73it/s][A
loss=1.586:   6%|▋         | 4/63 [00:02<00:34,  1.73it/s][A
loss=1.586:   8%|▊         | 5

{"eval_acc": 0.5290136789056875, "eval_f1": 0.10987451296596841, "eval_acc_and_f1": 0.31944409593582795, "eval_loss": 1.8181994259357452, "learning_rate": 1.3684210526315791e-05, "train_loss": 1.793828119834264, "step": 72}



loss=1.689:  16%|█▌        | 10/63 [00:37<06:23,  7.24s/it][A
loss=1.407:  16%|█▌        | 10/63 [00:37<06:23,  7.24s/it][A
loss=1.407:  17%|█▋        | 11/63 [00:38<04:32,  5.24s/it][A
loss=1.551:  17%|█▋        | 11/63 [00:38<04:32,  5.24s/it][A
loss=1.551:  19%|█▉        | 12/63 [00:38<03:16,  3.84s/it][A
loss=1.398:  19%|█▉        | 12/63 [00:38<03:16,  3.84s/it][A
loss=1.398:  21%|██        | 13/63 [00:39<02:23,  2.86s/it][A
loss=1.470:  21%|██        | 13/63 [00:39<02:23,  2.86s/it][A
loss=1.470:  22%|██▏       | 14/63 [00:39<01:46,  2.18s/it][A
loss=1.755:  22%|██▏       | 14/63 [00:39<01:46,  2.18s/it][A
loss=1.755:  24%|██▍       | 15/63 [00:40<01:21,  1.70s/it][A
loss=1.379:  24%|██▍       | 15/63 [00:40<01:21,  1.70s/it][A
loss=1.379:  25%|██▌       | 16/63 [00:41<01:04,  1.37s/it][A
loss=1.724:  25%|██▌       | 16/63 [00:41<01:04,  1.37s/it][A
loss=1.724:  27%|██▋       | 17/63 [00:41<00:51,  1.13s/it][A
loss=1.913:  27%|██▋       | 17/63 [00:41<00:51,  1.13

{"eval_acc": 0.5380849532037437, "eval_f1": 0.11027931943110258, "eval_acc_and_f1": 0.32418213631742315, "eval_loss": 1.6778610008103507, "learning_rate": 1.2280701754385966e-05, "train_loss": 1.5764617522557576, "step": 84}



loss=1.373:  35%|███▍      | 22/63 [01:16<05:02,  7.37s/it][A
loss=1.281:  35%|███▍      | 22/63 [01:16<05:02,  7.37s/it][A
loss=1.281:  37%|███▋      | 23/63 [01:16<03:33,  5.33s/it][A
loss=1.321:  37%|███▋      | 23/63 [01:17<03:33,  5.33s/it][A
loss=1.321:  38%|███▊      | 24/63 [01:17<02:32,  3.91s/it][A
loss=1.531:  38%|███▊      | 24/63 [01:17<02:32,  3.91s/it][A
loss=1.531:  40%|███▉      | 25/63 [01:18<01:50,  2.91s/it][A
loss=1.174:  40%|███▉      | 25/63 [01:18<01:50,  2.91s/it][A
loss=1.174:  41%|████▏     | 26/63 [01:18<01:21,  2.21s/it][A
loss=1.632:  41%|████▏     | 26/63 [01:18<01:21,  2.21s/it][A
loss=1.632:  43%|████▎     | 27/63 [01:19<01:02,  1.73s/it][A
loss=1.341:  43%|████▎     | 27/63 [01:19<01:02,  1.73s/it][A
loss=1.341:  44%|████▍     | 28/63 [01:19<00:48,  1.39s/it][A
loss=1.163:  44%|████▍     | 28/63 [01:20<00:48,  1.39s/it][A
loss=1.163:  46%|████▌     | 29/63 [01:20<00:38,  1.15s/it][A
loss=1.260:  46%|████▌     | 29/63 [01:20<00:38,  1.15

{"eval_acc": 0.5928005759539237, "eval_f1": 0.12338607720254241, "eval_acc_and_f1": 0.358093326578233, "eval_loss": 1.5590890177658625, "learning_rate": 1.0877192982456142e-05, "train_loss": 1.3629393875598907, "step": 96}



loss=1.348:  54%|█████▍    | 34/63 [01:55<03:35,  7.42s/it][A
loss=1.247:  54%|█████▍    | 34/63 [01:55<03:35,  7.42s/it][A
loss=1.247:  56%|█████▌    | 35/63 [01:56<02:30,  5.37s/it][A
loss=1.944:  56%|█████▌    | 35/63 [01:56<02:30,  5.37s/it][A
loss=1.944:  57%|█████▋    | 36/63 [01:56<01:46,  3.93s/it][A
loss=0.993:  57%|█████▋    | 36/63 [01:56<01:46,  3.93s/it][A
loss=0.993:  59%|█████▊    | 37/63 [01:57<01:16,  2.93s/it][A
loss=1.650:  59%|█████▊    | 37/63 [01:57<01:16,  2.93s/it][A
loss=1.650:  60%|██████    | 38/63 [01:57<00:55,  2.22s/it][A
loss=1.631:  60%|██████    | 38/63 [01:57<00:55,  2.22s/it][A
loss=1.631:  62%|██████▏   | 39/63 [01:58<00:41,  1.73s/it][A
loss=1.385:  62%|██████▏   | 39/63 [01:58<00:41,  1.73s/it][A
loss=1.385:  63%|██████▎   | 40/63 [01:58<00:31,  1.39s/it][A
loss=1.612:  63%|██████▎   | 40/63 [01:59<00:31,  1.39s/it][A
loss=1.612:  65%|██████▌   | 41/63 [01:59<00:25,  1.14s/it][A
loss=1.156:  65%|██████▌   | 41/63 [01:59<00:25,  1.14

{"eval_acc": 0.6031677465802736, "eval_f1": 0.12743274644285038, "eval_acc_and_f1": 0.365300246511562, "eval_loss": 1.5023436078003474, "learning_rate": 9.473684210526315e-06, "train_loss": 1.3910537660121918, "step": 108}



loss=1.114:  73%|███████▎  | 46/63 [02:34<02:05,  7.38s/it][A
loss=1.416:  73%|███████▎  | 46/63 [02:34<02:05,  7.38s/it][A
loss=1.416:  75%|███████▍  | 47/63 [02:34<01:25,  5.34s/it][A
loss=1.282:  75%|███████▍  | 47/63 [02:35<01:25,  5.34s/it][A
loss=1.282:  76%|███████▌  | 48/63 [02:35<00:58,  3.91s/it][A
loss=1.556:  76%|███████▌  | 48/63 [02:35<00:58,  3.91s/it][A
loss=1.556:  78%|███████▊  | 49/63 [02:36<00:40,  2.91s/it][A
loss=1.557:  78%|███████▊  | 49/63 [02:36<00:40,  2.91s/it][A
loss=1.557:  79%|███████▉  | 50/63 [02:36<00:28,  2.21s/it][A
loss=1.038:  79%|███████▉  | 50/63 [02:36<00:28,  2.21s/it][A
loss=1.038:  81%|████████  | 51/63 [02:37<00:20,  1.72s/it][A
loss=1.195:  81%|████████  | 51/63 [02:37<00:20,  1.72s/it][A
loss=1.195:  83%|████████▎ | 52/63 [02:37<00:15,  1.38s/it][A
loss=1.656:  83%|████████▎ | 52/63 [02:37<00:15,  1.38s/it][A
loss=1.656:  84%|████████▍ | 53/63 [02:38<00:11,  1.14s/it][A
loss=1.466:  84%|████████▍ | 53/63 [02:38<00:11,  1.14

{"eval_acc": 0.6429085673146149, "eval_f1": 0.13581800047456008, "eval_acc_and_f1": 0.38936328389458746, "eval_loss": 1.4242424155984605, "learning_rate": 8.070175438596492e-06, "train_loss": 1.3212155401706696, "step": 120}



loss=1.334:  92%|█████████▏| 58/63 [03:16<00:40,  8.10s/it][A
loss=1.616:  92%|█████████▏| 58/63 [03:16<00:40,  8.10s/it][A
loss=1.616:  94%|█████████▎| 59/63 [03:17<00:23,  5.84s/it][A
loss=1.275:  94%|█████████▎| 59/63 [03:17<00:23,  5.84s/it][A
loss=1.275:  95%|█████████▌| 60/63 [03:17<00:12,  4.27s/it][A
loss=1.194:  95%|█████████▌| 60/63 [03:17<00:12,  4.27s/it][A
loss=1.194:  97%|█████████▋| 61/63 [03:18<00:06,  3.16s/it][A
loss=1.462:  97%|█████████▋| 61/63 [03:18<00:06,  3.16s/it][A
loss=1.462:  98%|█████████▊| 62/63 [03:19<00:02,  2.39s/it][A
loss=0.813:  98%|█████████▊| 62/63 [03:19<00:02,  2.39s/it][A
loss=0.813: 100%|██████████| 63/63 [03:19<00:00,  3.16s/it]
Epoch:  67%|██████▋   | 2/3 [06:39<03:20, 200.24s/it]
Iteration:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=1.057:   0%|          | 0/63 [00:00<?, ?it/s][A
loss=1.057:   2%|▏         | 1/63 [00:00<00:35,  1.73it/s][A
loss=0.773:   2%|▏         | 1/63 [00:00<00:35,  1.73it/s][A
loss=0.773:   3%|▎      

{"eval_acc": 0.6393088552915767, "eval_f1": 0.14121461403460078, "eval_acc_and_f1": 0.3902617346630887, "eval_loss": 1.3525660336017609, "learning_rate": 6.666666666666667e-06, "train_loss": 1.2249694367249806, "step": 132}



loss=1.241:  11%|█         | 7/63 [00:36<06:55,  7.43s/it][A
loss=0.842:  11%|█         | 7/63 [00:36<06:55,  7.43s/it][A
loss=0.842:  13%|█▎        | 8/63 [00:37<04:55,  5.37s/it][A
loss=1.260:  13%|█▎        | 8/63 [00:37<04:55,  5.37s/it][A
loss=1.260:  14%|█▍        | 9/63 [00:37<03:32,  3.94s/it][A
loss=1.381:  14%|█▍        | 9/63 [00:37<03:32,  3.94s/it][A
loss=1.381:  16%|█▌        | 10/63 [00:38<02:35,  2.93s/it][A
loss=1.065:  16%|█▌        | 10/63 [00:38<02:35,  2.93s/it][A
loss=1.065:  17%|█▋        | 11/63 [00:39<01:55,  2.23s/it][A
loss=1.369:  17%|█▋        | 11/63 [00:39<01:55,  2.23s/it][A
loss=1.369:  19%|█▉        | 12/63 [00:39<01:28,  1.73s/it][A
loss=1.195:  19%|█▉        | 12/63 [00:39<01:28,  1.73s/it][A
loss=1.195:  21%|██        | 13/63 [00:40<01:09,  1.39s/it][A
loss=1.346:  21%|██        | 13/63 [00:40<01:09,  1.39s/it][A
loss=1.346:  22%|██▏       | 14/63 [00:40<00:56,  1.15s/it][A
loss=1.144:  22%|██▏       | 14/63 [00:40<00:56,  1.15s/it]

{"eval_acc": 0.6735781137508999, "eval_f1": 0.15690778994124543, "eval_acc_and_f1": 0.4152429518460727, "eval_loss": 1.2935803915773119, "learning_rate": 5.263157894736842e-06, "train_loss": 1.1992519895235698, "step": 144}



loss=0.982:  30%|███       | 19/63 [01:19<05:57,  8.13s/it][A
loss=1.138:  30%|███       | 19/63 [01:19<05:57,  8.13s/it][A
loss=1.138:  32%|███▏      | 20/63 [01:19<04:12,  5.87s/it][A
loss=1.165:  32%|███▏      | 20/63 [01:19<04:12,  5.87s/it][A
loss=1.165:  33%|███▎      | 21/63 [01:20<02:59,  4.28s/it][A
loss=1.340:  33%|███▎      | 21/63 [01:20<02:59,  4.28s/it][A
loss=1.340:  35%|███▍      | 22/63 [01:20<02:10,  3.17s/it][A
loss=0.902:  35%|███▍      | 22/63 [01:21<02:10,  3.17s/it][A
loss=0.902:  37%|███▋      | 23/63 [01:21<01:35,  2.40s/it][A
loss=1.244:  37%|███▋      | 23/63 [01:21<01:35,  2.40s/it][A
loss=1.244:  38%|███▊      | 24/63 [01:22<01:12,  1.85s/it][A
loss=1.794:  38%|███▊      | 24/63 [01:22<01:12,  1.85s/it][A
loss=1.794:  40%|███▉      | 25/63 [01:22<00:56,  1.48s/it][A
loss=1.105:  40%|███▉      | 25/63 [01:22<00:56,  1.48s/it][A
loss=1.105:  41%|████▏     | 26/63 [01:23<00:44,  1.21s/it][A
loss=1.100:  41%|████▏     | 26/63 [01:23<00:44,  1.21

{"eval_acc": 0.6787616990640749, "eval_f1": 0.16057973552576033, "eval_acc_and_f1": 0.4196707172949176, "eval_loss": 1.2666080594062805, "learning_rate": 3.859649122807018e-06, "train_loss": 1.176384150981903, "step": 156}



loss=1.198:  49%|████▉     | 31/63 [01:58<03:56,  7.39s/it][A
loss=1.082:  49%|████▉     | 31/63 [01:58<03:56,  7.39s/it][A
loss=1.082:  51%|█████     | 32/63 [01:58<02:45,  5.35s/it][A
loss=1.058:  51%|█████     | 32/63 [01:58<02:45,  5.35s/it][A
loss=1.058:  52%|█████▏    | 33/63 [01:59<01:57,  3.92s/it][A
loss=0.942:  52%|█████▏    | 33/63 [01:59<01:57,  3.92s/it][A
loss=0.942:  54%|█████▍    | 34/63 [01:59<01:24,  2.91s/it][A
loss=1.053:  54%|█████▍    | 34/63 [02:00<01:24,  2.91s/it][A
loss=1.053:  56%|█████▌    | 35/63 [02:00<01:02,  2.22s/it][A
loss=0.950:  56%|█████▌    | 35/63 [02:00<01:02,  2.22s/it][A
loss=0.950:  57%|█████▋    | 36/63 [02:01<00:46,  1.73s/it][A
loss=1.002:  57%|█████▋    | 36/63 [02:01<00:46,  1.73s/it][A
loss=1.002:  59%|█████▊    | 37/63 [02:01<00:35,  1.38s/it][A
loss=0.743:  59%|█████▊    | 37/63 [02:01<00:35,  1.38s/it][A
loss=0.743:  60%|██████    | 38/63 [02:02<00:28,  1.14s/it][A
loss=1.100:  60%|██████    | 38/63 [02:02<00:28,  1.14

{"eval_acc": 0.6902807775377969, "eval_f1": 0.16376429386246244, "eval_acc_and_f1": 0.4270225357001297, "eval_loss": 1.2243195048400335, "learning_rate": 2.456140350877193e-06, "train_loss": 1.0348477164904277, "step": 168}



loss=0.980:  68%|██████▊   | 43/63 [02:38<02:33,  7.69s/it][A
loss=1.095:  68%|██████▊   | 43/63 [02:38<02:33,  7.69s/it][A
loss=1.095:  70%|██████▉   | 44/63 [02:39<01:45,  5.56s/it][A
loss=0.969:  70%|██████▉   | 44/63 [02:39<01:45,  5.56s/it][A
loss=0.969:  71%|███████▏  | 45/63 [02:39<01:13,  4.06s/it][A
loss=1.107:  71%|███████▏  | 45/63 [02:39<01:13,  4.06s/it][A
loss=1.107:  73%|███████▎  | 46/63 [02:40<00:51,  3.02s/it][A
loss=0.996:  73%|███████▎  | 46/63 [02:40<00:51,  3.02s/it][A
loss=0.996:  75%|███████▍  | 47/63 [02:40<00:36,  2.29s/it][A
loss=1.073:  75%|███████▍  | 47/63 [02:40<00:36,  2.29s/it][A
loss=1.073:  76%|███████▌  | 48/63 [02:41<00:26,  1.78s/it][A
loss=0.880:  76%|███████▌  | 48/63 [02:41<00:26,  1.78s/it][A
loss=0.880:  78%|███████▊  | 49/63 [02:41<00:19,  1.42s/it][A
loss=1.237:  78%|███████▊  | 49/63 [02:42<00:19,  1.42s/it][A
loss=1.237:  79%|███████▉  | 50/63 [02:42<00:15,  1.17s/it][A
loss=0.976:  79%|███████▉  | 50/63 [02:42<00:15,  1.17

{"eval_acc": 0.6891288696904248, "eval_f1": 0.16583481801037261, "eval_acc_and_f1": 0.42748184385039867, "eval_loss": 1.2256096601486206, "learning_rate": 1.0526315789473685e-06, "train_loss": 1.0471435735623043, "step": 180}



loss=1.056:  87%|████████▋ | 55/63 [03:13<00:52,  6.55s/it][A
loss=1.377:  87%|████████▋ | 55/63 [03:13<00:52,  6.55s/it][A
loss=1.377:  89%|████████▉ | 56/63 [03:14<00:33,  4.76s/it][A
loss=1.264:  89%|████████▉ | 56/63 [03:14<00:33,  4.76s/it][A
loss=1.264:  90%|█████████ | 57/63 [03:14<00:21,  3.51s/it][A
loss=1.121:  90%|█████████ | 57/63 [03:14<00:21,  3.51s/it][A
loss=1.121:  92%|█████████▏| 58/63 [03:15<00:13,  2.63s/it][A
loss=1.159:  92%|█████████▏| 58/63 [03:15<00:13,  2.63s/it][A
loss=1.159:  94%|█████████▎| 59/63 [03:15<00:08,  2.02s/it][A
loss=1.301:  94%|█████████▎| 59/63 [03:15<00:08,  2.02s/it][A
loss=1.301:  95%|█████████▌| 60/63 [03:16<00:04,  1.59s/it][A
loss=1.089:  95%|█████████▌| 60/63 [03:16<00:04,  1.59s/it][A
loss=1.089:  97%|█████████▋| 61/63 [03:16<00:02,  1.28s/it][A
loss=0.903:  97%|█████████▋| 61/63 [03:17<00:02,  1.28s/it][A
loss=0.903:  98%|█████████▊| 62/63 [03:17<00:01,  1.07s/it][A
loss=0.953:  98%|█████████▊| 62/63 [03:17<00:01,  1.07

{"eval_acc": 0.6915766738660907, "eval_f1": 0.16681684120395107, "eval_acc_and_f1": 0.42919675753502085, "eval_loss": 1.2170370050839014, "learning_rate": 0.0, "train_loss": 0.8519768168528875, "step": 189}


07/18/2022 04:49:54 - INFO - utilities.trainers -   ***** Running evaluation iter-4_trial1 *****
07/18/2022 04:49:54 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 04:49:54 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 04:50:22 - INFO - utilities.trainers -   ***** Eval results iter-4_trial1 *****
07/18/2022 04:50:22 - INFO - utilities.trainers -     acc = 0.6915766738660907
07/18/2022 04:50:22 - INFO - utilities.trainers -     acc_and_f1 = 0.42919675753502085
07/18/2022 04:50:22 - INFO - utilities.trainers -     f1 = 0.16681684120395107
07/18/2022 04:50:25 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 04:50:31 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:50:31 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 04:50:31 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:19<00:00,  1.02s/it]
07/18/2022 04:52:50 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 04:52:50 - INFO - utilities.trainers -     acc = 0.6834571741259144
07/18/2022 04:52:50 - INFO - utilities.trainers -     acc_and_f1 = 0.4194919541525205
07/18/2022 04:52:50 - INFO - utilities.trainers -     f1 = 0.15552673417912655



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 04:52:51 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 04:53:09 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 04:53:14 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 04:53:14 - INFO - utilities.trainers -     Num examples = 48000
07/18/2022 04:53:14 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/188 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 188/188 [06:23<00:00,  2.04s/it]
07/18/2022 04:59:38 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 04:59:38 - INFO - utilities.trainers -     acc = 0.6865625
07/18/2022 04:59:38 - INFO - utilities.trainers -     acc_and_f1 = 0.4250003391143463
07/18/2022 04:59:38 - INFO - utilities.trainers -     f1 = 0.1634381782286927
07/18/2022 04:59:38 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 04:59:56 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 05:00:00 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 05:00:00 - INFO - utilities.trainers -     Num examples = 2000
07/18/2022 05:00:00 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 8/8 [00:15<00:00,  1.99s/it]
07/18/2022 05:00:16 - INFO - utilities.trai



************
End of iteration 4:
Train loss 1.6466, Val loss 1.2170370050839014, Test loss 1.233336124350043
Annotated 500 samples
Current labeled (training) data: 2500 samples
Remaining budget: 1500 (in samples)
************

Saving json with the results....

 Start Training model of iteration 5!



07/18/2022 05:02:37 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.7748020158387329, acc_best_iteration=3, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-3', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-5', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, 

warmup steps: 23
total steps: 234
logging steps: 15
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.307:   1%|▏         | 1/79 [00:00<00:40,  1.91it/s][A
loss=3.337:   1%|▏         | 1/79 [00:00<00:40,  1.91it/s][A
loss=3.337:   3%|▎         | 2/79 [00:01<00:41,  1.85it/s][A
loss=3.254:   3%|▎         | 2/79 [00:01<00:41,  1.85it/s][A
loss=3.254:   4%|▍         | 3/79 [00:01<00:41,  1.82it/s][A
loss=3.278:   4%|▍         | 3/79 [00:01<00:41,  1.82it/s][A
loss=3.278:   5%|▌         | 4/79 [00:02<00:41,  1.79it/s][A
loss=3.300:   5%|▌         | 4/79 [00:02<00:41,  1.79it/s][A
loss=3.300:   6%|▋         | 5/79 [00:02<00:41,  1.77it/s][A
loss=3.106:   6%|▋         | 5/79 [00:02<00:41,  1.77it/s][A
loss=3.106:   8%|▊         | 6/79 [00:03<00:41,  1.76it/s][A
loss=3.168:   8%|▊         | 6/79 [00:03<00:41,  1.76it/s][A
loss=3.168:   9%|▉         | 7/79 [00:03<00:40,  1.76it/s][A
loss=3.098:   9%|▉         | 7/79 [00:04<00:40,  1.76it/s][A
loss=3.098:  10%|█         | 8/79 [00:04<00:40,  1.75it/s][A
loss=3.050:  10%|█         | 8/79 [00:04<00:40,  1.75it/s][A
loss=3.

{"eval_acc": 0.17350611951043918, "eval_f1": 0.012191147122900717, "eval_acc_and_f1": 0.09284863331666995, "eval_loss": 2.8972533345222473, "learning_rate": 1.3043478260869566e-05, "train_loss": 3.106929127375285, "step": 15}



loss=2.907:  20%|██        | 16/79 [00:44<08:17,  7.90s/it][A
loss=2.733:  20%|██        | 16/79 [00:44<08:17,  7.90s/it][A
loss=2.733:  22%|██▏       | 17/79 [00:44<05:53,  5.70s/it][A
loss=2.872:  22%|██▏       | 17/79 [00:44<05:53,  5.70s/it][A
loss=2.872:  23%|██▎       | 18/79 [00:45<04:13,  4.16s/it][A
loss=2.644:  23%|██▎       | 18/79 [00:45<04:13,  4.16s/it][A
loss=2.644:  24%|██▍       | 19/79 [00:45<03:05,  3.09s/it][A
loss=2.651:  24%|██▍       | 19/79 [00:45<03:05,  3.09s/it][A
loss=2.651:  25%|██▌       | 20/79 [00:46<02:17,  2.34s/it][A
loss=2.555:  25%|██▌       | 20/79 [00:46<02:17,  2.34s/it][A
loss=2.555:  27%|██▋       | 21/79 [00:46<01:45,  1.81s/it][A
loss=2.369:  27%|██▋       | 21/79 [00:47<01:45,  1.81s/it][A
loss=2.369:  28%|██▊       | 22/79 [00:47<01:22,  1.45s/it][A
loss=2.353:  28%|██▊       | 22/79 [00:47<01:22,  1.45s/it][A
loss=2.353:  29%|██▉       | 23/79 [00:48<01:06,  1.19s/it][A
loss=2.676:  29%|██▉       | 23/79 [00:48<01:06,  1.19

{"eval_acc": 0.2316774658027358, "eval_f1": 0.03142756833640784, "eval_acc_and_f1": 0.13155251706957183, "eval_loss": 2.617457934788295, "learning_rate": 1.9345794392523366e-05, "train_loss": 2.571739753087362, "step": 30}



loss=2.613:  39%|███▉      | 31/79 [01:25<06:01,  7.52s/it][A
loss=2.383:  39%|███▉      | 31/79 [01:25<06:01,  7.52s/it][A
loss=2.383:  41%|████      | 32/79 [01:26<04:15,  5.44s/it][A
loss=2.293:  41%|████      | 32/79 [01:26<04:15,  5.44s/it][A
loss=2.293:  42%|████▏     | 33/79 [01:26<03:03,  3.98s/it][A
loss=2.349:  42%|████▏     | 33/79 [01:26<03:03,  3.98s/it][A
loss=2.349:  43%|████▎     | 34/79 [01:27<02:13,  2.96s/it][A
loss=2.046:  43%|████▎     | 34/79 [01:27<02:13,  2.96s/it][A
loss=2.046:  44%|████▍     | 35/79 [01:28<01:38,  2.25s/it][A
loss=2.178:  44%|████▍     | 35/79 [01:28<01:38,  2.25s/it][A
loss=2.178:  46%|████▌     | 36/79 [01:28<01:15,  1.75s/it][A
loss=2.282:  46%|████▌     | 36/79 [01:28<01:15,  1.75s/it][A
loss=2.282:  47%|████▋     | 37/79 [01:29<00:58,  1.40s/it][A
loss=2.085:  47%|████▋     | 37/79 [01:29<00:58,  1.40s/it][A
loss=2.085:  48%|████▊     | 38/79 [01:29<00:47,  1.16s/it][A
loss=2.296:  48%|████▊     | 38/79 [01:29<00:47,  1.16

{"eval_acc": 0.3091432685385169, "eval_f1": 0.045098503767768465, "eval_acc_and_f1": 0.1771208861531427, "eval_loss": 2.309927191053118, "learning_rate": 1.794392523364486e-05, "train_loss": 2.245981041590373, "step": 45}



loss=1.995:  58%|█████▊    | 46/79 [02:08<04:13,  7.67s/it][A
loss=2.108:  58%|█████▊    | 46/79 [02:08<04:13,  7.67s/it][A
loss=2.108:  59%|█████▉    | 47/79 [02:08<02:57,  5.55s/it][A
loss=2.091:  59%|█████▉    | 47/79 [02:08<02:57,  5.55s/it][A
loss=2.091:  61%|██████    | 48/79 [02:09<02:05,  4.06s/it][A
loss=1.962:  61%|██████    | 48/79 [02:09<02:05,  4.06s/it][A
loss=1.962:  62%|██████▏   | 49/79 [02:09<01:30,  3.01s/it][A
loss=2.188:  62%|██████▏   | 49/79 [02:09<01:30,  3.01s/it][A
loss=2.188:  63%|██████▎   | 50/79 [02:10<01:06,  2.29s/it][A
loss=2.051:  63%|██████▎   | 50/79 [02:10<01:06,  2.29s/it][A
loss=2.051:  65%|██████▍   | 51/79 [02:10<00:49,  1.77s/it][A
loss=2.006:  65%|██████▍   | 51/79 [02:11<00:49,  1.77s/it][A
loss=2.006:  66%|██████▌   | 52/79 [02:11<00:38,  1.42s/it][A
loss=2.117:  66%|██████▌   | 52/79 [02:11<00:38,  1.42s/it][A
loss=2.117:  67%|██████▋   | 53/79 [02:12<00:30,  1.17s/it][A
loss=2.451:  67%|██████▋   | 53/79 [02:12<00:30,  1.17

{"eval_acc": 0.3647228221742261, "eval_f1": 0.06034789215004535, "eval_acc_and_f1": 0.21253535716213573, "eval_loss": 2.1019162578242168, "learning_rate": 1.6542056074766357e-05, "train_loss": 2.0653997659683228, "step": 60}



loss=1.977:  77%|███████▋  | 61/79 [02:48<02:11,  7.31s/it][A
loss=1.766:  77%|███████▋  | 61/79 [02:48<02:11,  7.31s/it][A
loss=1.766:  78%|███████▊  | 62/79 [02:49<01:29,  5.29s/it][A
loss=1.887:  78%|███████▊  | 62/79 [02:49<01:29,  5.29s/it][A
loss=1.887:  80%|███████▉  | 63/79 [02:49<01:02,  3.88s/it][A
loss=1.746:  80%|███████▉  | 63/79 [02:50<01:02,  3.88s/it][A
loss=1.746:  81%|████████  | 64/79 [02:50<00:43,  2.89s/it][A
loss=1.902:  81%|████████  | 64/79 [02:50<00:43,  2.89s/it][A
loss=1.902:  82%|████████▏ | 65/79 [02:51<00:30,  2.20s/it][A
loss=1.634:  82%|████████▏ | 65/79 [02:51<00:30,  2.20s/it][A
loss=1.634:  84%|████████▎ | 66/79 [02:51<00:22,  1.71s/it][A
loss=1.848:  84%|████████▎ | 66/79 [02:51<00:22,  1.71s/it][A
loss=1.848:  85%|████████▍ | 67/79 [02:52<00:16,  1.37s/it][A
loss=1.787:  85%|████████▍ | 67/79 [02:52<00:16,  1.37s/it][A
loss=1.787:  86%|████████▌ | 68/79 [02:52<00:12,  1.13s/it][A
loss=1.815:  86%|████████▌ | 68/79 [02:52<00:12,  1.13

{"eval_acc": 0.6660907127429806, "eval_f1": 0.14715182348495015, "eval_acc_and_f1": 0.4066212681139654, "eval_loss": 1.7148667063031877, "learning_rate": 1.5140186915887852e-05, "train_loss": 1.8196422338485718, "step": 75}



loss=1.846:  96%|█████████▌| 76/79 [03:33<00:24,  8.20s/it][A
loss=2.066:  96%|█████████▌| 76/79 [03:33<00:24,  8.20s/it][A
loss=2.066:  97%|█████████▋| 77/79 [03:34<00:11,  5.92s/it][A
loss=1.726:  97%|█████████▋| 77/79 [03:34<00:11,  5.92s/it][A
loss=1.726:  99%|█████████▊| 78/79 [03:34<00:04,  4.32s/it][A
loss=1.138:  99%|█████████▊| 78/79 [03:34<00:04,  4.32s/it][A
loss=1.138: 100%|██████████| 79/79 [03:34<00:00,  2.72s/it]
Epoch:  33%|███▎      | 1/3 [03:34<07:09, 214.84s/it]
Iteration:   0%|          | 0/79 [00:00<?, ?it/s][A
loss=1.632:   0%|          | 0/79 [00:00<?, ?it/s][A
loss=1.632:   1%|▏         | 1/79 [00:00<00:45,  1.72it/s][A
loss=1.588:   1%|▏         | 1/79 [00:00<00:45,  1.72it/s][A
loss=1.588:   3%|▎         | 2/79 [00:01<00:44,  1.73it/s][A
loss=1.701:   3%|▎         | 2/79 [00:01<00:44,  1.73it/s][A
loss=1.701:   4%|▍         | 3/79 [00:01<00:43,  1.73it/s][A
loss=1.757:   4%|▍         | 3/79 [00:01<00:43,  1.73it/s][A
loss=1.757:   5%|▌         |

{"eval_acc": 0.7110151187904967, "eval_f1": 0.17301215372463244, "eval_acc_and_f1": 0.44201363625756457, "eval_loss": 1.4992633845124925, "learning_rate": 1.3738317757009347e-05, "train_loss": 1.5779810269673666, "step": 90}



loss=1.579:  15%|█▌        | 12/79 [00:38<08:07,  7.28s/it][A
loss=1.609:  15%|█▌        | 12/79 [00:38<08:07,  7.28s/it][A
loss=1.609:  16%|█▋        | 13/79 [00:39<05:47,  5.27s/it][A
loss=1.667:  16%|█▋        | 13/79 [00:39<05:47,  5.27s/it][A
loss=1.667:  18%|█▊        | 14/79 [00:39<04:10,  3.86s/it][A
loss=1.391:  18%|█▊        | 14/79 [00:40<04:10,  3.86s/it][A
loss=1.391:  19%|█▉        | 15/79 [00:40<03:03,  2.87s/it][A
loss=1.492:  19%|█▉        | 15/79 [00:40<03:03,  2.87s/it][A
loss=1.492:  20%|██        | 16/79 [00:41<02:17,  2.19s/it][A
loss=1.741:  20%|██        | 16/79 [00:41<02:17,  2.19s/it][A
loss=1.741:  22%|██▏       | 17/79 [00:41<01:45,  1.71s/it][A
loss=1.201:  22%|██▏       | 17/79 [00:41<01:45,  1.71s/it][A
loss=1.201:  23%|██▎       | 18/79 [00:42<01:23,  1.37s/it][A
loss=1.372:  23%|██▎       | 18/79 [00:42<01:23,  1.37s/it][A
loss=1.372:  24%|██▍       | 19/79 [00:42<01:08,  1.13s/it][A
loss=1.552:  24%|██▍       | 19/79 [00:43<01:08,  1.13

{"eval_acc": 0.7281497480201584, "eval_f1": 0.17606503721253372, "eval_acc_and_f1": 0.4521073926163461, "eval_loss": 1.304234423807689, "learning_rate": 1.233644859813084e-05, "train_loss": 1.409789009888967, "step": 105}



loss=1.373:  34%|███▍      | 27/79 [01:19<06:20,  7.31s/it][A
loss=1.290:  34%|███▍      | 27/79 [01:19<06:20,  7.31s/it][A
loss=1.290:  35%|███▌      | 28/79 [01:20<04:30,  5.30s/it][A
loss=1.261:  35%|███▌      | 28/79 [01:20<04:30,  5.30s/it][A
loss=1.261:  37%|███▋      | 29/79 [01:20<03:14,  3.88s/it][A
loss=1.349:  37%|███▋      | 29/79 [01:20<03:14,  3.88s/it][A
loss=1.349:  38%|███▊      | 30/79 [01:21<02:21,  2.89s/it][A
loss=1.194:  38%|███▊      | 30/79 [01:21<02:21,  2.89s/it][A
loss=1.194:  39%|███▉      | 31/79 [01:21<01:45,  2.20s/it][A
loss=1.335:  39%|███▉      | 31/79 [01:21<01:45,  2.20s/it][A
loss=1.335:  41%|████      | 32/79 [01:22<01:20,  1.71s/it][A
loss=1.289:  41%|████      | 32/79 [01:22<01:20,  1.71s/it][A
loss=1.289:  42%|████▏     | 33/79 [01:22<01:03,  1.38s/it][A
loss=1.284:  42%|████▏     | 33/79 [01:23<01:03,  1.38s/it][A
loss=1.284:  43%|████▎     | 34/79 [01:23<00:51,  1.14s/it][A
loss=1.469:  43%|████▎     | 34/79 [01:23<00:51,  1.14

{"eval_acc": 0.7634269258459323, "eval_f1": 0.1938684297728023, "eval_acc_and_f1": 0.4786476778093673, "eval_loss": 1.1068145824330193, "learning_rate": 1.0934579439252338e-05, "train_loss": 1.237258005142212, "step": 120}



loss=0.891:  53%|█████▎    | 42/79 [02:00<04:30,  7.31s/it][A
loss=1.070:  53%|█████▎    | 42/79 [02:00<04:30,  7.31s/it][A
loss=1.070:  54%|█████▍    | 43/79 [02:00<03:10,  5.29s/it][A
loss=0.766:  54%|█████▍    | 43/79 [02:00<03:10,  5.29s/it][A
loss=0.766:  56%|█████▌    | 44/79 [02:01<02:15,  3.88s/it][A
loss=0.811:  56%|█████▌    | 44/79 [02:01<02:15,  3.88s/it][A
loss=0.811:  57%|█████▋    | 45/79 [02:01<01:38,  2.89s/it][A
loss=0.913:  57%|█████▋    | 45/79 [02:01<01:38,  2.89s/it][A
loss=0.913:  58%|█████▊    | 46/79 [02:02<01:12,  2.19s/it][A
loss=0.837:  58%|█████▊    | 46/79 [02:02<01:12,  2.19s/it][A
loss=0.837:  59%|█████▉    | 47/79 [02:02<00:54,  1.71s/it][A
loss=1.106:  59%|█████▉    | 47/79 [02:03<00:54,  1.71s/it][A
loss=1.106:  61%|██████    | 48/79 [02:03<00:42,  1.37s/it][A
loss=1.201:  61%|██████    | 48/79 [02:03<00:42,  1.37s/it][A
loss=1.201:  62%|██████▏   | 49/79 [02:04<00:34,  1.14s/it][A
loss=1.167:  62%|██████▏   | 49/79 [02:04<00:34,  1.14

{"eval_acc": 0.7742260619150468, "eval_f1": 0.206741669850943, "eval_acc_and_f1": 0.4904838658829949, "eval_loss": 1.0079429894685745, "learning_rate": 9.532710280373833e-06, "train_loss": 1.0218055407206217, "step": 135}



loss=0.927:  72%|███████▏  | 57/79 [02:40<02:40,  7.30s/it][A
loss=1.153:  72%|███████▏  | 57/79 [02:40<02:40,  7.30s/it][A
loss=1.153:  73%|███████▎  | 58/79 [02:41<01:50,  5.28s/it][A
loss=0.872:  73%|███████▎  | 58/79 [02:41<01:50,  5.28s/it][A
loss=0.872:  75%|███████▍  | 59/79 [02:41<01:17,  3.87s/it][A
loss=1.108:  75%|███████▍  | 59/79 [02:41<01:17,  3.87s/it][A
loss=1.108:  76%|███████▌  | 60/79 [02:42<00:54,  2.88s/it][A
loss=0.956:  76%|███████▌  | 60/79 [02:42<00:54,  2.88s/it][A
loss=0.956:  77%|███████▋  | 61/79 [02:42<00:39,  2.19s/it][A
loss=0.954:  77%|███████▋  | 61/79 [02:43<00:39,  2.19s/it][A
loss=0.954:  78%|███████▊  | 62/79 [02:43<00:29,  1.71s/it][A
loss=1.104:  78%|███████▊  | 62/79 [02:43<00:29,  1.71s/it][A
loss=1.104:  80%|███████▉  | 63/79 [02:44<00:21,  1.37s/it][A
loss=0.913:  80%|███████▉  | 63/79 [02:44<00:21,  1.37s/it][A
loss=0.913:  81%|████████  | 64/79 [02:44<00:17,  1.14s/it][A
loss=1.020:  81%|████████  | 64/79 [02:44<00:17,  1.14

{"eval_acc": 0.7930885529157667, "eval_f1": 0.21954812179150018, "eval_acc_and_f1": 0.5063183373536335, "eval_loss": 0.8992360298122678, "learning_rate": 8.130841121495327e-06, "train_loss": 1.0207442363103232, "step": 150}



loss=0.979:  91%|█████████ | 72/79 [03:24<00:56,  8.01s/it][A
loss=1.193:  91%|█████████ | 72/79 [03:24<00:56,  8.01s/it][A
loss=1.193:  92%|█████████▏| 73/79 [03:25<00:34,  5.78s/it][A
loss=0.972:  92%|█████████▏| 73/79 [03:25<00:34,  5.78s/it][A
loss=0.972:  94%|█████████▎| 74/79 [03:25<00:21,  4.22s/it][A
loss=0.771:  94%|█████████▎| 74/79 [03:25<00:21,  4.22s/it][A
loss=0.771:  95%|█████████▍| 75/79 [03:26<00:12,  3.13s/it][A
loss=0.822:  95%|█████████▍| 75/79 [03:26<00:12,  3.13s/it][A
loss=0.822:  96%|█████████▌| 76/79 [03:26<00:07,  2.37s/it][A
loss=0.855:  96%|█████████▌| 76/79 [03:27<00:07,  2.37s/it][A
loss=0.855:  97%|█████████▋| 77/79 [03:27<00:03,  1.83s/it][A
loss=0.899:  97%|█████████▋| 77/79 [03:27<00:03,  1.83s/it][A
loss=0.899:  99%|█████████▊| 78/79 [03:28<00:01,  1.46s/it][A
loss=0.811:  99%|█████████▊| 78/79 [03:28<00:01,  1.46s/it][A
loss=0.811: 100%|██████████| 79/79 [03:28<00:00,  2.63s/it]
Epoch:  67%|██████▋   | 2/3 [07:03<03:32, 212.84s/it]
Ite

{"eval_acc": 0.8074874010079194, "eval_f1": 0.23678267967876995, "eval_acc_and_f1": 0.5221350403433447, "eval_loss": 0.8500771054199764, "learning_rate": 6.728971962616823e-06, "train_loss": 0.876128242413203, "step": 165}



loss=0.768:  10%|█         | 8/79 [00:36<08:39,  7.32s/it][A
loss=0.883:  10%|█         | 8/79 [00:36<08:39,  7.32s/it][A
loss=0.883:  11%|█▏        | 9/79 [00:37<06:10,  5.30s/it][A
loss=0.704:  11%|█▏        | 9/79 [00:37<06:10,  5.30s/it][A
loss=0.704:  13%|█▎        | 10/79 [00:37<04:28,  3.88s/it][A
loss=0.776:  13%|█▎        | 10/79 [00:38<04:28,  3.88s/it][A
loss=0.776:  14%|█▍        | 11/79 [00:38<03:16,  2.89s/it][A
loss=1.012:  14%|█▍        | 11/79 [00:38<03:16,  2.89s/it][A
loss=1.012:  15%|█▌        | 12/79 [00:39<02:27,  2.20s/it][A
loss=0.501:  15%|█▌        | 12/79 [00:39<02:27,  2.20s/it][A
loss=0.501:  16%|█▋        | 13/79 [00:39<01:53,  1.71s/it][A
loss=0.783:  16%|█▋        | 13/79 [00:39<01:53,  1.71s/it][A
loss=0.783:  18%|█▊        | 14/79 [00:40<01:29,  1.37s/it][A
loss=0.811:  18%|█▊        | 14/79 [00:40<01:29,  1.37s/it][A
loss=0.811:  19%|█▉        | 15/79 [00:40<01:12,  1.13s/it][A
loss=0.751:  19%|█▉        | 15/79 [00:40<01:12,  1.13s/it

{"eval_acc": 0.8123830093592512, "eval_f1": 0.23784482080550198, "eval_acc_and_f1": 0.5251139150823766, "eval_loss": 0.8151919692754745, "learning_rate": 5.3271028037383174e-06, "train_loss": 0.8094080766042073, "step": 180}



loss=0.911:  29%|██▉       | 23/79 [01:21<07:34,  8.12s/it][A
loss=0.661:  29%|██▉       | 23/79 [01:21<07:34,  8.12s/it][A
loss=0.661:  30%|███       | 24/79 [01:21<05:22,  5.86s/it][A
loss=1.013:  30%|███       | 24/79 [01:21<05:22,  5.86s/it][A
loss=1.013:  32%|███▏      | 25/79 [01:22<03:51,  4.28s/it][A
loss=0.754:  32%|███▏      | 25/79 [01:22<03:51,  4.28s/it][A
loss=0.754:  33%|███▎      | 26/79 [01:22<02:48,  3.17s/it][A
loss=0.845:  33%|███▎      | 26/79 [01:23<02:48,  3.17s/it][A
loss=0.845:  34%|███▍      | 27/79 [01:23<02:04,  2.40s/it][A
loss=0.915:  34%|███▍      | 27/79 [01:23<02:04,  2.40s/it][A
loss=0.915:  35%|███▌      | 28/79 [01:24<01:34,  1.85s/it][A
loss=0.668:  35%|███▌      | 28/79 [01:24<01:34,  1.85s/it][A
loss=0.668:  37%|███▋      | 29/79 [01:24<01:13,  1.47s/it][A
loss=0.605:  37%|███▋      | 29/79 [01:24<01:13,  1.47s/it][A
loss=0.605:  38%|███▊      | 30/79 [01:25<00:58,  1.20s/it][A
loss=0.922:  38%|███▊      | 30/79 [01:25<00:58,  1.20

{"eval_acc": 0.8223182145428366, "eval_f1": 0.24552727342560993, "eval_acc_and_f1": 0.5339227439842232, "eval_loss": 0.7748749511582511, "learning_rate": 3.925233644859814e-06, "train_loss": 0.7723236838976543, "step": 195}



loss=0.906:  48%|████▊     | 38/79 [02:04<05:20,  7.81s/it][A
loss=0.642:  48%|████▊     | 38/79 [02:04<05:20,  7.81s/it][A
loss=0.642:  49%|████▉     | 39/79 [02:04<03:45,  5.64s/it][A
loss=0.721:  49%|████▉     | 39/79 [02:04<03:45,  5.64s/it][A
loss=0.721:  51%|█████     | 40/79 [02:05<02:40,  4.12s/it][A
loss=0.728:  51%|█████     | 40/79 [02:05<02:40,  4.12s/it][A
loss=0.728:  52%|█████▏    | 41/79 [02:05<01:56,  3.06s/it][A
loss=0.407:  52%|█████▏    | 41/79 [02:06<01:56,  3.06s/it][A
loss=0.407:  53%|█████▎    | 42/79 [02:06<01:25,  2.32s/it][A
loss=0.654:  53%|█████▎    | 42/79 [02:06<01:25,  2.32s/it][A
loss=0.654:  54%|█████▍    | 43/79 [02:07<01:04,  1.80s/it][A
loss=0.631:  54%|█████▍    | 43/79 [02:07<01:04,  1.80s/it][A
loss=0.631:  56%|█████▌    | 44/79 [02:07<00:50,  1.44s/it][A
loss=1.040:  56%|█████▌    | 44/79 [02:07<00:50,  1.44s/it][A
loss=1.040:  57%|█████▋    | 45/79 [02:08<00:40,  1.18s/it][A
loss=0.562:  57%|█████▋    | 45/79 [02:08<00:40,  1.18

{"eval_acc": 0.82735781137509, "eval_f1": 0.2539362044204405, "eval_acc_and_f1": 0.5406470078977652, "eval_loss": 0.7629671437399728, "learning_rate": 2.5233644859813085e-06, "train_loss": 0.7302893002827963, "step": 210}



loss=0.564:  67%|██████▋   | 53/79 [02:48<03:31,  8.15s/it][A
loss=0.669:  67%|██████▋   | 53/79 [02:48<03:31,  8.15s/it][A
loss=0.669:  68%|██████▊   | 54/79 [02:49<02:27,  5.88s/it][A
loss=0.734:  68%|██████▊   | 54/79 [02:49<02:27,  5.88s/it][A
loss=0.734:  70%|██████▉   | 55/79 [02:49<01:42,  4.29s/it][A
loss=1.428:  70%|██████▉   | 55/79 [02:50<01:42,  4.29s/it][A
loss=1.428:  71%|███████   | 56/79 [02:50<01:13,  3.18s/it][A
loss=0.887:  71%|███████   | 56/79 [02:50<01:13,  3.18s/it][A
loss=0.887:  72%|███████▏  | 57/79 [02:51<00:52,  2.40s/it][A
loss=0.586:  72%|███████▏  | 57/79 [02:51<00:52,  2.40s/it][A
loss=0.586:  73%|███████▎  | 58/79 [02:51<00:38,  1.86s/it][A
loss=0.707:  73%|███████▎  | 58/79 [02:51<00:38,  1.86s/it][A
loss=0.707:  75%|███████▍  | 59/79 [02:52<00:29,  1.47s/it][A
loss=0.696:  75%|███████▍  | 59/79 [02:52<00:29,  1.47s/it][A
loss=0.696:  76%|███████▌  | 60/79 [02:52<00:22,  1.21s/it][A
loss=0.619:  76%|███████▌  | 60/79 [02:53<00:22,  1.21

{"eval_acc": 0.827933765298776, "eval_f1": 0.2552329172807623, "eval_acc_and_f1": 0.5415833412897691, "eval_loss": 0.7497879351888385, "learning_rate": 1.1214953271028038e-06, "train_loss": 0.7553534785906474, "step": 225}



loss=0.585:  86%|████████▌ | 68/79 [03:31<01:24,  7.67s/it][A
loss=0.712:  86%|████████▌ | 68/79 [03:31<01:24,  7.67s/it][A
loss=0.712:  87%|████████▋ | 69/79 [03:31<00:55,  5.54s/it][A
loss=0.821:  87%|████████▋ | 69/79 [03:31<00:55,  5.54s/it][A
loss=0.821:  89%|████████▊ | 70/79 [03:32<00:36,  4.05s/it][A
loss=0.550:  89%|████████▊ | 70/79 [03:32<00:36,  4.05s/it][A
loss=0.550:  90%|████████▉ | 71/79 [03:32<00:24,  3.01s/it][A
loss=0.668:  90%|████████▉ | 71/79 [03:32<00:24,  3.01s/it][A
loss=0.668:  91%|█████████ | 72/79 [03:33<00:16,  2.29s/it][A
loss=0.624:  91%|█████████ | 72/79 [03:33<00:16,  2.29s/it][A
loss=0.624:  92%|█████████▏| 73/79 [03:34<00:10,  1.77s/it][A
loss=1.064:  92%|█████████▏| 73/79 [03:34<00:10,  1.77s/it][A
loss=1.064:  94%|█████████▎| 74/79 [03:34<00:07,  1.42s/it][A
loss=0.716:  94%|█████████▎| 74/79 [03:34<00:07,  1.42s/it][A
loss=0.716:  95%|█████████▍| 75/79 [03:35<00:04,  1.17s/it][A
loss=0.546:  95%|█████████▍| 75/79 [03:35<00:04,  1.17

{"eval_acc": 0.8280777537796976, "eval_f1": 0.25601604470575157, "eval_acc_and_f1": 0.5420468992427245, "eval_loss": 0.7462226705891746, "learning_rate": 0.0, "train_loss": 0.5829422076543173, "step": 237}


07/18/2022 05:14:18 - INFO - utilities.trainers -   ***** Running evaluation iter-5_trial1 *****
07/18/2022 05:14:18 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 05:14:18 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 05:14:46 - INFO - utilities.trainers -   ***** Eval results iter-5_trial1 *****
07/18/2022 05:14:46 - INFO - utilities.trainers -     acc = 0.8280777537796976
07/18/2022 05:14:46 - INFO - utilities.trainers -     acc_and_f1 = 0.5420468992427245
07/18/2022 05:14:46 - INFO - utilities.trainers -     f1 = 0.25601604470575157
07/18/2022 05:14:49 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 05:14:55 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 05:14:55 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 05:14:55 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:19<00:00,  1.02s/it]
07/18/2022 05:17:15 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 05:17:15 - INFO - utilities.trainers -     acc = 0.8295893093715799
07/18/2022 05:17:15 - INFO - utilities.trainers -     acc_and_f1 = 0.5368866869626376
07/18/2022 05:17:15 - INFO - utilities.trainers -     f1 = 0.24418406455369546



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 05:17:15 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 05:17:34 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 05:17:39 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 05:17:39 - INFO - utilities.trainers -     Num examples = 47500
07/18/2022 05:17:39 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/186 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 186/186 [06:19<00:00,  2.04s/it]
07/18/2022 05:23:59 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 05:23:59 - INFO - utilities.trainers -     acc = 0.8290315789473685
07/18/2022 05:23:59 - INFO - utilities.trainers -     acc_and_f1 = 0.5404298903519439
07/18/2022 05:23:59 - INFO - utilities.trainers -     f1 = 0.2518282017565193
07/18/2022 05:23:59 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 05:24:17 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 05:24:21 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 05:24:21 - INFO - utilities.trainers -     Num examples = 2500
07/18/2022 05:24:21 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 10/10 [00:19<00:00,  1.99s/it]
07/18/2022 05:24:41 - INFO - uti



************
End of iteration 5:
Train loss 1.4306, Val loss 0.7462226705891746, Test loss 0.7516504456015194
Annotated 500 samples
Current labeled (training) data: 3000 samples
Remaining budget: 1000 (in samples)
************

Saving json with the results....

 Start Training model of iteration 6!



07/18/2022 05:27:52 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.8280777537796976, acc_best_iteration=5, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-5', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-6', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, 

warmup steps: 28
total steps: 281
logging steps: 18
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.233:   1%|          | 1/94 [00:00<00:48,  1.91it/s][A
loss=3.290:   1%|          | 1/94 [00:00<00:48,  1.91it/s][A
loss=3.290:   2%|▏         | 2/94 [00:01<00:50,  1.82it/s][A
loss=3.446:   2%|▏         | 2/94 [00:01<00:50,  1.82it/s][A
loss=3.446:   3%|▎         | 3/94 [00:01<00:50,  1.79it/s][A
loss=3.316:   3%|▎         | 3/94 [00:01<00:50,  1.79it/s][A
loss=3.316:   4%|▍         | 4/94 [00:02<00:50,  1.77it/s][A
loss=3.416:   4%|▍         | 4/94 [00:02<00:50,  1.77it/s][A
loss=3.416:   5%|▌         | 5/94 [00:02<00:50,  1.76it/s][A
loss=3.205:   5%|▌         | 5/94 [00:03<00:50,  1.76it/s][A
loss=3.205:   6%|▋         | 6/94 [00:03<00:50,  1.75it/s][A
loss=3.188:   6%|▋         | 6/94 [00:03<00:50,  1.75it/s][A
loss=3.188:   7%|▋         | 7/94 [00:04<00:49,  1.75it/s][A
loss=3.318:   7%|▋         | 7/94 [00:04<00:49,  1.75it/s][A
loss=3.318:   9%|▊         | 8/94 [00:04<00:49,  1.74it/s][A
loss=3.243:   9%|▊         | 8/94 [00:04<00:49,  1.74it/s][A
loss=3.

{"eval_acc": 0.17566594672426206, "eval_f1": 0.013104394280422689, "eval_acc_and_f1": 0.09438517050234237, "eval_loss": 2.8836023211479187, "learning_rate": 1.2857142857142859e-05, "train_loss": 3.1689089139302573, "step": 18}



loss=2.920:  20%|██        | 19/94 [00:42<08:59,  7.19s/it][A
loss=2.705:  20%|██        | 19/94 [00:42<08:59,  7.19s/it][A
loss=2.705:  21%|██▏       | 20/94 [00:43<06:25,  5.21s/it][A
loss=2.862:  21%|██▏       | 20/94 [00:43<06:25,  5.21s/it][A
loss=2.862:  22%|██▏       | 21/94 [00:43<04:38,  3.82s/it][A
loss=2.782:  22%|██▏       | 21/94 [00:43<04:38,  3.82s/it][A
loss=2.782:  23%|██▎       | 22/94 [00:44<03:25,  2.85s/it][A
loss=2.873:  23%|██▎       | 22/94 [00:44<03:25,  2.85s/it][A
loss=2.873:  24%|██▍       | 23/94 [00:44<02:33,  2.17s/it][A
loss=2.898:  24%|██▍       | 23/94 [00:44<02:33,  2.17s/it][A
loss=2.898:  26%|██▌       | 24/94 [00:45<01:58,  1.69s/it][A
loss=2.696:  26%|██▌       | 24/94 [00:45<01:58,  1.69s/it][A
loss=2.696:  27%|██▋       | 25/94 [00:45<01:33,  1.36s/it][A
loss=2.578:  27%|██▋       | 25/94 [00:46<01:33,  1.36s/it][A
loss=2.578:  28%|██▊       | 26/94 [00:46<01:17,  1.13s/it][A
loss=2.812:  28%|██▊       | 26/94 [00:46<01:17,  1.13

{"eval_acc": 0.17494600431965443, "eval_f1": 0.012568183479181476, "eval_acc_and_f1": 0.09375709389941796, "eval_loss": 2.484618195465633, "learning_rate": 1.937007874015748e-05, "train_loss": 2.639375752872891, "step": 36}



loss=2.497:  39%|███▉      | 37/94 [01:28<07:33,  7.96s/it][A
loss=2.274:  39%|███▉      | 37/94 [01:28<07:33,  7.96s/it][A
loss=2.274:  40%|████      | 38/94 [01:28<05:21,  5.75s/it][A
loss=2.259:  40%|████      | 38/94 [01:28<05:21,  5.75s/it][A
loss=2.259:  41%|████▏     | 39/94 [01:29<03:50,  4.20s/it][A
loss=2.312:  41%|████▏     | 39/94 [01:29<03:50,  4.20s/it][A
loss=2.312:  43%|████▎     | 40/94 [01:29<02:48,  3.11s/it][A
loss=2.610:  43%|████▎     | 40/94 [01:29<02:48,  3.11s/it][A
loss=2.610:  44%|████▎     | 41/94 [01:30<02:04,  2.36s/it][A
loss=2.331:  44%|████▎     | 41/94 [01:30<02:04,  2.36s/it][A
loss=2.331:  45%|████▍     | 42/94 [01:31<01:34,  1.83s/it][A
loss=2.387:  45%|████▍     | 42/94 [01:31<01:34,  1.83s/it][A
loss=2.387:  46%|████▌     | 43/94 [01:31<01:14,  1.45s/it][A
loss=2.426:  46%|████▌     | 43/94 [01:31<01:14,  1.45s/it][A
loss=2.426:  47%|████▋     | 44/94 [01:32<00:59,  1.19s/it][A
loss=2.446:  47%|████▋     | 44/94 [01:32<00:59,  1.19

{"eval_acc": 0.38646508279337655, "eval_f1": 0.06730920224378667, "eval_acc_and_f1": 0.2268871425185816, "eval_loss": 2.239226222038269, "learning_rate": 1.7952755905511813e-05, "train_loss": 2.3824996021058826, "step": 54}



loss=2.067:  59%|█████▊    | 55/94 [02:12<04:57,  7.63s/it][A
loss=2.153:  59%|█████▊    | 55/94 [02:12<04:57,  7.63s/it][A
loss=2.153:  60%|█████▉    | 56/94 [02:12<03:29,  5.51s/it][A
loss=2.298:  60%|█████▉    | 56/94 [02:12<03:29,  5.51s/it][A
loss=2.298:  61%|██████    | 57/94 [02:13<02:29,  4.03s/it][A
loss=2.530:  61%|██████    | 57/94 [02:13<02:29,  4.03s/it][A
loss=2.530:  62%|██████▏   | 58/94 [02:13<01:47,  3.00s/it][A
loss=2.032:  62%|██████▏   | 58/94 [02:13<01:47,  3.00s/it][A
loss=2.032:  63%|██████▎   | 59/94 [02:14<01:19,  2.27s/it][A
loss=2.052:  63%|██████▎   | 59/94 [02:14<01:19,  2.27s/it][A
loss=2.052:  64%|██████▍   | 60/94 [02:14<01:00,  1.77s/it][A
loss=2.144:  64%|██████▍   | 60/94 [02:15<01:00,  1.77s/it][A
loss=2.144:  65%|██████▍   | 61/94 [02:15<00:46,  1.41s/it][A
loss=2.201:  65%|██████▍   | 61/94 [02:15<00:46,  1.41s/it][A
loss=2.201:  66%|██████▌   | 62/94 [02:16<00:37,  1.16s/it][A
loss=2.178:  66%|██████▌   | 62/94 [02:16<00:37,  1.16

{"eval_acc": 0.3886249100071994, "eval_f1": 0.07913459781632824, "eval_acc_and_f1": 0.23387975391176383, "eval_loss": 2.0379689250673567, "learning_rate": 1.6535433070866142e-05, "train_loss": 2.1105780137909784, "step": 72}



loss=2.076:  78%|███████▊  | 73/94 [02:54<02:33,  7.31s/it][A
loss=1.955:  78%|███████▊  | 73/94 [02:54<02:33,  7.31s/it][A
loss=1.955:  79%|███████▊  | 74/94 [02:55<01:45,  5.29s/it][A
loss=1.927:  79%|███████▊  | 74/94 [02:55<01:45,  5.29s/it][A
loss=1.927:  80%|███████▉  | 75/94 [02:55<01:13,  3.88s/it][A
loss=2.254:  80%|███████▉  | 75/94 [02:55<01:13,  3.88s/it][A
loss=2.254:  81%|████████  | 76/94 [02:56<00:52,  2.89s/it][A
loss=2.289:  81%|████████  | 76/94 [02:56<00:52,  2.89s/it][A
loss=2.289:  82%|████████▏ | 77/94 [02:56<00:37,  2.20s/it][A
loss=1.917:  82%|████████▏ | 77/94 [02:57<00:37,  2.20s/it][A
loss=1.917:  83%|████████▎ | 78/94 [02:57<00:27,  1.72s/it][A
loss=2.060:  83%|████████▎ | 78/94 [02:57<00:27,  1.72s/it][A
loss=2.060:  84%|████████▍ | 79/94 [02:58<00:20,  1.38s/it][A
loss=2.058:  84%|████████▍ | 79/94 [02:58<00:20,  1.38s/it][A
loss=2.058:  85%|████████▌ | 80/94 [02:58<00:15,  1.14s/it][A
loss=2.046:  85%|████████▌ | 80/94 [02:58<00:15,  1.14

{"eval_acc": 0.5701943844492441, "eval_f1": 0.13958370981793147, "eval_acc_and_f1": 0.35488904713358776, "eval_loss": 1.7797475244317735, "learning_rate": 1.5118110236220473e-05, "train_loss": 2.115152186817593, "step": 90}



loss=1.785:  97%|█████████▋| 91/94 [03:39<00:23,  7.89s/it][A
loss=1.959:  97%|█████████▋| 91/94 [03:39<00:23,  7.89s/it][A
loss=1.959:  98%|█████████▊| 92/94 [03:40<00:11,  5.70s/it][A
loss=2.298:  98%|█████████▊| 92/94 [03:40<00:11,  5.70s/it][A
loss=2.298:  99%|█████████▉| 93/94 [03:40<00:04,  4.16s/it][A
loss=1.707:  99%|█████████▉| 93/94 [03:41<00:04,  4.16s/it][A
loss=1.707: 100%|██████████| 94/94 [03:41<00:00,  2.36s/it]
Epoch:  33%|███▎      | 1/3 [03:41<07:22, 221.42s/it]
Iteration:   0%|          | 0/94 [00:00<?, ?it/s][A
loss=1.807:   0%|          | 0/94 [00:00<?, ?it/s][A
loss=1.807:   1%|          | 1/94 [00:00<00:53,  1.73it/s][A
loss=1.574:   1%|          | 1/94 [00:00<00:53,  1.73it/s][A
loss=1.574:   2%|▏         | 2/94 [00:01<00:53,  1.72it/s][A
loss=1.501:   2%|▏         | 2/94 [00:01<00:53,  1.72it/s][A
loss=1.501:   3%|▎         | 3/94 [00:01<00:52,  1.72it/s][A
loss=1.907:   3%|▎         | 3/94 [00:01<00:52,  1.72it/s][A
loss=1.907:   4%|▍         |

{"eval_acc": 0.621886249100072, "eval_f1": 0.15963814569875268, "eval_acc_and_f1": 0.3907621973994123, "eval_loss": 1.5815428367682867, "learning_rate": 1.3700787401574804e-05, "train_loss": 1.7655362685521443, "step": 108}



loss=2.004:  16%|█▌        | 15/94 [00:40<09:38,  7.32s/it][A
loss=1.838:  16%|█▌        | 15/94 [00:40<09:38,  7.32s/it][A
loss=1.838:  17%|█▋        | 16/94 [00:41<06:53,  5.30s/it][A
loss=1.872:  17%|█▋        | 16/94 [00:41<06:53,  5.30s/it][A
loss=1.872:  18%|█▊        | 17/94 [00:41<04:58,  3.88s/it][A
loss=1.496:  18%|█▊        | 17/94 [00:42<04:58,  3.88s/it][A
loss=1.496:  19%|█▉        | 18/94 [00:42<03:39,  2.89s/it][A
loss=1.752:  19%|█▉        | 18/94 [00:42<03:39,  2.89s/it][A
loss=1.752:  20%|██        | 19/94 [00:43<02:44,  2.20s/it][A
loss=1.806:  20%|██        | 19/94 [00:43<02:44,  2.20s/it][A
loss=1.806:  21%|██▏       | 20/94 [00:43<02:06,  1.71s/it][A
loss=1.224:  21%|██▏       | 20/94 [00:43<02:06,  1.71s/it][A
loss=1.224:  22%|██▏       | 21/94 [00:44<01:40,  1.37s/it][A
loss=1.674:  22%|██▏       | 21/94 [00:44<01:40,  1.37s/it][A
loss=1.674:  23%|██▎       | 22/94 [00:44<01:21,  1.13s/it][A
loss=1.567:  23%|██▎       | 22/94 [00:45<01:21,  1.13

{"eval_acc": 0.6852411807055435, "eval_f1": 0.18023634851461565, "eval_acc_and_f1": 0.4327387646100796, "eval_loss": 1.386972474200385, "learning_rate": 1.2283464566929135e-05, "train_loss": 1.6281807753774855, "step": 126}



loss=2.045:  35%|███▌      | 33/94 [01:26<08:10,  8.03s/it][A
loss=1.226:  35%|███▌      | 33/94 [01:26<08:10,  8.03s/it][A
loss=1.226:  36%|███▌      | 34/94 [01:27<05:47,  5.80s/it][A
loss=1.325:  36%|███▌      | 34/94 [01:27<05:47,  5.80s/it][A
loss=1.325:  37%|███▋      | 35/94 [01:27<04:09,  4.23s/it][A
loss=1.368:  37%|███▋      | 35/94 [01:28<04:09,  4.23s/it][A
loss=1.368:  38%|███▊      | 36/94 [01:28<03:01,  3.14s/it][A
loss=1.835:  38%|███▊      | 36/94 [01:28<03:01,  3.14s/it][A
loss=1.835:  39%|███▉      | 37/94 [01:29<02:15,  2.37s/it][A
loss=1.571:  39%|███▉      | 37/94 [01:29<02:15,  2.37s/it][A
loss=1.571:  40%|████      | 38/94 [01:29<01:42,  1.84s/it][A
loss=1.440:  40%|████      | 38/94 [01:29<01:42,  1.84s/it][A
loss=1.440:  41%|████▏     | 39/94 [01:30<01:20,  1.46s/it][A
loss=1.471:  41%|████▏     | 39/94 [01:30<01:20,  1.46s/it][A
loss=1.471:  43%|████▎     | 40/94 [01:30<01:04,  1.20s/it][A
loss=1.214:  43%|████▎     | 40/94 [01:30<01:04,  1.20

{"eval_acc": 0.7245500359971202, "eval_f1": 0.19205772233266127, "eval_acc_and_f1": 0.45830387916489074, "eval_loss": 1.2375805037362235, "learning_rate": 1.0866141732283466e-05, "train_loss": 1.452769160270691, "step": 144}



loss=1.454:  54%|█████▍    | 51/94 [02:10<05:27,  7.61s/it][A
loss=1.205:  54%|█████▍    | 51/94 [02:10<05:27,  7.61s/it][A
loss=1.205:  55%|█████▌    | 52/94 [02:11<03:51,  5.50s/it][A
loss=1.849:  55%|█████▌    | 52/94 [02:11<03:51,  5.50s/it][A
loss=1.849:  56%|█████▋    | 53/94 [02:11<02:45,  4.03s/it][A
loss=1.091:  56%|█████▋    | 53/94 [02:11<02:45,  4.03s/it][A
loss=1.091:  57%|█████▋    | 54/94 [02:12<01:59,  3.00s/it][A
loss=1.581:  57%|█████▋    | 54/94 [02:12<01:59,  3.00s/it][A
loss=1.581:  59%|█████▊    | 55/94 [02:12<01:28,  2.27s/it][A
loss=1.280:  59%|█████▊    | 55/94 [02:13<01:28,  2.27s/it][A
loss=1.280:  60%|█████▉    | 56/94 [02:13<01:07,  1.77s/it][A
loss=1.476:  60%|█████▉    | 56/94 [02:13<01:07,  1.77s/it][A
loss=1.476:  61%|██████    | 57/94 [02:14<00:52,  1.41s/it][A
loss=1.252:  61%|██████    | 57/94 [02:14<00:52,  1.41s/it][A
loss=1.252:  62%|██████▏   | 58/94 [02:14<00:41,  1.16s/it][A
loss=1.627:  62%|██████▏   | 58/94 [02:14<00:41,  1.16

{"eval_acc": 0.744708423326134, "eval_f1": 0.20591702716285212, "eval_acc_and_f1": 0.47531272524449303, "eval_loss": 1.1160817997796195, "learning_rate": 9.448818897637797e-06, "train_loss": 1.336315976248847, "step": 162}



loss=0.993:  73%|███████▎  | 69/94 [02:57<03:23,  8.14s/it][A
loss=1.124:  73%|███████▎  | 69/94 [02:57<03:23,  8.14s/it][A
loss=1.124:  74%|███████▍  | 70/94 [02:57<02:20,  5.87s/it][A
loss=1.540:  74%|███████▍  | 70/94 [02:57<02:20,  5.87s/it][A
loss=1.540:  76%|███████▌  | 71/94 [02:58<01:38,  4.29s/it][A
loss=1.046:  76%|███████▌  | 71/94 [02:58<01:38,  4.29s/it][A
loss=1.046:  77%|███████▋  | 72/94 [02:58<01:09,  3.18s/it][A
loss=0.960:  77%|███████▋  | 72/94 [02:58<01:09,  3.18s/it][A
loss=0.960:  78%|███████▊  | 73/94 [02:59<00:50,  2.40s/it][A
loss=1.239:  78%|███████▊  | 73/94 [02:59<00:50,  2.40s/it][A
loss=1.239:  79%|███████▊  | 74/94 [02:59<00:37,  1.86s/it][A
loss=1.006:  79%|███████▊  | 74/94 [03:00<00:37,  1.86s/it][A
loss=1.006:  80%|███████▉  | 75/94 [03:00<00:28,  1.47s/it][A
loss=1.340:  80%|███████▉  | 75/94 [03:00<00:28,  1.47s/it][A
loss=1.340:  81%|████████  | 76/94 [03:01<00:21,  1.21s/it][A
loss=1.171:  81%|████████  | 76/94 [03:01<00:21,  1.21

{"eval_acc": 0.7591072714182865, "eval_f1": 0.21309013960781262, "eval_acc_and_f1": 0.48609870551304957, "eval_loss": 1.0124303294079644, "learning_rate": 8.031496062992128e-06, "train_loss": 1.2314215931627486, "step": 180}



loss=1.301:  93%|█████████▎| 87/94 [03:40<00:52,  7.54s/it][A
loss=1.426:  93%|█████████▎| 87/94 [03:40<00:52,  7.54s/it][A
loss=1.426:  94%|█████████▎| 88/94 [03:41<00:32,  5.45s/it][A
loss=1.302:  94%|█████████▎| 88/94 [03:41<00:32,  5.45s/it][A
loss=1.302:  95%|█████████▍| 89/94 [03:41<00:19,  3.99s/it][A
loss=0.801:  95%|█████████▍| 89/94 [03:41<00:19,  3.99s/it][A
loss=0.801:  96%|█████████▌| 90/94 [03:42<00:11,  2.97s/it][A
loss=1.403:  96%|█████████▌| 90/94 [03:42<00:11,  2.97s/it][A
loss=1.403:  97%|█████████▋| 91/94 [03:42<00:06,  2.25s/it][A
loss=1.349:  97%|█████████▋| 91/94 [03:43<00:06,  2.25s/it][A
loss=1.349:  98%|█████████▊| 92/94 [03:43<00:03,  1.75s/it][A
loss=1.056:  98%|█████████▊| 92/94 [03:43<00:03,  1.75s/it][A
loss=1.056:  99%|█████████▉| 93/94 [03:44<00:01,  1.40s/it][A
loss=0.879:  99%|█████████▉| 93/94 [03:44<00:01,  1.40s/it][A
loss=0.879: 100%|██████████| 94/94 [03:44<00:00,  2.39s/it]
Epoch:  67%|██████▋   | 2/3 [07:25<03:42, 222.37s/it]
Ite

{"eval_acc": 0.7765298776097912, "eval_f1": 0.2274344406911537, "eval_acc_and_f1": 0.5019821591504725, "eval_loss": 0.9376573839357921, "learning_rate": 6.614173228346458e-06, "train_loss": 1.1241870489385393, "step": 198}



loss=1.126:  12%|█▏        | 11/94 [00:40<10:40,  7.71s/it][A
loss=1.596:  12%|█▏        | 11/94 [00:40<10:40,  7.71s/it][A
loss=1.596:  13%|█▎        | 12/94 [00:40<07:36,  5.57s/it][A
loss=1.137:  13%|█▎        | 12/94 [00:41<07:36,  5.57s/it][A
loss=1.137:  14%|█▍        | 13/94 [00:41<05:30,  4.08s/it][A
loss=1.142:  14%|█▍        | 13/94 [00:41<05:30,  4.08s/it][A
loss=1.142:  15%|█▍        | 14/94 [00:42<04:02,  3.03s/it][A
loss=0.939:  15%|█▍        | 14/94 [00:42<04:02,  3.03s/it][A
loss=0.939:  16%|█▌        | 15/94 [00:42<03:01,  2.29s/it][A
loss=1.020:  16%|█▌        | 15/94 [00:42<03:01,  2.29s/it][A
loss=1.020:  17%|█▋        | 16/94 [00:43<02:18,  1.78s/it][A
loss=1.328:  17%|█▋        | 16/94 [00:43<02:18,  1.78s/it][A
loss=1.328:  18%|█▊        | 17/94 [00:43<01:49,  1.43s/it][A
loss=1.048:  18%|█▊        | 17/94 [00:44<01:49,  1.43s/it][A
loss=1.048:  19%|█▉        | 18/94 [00:44<01:29,  1.17s/it][A
loss=1.133:  19%|█▉        | 18/94 [00:44<01:29,  1.17

{"eval_acc": 0.7861771058315334, "eval_f1": 0.23854817421078317, "eval_acc_and_f1": 0.5123626400211583, "eval_loss": 0.8710545769759587, "learning_rate": 5.196850393700788e-06, "train_loss": 1.0419583817323048, "step": 216}



loss=1.066:  31%|███       | 29/94 [01:25<08:37,  7.96s/it][A
loss=1.249:  31%|███       | 29/94 [01:26<08:37,  7.96s/it][A
loss=1.249:  32%|███▏      | 30/94 [01:26<06:07,  5.75s/it][A
loss=0.811:  32%|███▏      | 30/94 [01:26<06:07,  5.75s/it][A
loss=0.811:  33%|███▎      | 31/94 [01:27<04:24,  4.20s/it][A
loss=0.973:  33%|███▎      | 31/94 [01:27<04:24,  4.20s/it][A
loss=0.973:  34%|███▍      | 32/94 [01:27<03:12,  3.11s/it][A
loss=1.328:  34%|███▍      | 32/94 [01:27<03:12,  3.11s/it][A
loss=1.328:  35%|███▌      | 33/94 [01:28<02:23,  2.36s/it][A
loss=1.146:  35%|███▌      | 33/94 [01:28<02:23,  2.36s/it][A
loss=1.146:  36%|███▌      | 34/94 [01:28<01:49,  1.82s/it][A
loss=0.985:  36%|███▌      | 34/94 [01:29<01:49,  1.82s/it][A
loss=0.985:  37%|███▋      | 35/94 [01:29<01:25,  1.45s/it][A
loss=1.063:  37%|███▋      | 35/94 [01:29<01:25,  1.45s/it][A
loss=1.063:  38%|███▊      | 36/94 [01:30<01:09,  1.19s/it][A
loss=0.951:  38%|███▊      | 36/94 [01:30<01:09,  1.19

{"eval_acc": 0.8028797696184305, "eval_f1": 0.24570431053545255, "eval_acc_and_f1": 0.5242920400769415, "eval_loss": 0.8430559784173965, "learning_rate": 3.7795275590551182e-06, "train_loss": 0.9999731613530053, "step": 234}



loss=0.666:  50%|█████     | 47/94 [02:11<06:11,  7.90s/it][A
loss=1.073:  50%|█████     | 47/94 [02:11<06:11,  7.90s/it][A
loss=1.073:  51%|█████     | 48/94 [02:11<04:22,  5.70s/it][A
loss=1.022:  51%|█████     | 48/94 [02:11<04:22,  5.70s/it][A
loss=1.022:  52%|█████▏    | 49/94 [02:12<03:07,  4.16s/it][A
loss=0.845:  52%|█████▏    | 49/94 [02:12<03:07,  4.16s/it][A
loss=0.845:  53%|█████▎    | 50/94 [02:12<02:15,  3.09s/it][A
loss=0.995:  53%|█████▎    | 50/94 [02:13<02:15,  3.09s/it][A
loss=0.995:  54%|█████▍    | 51/94 [02:13<01:40,  2.34s/it][A
loss=0.773:  54%|█████▍    | 51/94 [02:13<01:40,  2.34s/it][A
loss=0.773:  55%|█████▌    | 52/94 [02:14<01:16,  1.81s/it][A
loss=1.077:  55%|█████▌    | 52/94 [02:14<01:16,  1.81s/it][A
loss=1.077:  56%|█████▋    | 53/94 [02:14<00:59,  1.44s/it][A
loss=1.005:  56%|█████▋    | 53/94 [02:14<00:59,  1.44s/it][A
loss=1.005:  57%|█████▋    | 54/94 [02:15<00:47,  1.18s/it][A
loss=0.840:  57%|█████▋    | 54/94 [02:15<00:47,  1.18

{"eval_acc": 0.8028797696184305, "eval_f1": 0.2541191913672134, "eval_acc_and_f1": 0.528499480492822, "eval_loss": 0.8169779883963721, "learning_rate": 2.362204724409449e-06, "train_loss": 0.9244548115465376, "step": 252}



loss=0.833:  69%|██████▉   | 65/94 [02:54<03:35,  7.44s/it][A
loss=1.047:  69%|██████▉   | 65/94 [02:54<03:35,  7.44s/it][A
loss=1.047:  70%|███████   | 66/94 [02:54<02:30,  5.38s/it][A
loss=0.757:  70%|███████   | 66/94 [02:55<02:30,  5.38s/it][A
loss=0.757:  71%|███████▏  | 67/94 [02:55<01:46,  3.94s/it][A
loss=0.845:  71%|███████▏  | 67/94 [02:55<01:46,  3.94s/it][A
loss=0.845:  72%|███████▏  | 68/94 [02:56<01:16,  2.93s/it][A
loss=0.766:  72%|███████▏  | 68/94 [02:56<01:16,  2.93s/it][A
loss=0.766:  73%|███████▎  | 69/94 [02:56<00:55,  2.23s/it][A
loss=0.919:  73%|███████▎  | 69/94 [02:56<00:55,  2.23s/it][A
loss=0.919:  74%|███████▍  | 70/94 [02:57<00:41,  1.73s/it][A
loss=0.585:  74%|███████▍  | 70/94 [02:57<00:41,  1.73s/it][A
loss=0.585:  76%|███████▌  | 71/94 [02:57<00:31,  1.39s/it][A
loss=0.899:  76%|███████▌  | 71/94 [02:57<00:31,  1.39s/it][A
loss=0.899:  77%|███████▋  | 72/94 [02:58<00:25,  1.15s/it][A
loss=1.192:  77%|███████▋  | 72/94 [02:58<00:25,  1.15

{"eval_acc": 0.807919366450684, "eval_f1": 0.2592617668294975, "eval_acc_and_f1": 0.5335905666400907, "eval_loss": 0.8068710501704898, "learning_rate": 9.448818897637796e-07, "train_loss": 0.9300331506464217, "step": 270}



loss=0.891:  88%|████████▊ | 83/94 [03:36<01:20,  7.30s/it][A
loss=0.861:  88%|████████▊ | 83/94 [03:36<01:20,  7.30s/it][A
loss=0.861:  89%|████████▉ | 84/94 [03:37<00:52,  5.29s/it][A
loss=1.130:  89%|████████▉ | 84/94 [03:37<00:52,  5.29s/it][A
loss=1.130:  90%|█████████ | 85/94 [03:37<00:34,  3.88s/it][A
loss=1.351:  90%|█████████ | 85/94 [03:38<00:34,  3.88s/it][A
loss=1.351:  91%|█████████▏| 86/94 [03:38<00:23,  2.89s/it][A
loss=0.748:  91%|█████████▏| 86/94 [03:38<00:23,  2.89s/it][A
loss=0.748:  93%|█████████▎| 87/94 [03:39<00:15,  2.20s/it][A
loss=0.946:  93%|█████████▎| 87/94 [03:39<00:15,  2.20s/it][A
loss=0.946:  94%|█████████▎| 88/94 [03:39<00:10,  1.71s/it][A
loss=1.109:  94%|█████████▎| 88/94 [03:39<00:10,  1.71s/it][A
loss=1.109:  95%|█████████▍| 89/94 [03:40<00:06,  1.38s/it][A
loss=0.896:  95%|█████████▍| 89/94 [03:40<00:06,  1.38s/it][A
loss=0.896:  96%|█████████▌| 90/94 [03:40<00:04,  1.14s/it][A
loss=1.013:  96%|█████████▌| 90/94 [03:40<00:04,  1.14

{"eval_acc": 0.8092152627789777, "eval_f1": 0.2603742312425326, "eval_acc_and_f1": 0.5347947470107551, "eval_loss": 0.8040440444435392, "learning_rate": 0.0, "train_loss": 0.662816713253657, "step": 282}


07/18/2022 05:40:06 - INFO - utilities.trainers -   ***** Running evaluation iter-6_trial1 *****
07/18/2022 05:40:06 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 05:40:06 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 05:40:33 - INFO - utilities.trainers -   ***** Eval results iter-6_trial1 *****
07/18/2022 05:40:33 - INFO - utilities.trainers -     acc = 0.8092152627789777
07/18/2022 05:40:33 - INFO - utilities.trainers -     acc_and_f1 = 0.5347947470107551
07/18/2022 05:40:33 - INFO - utilities.trainers -     f1 = 0.2603742312425326
07/18/2022 05:40:36 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 05:40:42 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 05:40:42 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 05:40:42 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:19<00:00,  1.02s/it]
07/18/2022 05:43:01 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 05:43:01 - INFO - utilities.trainers -     acc = 0.808910777028973
07/18/2022 05:43:01 - INFO - utilities.trainers -     acc_and_f1 = 0.5274827924243888
07/18/2022 05:43:01 - INFO - utilities.trainers -     f1 = 0.2460548078198047



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 05:43:02 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 05:43:20 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 05:43:24 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 05:43:24 - INFO - utilities.trainers -     Num examples = 47000
07/18/2022 05:43:24 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/184 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 184/184 [06:15<00:00,  2.04s/it]
07/18/2022 05:49:40 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 05:49:40 - INFO - utilities.trainers -     acc = 0.8119787234042554
07/18/2022 05:49:40 - INFO - utilities.trainers -     acc_and_f1 = 0.5327138896815442
07/18/2022 05:49:40 - INFO - utilities.trainers -     f1 = 0.2534490559588331
07/18/2022 05:49:40 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 05:49:59 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 05:50:02 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 05:50:02 - INFO - utilities.trainers -     Num examples = 3000
07/18/2022 05:50:02 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 12/12 [00:23<00:00,  1.99s/it]
07/18/2022 05:50:26 - INFO - uti



************
End of iteration 6:
Train loss 1.6286, Val loss 0.8040440444435392, Test loss 0.8092774981084991
Annotated 500 samples
Current labeled (training) data: 3500 samples
Remaining budget: 500 (in samples)
************

Saving json with the results....

 Start Training model of iteration 7!



07/18/2022 05:53:54 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.8280777537796976, acc_best_iteration=5, acquisition='cal', acquisition_size=500, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-5', binary=False, budget=(8, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_cal_5262/ornl20_bert-cls/iter-7', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_training=True, 

warmup steps: 32
total steps: 328
logging steps: 21
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.273:   1%|          | 1/110 [00:00<00:57,  1.90it/s][A
loss=3.355:   1%|          | 1/110 [00:00<00:57,  1.90it/s][A
loss=3.355:   2%|▏         | 2/110 [00:01<00:58,  1.84it/s][A
loss=3.325:   2%|▏         | 2/110 [00:01<00:58,  1.84it/s][A
loss=3.325:   3%|▎         | 3/110 [00:01<00:59,  1.81it/s][A
loss=3.246:   3%|▎         | 3/110 [00:01<00:59,  1.81it/s][A
loss=3.246:   4%|▎         | 4/110 [00:02<00:59,  1.79it/s][A
loss=3.366:   4%|▎         | 4/110 [00:02<00:59,  1.79it/s][A
loss=3.366:   5%|▍         | 5/110 [00:02<00:59,  1.77it/s][A
loss=3.371:   5%|▍         | 5/110 [00:02<00:59,  1.77it/s][A
loss=3.371:   5%|▌         | 6/110 [00:03<00:59,  1.76it/s][A
loss=3.296:   5%|▌         | 6/110 [00:03<00:59,  1.76it/s][A
loss=3.296:   6%|▋         | 7/110 [00:03<00:59,  1.75it/s][A
loss=3.262:   6%|▋         | 7/110 [00:04<00:59,  1.75it/s][A
loss=3.262:   7%|▋         | 8/110 [00:04<00:58,  1.74it/s][A
loss=3.298:   7%|▋         | 8/110 [00:04<00:58,  1.74

{"eval_acc": 0.20230381569474443, "eval_f1": 0.024720956360228658, "eval_acc_and_f1": 0.11351238602748655, "eval_loss": 2.7883573940822055, "learning_rate": 1.3125e-05, "train_loss": 3.1330576056525823, "step": 21}



loss=2.692:  20%|██        | 22/110 [00:44<10:37,  7.25s/it][A
loss=2.740:  20%|██        | 22/110 [00:44<10:37,  7.25s/it][A
loss=2.740:  21%|██        | 23/110 [00:45<07:36,  5.25s/it][A
loss=2.828:  21%|██        | 23/110 [00:45<07:36,  5.25s/it][A
loss=2.828:  22%|██▏       | 24/110 [00:45<05:30,  3.85s/it][A
loss=2.723:  22%|██▏       | 24/110 [00:45<05:30,  3.85s/it][A
loss=2.723:  23%|██▎       | 25/110 [00:46<04:03,  2.87s/it][A
loss=2.711:  23%|██▎       | 25/110 [00:46<04:03,  2.87s/it][A
loss=2.711:  24%|██▎       | 26/110 [00:46<03:03,  2.18s/it][A
loss=2.509:  24%|██▎       | 26/110 [00:46<03:03,  2.18s/it][A
loss=2.509:  25%|██▍       | 27/110 [00:47<02:21,  1.70s/it][A
loss=2.680:  25%|██▍       | 27/110 [00:47<02:21,  1.70s/it][A
loss=2.680:  25%|██▌       | 28/110 [00:47<01:51,  1.37s/it][A
loss=2.764:  25%|██▌       | 28/110 [00:48<01:51,  1.37s/it][A
loss=2.764:  26%|██▋       | 29/110 [00:48<01:31,  1.13s/it][A
loss=2.530:  26%|██▋       | 29/110 [00

{"eval_acc": 0.2542836573074154, "eval_f1": 0.04577015082865323, "eval_acc_and_f1": 0.15002690406803432, "eval_loss": 2.420615630490439, "learning_rate": 1.9328859060402687e-05, "train_loss": 2.5976464407784596, "step": 42}



loss=2.511:  39%|███▉      | 43/110 [01:32<08:56,  8.01s/it][A
loss=2.459:  39%|███▉      | 43/110 [01:32<08:56,  8.01s/it][A
loss=2.459:  40%|████      | 44/110 [01:32<06:21,  5.78s/it][A
loss=2.639:  40%|████      | 44/110 [01:32<06:21,  5.78s/it][A
loss=2.639:  41%|████      | 45/110 [01:33<04:34,  4.22s/it][A
loss=2.382:  41%|████      | 45/110 [01:33<04:34,  4.22s/it][A
loss=2.382:  42%|████▏     | 46/110 [01:33<03:20,  3.13s/it][A
loss=2.463:  42%|████▏     | 46/110 [01:33<03:20,  3.13s/it][A
loss=2.463:  43%|████▎     | 47/110 [01:34<02:29,  2.37s/it][A
loss=2.508:  43%|████▎     | 47/110 [01:34<02:29,  2.37s/it][A
loss=2.508:  44%|████▎     | 48/110 [01:34<01:53,  1.83s/it][A
loss=2.313:  44%|████▎     | 48/110 [01:35<01:53,  1.83s/it][A
loss=2.313:  45%|████▍     | 49/110 [01:35<01:29,  1.46s/it][A
loss=2.398:  45%|████▍     | 49/110 [01:35<01:29,  1.46s/it][A
loss=2.398:  45%|████▌     | 50/110 [01:36<01:11,  1.20s/it][A
loss=2.365:  45%|████▌     | 50/110 [01

{"eval_acc": 0.45932325413966885, "eval_f1": 0.10503713826155721, "eval_acc_and_f1": 0.282180196200613, "eval_loss": 2.1106906703540256, "learning_rate": 1.7919463087248323e-05, "train_loss": 2.3811644599551247, "step": 63}



loss=2.037:  58%|█████▊    | 64/110 [02:19<06:02,  7.89s/it][A
loss=2.163:  58%|█████▊    | 64/110 [02:19<06:02,  7.89s/it][A
loss=2.163:  59%|█████▉    | 65/110 [02:19<04:16,  5.70s/it][A
loss=2.046:  59%|█████▉    | 65/110 [02:19<04:16,  5.70s/it][A
loss=2.046:  60%|██████    | 66/110 [02:20<03:03,  4.17s/it][A
loss=2.305:  60%|██████    | 66/110 [02:20<03:03,  4.17s/it][A
loss=2.305:  61%|██████    | 67/110 [02:20<02:13,  3.10s/it][A
loss=2.016:  61%|██████    | 67/110 [02:21<02:13,  3.10s/it][A
loss=2.016:  62%|██████▏   | 68/110 [02:21<01:38,  2.34s/it][A
loss=2.447:  62%|██████▏   | 68/110 [02:21<01:38,  2.34s/it][A
loss=2.447:  63%|██████▎   | 69/110 [02:22<01:14,  1.82s/it][A
loss=2.180:  63%|██████▎   | 69/110 [02:22<01:14,  1.82s/it][A
loss=2.180:  64%|██████▎   | 70/110 [02:22<00:57,  1.45s/it][A
loss=2.224:  64%|██████▎   | 70/110 [02:22<00:57,  1.45s/it][A
loss=2.224:  65%|██████▍   | 71/110 [02:23<00:46,  1.19s/it][A
loss=2.293:  65%|██████▍   | 71/110 [02

{"eval_acc": 0.6564434845212384, "eval_f1": 0.1634305146135568, "eval_acc_and_f1": 0.4099369995673976, "eval_loss": 1.7428641319274902, "learning_rate": 1.6510067114093962e-05, "train_loss": 2.106461445490519, "step": 84}



loss=2.150:  77%|███████▋  | 85/110 [03:03<03:03,  7.34s/it][A
loss=2.006:  77%|███████▋  | 85/110 [03:03<03:03,  7.34s/it][A
loss=2.006:  78%|███████▊  | 86/110 [03:04<02:07,  5.32s/it][A
loss=1.894:  78%|███████▊  | 86/110 [03:04<02:07,  5.32s/it][A
loss=1.894:  79%|███████▉  | 87/110 [03:04<01:29,  3.90s/it][A
loss=1.763:  79%|███████▉  | 87/110 [03:04<01:29,  3.90s/it][A
loss=1.763:  80%|████████  | 88/110 [03:05<01:03,  2.90s/it][A
loss=1.867:  80%|████████  | 88/110 [03:05<01:03,  2.90s/it][A
loss=1.867:  81%|████████  | 89/110 [03:05<00:46,  2.20s/it][A
loss=2.103:  81%|████████  | 89/110 [03:06<00:46,  2.20s/it][A
loss=2.103:  82%|████████▏ | 90/110 [03:06<00:34,  1.72s/it][A
loss=1.852:  82%|████████▏ | 90/110 [03:06<00:34,  1.72s/it][A
loss=1.852:  83%|████████▎ | 91/110 [03:07<00:26,  1.38s/it][A
loss=1.923:  83%|████████▎ | 91/110 [03:07<00:26,  1.38s/it][A
loss=1.923:  84%|████████▎ | 92/110 [03:07<00:20,  1.14s/it][A
loss=1.662:  84%|████████▎ | 92/110 [03

{"eval_acc": 0.7448524118070554, "eval_f1": 0.1895436674108066, "eval_acc_and_f1": 0.467198039608931, "eval_loss": 1.4570284868989671, "learning_rate": 1.5100671140939598e-05, "train_loss": 1.8719186271939958, "step": 105}



loss=1.575:  96%|█████████▋| 106/110 [03:48<00:29,  7.39s/it][A
loss=1.861:  96%|█████████▋| 106/110 [03:48<00:29,  7.39s/it][A
loss=1.861:  97%|█████████▋| 107/110 [03:48<00:16,  5.35s/it][A
loss=1.621:  97%|█████████▋| 107/110 [03:49<00:16,  5.35s/it][A
loss=1.621:  98%|█████████▊| 108/110 [03:49<00:07,  3.92s/it][A
loss=1.595:  98%|█████████▊| 108/110 [03:49<00:07,  3.92s/it][A
loss=1.595:  99%|█████████▉| 109/110 [03:50<00:02,  2.92s/it][A
loss=1.863:  99%|█████████▉| 109/110 [03:50<00:02,  2.92s/it][A
loss=1.863: 100%|██████████| 110/110 [03:50<00:00,  2.09s/it]
Epoch:  33%|███▎      | 1/3 [03:50<07:40, 230.29s/it]
Iteration:   0%|          | 0/110 [00:00<?, ?it/s][A
loss=1.598:   0%|          | 0/110 [00:00<?, ?it/s][A
loss=1.598:   1%|          | 1/110 [00:00<01:03,  1.72it/s][A
loss=1.887:   1%|          | 1/110 [00:00<01:03,  1.72it/s][A
loss=1.887:   2%|▏         | 2/110 [00:01<01:02,  1.72it/s][A
loss=1.655:   2%|▏         | 2/110 [00:01<01:02,  1.72it/s][A
lo

{"eval_acc": 0.7749460043196544, "eval_f1": 0.21044113221543193, "eval_acc_and_f1": 0.4926935682675432, "eval_loss": 1.192844033241272, "learning_rate": 1.3691275167785237e-05, "train_loss": 1.617232186453683, "step": 126}



loss=1.587:  15%|█▌        | 17/110 [00:45<12:23,  8.00s/it][A
loss=1.538:  15%|█▌        | 17/110 [00:45<12:23,  8.00s/it][A
loss=1.538:  16%|█▋        | 18/110 [00:45<08:51,  5.77s/it][A
loss=1.204:  16%|█▋        | 18/110 [00:45<08:51,  5.77s/it][A
loss=1.204:  17%|█▋        | 19/110 [00:46<06:23,  4.21s/it][A
loss=1.210:  17%|█▋        | 19/110 [00:46<06:23,  4.21s/it][A
loss=1.210:  18%|█▊        | 20/110 [00:46<04:41,  3.13s/it][A
loss=1.323:  18%|█▊        | 20/110 [00:47<04:41,  3.13s/it][A
loss=1.323:  19%|█▉        | 21/110 [00:47<03:30,  2.36s/it][A
loss=1.189:  19%|█▉        | 21/110 [00:47<03:30,  2.36s/it][A
loss=1.189:  20%|██        | 22/110 [00:48<02:40,  1.83s/it][A
loss=1.591:  20%|██        | 22/110 [00:48<02:40,  1.83s/it][A
loss=1.591:  21%|██        | 23/110 [00:48<02:06,  1.45s/it][A
loss=1.545:  21%|██        | 23/110 [00:48<02:06,  1.45s/it][A
loss=1.545:  22%|██▏       | 24/110 [00:49<01:42,  1.19s/it][A
loss=1.465:  22%|██▏       | 24/110 [00

{"eval_acc": 0.7946724262059035, "eval_f1": 0.2370041873077946, "eval_acc_and_f1": 0.515838306756849, "eval_loss": 1.0291313167129243, "learning_rate": 1.2281879194630872e-05, "train_loss": 1.3748396039009094, "step": 147}



loss=1.616:  35%|███▍      | 38/110 [01:30<08:58,  7.48s/it][A
loss=1.050:  35%|███▍      | 38/110 [01:30<08:58,  7.48s/it][A
loss=1.050:  35%|███▌      | 39/110 [01:30<06:24,  5.41s/it][A
loss=1.152:  35%|███▌      | 39/110 [01:31<06:24,  5.41s/it][A
loss=1.152:  36%|███▋      | 40/110 [01:31<04:37,  3.96s/it][A
loss=1.136:  36%|███▋      | 40/110 [01:31<04:37,  3.96s/it][A
loss=1.136:  37%|███▋      | 41/110 [01:32<03:23,  2.95s/it][A
loss=1.137:  37%|███▋      | 41/110 [01:32<03:23,  2.95s/it][A
loss=1.137:  38%|███▊      | 42/110 [01:32<02:32,  2.24s/it][A
loss=1.303:  38%|███▊      | 42/110 [01:32<02:32,  2.24s/it][A
loss=1.303:  39%|███▉      | 43/110 [01:33<01:56,  1.74s/it][A
loss=1.154:  39%|███▉      | 43/110 [01:33<01:56,  1.74s/it][A
loss=1.154:  40%|████      | 44/110 [01:33<01:31,  1.39s/it][A
loss=1.318:  40%|████      | 44/110 [01:33<01:31,  1.39s/it][A
loss=1.318:  41%|████      | 45/110 [01:34<01:14,  1.15s/it][A
loss=1.527:  41%|████      | 45/110 [01

{"eval_acc": 0.803023758099352, "eval_f1": 0.24441916571695402, "eval_acc_and_f1": 0.523721461908153, "eval_loss": 0.889022314122745, "learning_rate": 1.0872483221476512e-05, "train_loss": 1.2785647028968448, "step": 168}



loss=1.085:  54%|█████▎    | 59/110 [02:16<06:31,  7.67s/it][A
loss=0.915:  54%|█████▎    | 59/110 [02:16<06:31,  7.67s/it][A
loss=0.915:  55%|█████▍    | 60/110 [02:16<04:37,  5.55s/it][A
loss=1.168:  55%|█████▍    | 60/110 [02:17<04:37,  5.55s/it][A
loss=1.168:  55%|█████▌    | 61/110 [02:17<03:18,  4.06s/it][A
loss=1.082:  55%|█████▌    | 61/110 [02:17<03:18,  4.06s/it][A
loss=1.082:  56%|█████▋    | 62/110 [02:18<02:24,  3.02s/it][A
loss=0.848:  56%|█████▋    | 62/110 [02:18<02:24,  3.02s/it][A
loss=0.848:  57%|█████▋    | 63/110 [02:18<01:47,  2.29s/it][A
loss=1.242:  57%|█████▋    | 63/110 [02:18<01:47,  2.29s/it][A
loss=1.242:  58%|█████▊    | 64/110 [02:19<01:21,  1.78s/it][A
loss=0.796:  58%|█████▊    | 64/110 [02:19<01:21,  1.78s/it][A
loss=0.796:  59%|█████▉    | 65/110 [02:19<01:03,  1.42s/it][A
loss=1.095:  59%|█████▉    | 65/110 [02:19<01:03,  1.42s/it][A
loss=1.095:  60%|██████    | 66/110 [02:20<00:51,  1.17s/it][A
loss=1.116:  60%|██████    | 66/110 [02

{"eval_acc": 0.8119510439164866, "eval_f1": 0.2567279900637141, "eval_acc_and_f1": 0.5343395169901004, "eval_loss": 0.8102056682109833, "learning_rate": 9.463087248322147e-06, "train_loss": 1.0574223739760262, "step": 189}



loss=0.998:  73%|███████▎  | 80/110 [03:00<03:40,  7.34s/it][A
loss=0.956:  73%|███████▎  | 80/110 [03:00<03:40,  7.34s/it][A
loss=0.956:  74%|███████▎  | 81/110 [03:01<02:34,  5.31s/it][A
loss=0.980:  74%|███████▎  | 81/110 [03:01<02:34,  5.31s/it][A
loss=0.980:  75%|███████▍  | 82/110 [03:01<01:49,  3.89s/it][A
loss=1.023:  75%|███████▍  | 82/110 [03:02<01:49,  3.89s/it][A
loss=1.023:  75%|███████▌  | 83/110 [03:02<01:18,  2.90s/it][A
loss=0.872:  75%|███████▌  | 83/110 [03:02<01:18,  2.90s/it][A
loss=0.872:  76%|███████▋  | 84/110 [03:03<00:57,  2.20s/it][A
loss=0.789:  76%|███████▋  | 84/110 [03:03<00:57,  2.20s/it][A
loss=0.789:  77%|███████▋  | 85/110 [03:03<00:42,  1.72s/it][A
loss=0.986:  77%|███████▋  | 85/110 [03:03<00:42,  1.72s/it][A
loss=0.986:  78%|███████▊  | 86/110 [03:04<00:33,  1.38s/it][A
loss=1.128:  78%|███████▊  | 86/110 [03:04<00:33,  1.38s/it][A
loss=1.128:  79%|███████▉  | 87/110 [03:04<00:26,  1.14s/it][A
loss=0.861:  79%|███████▉  | 87/110 [03

{"eval_acc": 0.8184305255579554, "eval_f1": 0.26038531708509743, "eval_acc_and_f1": 0.5394079213215264, "eval_loss": 0.7462562131030219, "learning_rate": 8.053691275167785e-06, "train_loss": 0.9859031949724469, "step": 210}



loss=0.925:  92%|█████████▏| 101/110 [03:48<01:12,  8.06s/it][A
loss=1.390:  92%|█████████▏| 101/110 [03:48<01:12,  8.06s/it][A
loss=1.390:  93%|█████████▎| 102/110 [03:49<00:46,  5.82s/it][A
loss=1.144:  93%|█████████▎| 102/110 [03:49<00:46,  5.82s/it][A
loss=1.144:  94%|█████████▎| 103/110 [03:49<00:29,  4.25s/it][A
loss=1.068:  94%|█████████▎| 103/110 [03:49<00:29,  4.25s/it][A
loss=1.068:  95%|█████████▍| 104/110 [03:50<00:18,  3.15s/it][A
loss=0.933:  95%|█████████▍| 104/110 [03:50<00:18,  3.15s/it][A
loss=0.933:  95%|█████████▌| 105/110 [03:50<00:11,  2.38s/it][A
loss=0.871:  95%|█████████▌| 105/110 [03:51<00:11,  2.38s/it][A
loss=0.871:  96%|█████████▋| 106/110 [03:51<00:07,  1.84s/it][A
loss=0.852:  96%|█████████▋| 106/110 [03:51<00:07,  1.84s/it][A
loss=0.852:  97%|█████████▋| 107/110 [03:52<00:04,  1.47s/it][A
loss=0.663:  97%|█████████▋| 107/110 [03:52<00:04,  1.47s/it][A
loss=0.663:  98%|█████████▊| 108/110 [03:52<00:02,  1.20s/it][A
loss=0.808:  98%|███████

{"eval_acc": 0.8267818574514039, "eval_f1": 0.27229871076128537, "eval_acc_and_f1": 0.5495402841063446, "eval_loss": 0.696358071906226, "learning_rate": 6.644295302013424e-06, "train_loss": 0.8890522093999953, "step": 231}



loss=0.757:  11%|█         | 12/110 [00:39<11:59,  7.34s/it][A
loss=0.904:  11%|█         | 12/110 [00:39<11:59,  7.34s/it][A
loss=0.904:  12%|█▏        | 13/110 [00:39<08:35,  5.31s/it][A
loss=0.852:  12%|█▏        | 13/110 [00:39<08:35,  5.31s/it][A
loss=0.852:  13%|█▎        | 14/110 [00:40<06:13,  3.89s/it][A
loss=0.740:  13%|█▎        | 14/110 [00:40<06:13,  3.89s/it][A
loss=0.740:  14%|█▎        | 15/110 [00:40<04:35,  2.90s/it][A
loss=0.934:  14%|█▎        | 15/110 [00:41<04:35,  2.90s/it][A
loss=0.934:  15%|█▍        | 16/110 [00:41<03:27,  2.20s/it][A
loss=0.732:  15%|█▍        | 16/110 [00:41<03:27,  2.20s/it][A
loss=0.732:  15%|█▌        | 17/110 [00:42<02:39,  1.72s/it][A
loss=0.789:  15%|█▌        | 17/110 [00:42<02:39,  1.72s/it][A
loss=0.789:  16%|█▋        | 18/110 [00:42<02:06,  1.38s/it][A
loss=0.855:  16%|█▋        | 18/110 [00:42<02:06,  1.38s/it][A
loss=0.855:  17%|█▋        | 19/110 [00:43<01:43,  1.14s/it][A
loss=0.946:  17%|█▋        | 19/110 [00

{"eval_acc": 0.8515478761699065, "eval_f1": 0.299044461807361, "eval_acc_and_f1": 0.5752961689886338, "eval_loss": 0.6571116117494447, "learning_rate": 5.234899328859061e-06, "train_loss": 0.8671237457366217, "step": 252}



loss=0.743:  29%|██▉       | 32/110 [01:23<13:30, 10.40s/it][A
loss=0.743:  30%|███       | 33/110 [01:24<09:34,  7.46s/it][A
loss=0.809:  30%|███       | 33/110 [01:24<09:34,  7.46s/it][A
loss=0.809:  31%|███       | 34/110 [01:24<06:49,  5.39s/it][A
loss=1.009:  31%|███       | 34/110 [01:24<06:49,  5.39s/it][A
loss=1.009:  32%|███▏      | 35/110 [01:25<04:56,  3.95s/it][A
loss=0.708:  32%|███▏      | 35/110 [01:25<04:56,  3.95s/it][A
loss=0.708:  33%|███▎      | 36/110 [01:25<03:37,  2.94s/it][A
loss=0.694:  33%|███▎      | 36/110 [01:26<03:37,  2.94s/it][A
loss=0.694:  34%|███▎      | 37/110 [01:26<02:43,  2.24s/it][A
loss=0.880:  34%|███▎      | 37/110 [01:26<02:43,  2.24s/it][A
loss=0.880:  35%|███▍      | 38/110 [01:27<02:05,  1.74s/it][A
loss=1.004:  35%|███▍      | 38/110 [01:27<02:05,  1.74s/it][A
loss=1.004:  35%|███▌      | 39/110 [01:27<01:38,  1.39s/it][A
loss=0.602:  35%|███▌      | 39/110 [01:27<01:38,  1.39s/it][A
loss=0.602:  36%|███▋      | 40/110 [01

{"eval_acc": 0.8529877609791217, "eval_f1": 0.2984261943415732, "eval_acc_and_f1": 0.5757069776603474, "eval_loss": 0.6291373382721629, "learning_rate": 3.825503355704698e-06, "train_loss": 0.8172115953195662, "step": 273}



loss=0.940:  49%|████▉     | 54/110 [02:08<06:53,  7.38s/it][A
loss=0.675:  49%|████▉     | 54/110 [02:08<06:53,  7.38s/it][A
loss=0.675:  50%|█████     | 55/110 [02:09<04:53,  5.34s/it][A
loss=0.598:  50%|█████     | 55/110 [02:09<04:53,  5.34s/it][A
loss=0.598:  51%|█████     | 56/110 [02:09<03:31,  3.91s/it][A
loss=0.775:  51%|█████     | 56/110 [02:10<03:31,  3.91s/it][A
loss=0.775:  52%|█████▏    | 57/110 [02:10<02:34,  2.91s/it][A
loss=1.244:  52%|█████▏    | 57/110 [02:10<02:34,  2.91s/it][A
loss=1.244:  53%|█████▎    | 58/110 [02:11<01:55,  2.22s/it][A
loss=0.955:  53%|█████▎    | 58/110 [02:11<01:55,  2.22s/it][A
loss=0.955:  54%|█████▎    | 59/110 [02:11<01:28,  1.73s/it][A
loss=0.670:  54%|█████▎    | 59/110 [02:11<01:28,  1.73s/it][A
loss=0.670:  55%|█████▍    | 60/110 [02:12<01:09,  1.39s/it][A
loss=0.857:  55%|█████▍    | 60/110 [02:12<01:09,  1.39s/it][A
loss=0.857:  55%|█████▌    | 61/110 [02:12<00:56,  1.15s/it][A
loss=0.653:  55%|█████▌    | 61/110 [02

{"eval_acc": 0.8587473002159827, "eval_f1": 0.30749935809427337, "eval_acc_and_f1": 0.583123329155128, "eval_loss": 0.6126495716827256, "learning_rate": 2.416107382550336e-06, "train_loss": 0.7582741890634809, "step": 294}



loss=0.608:  68%|██████▊   | 75/110 [02:53<04:19,  7.40s/it][A
loss=0.572:  68%|██████▊   | 75/110 [02:53<04:19,  7.40s/it][A
loss=0.572:  69%|██████▉   | 76/110 [02:54<03:02,  5.35s/it][A
loss=0.927:  69%|██████▉   | 76/110 [02:54<03:02,  5.35s/it][A
loss=0.927:  70%|███████   | 77/110 [02:54<02:09,  3.92s/it][A
loss=0.703:  70%|███████   | 77/110 [02:54<02:09,  3.92s/it][A
loss=0.703:  71%|███████   | 78/110 [02:55<01:33,  2.92s/it][A
loss=0.421:  71%|███████   | 78/110 [02:55<01:33,  2.92s/it][A
loss=0.421:  72%|███████▏  | 79/110 [02:55<01:08,  2.22s/it][A
loss=0.718:  72%|███████▏  | 79/110 [02:56<01:08,  2.22s/it][A
loss=0.718:  73%|███████▎  | 80/110 [02:56<00:51,  1.73s/it][A
loss=0.977:  73%|███████▎  | 80/110 [02:56<00:51,  1.73s/it][A
loss=0.977:  74%|███████▎  | 81/110 [02:57<00:40,  1.39s/it][A
loss=1.036:  74%|███████▎  | 81/110 [02:57<00:40,  1.39s/it][A
loss=1.036:  75%|███████▍  | 82/110 [02:57<00:32,  1.15s/it][A
loss=0.743:  75%|███████▍  | 82/110 [02

{"eval_acc": 0.8567314614830813, "eval_f1": 0.30874982914439214, "eval_acc_and_f1": 0.5827406453137367, "eval_loss": 0.602686168892043, "learning_rate": 1.006711409395973e-06, "train_loss": 0.7309000591437022, "step": 315}



loss=0.830:  87%|████████▋ | 96/110 [03:37<01:42,  7.33s/it][A
loss=1.069:  87%|████████▋ | 96/110 [03:38<01:42,  7.33s/it][A
loss=1.069:  88%|████████▊ | 97/110 [03:38<01:08,  5.31s/it][A
loss=0.652:  88%|████████▊ | 97/110 [03:38<01:08,  5.31s/it][A
loss=0.652:  89%|████████▉ | 98/110 [03:39<00:46,  3.89s/it][A
loss=0.472:  89%|████████▉ | 98/110 [03:39<00:46,  3.89s/it][A
loss=0.472:  90%|█████████ | 99/110 [03:39<00:31,  2.89s/it][A
loss=0.776:  90%|█████████ | 99/110 [03:39<00:31,  2.89s/it][A
loss=0.776:  91%|█████████ | 100/110 [03:40<00:22,  2.20s/it][A
loss=0.630:  91%|█████████ | 100/110 [03:40<00:22,  2.20s/it][A
loss=0.630:  92%|█████████▏| 101/110 [03:40<00:15,  1.72s/it][A
loss=0.634:  92%|█████████▏| 101/110 [03:40<00:15,  1.72s/it][A
loss=0.634:  93%|█████████▎| 102/110 [03:41<00:11,  1.38s/it][A
loss=0.884:  93%|█████████▎| 102/110 [03:41<00:11,  1.38s/it][A
loss=0.884:  94%|█████████▎| 103/110 [03:41<00:07,  1.14s/it][A
loss=0.469:  94%|█████████▎| 103

{"eval_acc": 0.8584593232541397, "eval_f1": 0.31056509215756994, "eval_acc_and_f1": 0.5845122077058548, "eval_loss": 0.6009200492075512, "learning_rate": 0.0, "train_loss": 0.5434147133713677, "step": 330}


07/18/2022 06:06:24 - INFO - utilities.trainers -   ***** Running evaluation iter-7_trial1 *****
07/18/2022 06:06:24 - INFO - utilities.trainers -     Num examples = 6945
07/18/2022 06:06:24 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.01it/s]
  'precision', 'predicted', average, warn_for)
07/18/2022 06:06:52 - INFO - utilities.trainers -   ***** Eval results iter-7_trial1 *****
07/18/2022 06:06:52 - INFO - utilities.trainers -     acc = 0.8584593232541397
07/18/2022 06:06:52 - INFO - utilities.trainers -     acc_and_f1 = 0.5845122077058548
07/18/2022 06:06:52 - INFO - utilities.trainers -     f1 = 0.31056509215756994
07/18/2022 06:06:55 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/18/2022 06:07:01 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 06:07:01 - INFO - utilities.trainers -     Num examples = 34722
07/18/2022 06:07:01 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:19<00:00,  1.02s/it]
07/18/2022 06:09:20 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 06:09:20 - INFO - utilities.trainers -     acc = 0.8637175277921779
07/18/2022 06:09:20 - INFO - utilities.trainers -     acc_and_f1 = 0.5849697054832969
07/18/2022 06:09:20 - INFO - utilities.trainers -     f1 = 0.3062218831744158



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



07/18/2022 06:09:21 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 06:09:39 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 06:09:45 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 06:09:45 - INFO - utilities.trainers -     Num examples = 46500
07/18/2022 06:09:45 - INFO - utilities.trainers -     Batch size = 256
Evaluating:   0%|          | 0/182 [00:00<?, ?it/s]

MC samples N=None


Evaluating: 100%|██████████| 182/182 [06:11<00:00,  2.04s/it]
07/18/2022 06:15:56 - INFO - utilities.trainers -   ***** Eval results  *****
07/18/2022 06:15:56 - INFO - utilities.trainers -     acc = 0.8680430107526882
07/18/2022 06:15:56 - INFO - utilities.trainers -     acc_and_f1 = 0.5937345925936267
07/18/2022 06:15:56 - INFO - utilities.trainers -     f1 = 0.3194261744345652
07/18/2022 06:15:56 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_bert-base-dutch-cased_256_ornl20_original
07/18/2022 06:16:16 - INFO - utilities.data_loader -   Selecting subsample...
07/18/2022 06:16:20 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/18/2022 06:16:20 - INFO - utilities.trainers -     Num examples = 3500
07/18/2022 06:16:20 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 14/14 [00:27<00:00,  1.99s/it]
07/18/2022 06:16:48 - INFO - uti



************
End of iteration 7:
Train loss 1.4643, Val loss 0.6009200492075512, Test loss 0.6063534968039569
Annotated 500 samples
Current labeled (training) data: 4000 samples
Remaining budget: 0 (in samples)
************

Saving json with the results....
The end!....


In [None]:
flags = make_flags(params, acq = 'random', seed = 5262, dataset = 'ornl20')
%run $AL_SCRIPT $flags

In [None]:
params['acquisition_size'] = '98%' 
params['init_train_data'] = '1%'
params['budget'] = '100%'

flags = make_flags(params, acq = 'random', seed = 5262, dataset = 'ornl20')
%run $AL_SCRIPT $flags




 --dataset_name ornl20 --budget 100% --per_gpu_train_batch_size 32 --max_seq_length 256 --resume False --cap_training_pool 50000 --init random --init_train_data 1% --acquisition_size 98% --model_name_or_path wietsedv/bert-base-dutch-cased --acquisition random --seed 5262 

device: cuda:0
output_dir=/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_random_5262/ornl20_bert-cls



 /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20 





07/17/2022 12:18:33 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_train_ornl20_original
07/17/2022 12:18:35 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_dev_ornl20_original
07/17/2022 12:18:36 - INFO - utilities.data_loader -   Loading dataset from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_ornl20_original



train set stats: class 21: 17% class 20: 17% class 17: 7% class 25: 17% class 3: 17% class 2: 3% class 23: 5% class 18: 1% class 4: 1% class 1: 5% class 9: 0% class 11: 3% class 14: 0% class 5: 1% class 15: 1% class 13: 0% class 6: 2% class 22: 0% class 10: 0% class 12: 1% class 19: 0% class 0: 0% class 16: 0% class 8: 0% class 7: 0% 
validation set stats: class 25: 17% class 20: 17% class 3: 17% class 21: 17% class 17: 7% class 0: 0% class 6: 2% class 23: 5% class 2: 3% class 1: 5% class 11: 3% class 12: 1% class 4: 1% class 18: 1% class 15: 1% class 14: 0% class 5: 1% class 10: 0% class 9: 0% class 19: 0% class 8: 0% class 22: 1% class 16: 0% class 13: 0% class 7: 0% 
test set stats: class 3: 17% class 5: 1% class 25: 18% class 1: 5% class 15: 1% class 20: 17% class 21: 17% class 2: 3% class 17: 7% class 6: 2% class 4: 1% class 23: 5% class 10: 0% class 0: 0% class 11: 2% class 8: 0% class 12: 1% class 22: 0% class 9: 0% class 16: 0% class 18: 1% class 14: 0% class 13: 0% class 19: 

07/17/2022 12:19:43 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0, acc_best_iteration=0, acquisition='random', acquisition_size=49000, adam_epsilon=1e-08, bert_rep=False, bert_score=False, binary=False, budget=(100, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_random_5262/ornl20_bert-cls/iter-1', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, evaluate_during_training=True, fp16=False, fp16_opt_level='O1', gpu='0', gradient_accumulation_steps=1, indicator=None, init='random', init_train_data=500, knn_lab=False, learning_rate=2e-05, loca

warmup steps: 4
total steps: 46
logging steps: 3
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.310:   6%|▋         | 1/16 [00:00<00:07,  2.08it/s][A
loss=3.416:   6%|▋         | 1/16 [00:00<00:07,  2.08it/s][A
loss=3.416:  12%|█▎        | 2/16 [00:01<00:07,  1.98it/s][A
loss=3.278:  12%|█▎        | 2/16 [00:01<00:07,  1.98it/s][A07/17/2022 12:20:18 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:20:18 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:20:18 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.01it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:01<00:25,  1.01it/s][A[A

Evaluating:  11%|█         | 3/28 [00:02<00:24,  1.01it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:03<00:23,  1.00it/s][A[A

Evaluating:  18%|█▊        | 5/28 [00:04<00:22,  1.00it/s][A[A

Evaluating:  21%|██▏       | 6/28 [00:05<00:21,  1.00it/s][A[A

Evaluating:  25%|██▌       | 7/28 [00:06<00:20,  1.00it/s][A[A

E

{"eval_acc": 0.06263498920086392, "eval_f1": 0.012319526158824295, "eval_acc_and_f1": 0.03747725767984411, "eval_loss": 3.236840239592961, "learning_rate": 1.5000000000000002e-05, "train_loss": 3.334469715754191, "step": 3}



loss=3.243:  25%|██▌       | 4/16 [00:35<01:30,  7.50s/it][A
loss=3.112:  25%|██▌       | 4/16 [00:35<01:30,  7.50s/it][A
loss=3.112:  31%|███▏      | 5/16 [00:35<00:59,  5.42s/it][A
loss=3.061:  31%|███▏      | 5/16 [00:36<00:59,  5.42s/it][A07/17/2022 12:20:53 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:20:53 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:20:53 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:01<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:02<00:24,  1.00it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:03<00:23,  1.00it/s][A[A

Evaluating:  18%|█▊        | 5/28 [00:04<00:22,  1.00it/s][A[A

Evaluating:  21%|██▏       | 6/28 [00:05<00:21,  1.00it/s][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[A

E

{"eval_acc": 0.20259179265658747, "eval_f1": 0.03011173131549667, "eval_acc_and_f1": 0.11635176198604207, "eval_loss": 3.056640233312334, "learning_rate": 1.9090909090909094e-05, "train_loss": 3.138713836669922, "step": 6}



loss=3.106:  44%|████▍     | 7/16 [01:08<01:25,  9.54s/it][A
loss=2.861:  44%|████▍     | 7/16 [01:08<01:25,  9.54s/it][A
loss=2.861:  50%|█████     | 8/16 [01:08<00:54,  6.84s/it][A
loss=2.942:  50%|█████     | 8/16 [01:09<00:54,  6.84s/it][A07/17/2022 12:21:26 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:21:26 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:21:26 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:01<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:02<00:24,  1.00it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:03<00:23,  1.00it/s][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.00s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[A

E

{"eval_acc": 0.2969042476601872, "eval_f1": 0.04680004254144593, "eval_acc_and_f1": 0.17185214510081656, "eval_loss": 2.849212280341557, "learning_rate": 1.772727272727273e-05, "train_loss": 2.9696566263834634, "step": 9}



loss=2.834:  62%|██████▎   | 10/16 [01:43<01:03, 10.64s/it][A
loss=2.903:  62%|██████▎   | 10/16 [01:43<01:03, 10.64s/it][A
loss=2.903:  69%|██████▉   | 11/16 [01:44<00:38,  7.62s/it][A
loss=2.750:  69%|██████▉   | 11/16 [01:44<00:38,  7.62s/it][A07/17/2022 12:22:01 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:22:01 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:22:01 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.01it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:01<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:02<00:24,  1.00it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:23,  1.00it/s][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.00s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[

{"eval_acc": 0.3056875449964003, "eval_f1": 0.04853263312631875, "eval_acc_and_f1": 0.1771100890613595, "eval_loss": 2.70573137487684, "learning_rate": 1.6363636363636366e-05, "train_loss": 2.829087257385254, "step": 12}



loss=2.809:  81%|████████▏ | 13/16 [02:18<00:32, 10.98s/it][A
loss=2.757:  81%|████████▏ | 13/16 [02:18<00:32, 10.98s/it][A
loss=2.757:  88%|████████▊ | 14/16 [02:18<00:15,  7.85s/it][A
loss=2.759:  88%|████████▊ | 14/16 [02:18<00:15,  7.85s/it][A07/17/2022 12:22:36 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:22:36 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:22:36 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:01<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:02<00:24,  1.00it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:23,  1.00it/s][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.00s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[

{"eval_acc": 0.26119510439164867, "eval_f1": 0.040220392557941374, "eval_acc_and_f1": 0.150707748474795, "eval_loss": 2.6037780046463013, "learning_rate": 1.5000000000000002e-05, "train_loss": 2.775038957595825, "step": 15}



loss=2.519: 100%|██████████| 16/16 [02:51<00:00, 10.74s/it]
Epoch:  33%|███▎      | 1/3 [02:51<05:43, 171.89s/it]
Iteration:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=2.472:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=2.472:   6%|▋         | 1/16 [00:00<00:08,  1.80it/s][A
loss=2.548:   6%|▋         | 1/16 [00:00<00:08,  1.80it/s][A07/17/2022 12:23:09 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:23:09 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:23:09 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.00s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.00s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 [0

{"eval_acc": 0.22001439884809215, "eval_f1": 0.03300502641146546, "eval_acc_and_f1": 0.1265097126297788, "eval_loss": 2.5142789483070374, "learning_rate": 1.3636363636363637e-05, "train_loss": 2.512741724650065, "step": 18}



loss=2.588:  19%|█▉        | 3/16 [00:35<01:39,  7.63s/it][A
loss=2.487:  19%|█▉        | 3/16 [00:35<01:39,  7.63s/it][A
loss=2.487:  25%|██▌       | 4/16 [00:35<01:06,  5.52s/it][A
loss=2.579:  25%|██▌       | 4/16 [00:36<01:06,  5.52s/it][A07/17/2022 12:23:45 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:23:45 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:23:45 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:01<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:24,  1.00it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:23,  1.00it/s][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.00s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[A

E

{"eval_acc": 0.27429805615550756, "eval_f1": 0.0420396231511011, "eval_acc_and_f1": 0.15816883965330433, "eval_loss": 2.456186967236655, "learning_rate": 1.2272727272727274e-05, "train_loss": 2.5510865847269693, "step": 21}



loss=2.502:  38%|███▊      | 6/16 [01:08<01:36,  9.68s/it][A
loss=2.315:  38%|███▊      | 6/16 [01:09<01:36,  9.68s/it][A
loss=2.315:  44%|████▍     | 7/16 [01:09<01:02,  6.95s/it][A
loss=2.522:  44%|████▍     | 7/16 [01:09<01:02,  6.95s/it][A07/17/2022 12:24:18 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:24:18 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:24:18 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.01it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:01<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:02<00:24,  1.00it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:03<00:23,  1.00it/s][A[A

Evaluating:  18%|█▊        | 5/28 [00:04<00:22,  1.00it/s][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:21,  1.00it/s][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[A

E

{"eval_acc": 0.28192944564434846, "eval_f1": 0.050963084888806326, "eval_acc_and_f1": 0.1664462652665774, "eval_loss": 2.4027508412088667, "learning_rate": 1.0909090909090909e-05, "train_loss": 2.446011940638224, "step": 24}



loss=2.708:  56%|█████▋    | 9/16 [01:43<01:14, 10.62s/it][A
loss=2.514:  56%|█████▋    | 9/16 [01:43<01:14, 10.62s/it][A
loss=2.514:  62%|██████▎   | 10/16 [01:44<00:45,  7.60s/it][A
loss=2.404:  62%|██████▎   | 10/16 [01:44<00:45,  7.60s/it][A07/17/2022 12:24:53 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:24:53 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:24:53 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:01<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:24,  1.00it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:23,  1.00it/s][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.00s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[A


{"eval_acc": 0.3359251259899208, "eval_f1": 0.06361594310000915, "eval_acc_and_f1": 0.19977053454496496, "eval_loss": 2.360721903187888, "learning_rate": 9.545454545454547e-06, "train_loss": 2.542139927546183, "step": 27}



loss=2.399:  75%|███████▌  | 12/16 [02:17<00:43, 10.79s/it][A
loss=2.138:  75%|███████▌  | 12/16 [02:17<00:43, 10.79s/it][A
loss=2.138:  81%|████████▏ | 13/16 [02:18<00:23,  7.72s/it][A
loss=2.469:  81%|████████▏ | 13/16 [02:18<00:23,  7.72s/it][A07/17/2022 12:25:27 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:25:27 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:25:27 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.00s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.00s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.00s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[

{"eval_acc": 0.39136069114470845, "eval_f1": 0.073887747984277, "eval_acc_and_f1": 0.23262421956449272, "eval_loss": 2.3219669376100813, "learning_rate": 8.181818181818183e-06, "train_loss": 2.3353039423624673, "step": 30}



loss=2.269:  94%|█████████▍| 15/16 [02:52<00:10, 10.98s/it][A
loss=2.192:  94%|█████████▍| 15/16 [02:52<00:10, 10.98s/it][A
loss=2.192: 100%|██████████| 16/16 [02:52<00:00, 10.79s/it]
Epoch:  67%|██████▋   | 2/3 [05:44<02:52, 172.10s/it]
Iteration:   0%|          | 0/16 [00:00<?, ?it/s][A
loss=2.330:   0%|          | 0/16 [00:00<?, ?it/s][A07/17/2022 12:26:01 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:26:01 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:26:01 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.00s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.00s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 

{"eval_acc": 0.4673866090712743, "eval_f1": 0.09003262355460084, "eval_acc_and_f1": 0.27870961631293756, "eval_loss": 2.284376331738063, "learning_rate": 6.818181818181818e-06, "train_loss": 2.263935168584188, "step": 33}



loss=2.335:   6%|▋         | 1/16 [00:32<08:12, 32.81s/it][A
loss=2.335:  12%|█▎        | 2/16 [00:33<05:23, 23.13s/it][A
loss=2.398:  12%|█▎        | 2/16 [00:33<05:23, 23.13s/it][A
loss=2.398:  19%|█▉        | 3/16 [00:33<03:32, 16.36s/it][A
loss=2.220:  19%|█▉        | 3/16 [00:34<03:32, 16.36s/it][A07/17/2022 12:26:35 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:26:35 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:26:35 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:25,  1.00s/it][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.00s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.00s/it][A[A

Evalu

{"eval_acc": 0.5064074874010079, "eval_f1": 0.09617568429051145, "eval_acc_and_f1": 0.3012915858457597, "eval_loss": 2.2557651826313565, "learning_rate": 5.4545454545454545e-06, "train_loss": 2.31777556737264, "step": 36}



loss=2.058:  31%|███▏      | 5/16 [01:06<02:44, 14.91s/it][A
loss=2.255:  31%|███▏      | 5/16 [01:06<02:44, 14.91s/it][A
loss=2.255:  38%|███▊      | 6/16 [01:07<01:46, 10.61s/it][A
loss=2.050:  38%|███▊      | 6/16 [01:07<01:46, 10.61s/it][A07/17/2022 12:27:08 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:27:08 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:27:08 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:01<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:24,  1.00it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.00s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.00s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[A

E

{"eval_acc": 0.5229661627069835, "eval_f1": 0.0991122985109193, "eval_acc_and_f1": 0.3110392306089514, "eval_loss": 2.2335645045552934, "learning_rate": 4.0909090909090915e-06, "train_loss": 2.1209096908569336, "step": 39}



loss=2.310:  50%|█████     | 8/16 [01:39<01:36, 12.10s/it][A
loss=2.405:  50%|█████     | 8/16 [01:39<01:36, 12.10s/it][A
loss=2.405:  56%|█████▋    | 9/16 [01:40<01:00,  8.64s/it][A
loss=2.225:  56%|█████▋    | 9/16 [01:40<01:00,  8.64s/it][A07/17/2022 12:27:42 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:27:42 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:27:42 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:01<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:02<00:24,  1.00it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:03<00:23,  1.00it/s][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:22,  1.00it/s][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.00s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[A

E

{"eval_acc": 0.5333333333333333, "eval_f1": 0.10126331010567391, "eval_acc_and_f1": 0.3172983217195036, "eval_loss": 2.214598689760481, "learning_rate": 2.7272727272727272e-06, "train_loss": 2.313279072443644, "step": 42}



loss=2.077:  69%|██████▉   | 11/16 [02:12<00:55, 11.10s/it][A
loss=2.313:  69%|██████▉   | 11/16 [02:12<00:55, 11.10s/it][A
loss=2.313:  75%|███████▌  | 12/16 [02:13<00:31,  7.94s/it][A
loss=2.135:  75%|███████▌  | 12/16 [02:13<00:31,  7.94s/it][A07/17/2022 12:28:15 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:28:15 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:28:15 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:02<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:03<00:24,  1.00it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.00s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.00s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[

{"eval_acc": 0.5337652987760979, "eval_f1": 0.10163163479156634, "eval_acc_and_f1": 0.3176984667838321, "eval_loss": 2.2044726269585744, "learning_rate": 1.3636363636363636e-06, "train_loss": 2.1748218536376953, "step": 45}



loss=2.162:  88%|████████▊ | 14/16 [02:47<00:22, 11.06s/it][A
loss=2.483:  88%|████████▊ | 14/16 [02:47<00:22, 11.06s/it][A
loss=2.483:  94%|█████████▍| 15/16 [02:47<00:07,  7.91s/it][A
loss=2.007:  94%|█████████▍| 15/16 [02:47<00:07,  7.91s/it][A07/17/2022 12:28:49 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:28:49 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:28:49 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:00<00:26,  1.00it/s][A[A

Evaluating:   7%|▋         | 2/28 [00:01<00:25,  1.00it/s][A[A

Evaluating:  11%|█         | 3/28 [00:02<00:24,  1.00it/s][A[A

Evaluating:  14%|█▍        | 4/28 [00:04<00:24,  1.00s/it][A[A

Evaluating:  18%|█▊        | 5/28 [00:05<00:23,  1.00s/it][A[A

Evaluating:  21%|██▏       | 6/28 [00:06<00:22,  1.00s/it][A[A

Evaluating:  25%|██▌       | 7/28 [00:07<00:21,  1.00s/it][A[

{"eval_acc": 0.534341252699784, "eval_f1": 0.1017801511054394, "eval_acc_and_f1": 0.3180607019026117, "eval_loss": 2.20024060351508, "learning_rate": 0.0, "train_loss": 2.21732505162557, "step": 48}


07/17/2022 12:29:25 - INFO - utilities.trainers -   ***** Running evaluation iter-1_trial1 *****
07/17/2022 12:29:25 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 12:29:25 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.03it/s]
  'precision', 'predicted', average, warn_for)
07/17/2022 12:29:52 - INFO - utilities.trainers -   ***** Eval results iter-1_trial1 *****
07/17/2022 12:29:52 - INFO - utilities.trainers -     acc = 0.534341252699784
07/17/2022 12:29:52 - INFO - utilities.trainers -     acc_and_f1 = 0.3180607019026117
07/17/2022 12:29:52 - INFO - utilities.trainers -     f1 = 0.1017801511054394
07/17/2022 12:29:55 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/17/2022 12:30:01 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 12:30:01 - INFO - utilities.trainers -     Num examples = 34722
07/17/2022 12:30:01 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:15<00:00,  1.00it/s]
07/17/2022 12:32:17 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 12:32:17 - INFO - utilities.trainers -     acc = 0.5305857957490928
07/17/2022 12:32:17 - INFO - utilities.trainers -     acc_and_f1 = 0.3139096058608465
07/17/2022 12:32:17 - INFO - utilities.trainers -     f1 = 0.09723341597260018



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



************
End of iteration 1:
Train loss 2.5526, Val loss 2.20024060351508, Test loss 2.1994151928845573
Annotated 49000 samples
Current labeled (training) data: 49500 samples
Remaining budget: 500 (in samples)
************

Saving json with the results....

 Start Training model of iteration 2!



07/17/2022 12:32:20 - INFO - utilities.trainers -   Training/evaluation parameters Namespace(acc_best=0.534341252699784, acc_best_iteration=1, acquisition='random', acquisition_size=49000, adam_epsilon=1e-08, bert_rep=False, bert_score=False, best_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_random_5262/ornl20_bert-cls/iter-1', binary=False, budget=(100, True), cache_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/cache', cap_training_pool=50000, ce=False, cls=True, conf_mask=False, conf_thresh=0.0, config_name='', current_output_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/checkpoints/ornl20_bert_random_5262/ornl20_bert-cls/iter-2', data_dir='/content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20', dataset_name='ornl20', device=device(type='cuda', index=0), do_eval=True, do_lower_case=False, do_train=True, eval_all_checkpoints=False, eval_batch_size=256, evaluate_during_tra

warmup steps: 464
total steps: 4640
logging steps: 309
Total Params: 109.1M
Total Trainable Params: 109.1M



loss=3.415:   0%|          | 1/1547 [00:00<12:29,  2.06it/s][A
loss=3.322:   0%|          | 1/1547 [00:00<12:29,  2.06it/s][A
loss=3.322:   0%|          | 2/1547 [00:01<12:46,  2.02it/s][A
loss=3.264:   0%|          | 2/1547 [00:01<12:46,  2.02it/s][A
loss=3.264:   0%|          | 3/1547 [00:01<12:56,  1.99it/s][A
loss=3.353:   0%|          | 3/1547 [00:01<12:56,  1.99it/s][A
loss=3.353:   0%|          | 4/1547 [00:02<13:03,  1.97it/s][A
loss=3.481:   0%|          | 4/1547 [00:02<13:03,  1.97it/s][A
loss=3.481:   0%|          | 5/1547 [00:02<13:07,  1.96it/s][A
loss=3.408:   0%|          | 5/1547 [00:02<13:07,  1.96it/s][A
loss=3.408:   0%|          | 6/1547 [00:03<13:10,  1.95it/s][A
loss=3.330:   0%|          | 6/1547 [00:03<13:10,  1.95it/s][A
loss=3.330:   0%|          | 7/1547 [00:03<13:11,  1.95it/s][A
loss=3.337:   0%|          | 7/1547 [00:03<13:11,  1.95it/s][A
loss=3.337:   1%|          | 8/1547 [00:04<13:14,  1.94it/s][A
loss=3.276:   1%|          | 8/1547 [00

{"eval_acc": 0.8181425485961124, "eval_f1": 0.26945960612746694, "eval_acc_and_f1": 0.5438010773617896, "eval_loss": 0.6630330745662961, "learning_rate": 1.331896551724138e-05, "train_loss": 1.944005975541945, "step": 309}



loss=0.802:  20%|██        | 310/1547 [03:12<2:24:50,  7.03s/it][A
loss=0.682:  20%|██        | 310/1547 [03:12<2:24:50,  7.03s/it][A
loss=0.682:  20%|██        | 311/1547 [03:13<1:44:29,  5.07s/it][A
loss=1.032:  20%|██        | 311/1547 [03:13<1:44:29,  5.07s/it][A
loss=1.032:  20%|██        | 312/1547 [03:13<1:16:18,  3.71s/it][A
loss=0.573:  20%|██        | 312/1547 [03:13<1:16:18,  3.71s/it][A
loss=0.573:  20%|██        | 313/1547 [03:14<56:36,  2.75s/it]  [A
loss=0.586:  20%|██        | 313/1547 [03:14<56:36,  2.75s/it][A
loss=0.586:  20%|██        | 314/1547 [03:14<42:49,  2.08s/it][A
loss=0.721:  20%|██        | 314/1547 [03:15<42:49,  2.08s/it][A
loss=0.721:  20%|██        | 315/1547 [03:15<33:10,  1.62s/it][A
loss=0.700:  20%|██        | 315/1547 [03:15<33:10,  1.62s/it][A
loss=0.700:  20%|██        | 316/1547 [03:15<26:23,  1.29s/it][A
loss=0.478:  20%|██        | 316/1547 [03:16<26:23,  1.29s/it][A
loss=0.478:  20%|██        | 317/1547 [03:16<21:42,  1.06s/it

{"eval_acc": 0.9269978401727862, "eval_f1": 0.4823734953466436, "eval_acc_and_f1": 0.7046856677597149, "eval_loss": 0.28570242651871275, "learning_rate": 1.926262868087144e-05, "train_loss": 0.40374421837067526, "step": 618}



loss=0.253:  40%|████      | 619/1547 [06:27<1:54:46,  7.42s/it][A
loss=0.204:  40%|████      | 619/1547 [06:27<1:54:46,  7.42s/it][A
loss=0.204:  40%|████      | 620/1547 [06:27<1:22:39,  5.35s/it][A
loss=0.218:  40%|████      | 620/1547 [06:28<1:22:39,  5.35s/it][A
loss=0.218:  40%|████      | 621/1547 [06:28<1:00:13,  3.90s/it][A
loss=0.103:  40%|████      | 621/1547 [06:28<1:00:13,  3.90s/it][A
loss=0.103:  40%|████      | 622/1547 [06:29<44:32,  2.89s/it]  [A
loss=0.505:  40%|████      | 622/1547 [06:29<44:32,  2.89s/it][A
loss=0.505:  40%|████      | 623/1547 [06:29<33:34,  2.18s/it][A
loss=0.324:  40%|████      | 623/1547 [06:29<33:34,  2.18s/it][A
loss=0.324:  40%|████      | 624/1547 [06:30<25:55,  1.68s/it][A
loss=0.359:  40%|████      | 624/1547 [06:30<25:55,  1.68s/it][A
loss=0.359:  40%|████      | 625/1547 [06:30<20:31,  1.34s/it][A
loss=0.422:  40%|████      | 625/1547 [06:30<20:31,  1.34s/it][A
loss=0.422:  40%|████      | 626/1547 [06:31<16:45,  1.09s/it

{"eval_acc": 0.9406767458603311, "eval_f1": 0.581172059941383, "eval_acc_and_f1": 0.760924402900857, "eval_loss": 0.22643685420708998, "learning_rate": 1.778309791716543e-05, "train_loss": 0.26419918459439257, "step": 927}



loss=0.404:  60%|█████▉    | 928/1547 [09:42<1:17:00,  7.46s/it][A
loss=0.084:  60%|█████▉    | 928/1547 [09:42<1:17:00,  7.46s/it][A
loss=0.084:  60%|██████    | 929/1547 [09:42<55:26,  5.38s/it]  [A
loss=0.239:  60%|██████    | 929/1547 [09:43<55:26,  5.38s/it][A
loss=0.239:  60%|██████    | 930/1547 [09:43<40:24,  3.93s/it][A
loss=0.150:  60%|██████    | 930/1547 [09:43<40:24,  3.93s/it][A
loss=0.150:  60%|██████    | 931/1547 [09:43<29:50,  2.91s/it][A
loss=0.253:  60%|██████    | 931/1547 [09:44<29:50,  2.91s/it][A
loss=0.253:  60%|██████    | 932/1547 [09:44<22:29,  2.19s/it][A
loss=0.376:  60%|██████    | 932/1547 [09:44<22:29,  2.19s/it][A
loss=0.376:  60%|██████    | 933/1547 [09:44<17:21,  1.70s/it][A
loss=0.256:  60%|██████    | 933/1547 [09:45<17:21,  1.70s/it][A
loss=0.256:  60%|██████    | 934/1547 [09:45<13:45,  1.35s/it][A
loss=0.177:  60%|██████    | 934/1547 [09:45<13:45,  1.35s/it][A
loss=0.177:  60%|██████    | 935/1547 [09:46<11:15,  1.10s/it][A
los

{"eval_acc": 0.9457163426925846, "eval_f1": 0.6005432417643776, "eval_acc_and_f1": 0.7731297922284811, "eval_loss": 0.20267304166087083, "learning_rate": 1.630356715345942e-05, "train_loss": 0.2318937395756588, "step": 1236}



loss=0.073:  80%|███████▉  | 1237/1547 [12:55<36:43,  7.11s/it][A
loss=0.117:  80%|███████▉  | 1237/1547 [12:55<36:43,  7.11s/it][A
loss=0.117:  80%|████████  | 1238/1547 [12:55<26:25,  5.13s/it][A
loss=0.293:  80%|████████  | 1238/1547 [12:55<26:25,  5.13s/it][A
loss=0.293:  80%|████████  | 1239/1547 [12:56<19:14,  3.75s/it][A
loss=0.472:  80%|████████  | 1239/1547 [12:56<19:14,  3.75s/it][A
loss=0.472:  80%|████████  | 1240/1547 [12:56<14:13,  2.78s/it][A
loss=0.492:  80%|████████  | 1240/1547 [12:56<14:13,  2.78s/it][A
loss=0.492:  80%|████████  | 1241/1547 [12:57<10:43,  2.10s/it][A
loss=0.264:  80%|████████  | 1241/1547 [12:57<10:43,  2.10s/it][A
loss=0.264:  80%|████████  | 1242/1547 [12:57<08:16,  1.63s/it][A
loss=0.146:  80%|████████  | 1242/1547 [12:58<08:16,  1.63s/it][A
loss=0.146:  80%|████████  | 1243/1547 [12:58<06:34,  1.30s/it][A
loss=0.244:  80%|████████  | 1243/1547 [12:58<06:34,  1.30s/it][A
loss=0.244:  80%|████████  | 1244/1547 [12:58<05:22,  1.06s/i

{"eval_acc": 0.9506119510439165, "eval_f1": 0.6239344931924, "eval_acc_and_f1": 0.7872732221181582, "eval_loss": 0.18489824581359113, "learning_rate": 1.4824036389753414e-05, "train_loss": 0.2032768332631505, "step": 1545}



loss=0.279: 100%|█████████▉| 1546/1547 [16:11<00:07,  7.76s/it][A
loss=0.231: 100%|█████████▉| 1546/1547 [16:11<00:07,  7.76s/it][A
loss=0.231: 100%|██████████| 1547/1547 [16:11<00:00,  1.59it/s]
Epoch:  33%|███▎      | 1/3 [16:11<32:23, 971.67s/it]
Iteration:   0%|          | 0/1547 [00:00<?, ?it/s][A
loss=0.038:   0%|          | 0/1547 [00:00<?, ?it/s][A
loss=0.038:   0%|          | 1/1547 [00:00<13:48,  1.87it/s][A
loss=0.077:   0%|          | 1/1547 [00:00<13:48,  1.87it/s][A
loss=0.077:   0%|          | 2/1547 [00:01<13:48,  1.86it/s][A
loss=0.303:   0%|          | 2/1547 [00:01<13:48,  1.86it/s][A
loss=0.303:   0%|          | 3/1547 [00:01<13:39,  1.88it/s][A
loss=0.166:   0%|          | 3/1547 [00:01<13:39,  1.88it/s][A
loss=0.166:   0%|          | 4/1547 [00:02<13:34,  1.89it/s][A
loss=0.138:   0%|          | 4/1547 [00:02<13:34,  1.89it/s][A
loss=0.138:   0%|          | 5/1547 [00:02<13:39,  1.88it/s][A
loss=0.202:   0%|          | 5/1547 [00:02<13:39,  1.88it/s]

{"eval_acc": 0.9556515478761699, "eval_f1": 0.6327094407297164, "eval_acc_and_f1": 0.7941804943029431, "eval_loss": 0.1662155965875302, "learning_rate": 1.3344505626047404e-05, "train_loss": 0.14923108788357872, "step": 1854}



loss=0.061:  20%|█▉        | 308/1547 [03:14<2:34:14,  7.47s/it][A
loss=0.027:  20%|█▉        | 308/1547 [03:14<2:34:14,  7.47s/it][A
loss=0.027:  20%|█▉        | 309/1547 [03:14<1:51:07,  5.39s/it][A
loss=0.043:  20%|█▉        | 309/1547 [03:14<1:51:07,  5.39s/it][A
loss=0.043:  20%|██        | 310/1547 [03:15<1:20:58,  3.93s/it][A
loss=0.027:  20%|██        | 310/1547 [03:15<1:20:58,  3.93s/it][A
loss=0.027:  20%|██        | 311/1547 [03:15<59:53,  2.91s/it]  [A
loss=0.020:  20%|██        | 311/1547 [03:15<59:53,  2.91s/it][A
loss=0.020:  20%|██        | 312/1547 [03:16<45:06,  2.19s/it][A
loss=0.103:  20%|██        | 312/1547 [03:16<45:06,  2.19s/it][A
loss=0.103:  20%|██        | 313/1547 [03:16<34:46,  1.69s/it][A
loss=0.073:  20%|██        | 313/1547 [03:16<34:46,  1.69s/it][A
loss=0.073:  20%|██        | 314/1547 [03:17<27:36,  1.34s/it][A
loss=0.079:  20%|██        | 314/1547 [03:17<27:36,  1.34s/it][A
loss=0.079:  20%|██        | 315/1547 [03:17<22:30,  1.10s/it

{"eval_acc": 0.9547876169906407, "eval_f1": 0.6902901700449633, "eval_acc_and_f1": 0.822538893517802, "eval_loss": 0.16376650373318366, "learning_rate": 1.1864974862341394e-05, "train_loss": 0.1533379883182907, "step": 2163}



loss=0.026:  40%|███▉      | 617/1547 [06:30<2:01:03,  7.81s/it][A
loss=0.716:  40%|███▉      | 617/1547 [06:30<2:01:03,  7.81s/it][A
loss=0.716:  40%|███▉      | 618/1547 [06:30<1:27:03,  5.62s/it][A
loss=0.317:  40%|███▉      | 618/1547 [06:31<1:27:03,  5.62s/it][A
loss=0.317:  40%|████      | 619/1547 [06:31<1:03:19,  4.09s/it][A
loss=0.340:  40%|████      | 619/1547 [06:31<1:03:19,  4.09s/it][A
loss=0.340:  40%|████      | 620/1547 [06:31<46:40,  3.02s/it]  [A
loss=0.099:  40%|████      | 620/1547 [06:32<46:40,  3.02s/it][A
loss=0.099:  40%|████      | 621/1547 [06:32<35:07,  2.28s/it][A
loss=0.220:  40%|████      | 621/1547 [06:32<35:07,  2.28s/it][A
loss=0.220:  40%|████      | 622/1547 [06:33<27:02,  1.75s/it][A
loss=0.025:  40%|████      | 622/1547 [06:33<27:02,  1.75s/it][A
loss=0.025:  40%|████      | 623/1547 [06:33<21:19,  1.38s/it][A
loss=0.101:  40%|████      | 623/1547 [06:33<21:19,  1.38s/it][A
loss=0.101:  40%|████      | 624/1547 [06:34<17:20,  1.13s/it

{"eval_acc": 0.9582433405327574, "eval_f1": 0.6979874855539449, "eval_acc_and_f1": 0.8281154130433511, "eval_loss": 0.15737635642290115, "learning_rate": 1.0385444098635385e-05, "train_loss": 0.14975398885761312, "step": 2472}



loss=0.052:  60%|█████▉    | 926/1547 [09:43<1:13:14,  7.08s/it][A
loss=0.154:  60%|█████▉    | 926/1547 [09:43<1:13:14,  7.08s/it][A
loss=0.154:  60%|█████▉    | 927/1547 [09:43<52:47,  5.11s/it]  [A
loss=0.030:  60%|█████▉    | 927/1547 [09:43<52:47,  5.11s/it][A
loss=0.030:  60%|█████▉    | 928/1547 [09:44<38:30,  3.73s/it][A
loss=0.172:  60%|█████▉    | 928/1547 [09:44<38:30,  3.73s/it][A
loss=0.172:  60%|██████    | 929/1547 [09:44<28:33,  2.77s/it][A
loss=0.354:  60%|██████    | 929/1547 [09:44<28:33,  2.77s/it][A
loss=0.354:  60%|██████    | 930/1547 [09:45<21:34,  2.10s/it][A
loss=0.505:  60%|██████    | 930/1547 [09:45<21:34,  2.10s/it][A
loss=0.505:  60%|██████    | 931/1547 [09:45<16:41,  1.63s/it][A
loss=0.051:  60%|██████    | 931/1547 [09:45<16:41,  1.63s/it][A
loss=0.051:  60%|██████    | 932/1547 [09:46<13:17,  1.30s/it][A
loss=0.105:  60%|██████    | 932/1547 [09:46<13:17,  1.30s/it][A
loss=0.105:  60%|██████    | 933/1547 [09:46<10:54,  1.07s/it][A
los

{"eval_acc": 0.9585313174946004, "eval_f1": 0.7016493213862836, "eval_acc_and_f1": 0.830090319440442, "eval_loss": 0.15270331488656147, "learning_rate": 8.905913334929376e-06, "train_loss": 0.12956041623732756, "step": 2781}



loss=0.029:  80%|███████▉  | 1235/1547 [12:56<37:01,  7.12s/it][A
loss=0.027:  80%|███████▉  | 1235/1547 [12:56<37:01,  7.12s/it][A
loss=0.027:  80%|███████▉  | 1236/1547 [12:56<26:38,  5.14s/it][A
loss=0.191:  80%|███████▉  | 1236/1547 [12:56<26:38,  5.14s/it][A
loss=0.191:  80%|███████▉  | 1237/1547 [12:57<19:23,  3.75s/it][A
loss=0.012:  80%|███████▉  | 1237/1547 [12:57<19:23,  3.75s/it][A
loss=0.012:  80%|████████  | 1238/1547 [12:57<14:20,  2.78s/it][A
loss=0.144:  80%|████████  | 1238/1547 [12:57<14:20,  2.78s/it][A
loss=0.144:  80%|████████  | 1239/1547 [12:58<10:48,  2.11s/it][A
loss=0.297:  80%|████████  | 1239/1547 [12:58<10:48,  2.11s/it][A
loss=0.297:  80%|████████  | 1240/1547 [12:58<08:20,  1.63s/it][A
loss=0.345:  80%|████████  | 1240/1547 [12:58<08:20,  1.63s/it][A
loss=0.345:  80%|████████  | 1241/1547 [12:59<06:37,  1.30s/it][A
loss=0.083:  80%|████████  | 1241/1547 [12:59<06:37,  1.30s/it][A
loss=0.083:  80%|████████  | 1242/1547 [12:59<05:25,  1.07s/i

{"eval_acc": 0.9614110871130309, "eval_f1": 0.7035285570454611, "eval_acc_and_f1": 0.832469822079246, "eval_loss": 0.14353272039443254, "learning_rate": 7.426382571223366e-06, "train_loss": 0.13594945164210492, "step": 3090}



loss=0.022: 100%|█████████▉| 1544/1547 [16:11<00:23,  7.68s/it][A
loss=0.138: 100%|█████████▉| 1544/1547 [16:11<00:23,  7.68s/it][A
loss=0.138: 100%|█████████▉| 1545/1547 [16:12<00:11,  5.54s/it][A
loss=0.012: 100%|█████████▉| 1545/1547 [16:12<00:11,  5.54s/it][A
loss=0.012: 100%|█████████▉| 1546/1547 [16:12<00:04,  4.03s/it][A
loss=0.159: 100%|█████████▉| 1546/1547 [16:13<00:04,  4.03s/it][A
loss=0.159: 100%|██████████| 1547/1547 [16:13<00:00,  1.59it/s]
Epoch:  67%|██████▋   | 2/3 [32:25<16:12, 972.19s/it]
Iteration:   0%|          | 0/1547 [00:00<?, ?it/s][A
loss=0.034:   0%|          | 0/1547 [00:00<?, ?it/s][A
loss=0.034:   0%|          | 1/1547 [00:00<13:40,  1.88it/s][A
loss=0.073:   0%|          | 1/1547 [00:00<13:40,  1.88it/s][A
loss=0.073:   0%|          | 2/1547 [00:01<13:42,  1.88it/s][A
loss=0.027:   0%|          | 2/1547 [00:01<13:42,  1.88it/s][A
loss=0.027:   0%|          | 3/1547 [00:01<13:36,  1.89it/s][A
loss=0.032:   0%|          | 3/1547 [00:01<13:36

{"eval_acc": 0.9608351331893449, "eval_f1": 0.7112586202870165, "eval_acc_and_f1": 0.8360468767381807, "eval_loss": 0.15092272856937988, "learning_rate": 5.9468518075173575e-06, "train_loss": 0.10629290815301143, "step": 3399}



loss=0.024:  20%|█▉        | 306/1547 [03:07<2:09:10,  6.25s/it][A
loss=0.017:  20%|█▉        | 306/1547 [03:07<2:09:10,  6.25s/it][A
loss=0.017:  20%|█▉        | 307/1547 [03:07<1:33:35,  4.53s/it][A
loss=0.410:  20%|█▉        | 307/1547 [03:07<1:33:35,  4.53s/it][A
loss=0.410:  20%|█▉        | 308/1547 [03:08<1:08:40,  3.33s/it][A
loss=0.016:  20%|█▉        | 308/1547 [03:08<1:08:40,  3.33s/it][A
loss=0.016:  20%|█▉        | 309/1547 [03:08<51:17,  2.49s/it]  [A
loss=0.112:  20%|█▉        | 309/1547 [03:08<51:17,  2.49s/it][A
loss=0.112:  20%|██        | 310/1547 [03:09<39:06,  1.90s/it][A
loss=0.372:  20%|██        | 310/1547 [03:09<39:06,  1.90s/it][A
loss=0.372:  20%|██        | 311/1547 [03:09<30:34,  1.48s/it][A
loss=0.029:  20%|██        | 311/1547 [03:09<30:34,  1.48s/it][A
loss=0.029:  20%|██        | 312/1547 [03:10<24:35,  1.19s/it][A
loss=0.077:  20%|██        | 312/1547 [03:10<24:35,  1.19s/it][A
loss=0.077:  20%|██        | 313/1547 [03:10<20:24,  1.01it/s

{"eval_acc": 0.9614110871130309, "eval_f1": 0.7238084479752154, "eval_acc_and_f1": 0.8426097675441231, "eval_loss": 0.1476629033152546, "learning_rate": 4.467321043811348e-06, "train_loss": 0.091090877692616, "step": 3708}



loss=0.076:  40%|███▉      | 615/1547 [06:15<1:36:45,  6.23s/it][A
loss=0.122:  40%|███▉      | 615/1547 [06:15<1:36:45,  6.23s/it][A
loss=0.122:  40%|███▉      | 616/1547 [06:15<1:10:06,  4.52s/it][A
loss=0.223:  40%|███▉      | 616/1547 [06:15<1:10:06,  4.52s/it][A
loss=0.223:  40%|███▉      | 617/1547 [06:16<51:25,  3.32s/it]  [A
loss=0.082:  40%|███▉      | 617/1547 [06:16<51:25,  3.32s/it][A
loss=0.082:  40%|███▉      | 618/1547 [06:16<38:21,  2.48s/it][A
loss=0.192:  40%|███▉      | 618/1547 [06:16<38:21,  2.48s/it][A
loss=0.192:  40%|████      | 619/1547 [06:17<29:14,  1.89s/it][A
loss=0.158:  40%|████      | 619/1547 [06:17<29:14,  1.89s/it][A
loss=0.158:  40%|████      | 620/1547 [06:17<22:50,  1.48s/it][A
loss=0.015:  40%|████      | 620/1547 [06:17<22:50,  1.48s/it][A
loss=0.015:  40%|████      | 621/1547 [06:18<18:22,  1.19s/it][A
loss=0.193:  40%|████      | 621/1547 [06:18<18:22,  1.19s/it][A
loss=0.193:  40%|████      | 622/1547 [06:18<15:14,  1.01it/s][A

{"eval_acc": 0.9605471562275018, "eval_f1": 0.7251323002651604, "eval_acc_and_f1": 0.8428397282463311, "eval_loss": 0.15239618931497848, "learning_rate": 2.9877902801053386e-06, "train_loss": 0.09415632954867961, "step": 4017}



loss=0.015:  60%|█████▉    | 924/1547 [09:23<1:05:27,  6.30s/it][A
loss=0.010:  60%|█████▉    | 924/1547 [09:23<1:05:27,  6.30s/it][A
loss=0.010:  60%|█████▉    | 925/1547 [09:24<47:23,  4.57s/it]  [A
loss=0.094:  60%|█████▉    | 925/1547 [09:24<47:23,  4.57s/it][A
loss=0.094:  60%|█████▉    | 926/1547 [09:24<34:45,  3.36s/it][A
loss=0.253:  60%|█████▉    | 926/1547 [09:24<34:45,  3.36s/it][A
loss=0.253:  60%|█████▉    | 927/1547 [09:25<25:55,  2.51s/it][A
loss=0.011:  60%|█████▉    | 927/1547 [09:25<25:55,  2.51s/it][A
loss=0.011:  60%|█████▉    | 928/1547 [09:25<19:43,  1.91s/it][A
loss=0.021:  60%|█████▉    | 928/1547 [09:25<19:43,  1.91s/it][A
loss=0.021:  60%|██████    | 929/1547 [09:26<15:23,  1.50s/it][A
loss=0.011:  60%|██████    | 929/1547 [09:26<15:23,  1.50s/it][A
loss=0.011:  60%|██████    | 930/1547 [09:26<12:22,  1.20s/it][A
loss=0.030:  60%|██████    | 930/1547 [09:26<12:22,  1.20s/it][A
loss=0.030:  60%|██████    | 931/1547 [09:27<10:15,  1.00it/s][A
los

{"eval_acc": 0.9604031677465803, "eval_f1": 0.7268272832518544, "eval_acc_and_f1": 0.8436152254992173, "eval_loss": 0.15122054889798164, "learning_rate": 1.5082595163993296e-06, "train_loss": 0.08751939691853344, "step": 4326}



loss=0.100:  80%|███████▉  | 1233/1547 [12:32<32:43,  6.25s/it][A
loss=0.019:  80%|███████▉  | 1233/1547 [12:32<32:43,  6.25s/it][A
loss=0.019:  80%|███████▉  | 1234/1547 [12:32<23:39,  4.54s/it][A
loss=0.006:  80%|███████▉  | 1234/1547 [12:33<23:39,  4.54s/it][A
loss=0.006:  80%|███████▉  | 1235/1547 [12:33<17:19,  3.33s/it][A
loss=0.044:  80%|███████▉  | 1235/1547 [12:33<17:19,  3.33s/it][A
loss=0.044:  80%|███████▉  | 1236/1547 [12:33<12:54,  2.49s/it][A
loss=0.023:  80%|███████▉  | 1236/1547 [12:34<12:54,  2.49s/it][A
loss=0.023:  80%|███████▉  | 1237/1547 [12:34<09:48,  1.90s/it][A
loss=0.106:  80%|███████▉  | 1237/1547 [12:34<09:48,  1.90s/it][A
loss=0.106:  80%|████████  | 1238/1547 [12:35<07:39,  1.49s/it][A
loss=0.254:  80%|████████  | 1238/1547 [12:35<07:39,  1.49s/it][A
loss=0.254:  80%|████████  | 1239/1547 [12:35<06:08,  1.20s/it][A
loss=0.118:  80%|████████  | 1239/1547 [12:35<06:08,  1.20s/it][A
loss=0.118:  80%|████████  | 1240/1547 [12:36<05:05,  1.00it/

{"eval_acc": 0.961699064074874, "eval_f1": 0.7416385472342719, "eval_acc_and_f1": 0.8516688056545729, "eval_loss": 0.14804813465369598, "learning_rate": 2.872875269332057e-08, "train_loss": 0.100553522410902, "step": 4635}



loss=0.007: 100%|█████████▉| 1542/1547 [15:41<00:31,  6.25s/it][A
loss=0.077: 100%|█████████▉| 1542/1547 [15:41<00:31,  6.25s/it][A
loss=0.077: 100%|█████████▉| 1543/1547 [15:41<00:18,  4.53s/it][A
loss=0.045: 100%|█████████▉| 1543/1547 [15:41<00:18,  4.53s/it][A
loss=0.045: 100%|█████████▉| 1544/1547 [15:42<00:09,  3.33s/it][A
loss=0.122: 100%|█████████▉| 1544/1547 [15:42<00:09,  3.33s/it][A
loss=0.122: 100%|█████████▉| 1545/1547 [15:42<00:04,  2.49s/it][A
loss=0.047: 100%|█████████▉| 1545/1547 [15:42<00:04,  2.49s/it][A
loss=0.047: 100%|█████████▉| 1546/1547 [15:43<00:01,  1.90s/it][A
loss=0.030: 100%|█████████▉| 1546/1547 [15:43<00:01,  1.90s/it][A07/17/2022 13:21:01 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 13:21:01 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 13:21:01 - INFO - utilities.trainers -     Batch size = 256


Evaluating:   0%|          | 0/28 [00:00<?, ?it/s][A[A

Evaluating:   4%|▎         | 1/28 [00:01<

{"eval_acc": 0.961699064074874, "eval_f1": 0.7416385472342719, "eval_acc_and_f1": 0.8516688056545729, "eval_loss": 0.14804877447230474, "learning_rate": 0.0, "train_loss": 0.001062721211439101, "step": 4641}


07/17/2022 13:21:31 - INFO - utilities.trainers -   ***** Running evaluation iter-2_trial1 *****
07/17/2022 13:21:31 - INFO - utilities.trainers -     Num examples = 6945
07/17/2022 13:21:31 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 28/28 [00:27<00:00,  1.03it/s]
07/17/2022 13:21:58 - INFO - utilities.trainers -   ***** Eval results iter-2_trial1 *****
07/17/2022 13:21:58 - INFO - utilities.trainers -     acc = 0.9614110871130309
07/17/2022 13:21:58 - INFO - utilities.trainers -     acc_and_f1 = 0.832469822079246
07/17/2022 13:21:58 - INFO - utilities.trainers -     f1 = 0.7035285570454611
07/17/2022 13:22:01 - INFO - utilities.data_loader -   Loading features from cached file /content/drive/MyDrive/UvA/thesis/contrastive-active-learning/data/ORNL20/cached_test_bert-base-dutch-cased_256_ornl20_original



Done Training!


Start Testing on test set!



07/17/2022 13:22:07 - INFO - utilities.trainers -   ***** Running evaluation  *****
07/17/2022 13:22:07 - INFO - utilities.trainers -     Num examples = 34722
07/17/2022 13:22:08 - INFO - utilities.trainers -     Batch size = 256
Evaluating: 100%|██████████| 136/136 [02:15<00:00,  1.00it/s]
07/17/2022 13:24:23 - INFO - utilities.trainers -   ***** Eval results  *****
07/17/2022 13:24:23 - INFO - utilities.trainers -     acc = 0.9580957318126836
07/17/2022 13:24:23 - INFO - utilities.trainers -     acc_and_f1 = 0.8038705445329088
07/17/2022 13:24:23 - INFO - utilities.trainers -     f1 = 0.6496453572531341



Evaluating robustness! Start testing on OOD test set!


Evaluating Dpool!



************
End of iteration 2:
Train loss 0.2827, Val loss 0.14353272039443254, Test loss 0.15530538356260343
Annotated 500 samples
Current labeled (training) data: 50000 samples
Remaining budget: 0 (in samples)
************

Saving json with the results....
The end!....


## new bert loop

In [None]:
for current_seed in seeds:
  print(current_seed)
  flags = make_flags(params, acq = 'cal', seed = current_seed)
  %run $AL_SCRIPT $flags

In [None]:
params['model_name_or_path'] = TAPT_BERTJE

for current_seed in seeds:
  print(current_seed)
  flags = make_flags(params, acq = 'cal', seed = current_seed)
  %run $AL_SCRIPT $flags

In [None]:
params['model_name_or_path'] = TAPT_BERTJE

for current_seed in seeds:
  print(current_seed)
  flags = make_flags(params, acq = 'random', seed = current_seed)
  %run $AL_SCRIPT $flags

---
from here after we may again

In [None]:
seeds = [3, 4, 5]

params['model_name_or_path'] = 'wietsedv/bert-base-dutch-cased'
params['max_seq_length'] = 256

for current_seed in seeds:
  print(current_seed)
  flags = make_flags(params, acq = 'random', seed = current_seed)
  %run $AL_SCRIPT $flags

In [None]:
seeds = [512001, 512002, 512003, 512004, 512005]
params['model_name_or_path'] = TAPT_BERTJE
params['max_seq_length'] = 512 # <---------------------- this one
params['per_gpu_train_batch_size'] = 16
params['acquisition_size'] = '250' # <------------------ and this 

for current_seed in seeds:
  print(current_seed)
  flags = make_flags(params, acq = 'entropy', seed = current_seed)
  %run $AL_SCRIPT $flags

## full supervision

In [None]:
params['model_name_or_path'] = TAPT_BERTJE
params['acquisition_size'] = '1%' # <------------------ and this 
params['init_train_data'] = '98%' # <------------------ and this 
params['budget'] = '100%' # <------------------ and this 

for current_seed in seeds:
  print(current_seed)
  flags = make_flags(params, acq = 'random', seed = current_seed)
  %run $AL_SCRIPT $flags