In [1]:
import evaluate
import tokenizers
import numpy as np
import pandas as pd
import transformers
from datetime import datetime
from datasets import load_dataset, load_from_disk, concatenate_datasets
from transformers import AutoTokenizer, DataCollatorForLanguageModeling, AlbertForMaskedLM, AutoConfig, TrainingArguments, Trainer, pipeline

Carregando dataset:

In [2]:
dataset = load_from_disk('../CLIdata/datasets/cuneiform-spaced-indexed')

Carregando o tokenizer:

In [3]:
my_tokenizer = AutoTokenizer.from_pretrained('../tokenizers/bert-base-uncased_train_val_test_maxlen_512_vocab_size_1000')
vocab_size = my_tokenizer.vocab_size
max_len = my_tokenizer.model_max_length

Criamos uma função que identifica os tokens dos textos com índices:

In [4]:
def tokenize_function(example):
    output = my_tokenizer(example["text"], truncation=True, max_length=max_len)
    input_batch = []
    for token_id in output['input_ids']:
        input_batch.append(token_id)
    return {"input_ids": input_batch}

In [5]:
tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=dataset["train"].column_names)

Loading cached processed dataset at ../CLIdata/datasets/cuneiform-spaced-indexed/train\cache-585e058a74ca0651.arrow
Loading cached processed dataset at ../CLIdata/datasets/cuneiform-spaced-indexed/val\cache-95b8b34ca1812b1e.arrow


  0%|          | 0/7 [00:00<?, ?ba/s]

In [6]:
data_collator = DataCollatorForLanguageModeling(tokenizer=my_tokenizer, mlm=True, mlm_probability=0.15)

In [7]:
config = AutoConfig.from_pretrained('albert-base-v2')
model = AlbertForMaskedLM(config)

In [32]:
dia = datetime.today().strftime("%Y-%M-%d")
hora = datetime.now().strftime("%H-%M")

batch = 16
training_args = TrainingArguments(
    output_dir=f'../checkpoints-albert/standard_batch{batch}_{dia}_hora_{hora}',
    overwrite_output_dir=True,
    per_device_train_batch_size=batch,
    learning_rate=1e-5,
    logging_strategy='steps',
    logging_steps=1e3,
    save_strategy='steps',
    save_steps=10_000, 
    max_steps= 100_000
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=concatenate_datasets([tokenized_dataset['train'],tokenized_dataset['val'],tokenized_dataset['test']])
)

max_steps is given, it will override any value given in num_train_epochs


In [33]:
trainer.train()

***** Running training *****
  Num examples = 98025
  Num Epochs = 17
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 100000
  Number of trainable parameters = 11221680


  0%|          | 0/100000 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 9.1386, 'learning_rate': 9.9e-06, 'epoch': 0.16}
{'loss': 7.2398, 'learning_rate': 9.800000000000001e-06, 'epoch': 0.33}
{'loss': 5.884, 'learning_rate': 9.7e-06, 'epoch': 0.49}
{'loss': 5.2498, 'learning_rate': 9.600000000000001e-06, 'epoch': 0.65}
{'loss': 4.9791, 'learning_rate': 9.5e-06, 'epoch': 0.82}
{'loss': 4.8579, 'learning_rate': 9.4e-06, 'epoch': 0.98}
{'loss': 4.7726, 'learning_rate': 9.3e-06, 'epoch': 1.14}
{'loss': 4.7219, 'learning_rate': 9.200000000000002e-06, 'epoch': 1.31}
{'loss': 4.671, 'learning_rate': 9.100000000000001e-06, 'epoch': 1.47}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-10000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-10000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-10000\pytorch_model.bin


{'loss': 4.6209, 'learning_rate': 9e-06, 'epoch': 1.63}
{'loss': 4.6037, 'learning_rate': 8.900000000000001e-06, 'epoch': 1.8}
{'loss': 4.5712, 'learning_rate': 8.8e-06, 'epoch': 1.96}
{'loss': 4.5376, 'learning_rate': 8.700000000000001e-06, 'epoch': 2.12}
{'loss': 4.4857, 'learning_rate': 8.6e-06, 'epoch': 2.28}
{'loss': 4.4838, 'learning_rate': 8.5e-06, 'epoch': 2.45}
{'loss': 4.4549, 'learning_rate': 8.400000000000001e-06, 'epoch': 2.61}
{'loss': 4.4351, 'learning_rate': 8.3e-06, 'epoch': 2.77}
{'loss': 4.3956, 'learning_rate': 8.2e-06, 'epoch': 2.94}
{'loss': 4.3481, 'learning_rate': 8.1e-06, 'epoch': 3.1}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-20000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-20000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-20000\pytorch_model.bin


{'loss': 4.3467, 'learning_rate': 8.000000000000001e-06, 'epoch': 3.26}
{'loss': 4.328, 'learning_rate': 7.9e-06, 'epoch': 3.43}
{'loss': 4.2787, 'learning_rate': 7.800000000000002e-06, 'epoch': 3.59}
{'loss': 4.291, 'learning_rate': 7.7e-06, 'epoch': 3.75}
{'loss': 4.2648, 'learning_rate': 7.600000000000001e-06, 'epoch': 3.92}
{'loss': 4.2271, 'learning_rate': 7.500000000000001e-06, 'epoch': 4.08}
{'loss': 4.2156, 'learning_rate': 7.4e-06, 'epoch': 4.24}
{'loss': 4.1876, 'learning_rate': 7.3e-06, 'epoch': 4.41}
{'loss': 4.1721, 'learning_rate': 7.2000000000000005e-06, 'epoch': 4.57}
{'loss': 4.1284, 'learning_rate': 7.100000000000001e-06, 'epoch': 4.73}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-30000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-30000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-30000\pytorch_model.bin


{'loss': 4.1358, 'learning_rate': 7e-06, 'epoch': 4.9}
{'loss': 4.088, 'learning_rate': 6.9e-06, 'epoch': 5.06}
{'loss': 4.0977, 'learning_rate': 6.800000000000001e-06, 'epoch': 5.22}
{'loss': 4.0407, 'learning_rate': 6.700000000000001e-06, 'epoch': 5.39}
{'loss': 4.0515, 'learning_rate': 6.600000000000001e-06, 'epoch': 5.55}
{'loss': 4.0205, 'learning_rate': 6.5000000000000004e-06, 'epoch': 5.71}
{'loss': 4.0264, 'learning_rate': 6.4000000000000006e-06, 'epoch': 5.88}
{'loss': 4.0084, 'learning_rate': 6.300000000000001e-06, 'epoch': 6.04}
{'loss': 3.9846, 'learning_rate': 6.200000000000001e-06, 'epoch': 6.2}
{'loss': 3.9477, 'learning_rate': 6.1e-06, 'epoch': 6.37}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-40000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-40000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-40000\pytorch_model.bin


{'loss': 3.9614, 'learning_rate': 6e-06, 'epoch': 6.53}
{'loss': 3.9349, 'learning_rate': 5.9e-06, 'epoch': 6.69}
{'loss': 3.9652, 'learning_rate': 5.8e-06, 'epoch': 6.85}
{'loss': 3.9117, 'learning_rate': 5.7e-06, 'epoch': 7.02}
{'loss': 3.8932, 'learning_rate': 5.600000000000001e-06, 'epoch': 7.18}
{'loss': 3.8878, 'learning_rate': 5.500000000000001e-06, 'epoch': 7.34}
{'loss': 3.8775, 'learning_rate': 5.400000000000001e-06, 'epoch': 7.51}
{'loss': 3.8836, 'learning_rate': 5.300000000000001e-06, 'epoch': 7.67}
{'loss': 3.8713, 'learning_rate': 5.2e-06, 'epoch': 7.83}
{'loss': 3.832, 'learning_rate': 5.1e-06, 'epoch': 8.0}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-50000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-50000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-50000\pytorch_model.bin


{'loss': 3.8226, 'learning_rate': 5e-06, 'epoch': 8.16}
{'loss': 3.8308, 'learning_rate': 4.9000000000000005e-06, 'epoch': 8.32}
{'loss': 3.7984, 'learning_rate': 4.800000000000001e-06, 'epoch': 8.49}
{'loss': 3.8132, 'learning_rate': 4.7e-06, 'epoch': 8.65}
{'loss': 3.789, 'learning_rate': 4.600000000000001e-06, 'epoch': 8.81}
{'loss': 3.7819, 'learning_rate': 4.5e-06, 'epoch': 8.98}
{'loss': 3.7386, 'learning_rate': 4.4e-06, 'epoch': 9.14}
{'loss': 3.7403, 'learning_rate': 4.3e-06, 'epoch': 9.3}
{'loss': 3.7543, 'learning_rate': 4.2000000000000004e-06, 'epoch': 9.47}
{'loss': 3.758, 'learning_rate': 4.1e-06, 'epoch': 9.63}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-60000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-60000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-60000\pytorch_model.bin


{'loss': 3.7227, 'learning_rate': 4.000000000000001e-06, 'epoch': 9.79}
{'loss': 3.7348, 'learning_rate': 3.900000000000001e-06, 'epoch': 9.96}
{'loss': 3.719, 'learning_rate': 3.8000000000000005e-06, 'epoch': 10.12}
{'loss': 3.7045, 'learning_rate': 3.7e-06, 'epoch': 10.28}
{'loss': 3.7225, 'learning_rate': 3.6000000000000003e-06, 'epoch': 10.45}
{'loss': 3.69, 'learning_rate': 3.5e-06, 'epoch': 10.61}
{'loss': 3.7049, 'learning_rate': 3.4000000000000005e-06, 'epoch': 10.77}
{'loss': 3.7147, 'learning_rate': 3.3000000000000006e-06, 'epoch': 10.94}
{'loss': 3.6917, 'learning_rate': 3.2000000000000003e-06, 'epoch': 11.1}
{'loss': 3.6662, 'learning_rate': 3.1000000000000004e-06, 'epoch': 11.26}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-70000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-70000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-70000\pytorch_model.bin


{'loss': 3.6389, 'learning_rate': 3e-06, 'epoch': 11.42}
{'loss': 3.6691, 'learning_rate': 2.9e-06, 'epoch': 11.59}
{'loss': 3.6529, 'learning_rate': 2.8000000000000003e-06, 'epoch': 11.75}
{'loss': 3.6718, 'learning_rate': 2.7000000000000004e-06, 'epoch': 11.91}
{'loss': 3.6396, 'learning_rate': 2.6e-06, 'epoch': 12.08}
{'loss': 3.6319, 'learning_rate': 2.5e-06, 'epoch': 12.24}
{'loss': 3.6372, 'learning_rate': 2.4000000000000003e-06, 'epoch': 12.4}
{'loss': 3.6291, 'learning_rate': 2.3000000000000004e-06, 'epoch': 12.57}
{'loss': 3.628, 'learning_rate': 2.2e-06, 'epoch': 12.73}
{'loss': 3.635, 'learning_rate': 2.1000000000000002e-06, 'epoch': 12.89}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-80000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-80000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-80000\pytorch_model.bin


{'loss': 3.6176, 'learning_rate': 2.0000000000000003e-06, 'epoch': 13.06}
{'loss': 3.613, 'learning_rate': 1.9000000000000002e-06, 'epoch': 13.22}
{'loss': 3.6113, 'learning_rate': 1.8000000000000001e-06, 'epoch': 13.38}
{'loss': 3.5979, 'learning_rate': 1.7000000000000002e-06, 'epoch': 13.55}
{'loss': 3.5798, 'learning_rate': 1.6000000000000001e-06, 'epoch': 13.71}
{'loss': 3.5874, 'learning_rate': 1.5e-06, 'epoch': 13.87}
{'loss': 3.5889, 'learning_rate': 1.4000000000000001e-06, 'epoch': 14.04}
{'loss': 3.5911, 'learning_rate': 1.3e-06, 'epoch': 14.2}
{'loss': 3.5953, 'learning_rate': 1.2000000000000002e-06, 'epoch': 14.36}
{'loss': 3.5914, 'learning_rate': 1.1e-06, 'epoch': 14.53}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-90000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-90000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-90000\pytorch_model.bin


{'loss': 3.5963, 'learning_rate': 1.0000000000000002e-06, 'epoch': 14.69}
{'loss': 3.5634, 'learning_rate': 9.000000000000001e-07, 'epoch': 14.85}
{'loss': 3.557, 'learning_rate': 8.000000000000001e-07, 'epoch': 15.02}
{'loss': 3.5716, 'learning_rate': 7.000000000000001e-07, 'epoch': 15.18}
{'loss': 3.5871, 'learning_rate': 6.000000000000001e-07, 'epoch': 15.34}
{'loss': 3.5734, 'learning_rate': 5.000000000000001e-07, 'epoch': 15.51}
{'loss': 3.5658, 'learning_rate': 4.0000000000000003e-07, 'epoch': 15.67}
{'loss': 3.5825, 'learning_rate': 3.0000000000000004e-07, 'epoch': 15.83}
{'loss': 3.5668, 'learning_rate': 2.0000000000000002e-07, 'epoch': 15.99}
{'loss': 3.5574, 'learning_rate': 1.0000000000000001e-07, 'epoch': 16.16}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-100000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-100000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-100000\pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)




{'loss': 3.5532, 'learning_rate': 0.0, 'epoch': 16.32}
{'train_runtime': 9804.3097, 'train_samples_per_second': 163.194, 'train_steps_per_second': 10.2, 'train_loss': 4.048913425292969, 'epoch': 16.32}


TrainOutput(global_step=100000, training_loss=4.048913425292969, metrics={'train_runtime': 9804.3097, 'train_samples_per_second': 163.194, 'train_steps_per_second': 10.2, 'train_loss': 4.048913425292969, 'epoch': 16.32})

In [8]:
config = AutoConfig.from_pretrained('../checkpoints-albert/standard_2022-45-13_hora_22-45/checkpoint-100000/')
model2 = AlbertForMaskedLM(config)

In [9]:
dia = datetime.today().strftime("%Y-%M-%d")
hora = datetime.now().strftime("%H-%M")

batch = 16
lr = 1e-5
training_args = TrainingArguments(
    output_dir=f'../checkpoints-albert/standard_2022-45-13_hora_22-45',
    overwrite_output_dir=True,
    per_device_train_batch_size=batch,
    learning_rate=lr,
    logging_strategy='steps',
    logging_steps=1e3,
    save_strategy='steps',
    save_steps=10_000, 
    max_steps= 1_000_000
)

trainer = Trainer(
    model=model2,
    args=training_args,
    data_collator=data_collator,
    train_dataset=concatenate_datasets([tokenized_dataset['train'],tokenized_dataset['val'],tokenized_dataset['test']])
)

max_steps is given, it will override any value given in num_train_epochs


In [10]:
trainer.train(resume_from_checkpoint = True)

Loading model from ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-100000.
***** Running training *****
  Num examples = 98025
  Num Epochs = 164
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 1000000
  Number of trainable parameters = 11221680
  Continuing training from checkpoint, will skip to saved global_step
  Continuing training from epoch 16
  Continuing training from global step 100000
  Will skip the first 16 epochs then the first 1968 batches in the first epoch. If this takes a lot of time, you can add the `--ignore_data_skip` flag to your launch command, but you will resume the training on data already seen by your model.


  0%|          | 0/1968 [00:00<?, ?it/s]

  0%|          | 0/1000000 [00:00<?, ?it/s]

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 3.6283, 'learning_rate': 8.99e-06, 'epoch': 16.48}
{'loss': 3.6444, 'learning_rate': 8.98e-06, 'epoch': 16.65}
{'loss': 3.6182, 'learning_rate': 8.97e-06, 'epoch': 16.81}
{'loss': 3.625, 'learning_rate': 8.96e-06, 'epoch': 16.97}
{'loss': 3.5537, 'learning_rate': 8.95e-06, 'epoch': 17.14}
{'loss': 3.582, 'learning_rate': 8.94e-06, 'epoch': 17.3}
{'loss': 3.5884, 'learning_rate': 8.930000000000001e-06, 'epoch': 17.46}
{'loss': 3.5586, 'learning_rate': 8.920000000000001e-06, 'epoch': 17.63}
{'loss': 3.5367, 'learning_rate': 8.910000000000001e-06, 'epoch': 17.79}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-110000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-110000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-110000\pytorch_model.bin


{'loss': 3.541, 'learning_rate': 8.900000000000001e-06, 'epoch': 17.95}
{'loss': 3.5078, 'learning_rate': 8.890000000000001e-06, 'epoch': 18.12}
{'loss': 3.5412, 'learning_rate': 8.880000000000001e-06, 'epoch': 18.28}
{'loss': 3.4905, 'learning_rate': 8.870000000000001e-06, 'epoch': 18.44}
{'loss': 3.4996, 'learning_rate': 8.860000000000002e-06, 'epoch': 18.61}
{'loss': 3.5079, 'learning_rate': 8.85e-06, 'epoch': 18.77}
{'loss': 3.4609, 'learning_rate': 8.84e-06, 'epoch': 18.93}
{'loss': 3.4458, 'learning_rate': 8.83e-06, 'epoch': 19.1}
{'loss': 3.4647, 'learning_rate': 8.82e-06, 'epoch': 19.26}
{'loss': 3.4444, 'learning_rate': 8.81e-06, 'epoch': 19.42}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-120000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-120000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-120000\pytorch_model.bin


{'loss': 3.4377, 'learning_rate': 8.8e-06, 'epoch': 19.59}
{'loss': 3.4709, 'learning_rate': 8.79e-06, 'epoch': 19.75}
{'loss': 3.4103, 'learning_rate': 8.78e-06, 'epoch': 19.91}
{'loss': 3.41, 'learning_rate': 8.77e-06, 'epoch': 20.08}
{'loss': 3.3592, 'learning_rate': 8.76e-06, 'epoch': 20.24}
{'loss': 3.3858, 'learning_rate': 8.750000000000001e-06, 'epoch': 20.4}
{'loss': 3.3856, 'learning_rate': 8.740000000000001e-06, 'epoch': 20.56}
{'loss': 3.415, 'learning_rate': 8.730000000000001e-06, 'epoch': 20.73}
{'loss': 3.3894, 'learning_rate': 8.720000000000001e-06, 'epoch': 20.89}
{'loss': 3.3504, 'learning_rate': 8.710000000000001e-06, 'epoch': 21.05}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-130000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-130000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-130000\pytorch_model.bin


{'loss': 3.3372, 'learning_rate': 8.700000000000001e-06, 'epoch': 21.22}
{'loss': 3.3583, 'learning_rate': 8.690000000000002e-06, 'epoch': 21.38}
{'loss': 3.3481, 'learning_rate': 8.68e-06, 'epoch': 21.54}
{'loss': 3.3259, 'learning_rate': 8.67e-06, 'epoch': 21.71}
{'loss': 3.3322, 'learning_rate': 8.66e-06, 'epoch': 21.87}
{'loss': 3.3475, 'learning_rate': 8.65e-06, 'epoch': 22.03}
{'loss': 3.2954, 'learning_rate': 8.64e-06, 'epoch': 22.2}
{'loss': 3.2648, 'learning_rate': 8.63e-06, 'epoch': 22.36}
{'loss': 3.3037, 'learning_rate': 8.62e-06, 'epoch': 22.52}
{'loss': 3.2697, 'learning_rate': 8.61e-06, 'epoch': 22.69}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-140000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-140000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-140000\pytorch_model.bin


{'loss': 3.2891, 'learning_rate': 8.6e-06, 'epoch': 22.85}
{'loss': 3.2473, 'learning_rate': 8.59e-06, 'epoch': 23.01}
{'loss': 3.267, 'learning_rate': 8.580000000000001e-06, 'epoch': 23.18}
{'loss': 3.2732, 'learning_rate': 8.570000000000001e-06, 'epoch': 23.34}
{'loss': 3.2599, 'learning_rate': 8.560000000000001e-06, 'epoch': 23.5}
{'loss': 3.2553, 'learning_rate': 8.550000000000001e-06, 'epoch': 23.67}
{'loss': 3.235, 'learning_rate': 8.540000000000001e-06, 'epoch': 23.83}
{'loss': 3.1958, 'learning_rate': 8.530000000000001e-06, 'epoch': 23.99}
{'loss': 3.2346, 'learning_rate': 8.52e-06, 'epoch': 24.16}
{'loss': 3.206, 'learning_rate': 8.51e-06, 'epoch': 24.32}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-150000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-150000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-150000\pytorch_model.bin


{'loss': 3.1827, 'learning_rate': 8.5e-06, 'epoch': 24.48}
{'loss': 3.2255, 'learning_rate': 8.49e-06, 'epoch': 24.65}
{'loss': 3.2132, 'learning_rate': 8.48e-06, 'epoch': 24.81}
{'loss': 3.1884, 'learning_rate': 8.47e-06, 'epoch': 24.97}
{'loss': 3.1938, 'learning_rate': 8.46e-06, 'epoch': 25.13}
{'loss': 3.1613, 'learning_rate': 8.45e-06, 'epoch': 25.3}
{'loss': 3.1887, 'learning_rate': 8.44e-06, 'epoch': 25.46}
{'loss': 3.1678, 'learning_rate': 8.43e-06, 'epoch': 25.62}
{'loss': 3.1853, 'learning_rate': 8.42e-06, 'epoch': 25.79}
{'loss': 3.1495, 'learning_rate': 8.41e-06, 'epoch': 25.95}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-160000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-160000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-160000\pytorch_model.bin


{'loss': 3.144, 'learning_rate': 8.400000000000001e-06, 'epoch': 26.11}
{'loss': 3.1172, 'learning_rate': 8.390000000000001e-06, 'epoch': 26.28}
{'loss': 3.1315, 'learning_rate': 8.380000000000001e-06, 'epoch': 26.44}
{'loss': 3.1555, 'learning_rate': 8.370000000000001e-06, 'epoch': 26.6}
{'loss': 3.1045, 'learning_rate': 8.36e-06, 'epoch': 26.77}
{'loss': 3.1219, 'learning_rate': 8.35e-06, 'epoch': 26.93}
{'loss': 3.1216, 'learning_rate': 8.34e-06, 'epoch': 27.09}
{'loss': 3.101, 'learning_rate': 8.33e-06, 'epoch': 27.26}
{'loss': 3.1016, 'learning_rate': 8.32e-06, 'epoch': 27.42}
{'loss': 3.0999, 'learning_rate': 8.31e-06, 'epoch': 27.58}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-170000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-170000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-170000\pytorch_model.bin


{'loss': 3.1186, 'learning_rate': 8.3e-06, 'epoch': 27.75}
{'loss': 3.1171, 'learning_rate': 8.29e-06, 'epoch': 27.91}
{'loss': 3.0963, 'learning_rate': 8.28e-06, 'epoch': 28.07}
{'loss': 3.0553, 'learning_rate': 8.27e-06, 'epoch': 28.24}
{'loss': 3.0639, 'learning_rate': 8.26e-06, 'epoch': 28.4}
{'loss': 3.0864, 'learning_rate': 8.25e-06, 'epoch': 28.56}
{'loss': 3.0356, 'learning_rate': 8.24e-06, 'epoch': 28.73}
{'loss': 3.0735, 'learning_rate': 8.23e-06, 'epoch': 28.89}
{'loss': 3.0606, 'learning_rate': 8.220000000000001e-06, 'epoch': 29.05}
{'loss': 3.0091, 'learning_rate': 8.210000000000001e-06, 'epoch': 29.21}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-180000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-180000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-180000\pytorch_model.bin


{'loss': 3.0393, 'learning_rate': 8.2e-06, 'epoch': 29.38}
{'loss': 3.0577, 'learning_rate': 8.19e-06, 'epoch': 29.54}
{'loss': 3.0658, 'learning_rate': 8.18e-06, 'epoch': 29.7}
{'loss': 3.0327, 'learning_rate': 8.17e-06, 'epoch': 29.87}
{'loss': 3.0223, 'learning_rate': 8.16e-06, 'epoch': 30.03}
{'loss': 2.9989, 'learning_rate': 8.15e-06, 'epoch': 30.19}
{'loss': 3.0129, 'learning_rate': 8.14e-06, 'epoch': 30.36}
{'loss': 3.0026, 'learning_rate': 8.13e-06, 'epoch': 30.52}
{'loss': 3.0059, 'learning_rate': 8.120000000000002e-06, 'epoch': 30.68}
{'loss': 2.9748, 'learning_rate': 8.110000000000002e-06, 'epoch': 30.85}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-190000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-190000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-190000\pytorch_model.bin


{'loss': 3.0374, 'learning_rate': 8.1e-06, 'epoch': 31.01}
{'loss': 2.954, 'learning_rate': 8.09e-06, 'epoch': 31.17}
{'loss': 3.0148, 'learning_rate': 8.08e-06, 'epoch': 31.34}
{'loss': 2.9684, 'learning_rate': 8.07e-06, 'epoch': 31.5}
{'loss': 2.9785, 'learning_rate': 8.06e-06, 'epoch': 31.66}
{'loss': 2.9689, 'learning_rate': 8.050000000000001e-06, 'epoch': 31.83}
{'loss': 2.9934, 'learning_rate': 8.040000000000001e-06, 'epoch': 31.99}
{'loss': 2.9801, 'learning_rate': 8.030000000000001e-06, 'epoch': 32.15}
{'loss': 2.9514, 'learning_rate': 8.020000000000001e-06, 'epoch': 32.32}
{'loss': 2.9616, 'learning_rate': 8.010000000000001e-06, 'epoch': 32.48}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-200000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-200000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-200000\pytorch_model.bin


{'loss': 2.9112, 'learning_rate': 8.000000000000001e-06, 'epoch': 32.64}
{'loss': 2.9786, 'learning_rate': 7.990000000000001e-06, 'epoch': 32.81}
{'loss': 2.9273, 'learning_rate': 7.980000000000002e-06, 'epoch': 32.97}
{'loss': 2.9341, 'learning_rate': 7.970000000000002e-06, 'epoch': 33.13}
{'loss': 2.8807, 'learning_rate': 7.960000000000002e-06, 'epoch': 33.3}
{'loss': 2.9515, 'learning_rate': 7.950000000000002e-06, 'epoch': 33.46}
{'loss': 2.9556, 'learning_rate': 7.94e-06, 'epoch': 33.62}
{'loss': 2.9239, 'learning_rate': 7.93e-06, 'epoch': 33.78}
{'loss': 2.8907, 'learning_rate': 7.92e-06, 'epoch': 33.95}
{'loss': 2.8744, 'learning_rate': 7.91e-06, 'epoch': 34.11}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-210000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-210000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-210000\pytorch_model.bin


{'loss': 2.8999, 'learning_rate': 7.9e-06, 'epoch': 34.27}
{'loss': 2.8693, 'learning_rate': 7.89e-06, 'epoch': 34.44}
{'loss': 2.8958, 'learning_rate': 7.88e-06, 'epoch': 34.6}
{'loss': 2.911, 'learning_rate': 7.870000000000001e-06, 'epoch': 34.76}
{'loss': 2.9163, 'learning_rate': 7.860000000000001e-06, 'epoch': 34.93}
{'loss': 2.8849, 'learning_rate': 7.850000000000001e-06, 'epoch': 35.09}
{'loss': 2.9245, 'learning_rate': 7.840000000000001e-06, 'epoch': 35.25}
{'loss': 2.8361, 'learning_rate': 7.830000000000001e-06, 'epoch': 35.42}
{'loss': 2.8692, 'learning_rate': 7.820000000000001e-06, 'epoch': 35.58}
{'loss': 2.89, 'learning_rate': 7.810000000000001e-06, 'epoch': 35.74}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-220000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-220000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-220000\pytorch_model.bin


{'loss': 2.8561, 'learning_rate': 7.800000000000002e-06, 'epoch': 35.91}
{'loss': 2.8711, 'learning_rate': 7.790000000000002e-06, 'epoch': 36.07}
{'loss': 2.8467, 'learning_rate': 7.78e-06, 'epoch': 36.23}
{'loss': 2.8571, 'learning_rate': 7.77e-06, 'epoch': 36.4}
{'loss': 2.8663, 'learning_rate': 7.76e-06, 'epoch': 36.56}
{'loss': 2.8663, 'learning_rate': 7.75e-06, 'epoch': 36.72}
{'loss': 2.8524, 'learning_rate': 7.74e-06, 'epoch': 36.89}
{'loss': 2.8639, 'learning_rate': 7.73e-06, 'epoch': 37.05}
{'loss': 2.8655, 'learning_rate': 7.72e-06, 'epoch': 37.21}
{'loss': 2.8318, 'learning_rate': 7.71e-06, 'epoch': 37.38}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-230000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-230000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-230000\pytorch_model.bin


{'loss': 2.8469, 'learning_rate': 7.7e-06, 'epoch': 37.54}
{'loss': 2.8345, 'learning_rate': 7.690000000000001e-06, 'epoch': 37.7}
{'loss': 2.8309, 'learning_rate': 7.680000000000001e-06, 'epoch': 37.87}
{'loss': 2.7899, 'learning_rate': 7.670000000000001e-06, 'epoch': 38.03}
{'loss': 2.8153, 'learning_rate': 7.660000000000001e-06, 'epoch': 38.19}
{'loss': 2.8135, 'learning_rate': 7.650000000000001e-06, 'epoch': 38.35}
{'loss': 2.8299, 'learning_rate': 7.640000000000001e-06, 'epoch': 38.52}
{'loss': 2.8067, 'learning_rate': 7.630000000000001e-06, 'epoch': 38.68}
{'loss': 2.8107, 'learning_rate': 7.620000000000001e-06, 'epoch': 38.84}
{'loss': 2.7944, 'learning_rate': 7.610000000000001e-06, 'epoch': 39.01}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-240000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-240000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-240000\pytorch_model.bin


{'loss': 2.8002, 'learning_rate': 7.600000000000001e-06, 'epoch': 39.17}
{'loss': 2.7891, 'learning_rate': 7.590000000000001e-06, 'epoch': 39.33}
{'loss': 2.7567, 'learning_rate': 7.58e-06, 'epoch': 39.5}
{'loss': 2.7914, 'learning_rate': 7.57e-06, 'epoch': 39.66}
{'loss': 2.8079, 'learning_rate': 7.5600000000000005e-06, 'epoch': 39.82}
{'loss': 2.8048, 'learning_rate': 7.5500000000000006e-06, 'epoch': 39.99}
{'loss': 2.7957, 'learning_rate': 7.540000000000001e-06, 'epoch': 40.15}
{'loss': 2.7487, 'learning_rate': 7.530000000000001e-06, 'epoch': 40.31}
{'loss': 2.7751, 'learning_rate': 7.520000000000001e-06, 'epoch': 40.48}
{'loss': 2.8296, 'learning_rate': 7.510000000000001e-06, 'epoch': 40.64}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-250000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-250000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-250000\pytorch_model.bin


{'loss': 2.7544, 'learning_rate': 7.500000000000001e-06, 'epoch': 40.8}
{'loss': 2.7661, 'learning_rate': 7.49e-06, 'epoch': 40.97}
{'loss': 2.7766, 'learning_rate': 7.48e-06, 'epoch': 41.13}
{'loss': 2.7226, 'learning_rate': 7.4700000000000005e-06, 'epoch': 41.29}
{'loss': 2.7344, 'learning_rate': 7.4600000000000006e-06, 'epoch': 41.46}
{'loss': 2.788, 'learning_rate': 7.450000000000001e-06, 'epoch': 41.62}
{'loss': 2.7557, 'learning_rate': 7.440000000000001e-06, 'epoch': 41.78}
{'loss': 2.7399, 'learning_rate': 7.430000000000001e-06, 'epoch': 41.95}
{'loss': 2.7301, 'learning_rate': 7.420000000000001e-06, 'epoch': 42.11}
{'loss': 2.7431, 'learning_rate': 7.41e-06, 'epoch': 42.27}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-260000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-260000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-260000\pytorch_model.bin


{'loss': 2.7333, 'learning_rate': 7.4e-06, 'epoch': 42.44}
{'loss': 2.74, 'learning_rate': 7.39e-06, 'epoch': 42.6}
{'loss': 2.7413, 'learning_rate': 7.3800000000000005e-06, 'epoch': 42.76}
{'loss': 2.7592, 'learning_rate': 7.370000000000001e-06, 'epoch': 42.92}
{'loss': 2.7553, 'learning_rate': 7.360000000000001e-06, 'epoch': 43.09}
{'loss': 2.7072, 'learning_rate': 7.350000000000001e-06, 'epoch': 43.25}
{'loss': 2.7073, 'learning_rate': 7.340000000000001e-06, 'epoch': 43.41}
{'loss': 2.7334, 'learning_rate': 7.33e-06, 'epoch': 43.58}
{'loss': 2.744, 'learning_rate': 7.32e-06, 'epoch': 43.74}
{'loss': 2.7016, 'learning_rate': 7.31e-06, 'epoch': 43.9}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-270000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-270000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-270000\pytorch_model.bin


{'loss': 2.7351, 'learning_rate': 7.3e-06, 'epoch': 44.07}
{'loss': 2.7002, 'learning_rate': 7.2900000000000005e-06, 'epoch': 44.23}
{'loss': 2.6831, 'learning_rate': 7.280000000000001e-06, 'epoch': 44.39}
{'loss': 2.6824, 'learning_rate': 7.270000000000001e-06, 'epoch': 44.56}
{'loss': 2.673, 'learning_rate': 7.260000000000001e-06, 'epoch': 44.72}
{'loss': 2.6932, 'learning_rate': 7.25e-06, 'epoch': 44.88}
{'loss': 2.7025, 'learning_rate': 7.24e-06, 'epoch': 45.05}
{'loss': 2.6779, 'learning_rate': 7.23e-06, 'epoch': 45.21}
{'loss': 2.6641, 'learning_rate': 7.22e-06, 'epoch': 45.37}
{'loss': 2.6705, 'learning_rate': 7.2100000000000004e-06, 'epoch': 45.54}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-280000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-280000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-280000\pytorch_model.bin


{'loss': 2.6759, 'learning_rate': 7.2000000000000005e-06, 'epoch': 45.7}
{'loss': 2.6866, 'learning_rate': 7.190000000000001e-06, 'epoch': 45.86}
{'loss': 2.6699, 'learning_rate': 7.180000000000001e-06, 'epoch': 46.03}
{'loss': 2.6437, 'learning_rate': 7.17e-06, 'epoch': 46.19}
{'loss': 2.6819, 'learning_rate': 7.16e-06, 'epoch': 46.35}
{'loss': 2.6838, 'learning_rate': 7.15e-06, 'epoch': 46.52}
{'loss': 2.6476, 'learning_rate': 7.14e-06, 'epoch': 46.68}
{'loss': 2.6657, 'learning_rate': 7.13e-06, 'epoch': 46.84}
{'loss': 2.6733, 'learning_rate': 7.1200000000000004e-06, 'epoch': 47.01}
{'loss': 2.62, 'learning_rate': 7.1100000000000005e-06, 'epoch': 47.17}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-290000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-290000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-290000\pytorch_model.bin


{'loss': 2.6543, 'learning_rate': 7.100000000000001e-06, 'epoch': 47.33}
{'loss': 2.639, 'learning_rate': 7.09e-06, 'epoch': 47.49}
{'loss': 2.651, 'learning_rate': 7.08e-06, 'epoch': 47.66}
{'loss': 2.6849, 'learning_rate': 7.07e-06, 'epoch': 47.82}
{'loss': 2.6314, 'learning_rate': 7.06e-06, 'epoch': 47.98}
{'loss': 2.6364, 'learning_rate': 7.05e-06, 'epoch': 48.15}
{'loss': 2.5986, 'learning_rate': 7.04e-06, 'epoch': 48.31}
{'loss': 2.6203, 'learning_rate': 7.0300000000000005e-06, 'epoch': 48.47}
{'loss': 2.6359, 'learning_rate': 7.0200000000000006e-06, 'epoch': 48.64}
{'loss': 2.6496, 'learning_rate': 7.01e-06, 'epoch': 48.8}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-300000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-300000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-300000\pytorch_model.bin


{'loss': 2.6575, 'learning_rate': 7e-06, 'epoch': 48.96}
{'loss': 2.624, 'learning_rate': 6.99e-06, 'epoch': 49.13}
{'loss': 2.6168, 'learning_rate': 6.98e-06, 'epoch': 49.29}
{'loss': 2.6279, 'learning_rate': 6.97e-06, 'epoch': 49.45}
{'loss': 2.6271, 'learning_rate': 6.96e-06, 'epoch': 49.62}
{'loss': 2.6409, 'learning_rate': 6.95e-06, 'epoch': 49.78}
{'loss': 2.648, 'learning_rate': 6.9400000000000005e-06, 'epoch': 49.94}
{'loss': 2.5807, 'learning_rate': 6.93e-06, 'epoch': 50.11}
{'loss': 2.5805, 'learning_rate': 6.92e-06, 'epoch': 50.27}
{'loss': 2.6078, 'learning_rate': 6.91e-06, 'epoch': 50.43}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-310000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-310000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-310000\pytorch_model.bin


{'loss': 2.5779, 'learning_rate': 6.9e-06, 'epoch': 50.6}
{'loss': 2.6371, 'learning_rate': 6.89e-06, 'epoch': 50.76}
{'loss': 2.6301, 'learning_rate': 6.88e-06, 'epoch': 50.92}
{'loss': 2.6182, 'learning_rate': 6.870000000000001e-06, 'epoch': 51.09}
{'loss': 2.6005, 'learning_rate': 6.860000000000001e-06, 'epoch': 51.25}
{'loss': 2.582, 'learning_rate': 6.850000000000001e-06, 'epoch': 51.41}
{'loss': 2.6015, 'learning_rate': 6.8400000000000014e-06, 'epoch': 51.57}
{'loss': 2.5957, 'learning_rate': 6.830000000000001e-06, 'epoch': 51.74}
{'loss': 2.584, 'learning_rate': 6.820000000000001e-06, 'epoch': 51.9}
{'loss': 2.5926, 'learning_rate': 6.810000000000001e-06, 'epoch': 52.06}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-320000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-320000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-320000\pytorch_model.bin


{'loss': 2.5647, 'learning_rate': 6.800000000000001e-06, 'epoch': 52.23}
{'loss': 2.562, 'learning_rate': 6.790000000000001e-06, 'epoch': 52.39}
{'loss': 2.575, 'learning_rate': 6.780000000000001e-06, 'epoch': 52.55}
{'loss': 2.5811, 'learning_rate': 6.770000000000001e-06, 'epoch': 52.72}
{'loss': 2.5929, 'learning_rate': 6.760000000000001e-06, 'epoch': 52.88}
{'loss': 2.5687, 'learning_rate': 6.750000000000001e-06, 'epoch': 53.04}
{'loss': 2.5515, 'learning_rate': 6.740000000000001e-06, 'epoch': 53.21}
{'loss': 2.6032, 'learning_rate': 6.730000000000001e-06, 'epoch': 53.37}
{'loss': 2.5623, 'learning_rate': 6.720000000000001e-06, 'epoch': 53.53}
{'loss': 2.5515, 'learning_rate': 6.710000000000001e-06, 'epoch': 53.7}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-330000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-330000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-330000\pytorch_model.bin


{'loss': 2.5488, 'learning_rate': 6.700000000000001e-06, 'epoch': 53.86}
{'loss': 2.5482, 'learning_rate': 6.690000000000001e-06, 'epoch': 54.02}
{'loss': 2.5477, 'learning_rate': 6.680000000000001e-06, 'epoch': 54.19}
{'loss': 2.5518, 'learning_rate': 6.6700000000000005e-06, 'epoch': 54.35}
{'loss': 2.5962, 'learning_rate': 6.660000000000001e-06, 'epoch': 54.51}
{'loss': 2.5256, 'learning_rate': 6.650000000000001e-06, 'epoch': 54.68}
{'loss': 2.5179, 'learning_rate': 6.640000000000001e-06, 'epoch': 54.84}
{'loss': 2.5279, 'learning_rate': 6.630000000000001e-06, 'epoch': 55.0}
{'loss': 2.5305, 'learning_rate': 6.620000000000001e-06, 'epoch': 55.17}
{'loss': 2.5507, 'learning_rate': 6.610000000000001e-06, 'epoch': 55.33}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-340000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-340000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-340000\pytorch_model.bin


{'loss': 2.5306, 'learning_rate': 6.600000000000001e-06, 'epoch': 55.49}
{'loss': 2.5096, 'learning_rate': 6.5900000000000004e-06, 'epoch': 55.66}
{'loss': 2.5112, 'learning_rate': 6.5800000000000005e-06, 'epoch': 55.82}
{'loss': 2.5565, 'learning_rate': 6.570000000000001e-06, 'epoch': 55.98}
{'loss': 2.5069, 'learning_rate': 6.560000000000001e-06, 'epoch': 56.14}
{'loss': 2.5191, 'learning_rate': 6.550000000000001e-06, 'epoch': 56.31}
{'loss': 2.4963, 'learning_rate': 6.540000000000001e-06, 'epoch': 56.47}
{'loss': 2.5385, 'learning_rate': 6.530000000000001e-06, 'epoch': 56.63}
{'loss': 2.5324, 'learning_rate': 6.520000000000001e-06, 'epoch': 56.8}
{'loss': 2.5136, 'learning_rate': 6.51e-06, 'epoch': 56.96}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-350000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-350000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-350000\pytorch_model.bin


{'loss': 2.5085, 'learning_rate': 6.5000000000000004e-06, 'epoch': 57.12}
{'loss': 2.5083, 'learning_rate': 6.4900000000000005e-06, 'epoch': 57.29}
{'loss': 2.4833, 'learning_rate': 6.480000000000001e-06, 'epoch': 57.45}
{'loss': 2.4872, 'learning_rate': 6.470000000000001e-06, 'epoch': 57.61}
{'loss': 2.5084, 'learning_rate': 6.460000000000001e-06, 'epoch': 57.78}
{'loss': 2.5178, 'learning_rate': 6.450000000000001e-06, 'epoch': 57.94}
{'loss': 2.4648, 'learning_rate': 6.440000000000001e-06, 'epoch': 58.1}
{'loss': 2.4808, 'learning_rate': 6.43e-06, 'epoch': 58.27}
{'loss': 2.466, 'learning_rate': 6.42e-06, 'epoch': 58.43}
{'loss': 2.5225, 'learning_rate': 6.4100000000000005e-06, 'epoch': 58.59}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-360000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-360000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-360000\pytorch_model.bin


{'loss': 2.4988, 'learning_rate': 6.4000000000000006e-06, 'epoch': 58.76}
{'loss': 2.4955, 'learning_rate': 6.390000000000001e-06, 'epoch': 58.92}
{'loss': 2.4873, 'learning_rate': 6.380000000000001e-06, 'epoch': 59.08}
{'loss': 2.4836, 'learning_rate': 6.370000000000001e-06, 'epoch': 59.25}
{'loss': 2.4747, 'learning_rate': 6.360000000000001e-06, 'epoch': 59.41}
{'loss': 2.4968, 'learning_rate': 6.35e-06, 'epoch': 59.57}
{'loss': 2.4622, 'learning_rate': 6.34e-06, 'epoch': 59.74}
{'loss': 2.4754, 'learning_rate': 6.33e-06, 'epoch': 59.9}
{'loss': 2.4625, 'learning_rate': 6.3200000000000005e-06, 'epoch': 60.06}
{'loss': 2.5135, 'learning_rate': 6.3100000000000006e-06, 'epoch': 60.23}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-370000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-370000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-370000\pytorch_model.bin


{'loss': 2.443, 'learning_rate': 6.300000000000001e-06, 'epoch': 60.39}
{'loss': 2.4544, 'learning_rate': 6.290000000000001e-06, 'epoch': 60.55}
{'loss': 2.4738, 'learning_rate': 6.280000000000001e-06, 'epoch': 60.71}
{'loss': 2.4904, 'learning_rate': 6.27e-06, 'epoch': 60.88}
{'loss': 2.4468, 'learning_rate': 6.26e-06, 'epoch': 61.04}
{'loss': 2.4649, 'learning_rate': 6.25e-06, 'epoch': 61.2}
{'loss': 2.432, 'learning_rate': 6.24e-06, 'epoch': 61.37}
{'loss': 2.4421, 'learning_rate': 6.2300000000000005e-06, 'epoch': 61.53}
{'loss': 2.4335, 'learning_rate': 6.220000000000001e-06, 'epoch': 61.69}
{'loss': 2.4714, 'learning_rate': 6.210000000000001e-06, 'epoch': 61.86}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-380000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-380000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-380000\pytorch_model.bin


{'loss': 2.4898, 'learning_rate': 6.200000000000001e-06, 'epoch': 62.02}
{'loss': 2.46, 'learning_rate': 6.190000000000001e-06, 'epoch': 62.18}
{'loss': 2.4412, 'learning_rate': 6.18e-06, 'epoch': 62.35}
{'loss': 2.4419, 'learning_rate': 6.17e-06, 'epoch': 62.51}
{'loss': 2.4678, 'learning_rate': 6.16e-06, 'epoch': 62.67}
{'loss': 2.4324, 'learning_rate': 6.15e-06, 'epoch': 62.84}
{'loss': 2.4258, 'learning_rate': 6.1400000000000005e-06, 'epoch': 63.0}
{'loss': 2.4076, 'learning_rate': 6.130000000000001e-06, 'epoch': 63.16}
{'loss': 2.4312, 'learning_rate': 6.120000000000001e-06, 'epoch': 63.33}
{'loss': 2.4164, 'learning_rate': 6.110000000000001e-06, 'epoch': 63.49}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-390000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-390000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-390000\pytorch_model.bin


{'loss': 2.4164, 'learning_rate': 6.1e-06, 'epoch': 63.65}
{'loss': 2.4512, 'learning_rate': 6.09e-06, 'epoch': 63.82}
{'loss': 2.4254, 'learning_rate': 6.08e-06, 'epoch': 63.98}
{'loss': 2.4296, 'learning_rate': 6.07e-06, 'epoch': 64.14}
{'loss': 2.402, 'learning_rate': 6.0600000000000004e-06, 'epoch': 64.31}
{'loss': 2.4182, 'learning_rate': 6.0500000000000005e-06, 'epoch': 64.47}
{'loss': 2.4264, 'learning_rate': 6.040000000000001e-06, 'epoch': 64.63}
{'loss': 2.4125, 'learning_rate': 6.030000000000001e-06, 'epoch': 64.8}
{'loss': 2.4519, 'learning_rate': 6.02e-06, 'epoch': 64.96}
{'loss': 2.4203, 'learning_rate': 6.01e-06, 'epoch': 65.12}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-400000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-400000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-400000\pytorch_model.bin


{'loss': 2.4281, 'learning_rate': 6e-06, 'epoch': 65.28}
{'loss': 2.4379, 'learning_rate': 5.99e-06, 'epoch': 65.45}
{'loss': 2.418, 'learning_rate': 5.98e-06, 'epoch': 65.61}
{'loss': 2.4065, 'learning_rate': 5.9700000000000004e-06, 'epoch': 65.77}
{'loss': 2.4055, 'learning_rate': 5.9600000000000005e-06, 'epoch': 65.94}
{'loss': 2.4087, 'learning_rate': 5.950000000000001e-06, 'epoch': 66.1}
{'loss': 2.4066, 'learning_rate': 5.94e-06, 'epoch': 66.26}
{'loss': 2.4025, 'learning_rate': 5.93e-06, 'epoch': 66.43}
{'loss': 2.3918, 'learning_rate': 5.92e-06, 'epoch': 66.59}
{'loss': 2.4272, 'learning_rate': 5.91e-06, 'epoch': 66.75}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-410000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-410000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-410000\pytorch_model.bin


{'loss': 2.4058, 'learning_rate': 5.9e-06, 'epoch': 66.92}
{'loss': 2.3663, 'learning_rate': 5.89e-06, 'epoch': 67.08}
{'loss': 2.4219, 'learning_rate': 5.8800000000000005e-06, 'epoch': 67.24}
{'loss': 2.3814, 'learning_rate': 5.8700000000000005e-06, 'epoch': 67.41}
{'loss': 2.3473, 'learning_rate': 5.86e-06, 'epoch': 67.57}
{'loss': 2.4249, 'learning_rate': 5.85e-06, 'epoch': 67.73}
{'loss': 2.3765, 'learning_rate': 5.84e-06, 'epoch': 67.9}
{'loss': 2.3785, 'learning_rate': 5.83e-06, 'epoch': 68.06}
{'loss': 2.3455, 'learning_rate': 5.82e-06, 'epoch': 68.22}
{'loss': 2.377, 'learning_rate': 5.81e-06, 'epoch': 68.39}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-420000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-420000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-420000\pytorch_model.bin


{'loss': 2.3802, 'learning_rate': 5.8e-06, 'epoch': 68.55}
{'loss': 2.4114, 'learning_rate': 5.7900000000000005e-06, 'epoch': 68.71}
{'loss': 2.3869, 'learning_rate': 5.78e-06, 'epoch': 68.88}
{'loss': 2.3881, 'learning_rate': 5.77e-06, 'epoch': 69.04}
{'loss': 2.389, 'learning_rate': 5.76e-06, 'epoch': 69.2}
{'loss': 2.3344, 'learning_rate': 5.75e-06, 'epoch': 69.37}
{'loss': 2.3592, 'learning_rate': 5.74e-06, 'epoch': 69.53}
{'loss': 2.3597, 'learning_rate': 5.73e-06, 'epoch': 69.69}
{'loss': 2.355, 'learning_rate': 5.72e-06, 'epoch': 69.85}
{'loss': 2.3747, 'learning_rate': 5.71e-06, 'epoch': 70.02}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-430000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-430000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-430000\pytorch_model.bin


{'loss': 2.3249, 'learning_rate': 5.7e-06, 'epoch': 70.18}
{'loss': 2.3258, 'learning_rate': 5.69e-06, 'epoch': 70.34}
{'loss': 2.3685, 'learning_rate': 5.68e-06, 'epoch': 70.51}
{'loss': 2.3691, 'learning_rate': 5.67e-06, 'epoch': 70.67}
{'loss': 2.3434, 'learning_rate': 5.66e-06, 'epoch': 70.83}
{'loss': 2.3527, 'learning_rate': 5.65e-06, 'epoch': 71.0}
{'loss': 2.3417, 'learning_rate': 5.64e-06, 'epoch': 71.16}
{'loss': 2.3454, 'learning_rate': 5.63e-06, 'epoch': 71.32}
{'loss': 2.3822, 'learning_rate': 5.620000000000001e-06, 'epoch': 71.49}
{'loss': 2.3345, 'learning_rate': 5.610000000000001e-06, 'epoch': 71.65}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-440000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-440000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-440000\pytorch_model.bin


{'loss': 2.3366, 'learning_rate': 5.600000000000001e-06, 'epoch': 71.81}
{'loss': 2.3355, 'learning_rate': 5.590000000000001e-06, 'epoch': 71.98}
{'loss': 2.3254, 'learning_rate': 5.580000000000001e-06, 'epoch': 72.14}
{'loss': 2.3134, 'learning_rate': 5.570000000000001e-06, 'epoch': 72.3}
{'loss': 2.3322, 'learning_rate': 5.560000000000001e-06, 'epoch': 72.47}
{'loss': 2.3156, 'learning_rate': 5.550000000000001e-06, 'epoch': 72.63}
{'loss': 2.3586, 'learning_rate': 5.540000000000001e-06, 'epoch': 72.79}
{'loss': 2.3578, 'learning_rate': 5.530000000000001e-06, 'epoch': 72.96}
{'loss': 2.3458, 'learning_rate': 5.5200000000000005e-06, 'epoch': 73.12}
{'loss': 2.3172, 'learning_rate': 5.510000000000001e-06, 'epoch': 73.28}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-450000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-450000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-450000\pytorch_model.bin


{'loss': 2.322, 'learning_rate': 5.500000000000001e-06, 'epoch': 73.45}
{'loss': 2.3346, 'learning_rate': 5.490000000000001e-06, 'epoch': 73.61}
{'loss': 2.3289, 'learning_rate': 5.480000000000001e-06, 'epoch': 73.77}
{'loss': 2.3466, 'learning_rate': 5.470000000000001e-06, 'epoch': 73.94}
{'loss': 2.2849, 'learning_rate': 5.460000000000001e-06, 'epoch': 74.1}
{'loss': 2.3057, 'learning_rate': 5.450000000000001e-06, 'epoch': 74.26}
{'loss': 2.3193, 'learning_rate': 5.4400000000000004e-06, 'epoch': 74.42}
{'loss': 2.3356, 'learning_rate': 5.4300000000000005e-06, 'epoch': 74.59}
{'loss': 2.3276, 'learning_rate': 5.420000000000001e-06, 'epoch': 74.75}
{'loss': 2.335, 'learning_rate': 5.410000000000001e-06, 'epoch': 74.91}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-460000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-460000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-460000\pytorch_model.bin


{'loss': 2.306, 'learning_rate': 5.400000000000001e-06, 'epoch': 75.08}
{'loss': 2.2874, 'learning_rate': 5.390000000000001e-06, 'epoch': 75.24}
{'loss': 2.3317, 'learning_rate': 5.380000000000001e-06, 'epoch': 75.4}
{'loss': 2.3076, 'learning_rate': 5.370000000000001e-06, 'epoch': 75.57}
{'loss': 2.3296, 'learning_rate': 5.36e-06, 'epoch': 75.73}
{'loss': 2.3142, 'learning_rate': 5.3500000000000004e-06, 'epoch': 75.89}
{'loss': 2.2961, 'learning_rate': 5.3400000000000005e-06, 'epoch': 76.06}
{'loss': 2.2844, 'learning_rate': 5.330000000000001e-06, 'epoch': 76.22}
{'loss': 2.3301, 'learning_rate': 5.320000000000001e-06, 'epoch': 76.38}
{'loss': 2.3124, 'learning_rate': 5.310000000000001e-06, 'epoch': 76.55}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-470000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-470000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-470000\pytorch_model.bin


{'loss': 2.2994, 'learning_rate': 5.300000000000001e-06, 'epoch': 76.71}
{'loss': 2.3027, 'learning_rate': 5.290000000000001e-06, 'epoch': 76.87}
{'loss': 2.2801, 'learning_rate': 5.28e-06, 'epoch': 77.04}
{'loss': 2.2926, 'learning_rate': 5.27e-06, 'epoch': 77.2}
{'loss': 2.2755, 'learning_rate': 5.2600000000000005e-06, 'epoch': 77.36}
{'loss': 2.2852, 'learning_rate': 5.2500000000000006e-06, 'epoch': 77.53}
{'loss': 2.3031, 'learning_rate': 5.240000000000001e-06, 'epoch': 77.69}
{'loss': 2.2935, 'learning_rate': 5.230000000000001e-06, 'epoch': 77.85}
{'loss': 2.2906, 'learning_rate': 5.220000000000001e-06, 'epoch': 78.02}
{'loss': 2.233, 'learning_rate': 5.210000000000001e-06, 'epoch': 78.18}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-480000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-480000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-480000\pytorch_model.bin


{'loss': 2.3084, 'learning_rate': 5.2e-06, 'epoch': 78.34}
{'loss': 2.2635, 'learning_rate': 5.19e-06, 'epoch': 78.5}
{'loss': 2.2838, 'learning_rate': 5.18e-06, 'epoch': 78.67}
{'loss': 2.2576, 'learning_rate': 5.1700000000000005e-06, 'epoch': 78.83}
{'loss': 2.2866, 'learning_rate': 5.1600000000000006e-06, 'epoch': 78.99}
{'loss': 2.235, 'learning_rate': 5.150000000000001e-06, 'epoch': 79.16}
{'loss': 2.3075, 'learning_rate': 5.140000000000001e-06, 'epoch': 79.32}
{'loss': 2.2821, 'learning_rate': 5.130000000000001e-06, 'epoch': 79.48}
{'loss': 2.2754, 'learning_rate': 5.12e-06, 'epoch': 79.65}
{'loss': 2.2867, 'learning_rate': 5.11e-06, 'epoch': 79.81}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-490000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-490000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-490000\pytorch_model.bin


{'loss': 2.2673, 'learning_rate': 5.1e-06, 'epoch': 79.97}
{'loss': 2.2676, 'learning_rate': 5.09e-06, 'epoch': 80.14}
{'loss': 2.2556, 'learning_rate': 5.0800000000000005e-06, 'epoch': 80.3}
{'loss': 2.2314, 'learning_rate': 5.070000000000001e-06, 'epoch': 80.46}
{'loss': 2.2497, 'learning_rate': 5.060000000000001e-06, 'epoch': 80.63}
{'loss': 2.2535, 'learning_rate': 5.050000000000001e-06, 'epoch': 80.79}
{'loss': 2.2765, 'learning_rate': 5.04e-06, 'epoch': 80.95}
{'loss': 2.2598, 'learning_rate': 5.03e-06, 'epoch': 81.12}
{'loss': 2.2403, 'learning_rate': 5.02e-06, 'epoch': 81.28}
{'loss': 2.2532, 'learning_rate': 5.01e-06, 'epoch': 81.44}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-500000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-500000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-500000\pytorch_model.bin


{'loss': 2.2615, 'learning_rate': 5e-06, 'epoch': 81.61}
{'loss': 2.2379, 'learning_rate': 4.9900000000000005e-06, 'epoch': 81.77}
{'loss': 2.2483, 'learning_rate': 4.980000000000001e-06, 'epoch': 81.93}
{'loss': 2.2615, 'learning_rate': 4.970000000000001e-06, 'epoch': 82.1}
{'loss': 2.2283, 'learning_rate': 4.960000000000001e-06, 'epoch': 82.26}
{'loss': 2.1992, 'learning_rate': 4.95e-06, 'epoch': 82.42}
{'loss': 2.2385, 'learning_rate': 4.94e-06, 'epoch': 82.59}
{'loss': 2.2233, 'learning_rate': 4.93e-06, 'epoch': 82.75}
{'loss': 2.2609, 'learning_rate': 4.92e-06, 'epoch': 82.91}
{'loss': 2.2541, 'learning_rate': 4.9100000000000004e-06, 'epoch': 83.07}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-510000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-510000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-510000\pytorch_model.bin


{'loss': 2.2198, 'learning_rate': 4.9000000000000005e-06, 'epoch': 83.24}
{'loss': 2.2246, 'learning_rate': 4.890000000000001e-06, 'epoch': 83.4}
{'loss': 2.2413, 'learning_rate': 4.880000000000001e-06, 'epoch': 83.56}
{'loss': 2.2428, 'learning_rate': 4.87e-06, 'epoch': 83.73}
{'loss': 2.233, 'learning_rate': 4.86e-06, 'epoch': 83.89}
{'loss': 2.2116, 'learning_rate': 4.85e-06, 'epoch': 84.05}
{'loss': 2.2095, 'learning_rate': 4.84e-06, 'epoch': 84.22}
{'loss': 2.2227, 'learning_rate': 4.83e-06, 'epoch': 84.38}
{'loss': 2.2287, 'learning_rate': 4.8200000000000004e-06, 'epoch': 84.54}
{'loss': 2.2664, 'learning_rate': 4.8100000000000005e-06, 'epoch': 84.71}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-520000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-520000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-520000\pytorch_model.bin


{'loss': 2.274, 'learning_rate': 4.800000000000001e-06, 'epoch': 84.87}
{'loss': 2.2116, 'learning_rate': 4.79e-06, 'epoch': 85.03}
{'loss': 2.197, 'learning_rate': 4.78e-06, 'epoch': 85.2}
{'loss': 2.2349, 'learning_rate': 4.77e-06, 'epoch': 85.36}
{'loss': 2.2111, 'learning_rate': 4.76e-06, 'epoch': 85.52}
{'loss': 2.2233, 'learning_rate': 4.75e-06, 'epoch': 85.69}
{'loss': 2.2295, 'learning_rate': 4.74e-06, 'epoch': 85.85}
{'loss': 2.2513, 'learning_rate': 4.7300000000000005e-06, 'epoch': 86.01}
{'loss': 2.2355, 'learning_rate': 4.7200000000000005e-06, 'epoch': 86.18}
{'loss': 2.2502, 'learning_rate': 4.71e-06, 'epoch': 86.34}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-530000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-530000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-530000\pytorch_model.bin


{'loss': 2.2389, 'learning_rate': 4.7e-06, 'epoch': 86.5}
{'loss': 2.2265, 'learning_rate': 4.69e-06, 'epoch': 86.67}
{'loss': 2.2103, 'learning_rate': 4.680000000000001e-06, 'epoch': 86.83}
{'loss': 2.256, 'learning_rate': 4.670000000000001e-06, 'epoch': 86.99}
{'loss': 2.2089, 'learning_rate': 4.66e-06, 'epoch': 87.16}
{'loss': 2.2036, 'learning_rate': 4.65e-06, 'epoch': 87.32}
{'loss': 2.1986, 'learning_rate': 4.6400000000000005e-06, 'epoch': 87.48}
{'loss': 2.1968, 'learning_rate': 4.6300000000000006e-06, 'epoch': 87.64}
{'loss': 2.2076, 'learning_rate': 4.620000000000001e-06, 'epoch': 87.81}
{'loss': 2.2079, 'learning_rate': 4.610000000000001e-06, 'epoch': 87.97}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-540000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-540000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-540000\pytorch_model.bin


{'loss': 2.2151, 'learning_rate': 4.600000000000001e-06, 'epoch': 88.13}
{'loss': 2.1977, 'learning_rate': 4.590000000000001e-06, 'epoch': 88.3}
{'loss': 2.213, 'learning_rate': 4.58e-06, 'epoch': 88.46}
{'loss': 2.2025, 'learning_rate': 4.57e-06, 'epoch': 88.62}
{'loss': 2.2038, 'learning_rate': 4.56e-06, 'epoch': 88.79}
{'loss': 2.1821, 'learning_rate': 4.5500000000000005e-06, 'epoch': 88.95}
{'loss': 2.1908, 'learning_rate': 4.540000000000001e-06, 'epoch': 89.11}
{'loss': 2.1746, 'learning_rate': 4.530000000000001e-06, 'epoch': 89.28}
{'loss': 2.1494, 'learning_rate': 4.520000000000001e-06, 'epoch': 89.44}
{'loss': 2.1979, 'learning_rate': 4.510000000000001e-06, 'epoch': 89.6}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-550000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-550000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-550000\pytorch_model.bin


{'loss': 2.2172, 'learning_rate': 4.5e-06, 'epoch': 89.77}
{'loss': 2.1841, 'learning_rate': 4.49e-06, 'epoch': 89.93}
{'loss': 2.2212, 'learning_rate': 4.48e-06, 'epoch': 90.09}
{'loss': 2.203, 'learning_rate': 4.47e-06, 'epoch': 90.26}
{'loss': 2.1965, 'learning_rate': 4.4600000000000005e-06, 'epoch': 90.42}
{'loss': 2.1918, 'learning_rate': 4.450000000000001e-06, 'epoch': 90.58}
{'loss': 2.1749, 'learning_rate': 4.440000000000001e-06, 'epoch': 90.75}
{'loss': 2.1825, 'learning_rate': 4.430000000000001e-06, 'epoch': 90.91}
{'loss': 2.2016, 'learning_rate': 4.42e-06, 'epoch': 91.07}
{'loss': 2.1737, 'learning_rate': 4.41e-06, 'epoch': 91.24}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-560000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-560000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-560000\pytorch_model.bin


{'loss': 2.1663, 'learning_rate': 4.4e-06, 'epoch': 91.4}
{'loss': 2.167, 'learning_rate': 4.39e-06, 'epoch': 91.56}
{'loss': 2.2074, 'learning_rate': 4.38e-06, 'epoch': 91.73}
{'loss': 2.1557, 'learning_rate': 4.3700000000000005e-06, 'epoch': 91.89}
{'loss': 2.1818, 'learning_rate': 4.360000000000001e-06, 'epoch': 92.05}
{'loss': 2.183, 'learning_rate': 4.350000000000001e-06, 'epoch': 92.21}
{'loss': 2.1636, 'learning_rate': 4.34e-06, 'epoch': 92.38}
{'loss': 2.1709, 'learning_rate': 4.33e-06, 'epoch': 92.54}
{'loss': 2.1505, 'learning_rate': 4.32e-06, 'epoch': 92.7}
{'loss': 2.1696, 'learning_rate': 4.31e-06, 'epoch': 92.87}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-570000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-570000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-570000\pytorch_model.bin


{'loss': 2.1829, 'learning_rate': 4.3e-06, 'epoch': 93.03}
{'loss': 2.1623, 'learning_rate': 4.2900000000000004e-06, 'epoch': 93.19}
{'loss': 2.1536, 'learning_rate': 4.2800000000000005e-06, 'epoch': 93.36}
{'loss': 2.1727, 'learning_rate': 4.270000000000001e-06, 'epoch': 93.52}
{'loss': 2.1466, 'learning_rate': 4.26e-06, 'epoch': 93.68}
{'loss': 2.1779, 'learning_rate': 4.25e-06, 'epoch': 93.85}
{'loss': 2.1544, 'learning_rate': 4.24e-06, 'epoch': 94.01}
{'loss': 2.1449, 'learning_rate': 4.23e-06, 'epoch': 94.17}
{'loss': 2.1477, 'learning_rate': 4.22e-06, 'epoch': 94.34}
{'loss': 2.1426, 'learning_rate': 4.21e-06, 'epoch': 94.5}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-580000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-580000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-580000\pytorch_model.bin


{'loss': 2.1399, 'learning_rate': 4.2000000000000004e-06, 'epoch': 94.66}
{'loss': 2.1332, 'learning_rate': 4.1900000000000005e-06, 'epoch': 94.83}
{'loss': 2.1508, 'learning_rate': 4.18e-06, 'epoch': 94.99}
{'loss': 2.114, 'learning_rate': 4.17e-06, 'epoch': 95.15}
{'loss': 2.1496, 'learning_rate': 4.16e-06, 'epoch': 95.32}
{'loss': 2.1496, 'learning_rate': 4.15e-06, 'epoch': 95.48}
{'loss': 2.129, 'learning_rate': 4.14e-06, 'epoch': 95.64}
{'loss': 2.1657, 'learning_rate': 4.13e-06, 'epoch': 95.81}
{'loss': 2.1386, 'learning_rate': 4.12e-06, 'epoch': 95.97}
{'loss': 2.1688, 'learning_rate': 4.1100000000000005e-06, 'epoch': 96.13}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-590000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-590000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-590000\pytorch_model.bin


{'loss': 2.132, 'learning_rate': 4.1e-06, 'epoch': 96.3}
{'loss': 2.1318, 'learning_rate': 4.09e-06, 'epoch': 96.46}
{'loss': 2.1174, 'learning_rate': 4.08e-06, 'epoch': 96.62}
{'loss': 2.1378, 'learning_rate': 4.07e-06, 'epoch': 96.78}
{'loss': 2.1342, 'learning_rate': 4.060000000000001e-06, 'epoch': 96.95}
{'loss': 2.1252, 'learning_rate': 4.05e-06, 'epoch': 97.11}
{'loss': 2.1465, 'learning_rate': 4.04e-06, 'epoch': 97.27}
{'loss': 2.1854, 'learning_rate': 4.03e-06, 'epoch': 97.44}
{'loss': 2.1087, 'learning_rate': 4.0200000000000005e-06, 'epoch': 97.6}
{'loss': 2.1226, 'learning_rate': 4.0100000000000006e-06, 'epoch': 97.76}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-600000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-600000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-600000\pytorch_model.bin


{'loss': 2.1399, 'learning_rate': 4.000000000000001e-06, 'epoch': 97.93}
{'loss': 2.1559, 'learning_rate': 3.990000000000001e-06, 'epoch': 98.09}
{'loss': 2.1391, 'learning_rate': 3.980000000000001e-06, 'epoch': 98.25}
{'loss': 2.1034, 'learning_rate': 3.97e-06, 'epoch': 98.42}
{'loss': 2.1147, 'learning_rate': 3.96e-06, 'epoch': 98.58}
{'loss': 2.1124, 'learning_rate': 3.95e-06, 'epoch': 98.74}
{'loss': 2.1435, 'learning_rate': 3.94e-06, 'epoch': 98.91}
{'loss': 2.1169, 'learning_rate': 3.9300000000000005e-06, 'epoch': 99.07}
{'loss': 2.0895, 'learning_rate': 3.920000000000001e-06, 'epoch': 99.23}
{'loss': 2.1011, 'learning_rate': 3.910000000000001e-06, 'epoch': 99.4}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-610000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-610000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-610000\pytorch_model.bin


{'loss': 2.1231, 'learning_rate': 3.900000000000001e-06, 'epoch': 99.56}
{'loss': 2.1034, 'learning_rate': 3.89e-06, 'epoch': 99.72}
{'loss': 2.1396, 'learning_rate': 3.88e-06, 'epoch': 99.89}
{'loss': 2.137, 'learning_rate': 3.87e-06, 'epoch': 100.05}
{'loss': 2.1208, 'learning_rate': 3.86e-06, 'epoch': 100.21}
{'loss': 2.1206, 'learning_rate': 3.85e-06, 'epoch': 100.38}
{'loss': 2.1276, 'learning_rate': 3.8400000000000005e-06, 'epoch': 100.54}
{'loss': 2.0967, 'learning_rate': 3.830000000000001e-06, 'epoch': 100.7}
{'loss': 2.1526, 'learning_rate': 3.820000000000001e-06, 'epoch': 100.87}
{'loss': 2.1097, 'learning_rate': 3.8100000000000004e-06, 'epoch': 101.03}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-620000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-620000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-620000\pytorch_model.bin


{'loss': 2.1262, 'learning_rate': 3.8000000000000005e-06, 'epoch': 101.19}
{'loss': 2.1071, 'learning_rate': 3.79e-06, 'epoch': 101.35}
{'loss': 2.0893, 'learning_rate': 3.7800000000000002e-06, 'epoch': 101.52}
{'loss': 2.1094, 'learning_rate': 3.7700000000000003e-06, 'epoch': 101.68}
{'loss': 2.0627, 'learning_rate': 3.7600000000000004e-06, 'epoch': 101.84}
{'loss': 2.1024, 'learning_rate': 3.7500000000000005e-06, 'epoch': 102.01}
{'loss': 2.0547, 'learning_rate': 3.74e-06, 'epoch': 102.17}
{'loss': 2.0852, 'learning_rate': 3.7300000000000003e-06, 'epoch': 102.33}
{'loss': 2.1317, 'learning_rate': 3.7200000000000004e-06, 'epoch': 102.5}
{'loss': 2.1101, 'learning_rate': 3.7100000000000005e-06, 'epoch': 102.66}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-630000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-630000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-630000\pytorch_model.bin


{'loss': 2.1078, 'learning_rate': 3.7e-06, 'epoch': 102.82}
{'loss': 2.0789, 'learning_rate': 3.6900000000000002e-06, 'epoch': 102.99}
{'loss': 2.0724, 'learning_rate': 3.6800000000000003e-06, 'epoch': 103.15}
{'loss': 2.0863, 'learning_rate': 3.6700000000000004e-06, 'epoch': 103.31}
{'loss': 2.0889, 'learning_rate': 3.66e-06, 'epoch': 103.48}
{'loss': 2.0753, 'learning_rate': 3.65e-06, 'epoch': 103.64}
{'loss': 2.119, 'learning_rate': 3.6400000000000003e-06, 'epoch': 103.8}
{'loss': 2.0594, 'learning_rate': 3.6300000000000004e-06, 'epoch': 103.97}
{'loss': 2.099, 'learning_rate': 3.62e-06, 'epoch': 104.13}
{'loss': 2.047, 'learning_rate': 3.61e-06, 'epoch': 104.29}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-640000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-640000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-640000\pytorch_model.bin


{'loss': 2.0786, 'learning_rate': 3.6000000000000003e-06, 'epoch': 104.46}
{'loss': 2.1093, 'learning_rate': 3.5900000000000004e-06, 'epoch': 104.62}
{'loss': 2.0631, 'learning_rate': 3.58e-06, 'epoch': 104.78}
{'loss': 2.0791, 'learning_rate': 3.57e-06, 'epoch': 104.95}
{'loss': 2.1084, 'learning_rate': 3.5600000000000002e-06, 'epoch': 105.11}
{'loss': 2.0483, 'learning_rate': 3.5500000000000003e-06, 'epoch': 105.27}
{'loss': 2.0538, 'learning_rate': 3.54e-06, 'epoch': 105.43}
{'loss': 2.0802, 'learning_rate': 3.53e-06, 'epoch': 105.6}
{'loss': 2.0998, 'learning_rate': 3.52e-06, 'epoch': 105.76}
{'loss': 2.0954, 'learning_rate': 3.5100000000000003e-06, 'epoch': 105.92}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-650000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-650000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-650000\pytorch_model.bin


{'loss': 2.076, 'learning_rate': 3.5e-06, 'epoch': 106.09}
{'loss': 2.0614, 'learning_rate': 3.49e-06, 'epoch': 106.25}
{'loss': 2.0946, 'learning_rate': 3.48e-06, 'epoch': 106.41}
{'loss': 2.0638, 'learning_rate': 3.4700000000000002e-06, 'epoch': 106.58}
{'loss': 2.0703, 'learning_rate': 3.46e-06, 'epoch': 106.74}
{'loss': 2.071, 'learning_rate': 3.45e-06, 'epoch': 106.9}
{'loss': 2.0645, 'learning_rate': 3.44e-06, 'epoch': 107.07}
{'loss': 2.1063, 'learning_rate': 3.4300000000000006e-06, 'epoch': 107.23}
{'loss': 2.0753, 'learning_rate': 3.4200000000000007e-06, 'epoch': 107.39}
{'loss': 2.0269, 'learning_rate': 3.4100000000000004e-06, 'epoch': 107.56}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-660000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-660000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-660000\pytorch_model.bin


{'loss': 2.0658, 'learning_rate': 3.4000000000000005e-06, 'epoch': 107.72}
{'loss': 2.0782, 'learning_rate': 3.3900000000000006e-06, 'epoch': 107.88}
{'loss': 2.054, 'learning_rate': 3.3800000000000007e-06, 'epoch': 108.05}
{'loss': 2.0685, 'learning_rate': 3.3700000000000003e-06, 'epoch': 108.21}
{'loss': 2.0354, 'learning_rate': 3.3600000000000004e-06, 'epoch': 108.37}
{'loss': 2.0829, 'learning_rate': 3.3500000000000005e-06, 'epoch': 108.54}
{'loss': 2.0316, 'learning_rate': 3.3400000000000006e-06, 'epoch': 108.7}
{'loss': 2.0947, 'learning_rate': 3.3300000000000003e-06, 'epoch': 108.86}
{'loss': 2.0422, 'learning_rate': 3.3200000000000004e-06, 'epoch': 109.03}
{'loss': 2.1054, 'learning_rate': 3.3100000000000005e-06, 'epoch': 109.19}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-670000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-670000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-670000\pytorch_model.bin


{'loss': 2.0528, 'learning_rate': 3.3000000000000006e-06, 'epoch': 109.35}
{'loss': 2.0678, 'learning_rate': 3.2900000000000003e-06, 'epoch': 109.52}
{'loss': 2.0629, 'learning_rate': 3.2800000000000004e-06, 'epoch': 109.68}
{'loss': 2.0466, 'learning_rate': 3.2700000000000005e-06, 'epoch': 109.84}
{'loss': 2.0306, 'learning_rate': 3.2600000000000006e-06, 'epoch': 110.0}
{'loss': 2.0599, 'learning_rate': 3.2500000000000002e-06, 'epoch': 110.17}
{'loss': 2.0443, 'learning_rate': 3.2400000000000003e-06, 'epoch': 110.33}
{'loss': 2.0315, 'learning_rate': 3.2300000000000004e-06, 'epoch': 110.49}
{'loss': 2.0611, 'learning_rate': 3.2200000000000005e-06, 'epoch': 110.66}
{'loss': 2.0545, 'learning_rate': 3.21e-06, 'epoch': 110.82}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-680000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-680000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-680000\pytorch_model.bin


{'loss': 2.0377, 'learning_rate': 3.2000000000000003e-06, 'epoch': 110.98}
{'loss': 2.0202, 'learning_rate': 3.1900000000000004e-06, 'epoch': 111.15}
{'loss': 2.0551, 'learning_rate': 3.1800000000000005e-06, 'epoch': 111.31}
{'loss': 2.035, 'learning_rate': 3.17e-06, 'epoch': 111.47}
{'loss': 2.0275, 'learning_rate': 3.1600000000000002e-06, 'epoch': 111.64}
{'loss': 2.026, 'learning_rate': 3.1500000000000003e-06, 'epoch': 111.8}
{'loss': 2.0328, 'learning_rate': 3.1400000000000004e-06, 'epoch': 111.96}
{'loss': 2.0392, 'learning_rate': 3.13e-06, 'epoch': 112.13}
{'loss': 2.0057, 'learning_rate': 3.12e-06, 'epoch': 112.29}
{'loss': 2.0816, 'learning_rate': 3.1100000000000003e-06, 'epoch': 112.45}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-690000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-690000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-690000\pytorch_model.bin


{'loss': 2.0196, 'learning_rate': 3.1000000000000004e-06, 'epoch': 112.62}
{'loss': 2.054, 'learning_rate': 3.09e-06, 'epoch': 112.78}
{'loss': 2.0324, 'learning_rate': 3.08e-06, 'epoch': 112.94}
{'loss': 2.0219, 'learning_rate': 3.0700000000000003e-06, 'epoch': 113.11}
{'loss': 2.0085, 'learning_rate': 3.0600000000000003e-06, 'epoch': 113.27}
{'loss': 2.0334, 'learning_rate': 3.05e-06, 'epoch': 113.43}
{'loss': 2.0355, 'learning_rate': 3.04e-06, 'epoch': 113.6}
{'loss': 2.0137, 'learning_rate': 3.0300000000000002e-06, 'epoch': 113.76}
{'loss': 2.0472, 'learning_rate': 3.0200000000000003e-06, 'epoch': 113.92}
{'loss': 2.0401, 'learning_rate': 3.01e-06, 'epoch': 114.09}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-700000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-700000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-700000\pytorch_model.bin


{'loss': 2.0188, 'learning_rate': 3e-06, 'epoch': 114.25}
{'loss': 2.0212, 'learning_rate': 2.99e-06, 'epoch': 114.41}
{'loss': 2.0571, 'learning_rate': 2.9800000000000003e-06, 'epoch': 114.57}
{'loss': 2.0257, 'learning_rate': 2.97e-06, 'epoch': 114.74}
{'loss': 2.0192, 'learning_rate': 2.96e-06, 'epoch': 114.9}
{'loss': 2.018, 'learning_rate': 2.95e-06, 'epoch': 115.06}
{'loss': 2.0399, 'learning_rate': 2.9400000000000002e-06, 'epoch': 115.23}
{'loss': 2.008, 'learning_rate': 2.93e-06, 'epoch': 115.39}
{'loss': 2.0291, 'learning_rate': 2.92e-06, 'epoch': 115.55}
{'loss': 2.0025, 'learning_rate': 2.91e-06, 'epoch': 115.72}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-710000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-710000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-710000\pytorch_model.bin


{'loss': 2.019, 'learning_rate': 2.9e-06, 'epoch': 115.88}
{'loss': 2.0246, 'learning_rate': 2.89e-06, 'epoch': 116.04}
{'loss': 1.9965, 'learning_rate': 2.88e-06, 'epoch': 116.21}
{'loss': 2.0215, 'learning_rate': 2.87e-06, 'epoch': 116.37}
{'loss': 2.005, 'learning_rate': 2.86e-06, 'epoch': 116.53}
{'loss': 2.0414, 'learning_rate': 2.85e-06, 'epoch': 116.7}
{'loss': 2.0415, 'learning_rate': 2.84e-06, 'epoch': 116.86}
{'loss': 2.0359, 'learning_rate': 2.83e-06, 'epoch': 117.02}
{'loss': 2.0303, 'learning_rate': 2.82e-06, 'epoch': 117.19}
{'loss': 2.0251, 'learning_rate': 2.8100000000000006e-06, 'epoch': 117.35}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-720000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-720000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-720000\pytorch_model.bin


{'loss': 1.9977, 'learning_rate': 2.8000000000000003e-06, 'epoch': 117.51}
{'loss': 1.9783, 'learning_rate': 2.7900000000000004e-06, 'epoch': 117.68}
{'loss': 2.0371, 'learning_rate': 2.7800000000000005e-06, 'epoch': 117.84}
{'loss': 2.0122, 'learning_rate': 2.7700000000000006e-06, 'epoch': 118.0}
{'loss': 1.9567, 'learning_rate': 2.7600000000000003e-06, 'epoch': 118.17}
{'loss': 1.9937, 'learning_rate': 2.7500000000000004e-06, 'epoch': 118.33}
{'loss': 2.0172, 'learning_rate': 2.7400000000000004e-06, 'epoch': 118.49}
{'loss': 1.9631, 'learning_rate': 2.7300000000000005e-06, 'epoch': 118.66}
{'loss': 2.0233, 'learning_rate': 2.7200000000000002e-06, 'epoch': 118.82}
{'loss': 2.0341, 'learning_rate': 2.7100000000000003e-06, 'epoch': 118.98}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-730000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-730000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-730000\pytorch_model.bin


{'loss': 1.9776, 'learning_rate': 2.7000000000000004e-06, 'epoch': 119.14}
{'loss': 2.0062, 'learning_rate': 2.6900000000000005e-06, 'epoch': 119.31}
{'loss': 1.9568, 'learning_rate': 2.68e-06, 'epoch': 119.47}
{'loss': 1.9863, 'learning_rate': 2.6700000000000003e-06, 'epoch': 119.63}
{'loss': 1.9904, 'learning_rate': 2.6600000000000004e-06, 'epoch': 119.8}
{'loss': 1.9809, 'learning_rate': 2.6500000000000005e-06, 'epoch': 119.96}
{'loss': 2.0079, 'learning_rate': 2.64e-06, 'epoch': 120.12}
{'loss': 2.013, 'learning_rate': 2.6300000000000002e-06, 'epoch': 120.29}
{'loss': 1.9983, 'learning_rate': 2.6200000000000003e-06, 'epoch': 120.45}
{'loss': 1.996, 'learning_rate': 2.6100000000000004e-06, 'epoch': 120.61}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-740000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-740000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-740000\pytorch_model.bin


{'loss': 2.0077, 'learning_rate': 2.6e-06, 'epoch': 120.78}
{'loss': 2.0136, 'learning_rate': 2.59e-06, 'epoch': 120.94}
{'loss': 1.9544, 'learning_rate': 2.5800000000000003e-06, 'epoch': 121.1}
{'loss': 1.982, 'learning_rate': 2.5700000000000004e-06, 'epoch': 121.27}
{'loss': 1.9952, 'learning_rate': 2.56e-06, 'epoch': 121.43}
{'loss': 1.9844, 'learning_rate': 2.55e-06, 'epoch': 121.59}
{'loss': 1.9983, 'learning_rate': 2.5400000000000002e-06, 'epoch': 121.76}
{'loss': 1.9545, 'learning_rate': 2.5300000000000003e-06, 'epoch': 121.92}
{'loss': 1.9981, 'learning_rate': 2.52e-06, 'epoch': 122.08}
{'loss': 1.9824, 'learning_rate': 2.51e-06, 'epoch': 122.25}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-750000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-750000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-750000\pytorch_model.bin


{'loss': 2.0081, 'learning_rate': 2.5e-06, 'epoch': 122.41}
{'loss': 1.9895, 'learning_rate': 2.4900000000000003e-06, 'epoch': 122.57}
{'loss': 1.9943, 'learning_rate': 2.4800000000000004e-06, 'epoch': 122.74}
{'loss': 1.9762, 'learning_rate': 2.47e-06, 'epoch': 122.9}
{'loss': 1.9766, 'learning_rate': 2.46e-06, 'epoch': 123.06}
{'loss': 1.9542, 'learning_rate': 2.4500000000000003e-06, 'epoch': 123.23}
{'loss': 1.9849, 'learning_rate': 2.4400000000000004e-06, 'epoch': 123.39}
{'loss': 1.9998, 'learning_rate': 2.43e-06, 'epoch': 123.55}
{'loss': 1.9964, 'learning_rate': 2.42e-06, 'epoch': 123.71}
{'loss': 1.9968, 'learning_rate': 2.4100000000000002e-06, 'epoch': 123.88}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-760000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-760000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-760000\pytorch_model.bin


{'loss': 1.9831, 'learning_rate': 2.4000000000000003e-06, 'epoch': 124.04}
{'loss': 1.9569, 'learning_rate': 2.39e-06, 'epoch': 124.2}
{'loss': 1.9083, 'learning_rate': 2.38e-06, 'epoch': 124.37}
{'loss': 1.936, 'learning_rate': 2.37e-06, 'epoch': 124.53}
{'loss': 1.9443, 'learning_rate': 2.3600000000000003e-06, 'epoch': 124.69}
{'loss': 2.0281, 'learning_rate': 2.35e-06, 'epoch': 124.86}
{'loss': 1.954, 'learning_rate': 2.3400000000000005e-06, 'epoch': 125.02}
{'loss': 1.971, 'learning_rate': 2.33e-06, 'epoch': 125.18}
{'loss': 1.948, 'learning_rate': 2.3200000000000002e-06, 'epoch': 125.35}
{'loss': 2.0055, 'learning_rate': 2.3100000000000003e-06, 'epoch': 125.51}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-770000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-770000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-770000\pytorch_model.bin


{'loss': 1.9715, 'learning_rate': 2.3000000000000004e-06, 'epoch': 125.67}
{'loss': 1.9609, 'learning_rate': 2.29e-06, 'epoch': 125.84}
{'loss': 1.995, 'learning_rate': 2.28e-06, 'epoch': 126.0}
{'loss': 1.9464, 'learning_rate': 2.2700000000000003e-06, 'epoch': 126.16}
{'loss': 1.9584, 'learning_rate': 2.2600000000000004e-06, 'epoch': 126.33}
{'loss': 1.9682, 'learning_rate': 2.25e-06, 'epoch': 126.49}
{'loss': 1.961, 'learning_rate': 2.24e-06, 'epoch': 126.65}
{'loss': 1.9952, 'learning_rate': 2.2300000000000002e-06, 'epoch': 126.82}
{'loss': 1.9872, 'learning_rate': 2.2200000000000003e-06, 'epoch': 126.98}
{'loss': 1.957, 'learning_rate': 2.21e-06, 'epoch': 127.14}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-780000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-780000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-780000\pytorch_model.bin


{'loss': 1.9535, 'learning_rate': 2.2e-06, 'epoch': 127.31}
{'loss': 1.9638, 'learning_rate': 2.19e-06, 'epoch': 127.47}
{'loss': 1.9292, 'learning_rate': 2.1800000000000003e-06, 'epoch': 127.63}
{'loss': 1.9667, 'learning_rate': 2.17e-06, 'epoch': 127.8}
{'loss': 1.9689, 'learning_rate': 2.16e-06, 'epoch': 127.96}
{'loss': 1.9625, 'learning_rate': 2.15e-06, 'epoch': 128.12}
{'loss': 1.9531, 'learning_rate': 2.1400000000000003e-06, 'epoch': 128.28}
{'loss': 1.9361, 'learning_rate': 2.13e-06, 'epoch': 128.45}
{'loss': 1.9457, 'learning_rate': 2.12e-06, 'epoch': 128.61}
{'loss': 1.9829, 'learning_rate': 2.11e-06, 'epoch': 128.77}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-790000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-790000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-790000\pytorch_model.bin


{'loss': 1.942, 'learning_rate': 2.1000000000000002e-06, 'epoch': 128.94}
{'loss': 1.9532, 'learning_rate': 2.09e-06, 'epoch': 129.1}
{'loss': 1.9728, 'learning_rate': 2.08e-06, 'epoch': 129.26}
{'loss': 1.9385, 'learning_rate': 2.07e-06, 'epoch': 129.43}
{'loss': 1.9563, 'learning_rate': 2.06e-06, 'epoch': 129.59}
{'loss': 1.9551, 'learning_rate': 2.05e-06, 'epoch': 129.75}
{'loss': 1.942, 'learning_rate': 2.04e-06, 'epoch': 129.92}
{'loss': 1.9277, 'learning_rate': 2.0300000000000005e-06, 'epoch': 130.08}
{'loss': 1.9245, 'learning_rate': 2.02e-06, 'epoch': 130.24}
{'loss': 1.8995, 'learning_rate': 2.0100000000000002e-06, 'epoch': 130.41}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-800000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-800000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-800000\pytorch_model.bin


{'loss': 1.963, 'learning_rate': 2.0000000000000003e-06, 'epoch': 130.57}
{'loss': 1.9467, 'learning_rate': 1.9900000000000004e-06, 'epoch': 130.73}
{'loss': 1.9748, 'learning_rate': 1.98e-06, 'epoch': 130.9}
{'loss': 1.935, 'learning_rate': 1.97e-06, 'epoch': 131.06}
{'loss': 1.938, 'learning_rate': 1.9600000000000003e-06, 'epoch': 131.22}
{'loss': 1.9401, 'learning_rate': 1.9500000000000004e-06, 'epoch': 131.39}
{'loss': 1.9419, 'learning_rate': 1.94e-06, 'epoch': 131.55}
{'loss': 1.9516, 'learning_rate': 1.93e-06, 'epoch': 131.71}
{'loss': 1.9399, 'learning_rate': 1.9200000000000003e-06, 'epoch': 131.88}
{'loss': 1.9544, 'learning_rate': 1.9100000000000003e-06, 'epoch': 132.04}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-810000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-810000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-810000\pytorch_model.bin


{'loss': 1.9246, 'learning_rate': 1.9000000000000002e-06, 'epoch': 132.2}
{'loss': 1.9244, 'learning_rate': 1.8900000000000001e-06, 'epoch': 132.36}
{'loss': 1.9377, 'learning_rate': 1.8800000000000002e-06, 'epoch': 132.53}
{'loss': 1.9315, 'learning_rate': 1.87e-06, 'epoch': 132.69}
{'loss': 1.9758, 'learning_rate': 1.8600000000000002e-06, 'epoch': 132.85}
{'loss': 1.9502, 'learning_rate': 1.85e-06, 'epoch': 133.02}
{'loss': 1.9019, 'learning_rate': 1.8400000000000002e-06, 'epoch': 133.18}
{'loss': 1.9438, 'learning_rate': 1.83e-06, 'epoch': 133.34}
{'loss': 1.9415, 'learning_rate': 1.8200000000000002e-06, 'epoch': 133.51}
{'loss': 1.9643, 'learning_rate': 1.81e-06, 'epoch': 133.67}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-820000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-820000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-820000\pytorch_model.bin


{'loss': 1.9151, 'learning_rate': 1.8000000000000001e-06, 'epoch': 133.83}
{'loss': 1.9418, 'learning_rate': 1.79e-06, 'epoch': 134.0}
{'loss': 1.9411, 'learning_rate': 1.7800000000000001e-06, 'epoch': 134.16}
{'loss': 1.9573, 'learning_rate': 1.77e-06, 'epoch': 134.32}
{'loss': 1.9498, 'learning_rate': 1.76e-06, 'epoch': 134.49}
{'loss': 1.9254, 'learning_rate': 1.75e-06, 'epoch': 134.65}
{'loss': 1.9279, 'learning_rate': 1.74e-06, 'epoch': 134.81}
{'loss': 1.9607, 'learning_rate': 1.73e-06, 'epoch': 134.98}
{'loss': 1.9274, 'learning_rate': 1.72e-06, 'epoch': 135.14}
{'loss': 1.8913, 'learning_rate': 1.7100000000000004e-06, 'epoch': 135.3}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-830000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-830000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-830000\pytorch_model.bin


{'loss': 1.9324, 'learning_rate': 1.7000000000000002e-06, 'epoch': 135.47}
{'loss': 1.9343, 'learning_rate': 1.6900000000000003e-06, 'epoch': 135.63}
{'loss': 1.9195, 'learning_rate': 1.6800000000000002e-06, 'epoch': 135.79}
{'loss': 1.8931, 'learning_rate': 1.6700000000000003e-06, 'epoch': 135.96}
{'loss': 1.9294, 'learning_rate': 1.6600000000000002e-06, 'epoch': 136.12}
{'loss': 1.9263, 'learning_rate': 1.6500000000000003e-06, 'epoch': 136.28}
{'loss': 1.9389, 'learning_rate': 1.6400000000000002e-06, 'epoch': 136.45}
{'loss': 1.9449, 'learning_rate': 1.6300000000000003e-06, 'epoch': 136.61}
{'loss': 1.9153, 'learning_rate': 1.6200000000000002e-06, 'epoch': 136.77}
{'loss': 1.9292, 'learning_rate': 1.6100000000000003e-06, 'epoch': 136.93}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-840000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-840000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-840000\pytorch_model.bin


{'loss': 1.9199, 'learning_rate': 1.6000000000000001e-06, 'epoch': 137.1}
{'loss': 1.9371, 'learning_rate': 1.5900000000000002e-06, 'epoch': 137.26}
{'loss': 1.9578, 'learning_rate': 1.5800000000000001e-06, 'epoch': 137.42}
{'loss': 1.9393, 'learning_rate': 1.5700000000000002e-06, 'epoch': 137.59}
{'loss': 1.9597, 'learning_rate': 1.56e-06, 'epoch': 137.75}
{'loss': 1.9023, 'learning_rate': 1.5500000000000002e-06, 'epoch': 137.91}
{'loss': 1.9303, 'learning_rate': 1.54e-06, 'epoch': 138.08}
{'loss': 1.9162, 'learning_rate': 1.5300000000000002e-06, 'epoch': 138.24}
{'loss': 1.9199, 'learning_rate': 1.52e-06, 'epoch': 138.4}
{'loss': 1.9172, 'learning_rate': 1.5100000000000002e-06, 'epoch': 138.57}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-850000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-850000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-850000\pytorch_model.bin


{'loss': 1.9037, 'learning_rate': 1.5e-06, 'epoch': 138.73}
{'loss': 1.9223, 'learning_rate': 1.4900000000000001e-06, 'epoch': 138.89}
{'loss': 1.9047, 'learning_rate': 1.48e-06, 'epoch': 139.06}
{'loss': 1.8995, 'learning_rate': 1.4700000000000001e-06, 'epoch': 139.22}
{'loss': 1.9247, 'learning_rate': 1.46e-06, 'epoch': 139.38}
{'loss': 1.9229, 'learning_rate': 1.45e-06, 'epoch': 139.55}
{'loss': 1.9422, 'learning_rate': 1.44e-06, 'epoch': 139.71}
{'loss': 1.9114, 'learning_rate': 1.43e-06, 'epoch': 139.87}
{'loss': 1.9485, 'learning_rate': 1.42e-06, 'epoch': 140.04}
{'loss': 1.9096, 'learning_rate': 1.41e-06, 'epoch': 140.2}


Saving model checkpoint to ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-860000
Configuration saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-860000\config.json
Model weights saved in ../checkpoints-albert/standard_2022-45-13_hora_22-45\checkpoint-860000\pytorch_model.bin


{'loss': 1.9232, 'learning_rate': 1.4000000000000001e-06, 'epoch': 140.36}
{'loss': 1.908, 'learning_rate': 1.3900000000000002e-06, 'epoch': 140.53}
{'loss': 1.8975, 'learning_rate': 1.3800000000000001e-06, 'epoch': 140.69}
{'loss': 1.9163, 'learning_rate': 1.3700000000000002e-06, 'epoch': 140.85}
{'loss': 1.908, 'learning_rate': 1.3600000000000001e-06, 'epoch': 141.02}


KeyboardInterrupt: 