<a href="https://colab.research.google.com/github/ekaterinatao/NER_biomed_domain/blob/main/active_learning/%D0%92%D0%9A%D0%A0_nerel_bio_RuBioBERT_active_learning_v6_bald_seeds.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Обучение с разными сидами для расчета доверительных интервалов
# Инструменты
Предобработанный дасасет [NEREL-BIO](https://huggingface.co/datasets/ekaterinatao/nerel_bio_ner_unnested)  

Исходная модель [RuBioBERT](https://huggingface.co/alexyalunin/RuBioBERT)

### Установка зависимостей

In [13]:
!pip install datasets accelerate evaluate wandb seqeval -q
!pip install baal -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.4/64.4 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.7/61.7 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/419.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m409.6/419.6 kB[0m [31m14.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m419.6/419.6 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import numpy as np
import pandas as pd
import random
import json
from dataclasses import dataclass
import torch
from torch.nn.functional import softmax
import datasets
from datasets import Dataset, DatasetDict, concatenate_datasets
from transformers import (AutoTokenizer,
                          DataCollatorForTokenClassification,
                          AutoModelForTokenClassification,
                          TrainingArguments, Trainer)
import evaluate
import warnings
warnings.filterwarnings("ignore")

In [3]:
@dataclass
class TrainingConfig:
    dataset = 'ekaterinatao/nerel_bio_ner_unnested'
    checkpoint = 'alexyalunin/RuBioBERT'
    hf_repo_id = "ekaterinatao/nerel-bio-RuBioBERT-al"
    n_labels = 45
    n_epochs = 10
    train_batch_size = 4
    eval_batch_size = 4
    device = "cuda" if torch.cuda.is_available() else "cpu"
    l_rate = 5e-05
    w_decay = 0.1
    warm_up = 0.1
    run_name = "RuBioBERT_AL_v6_3"

config = TrainingConfig()

In [4]:
def fix_seed(seed):
    random.seed(seed)
    np.random.seed(seed)

    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    print("Current seed: ", seed)

In [5]:
# генерация фиксированного списка сидов для воспроизводимости экспериментов
np.random.seed(64)
seed_lst = np.random.randint(65, 2024, size=4)

### Скачивание датасета

In [6]:
dataset = datasets.load_dataset(config.dataset)
dataset

Downloading readme:   0%|          | 0.00/1.56k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/603k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/76.2k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/70.8k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/612 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/77 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/77 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'words', 'ner_tags'],
        num_rows: 612
    })
    valid: Dataset({
        features: ['id', 'words', 'ner_tags'],
        num_rows: 77
    })
    test: Dataset({
        features: ['id', 'words', 'ner_tags'],
        num_rows: 77
    })
})

In [7]:
# Labels
url = 'https://raw.githubusercontent.com/ekaterinatao/NER_biomed_domain/main/labels.txt'
tags = pd.read_csv(url, names=['tag']).values.tolist()
tags = [item for sublist in tags for item in sublist]
tag_to_id = {tag: i for i, tag in enumerate(tags)}
id_to_tag = {i: tag for i, tag in enumerate(tags)}

___
### Токенизация

In [8]:
tokenizer = AutoTokenizer.from_pretrained(config.checkpoint)

tokenizer_config.json:   0%|          | 0.00/413 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/1.78M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.75M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [9]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["words"], truncation=True, max_length=512, is_split_into_words=True)

    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        word_idxs = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_idxs:
            if word_idx is None:
                label_ids.append(-100) # Set the special tokens to -100.
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

In [10]:
tokenized_dataset = dataset.map(
    tokenize_and_align_labels, batched=True
)
tokenized_dataset

Map:   0%|          | 0/612 [00:00<?, ? examples/s]

Map:   0%|          | 0/77 [00:00<?, ? examples/s]

Map:   0%|          | 0/77 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'words', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 612
    })
    valid: Dataset({
        features: ['id', 'words', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 77
    })
    test: Dataset({
        features: ['id', 'words', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 77
    })
})

# Обучение модели

In [11]:
seqeval = evaluate.load("seqeval")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [tags[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [tags[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

In [14]:
def choose_samples_bald(test_predictions, n_tokens) -> np.ndarray:
    """Функция для реализации стратеги отбора примеров для разметки
    на основе BALD
    https://github.com/baal-org/baal/tree/master"""

    from baal.active.heuristics import BALD

    # test_preds_new (DATASET_LEN, NUM_CLASSES, NUM_TOKENS)
    test_preds_new = np.transpose(test_predictions, (0, 2, 1))
    bald = BALD()
    scores = bald.get_uncertainties(test_preds_new)
    args = np.argsort(-scores)

    return args[:n_tokens]

In [15]:
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [16]:
print(f'device is {config.device}')

device is cuda


In [17]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [18]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [19]:
def train_and_predict(
    al_train_dataset, eval_dataset, al_test_dataset, test_dataset,
    model, tokenizer, data_collator, compute_metrics, SEED
):
    "Функция для тренировки модели, сохранения метрик и предсказаний"

    global config
    training_args = TrainingArguments(
        output_dir=config.hf_repo_id,
        num_train_epochs=config.n_epochs,
        learning_rate=config.l_rate,
        weight_decay=config.w_decay,
        warmup_ratio=config.warm_up,
        per_device_train_batch_size=config.train_batch_size,
        per_device_eval_batch_size=config.eval_batch_size,
        group_by_length=True,
        optim="adamw_torch",
        lr_scheduler_type="cosine",
        evaluation_strategy="epoch",
        seed = SEED,
        data_seed = SEED,
        push_to_hub=True,
        save_strategy="no",
        report_to="wandb",
        logging_steps=16,
        run_name=config.run_name + f'{SEED}',
    )
    metrics = {}
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=al_train_dataset,
        eval_dataset=eval_dataset,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    train_result = trainer.train()
    metrics['train_loss'] = train_result.metrics['train_loss']
    metrics.update(trainer.evaluate(eval_dataset))
    if al_test_dataset.num_rows > 0:
        al_test_predictions = trainer.predict(
            al_test_dataset, metric_key_prefix="pred"
        )
        probas = al_test_predictions.predictions
        metrics.update(al_test_predictions.metrics)
    else:
        probas = None
    test_predictions = trainer.predict(test_dataset, metric_key_prefix="test")
    metrics.update(test_predictions.metrics)

    return metrics, probas, trainer

In [20]:
def run_active_learning(
    dataset, al_iters, init_train_size, choose_tokens,
    tokenizer, data_collator, compute_metrics, SEED
):
    "Функция для реализации цикла активного обучения"

    train_dataset = dataset['train'].select(
        random.sample(
            range(dataset['train'].num_rows),
            int(dataset['train'].num_rows * init_train_size)
        )
    )
    test_dataset = dataset['train'].filter(
        lambda s: s['id'] not in train_dataset['id']
    )
    all_metrics = {}

    for iter in range(al_iters):
        train_size = train_dataset.num_rows
        print(f'\nIteration: {iter}. Training using {train_size} samples')
        model = AutoModelForTokenClassification.from_pretrained(
            config.checkpoint, num_labels=config.n_labels,
            id2label=id_to_tag, label2id=tag_to_id
        )

        metrics, test_predictions, trainer = train_and_predict(
            train_dataset, dataset['valid'], test_dataset, dataset['test'],
            model, tokenizer, data_collator, compute_metrics, SEED
        )

        all_metrics[f'{iter}'] = {'train_size': train_size, 'metrics': metrics}

        if test_dataset.num_rows >= choose_tokens:
            assert test_predictions is not None, "test_predictions is None, must be array"
            new_args = choose_samples_bald(test_predictions, choose_tokens)
            new_train_samples = test_dataset.select(new_args)
            extended_train_dataset = concatenate_datasets(
                [train_dataset, new_train_samples]
            )

            train_dataset = extended_train_dataset
            test_dataset = dataset['train'].filter(
                lambda s: s['id'] not in train_dataset['id']
            )

    return all_metrics, trainer

# Сэмплирование по 4 примера (по ~0.7% данных, 30 итераций активного обучения) для каждого сида (4 цикла).

In [21]:
for SEED in seed_lst:
    fix_seed(SEED)
    run = wandb.init(
        project="ner_bert_nerel_bio",
        name=config.run_name + f'{SEED}',
        reinit=True)
    metrics, trainer = run_active_learning(
        dataset=tokenized_dataset,
        al_iters=30,
        init_train_size=0.007,
        choose_tokens=4,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
        SEED=SEED
    )
    run.join()
    with open(f'/content/drive/MyDrive/data/VKR/metrics_30_bald_seed{SEED}.json', 'w') as file:
        json.dump(metrics, file)

[34m[1mwandb[0m: Currently logged in as: [33mtaoea[0m. Use [1m`wandb login --relogin`[0m to force relogin


Current seed:  517


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 0. Training using 4 samples


config.json:   0%|          | 0.00/889 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.847057,0.01114,0.013837,0.012343,0.02735
2,No log,3.642503,0.035247,0.008208,0.013316,0.158974
3,No log,3.487499,0.014493,0.000469,0.000909,0.156177
4,No log,3.388926,0.022472,0.000469,0.000919,0.1554
5,No log,3.318139,0.023529,0.000469,0.00092,0.1554
6,No log,3.258257,0.022989,0.000469,0.000919,0.155556
7,No log,3.207805,0.021053,0.000469,0.000918,0.156333
8,No log,3.175283,0.044643,0.001173,0.002285,0.158042
9,No log,3.159549,0.047619,0.001407,0.002733,0.159596
10,No log,3.155416,0.053846,0.001642,0.003186,0.160218


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 1. Training using 8 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.789123,0.018471,0.023452,0.020665,0.043823
2,No log,3.368628,0.02621,0.003049,0.005462,0.158974
3,No log,3.17942,0.0,0.0,0.0,0.155089
4,No log,3.076553,0.11985,0.007505,0.014125,0.169231
5,No log,2.990402,0.209945,0.026735,0.047431,0.19425
6,No log,2.917831,0.196429,0.030957,0.053485,0.209324
7,No log,2.858582,0.240642,0.052767,0.086555,0.240249
8,2.553400,2.820873,0.242734,0.072467,0.111613,0.267133
9,2.553400,2.804401,0.250543,0.081144,0.122586,0.27568
10,2.553400,2.800904,0.249115,0.082552,0.124009,0.277078


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 2. Training using 12 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.587806,0.044661,0.025797,0.032704,0.155711
2,No log,3.243721,0.068966,0.011257,0.019355,0.171562
3,No log,3.140085,0.144311,0.036585,0.058372,0.20404
4,No log,2.920457,0.159574,0.063321,0.090665,0.234654
5,No log,2.769568,0.181818,0.081144,0.11221,0.259052
6,2.479200,2.640341,0.231994,0.150328,0.182439,0.317949
7,2.479200,2.558438,0.251001,0.17636,0.207163,0.341259
8,2.479200,2.512323,0.270751,0.192777,0.225205,0.361461
9,2.479200,2.492836,0.278502,0.200516,0.233161,0.367521
10,2.479200,2.488683,0.279455,0.201923,0.234445,0.368609


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 3. Training using 16 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.476379,0.03848,0.018996,0.025436,0.159907
2,No log,3.245438,0.138462,0.023218,0.039767,0.189588
3,No log,3.020683,0.172529,0.048311,0.075486,0.224087
4,2.623200,2.782965,0.221063,0.109287,0.146265,0.289666
5,2.623200,2.548073,0.284586,0.194418,0.231016,0.371873
6,2.623200,2.411324,0.316184,0.217636,0.257814,0.403263
7,2.623200,2.305544,0.317436,0.245075,0.276601,0.428438
8,1.370500,2.253013,0.321713,0.255394,0.284743,0.442113
9,1.370500,2.231045,0.323898,0.260319,0.288649,0.44864
10,1.370500,2.227509,0.324001,0.260553,0.288834,0.449262


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 4. Training using 20 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.359592,0.06989,0.037289,0.048631,0.193162
2,No log,3.181652,0.152616,0.073874,0.099558,0.24258
3,No log,2.797266,0.281321,0.145872,0.192124,0.306915
4,2.628900,2.469681,0.340625,0.230066,0.274636,0.40202
5,2.628900,2.241732,0.343511,0.27439,0.305085,0.452525
6,2.628900,2.080451,0.366266,0.314728,0.338547,0.493706
7,1.297600,1.993412,0.400999,0.357645,0.378084,0.533644
8,1.297600,1.947694,0.422427,0.375469,0.397566,0.546853
9,1.297600,1.928213,0.429172,0.382974,0.404759,0.554157
10,0.900400,1.924026,0.430697,0.386961,0.407659,0.556333


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 5. Training using 24 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.307522,0.041139,0.003049,0.005677,0.158508
2,No log,3.054591,0.118312,0.06379,0.082889,0.237451
3,2.768700,2.714026,0.276199,0.152674,0.196647,0.327894
4,2.768700,2.377177,0.356473,0.222795,0.27421,0.409635
5,2.768700,2.139949,0.338968,0.263368,0.296423,0.450039
6,1.467500,1.982189,0.379915,0.335366,0.356253,0.50272
7,1.467500,1.882889,0.418864,0.38743,0.402534,0.549495
8,0.909700,1.833856,0.436226,0.40666,0.420925,0.563947
9,0.909700,1.810999,0.439356,0.409475,0.423889,0.569852
10,0.909700,1.807553,0.4399,0.411116,0.425021,0.570785


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 6. Training using 28 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.176668,0.058056,0.030253,0.039778,0.186169
2,No log,2.840337,0.161109,0.094043,0.118762,0.279876
3,2.832600,2.39422,0.320926,0.240619,0.27503,0.420979
4,2.832600,2.04153,0.400596,0.346857,0.371795,0.518104
5,1.439800,1.813824,0.466753,0.424719,0.444745,0.584615
6,1.439800,1.676438,0.516213,0.466698,0.490208,0.622999
7,0.802700,1.613358,0.52926,0.481473,0.504237,0.63683
8,0.802700,1.582423,0.537906,0.489212,0.512405,0.642269
9,0.802700,1.562863,0.545945,0.498827,0.521324,0.648485
10,0.596200,1.560484,0.54655,0.499765,0.522112,0.648951


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 7. Training using 32 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.172459,0.076142,0.003518,0.006725,0.159751
2,2.911700,2.767629,0.339936,0.124765,0.182536,0.298679
3,2.911700,2.267021,0.377443,0.258208,0.306643,0.437141
4,1.508300,1.876747,0.444921,0.389306,0.41526,0.551671
5,1.508300,1.688674,0.522562,0.472561,0.496305,0.627661
6,0.815700,1.542494,0.567023,0.521811,0.543478,0.664802
7,0.815700,1.494256,0.581307,0.538227,0.558938,0.677389
8,0.555600,1.452122,0.591103,0.551595,0.570666,0.684848
9,0.555600,1.445478,0.592358,0.549015,0.569864,0.685004
10,0.480700,1.443106,0.59277,0.549953,0.57056,0.685936


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 8. Training using 36 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.11995,0.104863,0.032364,0.049462,0.20373
2,3.008900,2.655017,0.306718,0.204503,0.245392,0.393939
3,3.008900,2.126846,0.347128,0.263602,0.299653,0.445688
4,1.638100,1.800061,0.481558,0.434803,0.456988,0.595027
5,1.638100,1.606968,0.544854,0.502814,0.522991,0.650194
6,0.942100,1.481544,0.582269,0.545263,0.563159,0.678632
7,0.942100,1.4381,0.580733,0.549953,0.564924,0.682517
8,0.580800,1.411844,0.591133,0.562852,0.576646,0.689977
9,0.494200,1.399602,0.595045,0.568949,0.581705,0.693551
10,0.494200,1.397121,0.595379,0.568011,0.581373,0.693395


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 9. Training using 40 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.087921,0.090947,0.050891,0.065263,0.209635
2,3.009900,2.503031,0.286584,0.220919,0.249503,0.40979
3,3.009900,2.010952,0.398879,0.35061,0.37319,0.508314
4,1.666900,1.704526,0.500641,0.458255,0.478511,0.60373
5,0.875700,1.504958,0.574526,0.540572,0.557032,0.67568
6,0.875700,1.432314,0.602901,0.565432,0.583565,0.698057
7,0.583800,1.392991,0.602828,0.569887,0.585895,0.695726
8,0.423700,1.355173,0.607677,0.571764,0.589174,0.703497
9,0.423700,1.343412,0.611568,0.575281,0.59287,0.706138
10,0.388700,1.342338,0.610199,0.575281,0.592226,0.705517


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 10. Training using 44 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.01748,0.127506,0.037289,0.057703,0.200155
2,3.135000,2.387503,0.332605,0.249765,0.285293,0.43481
3,1.684500,1.858739,0.449012,0.399625,0.422881,0.561305
4,1.684500,1.558378,0.571025,0.521341,0.545053,0.671329
5,0.889500,1.396228,0.607152,0.565432,0.585549,0.699145
6,0.597400,1.386569,0.601898,0.565197,0.58297,0.697125
7,0.597400,1.320364,0.612687,0.577627,0.59464,0.70303
8,0.431600,1.315306,0.61594,0.579972,0.597415,0.70474
9,0.351400,1.297845,0.619214,0.58349,0.600821,0.707537
10,0.351400,1.297753,0.619498,0.584193,0.601328,0.708159


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 11. Training using 48 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.987976,0.15602,0.064728,0.091497,0.237451
2,3.166800,2.291578,0.390354,0.263837,0.314861,0.442269
3,1.754400,1.771591,0.485161,0.440901,0.461973,0.596426
4,1.030600,1.531487,0.56997,0.53682,0.552899,0.666667
5,1.030600,1.368692,0.608364,0.579972,0.593829,0.700699
6,0.642100,1.292257,0.616392,0.596154,0.606104,0.707692
7,0.472400,1.274854,0.624181,0.602955,0.613384,0.71453
8,0.378100,1.255188,0.634531,0.612805,0.623479,0.723699
9,0.378100,1.248372,0.639368,0.617026,0.627999,0.72634
10,0.339200,1.247224,0.641312,0.618902,0.629908,0.727273


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 12. Training using 52 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.012698,0.122884,0.039165,0.059399,0.206371
2,3.147500,2.256155,0.321941,0.262899,0.28944,0.450816
3,1.903300,1.725279,0.521951,0.476782,0.498345,0.629526
4,1.020800,1.427104,0.599092,0.557223,0.5774,0.691686
5,0.632100,1.339022,0.616541,0.576923,0.596075,0.702409
6,0.632100,1.294938,0.624625,0.586538,0.604983,0.71049
7,0.453300,1.271958,0.637811,0.601313,0.619025,0.719503
8,0.377400,1.256621,0.64093,0.607411,0.623721,0.722922
9,0.307800,1.250237,0.642698,0.609991,0.625917,0.725408
10,0.305700,1.250798,0.644005,0.610929,0.627031,0.72603


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 13. Training using 56 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.968348,0.101493,0.007974,0.014786,0.174514
2,3.297000,2.083232,0.381693,0.310976,0.342724,0.483605
3,1.832200,1.56681,0.567615,0.528612,0.54742,0.666822
4,1.051700,1.377342,0.604279,0.57622,0.589916,0.695571
5,0.657300,1.327653,0.608941,0.581379,0.594841,0.698213
6,0.486600,1.237351,0.625788,0.605535,0.615495,0.716861
7,0.362500,1.243104,0.632505,0.61515,0.623707,0.719037
8,0.314100,1.196935,0.640779,0.625,0.632791,0.729293
9,0.314100,1.205043,0.644353,0.62758,0.635856,0.73007
10,0.283700,1.205087,0.644525,0.628049,0.63618,0.730381


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 14. Training using 60 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.954767,0.091176,0.036351,0.051979,0.20979
2,3.342400,2.082607,0.353541,0.292683,0.320246,0.465734
3,1.989700,1.574737,0.552073,0.512195,0.531387,0.652681
4,1.040400,1.371847,0.602733,0.568949,0.585354,0.697747
5,0.652200,1.285119,0.62962,0.591229,0.609821,0.714375
6,0.521200,1.22076,0.644614,0.614681,0.629292,0.726651
7,0.383300,1.216266,0.647994,0.621248,0.634339,0.732246
8,0.304700,1.182944,0.666339,0.635084,0.650336,0.740948
9,0.276700,1.177954,0.660706,0.632505,0.646298,0.739083
10,0.284900,1.178697,0.660303,0.632739,0.646228,0.738462


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 15. Training using 64 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.2248,2.826479,0.264877,0.171201,0.207977,0.357265
2,1.8543,1.831814,0.489926,0.433396,0.45993,0.591142
3,1.0054,1.459689,0.599851,0.567073,0.583002,0.690754
4,0.6339,1.299823,0.625712,0.592402,0.608601,0.710645
5,0.4697,1.195279,0.658638,0.628518,0.643226,0.733489
6,0.358,1.153823,0.662578,0.646107,0.654239,0.74157
7,0.2927,1.138869,0.678952,0.65666,0.66762,0.74965
8,0.2521,1.121179,0.687122,0.666979,0.676901,0.7554
9,0.2308,1.113508,0.688964,0.66909,0.678882,0.757576
10,0.2211,1.111345,0.688798,0.66909,0.678801,0.75742


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 16. Training using 68 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3139,2.834247,0.206092,0.149156,0.173061,0.321678
2,2.0489,1.880152,0.470465,0.422139,0.444994,0.575758
3,1.1087,1.455819,0.593161,0.553236,0.572503,0.687646
4,0.7409,1.294028,0.620465,0.581614,0.600412,0.704429
5,0.4625,1.22152,0.64571,0.614212,0.629567,0.728205
6,0.3974,1.151646,0.668412,0.628752,0.647976,0.743279
7,0.334,1.152725,0.668384,0.639071,0.653399,0.744056
8,0.2554,1.132454,0.673439,0.645169,0.659001,0.747009
9,0.253,1.131551,0.674829,0.646341,0.660278,0.747319
10,0.222,1.131073,0.673934,0.645169,0.659238,0.747164


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 17. Training using 72 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.29,2.754973,0.341696,0.159709,0.217676,0.344367
2,1.9391,1.717212,0.545924,0.496248,0.519902,0.646154
3,1.0462,1.391235,0.618474,0.577861,0.597478,0.70101
4,0.7413,1.253728,0.634975,0.604597,0.619414,0.719037
5,0.4858,1.188467,0.651493,0.624296,0.637605,0.732556
6,0.4105,1.118354,0.68584,0.659944,0.672643,0.757265
7,0.3204,1.113632,0.683991,0.654315,0.668824,0.754623
8,0.2229,1.074491,0.69335,0.660178,0.676358,0.760218
9,0.2093,1.07782,0.687945,0.657129,0.672184,0.758042
10,0.2101,1.074903,0.688513,0.657833,0.672823,0.758042


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 18. Training using 76 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3062,2.685866,0.298687,0.128049,0.179251,0.319503
2,2.0753,1.694705,0.550671,0.519934,0.534861,0.648329
3,1.0896,1.361425,0.613715,0.58349,0.598221,0.700078
4,0.7311,1.245251,0.641299,0.620544,0.630751,0.724942
5,0.5508,1.166549,0.662376,0.63954,0.650758,0.740016
6,0.3299,1.123062,0.673128,0.651501,0.662138,0.747786
7,0.3318,1.082754,0.681268,0.660178,0.670557,0.754934
8,0.229,1.07904,0.683802,0.661351,0.672389,0.756954
9,0.263,1.066511,0.68788,0.664165,0.675814,0.758508
10,0.2184,1.065985,0.687864,0.664634,0.67605,0.758664


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 19. Training using 80 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3603,2.699093,0.278122,0.07364,0.116447,0.271018
2,2.0867,1.727898,0.518336,0.467402,0.491553,0.62704
3,1.1924,1.363368,0.607286,0.566839,0.586366,0.696503
4,0.548,1.219623,0.65899,0.624062,0.64105,0.730536
5,0.4082,1.138313,0.671203,0.647749,0.659267,0.745921
6,0.3553,1.127323,0.673824,0.655019,0.664288,0.746542
7,0.2924,1.093029,0.689756,0.663227,0.676231,0.759441
8,0.2025,1.094357,0.684544,0.658537,0.671289,0.75742
9,0.2064,1.078669,0.686724,0.661116,0.673677,0.758353
10,0.1809,1.077039,0.687135,0.661351,0.673996,0.758508


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 20. Training using 84 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3575,2.688937,0.324182,0.130159,0.185743,0.308314
2,2.1266,1.636136,0.554924,0.503518,0.527972,0.643667
3,1.1838,1.312165,0.636822,0.592167,0.613683,0.711733
4,0.5346,1.210148,0.661797,0.63743,0.649385,0.736441
5,0.4592,1.136487,0.682019,0.652908,0.667146,0.756954
6,0.3282,1.084756,0.687333,0.665572,0.676278,0.762859
7,0.2475,1.063341,0.695919,0.663931,0.679549,0.765346
8,0.2103,1.052631,0.693674,0.668621,0.680917,0.766123
9,0.198,1.044942,0.69332,0.666979,0.679895,0.767366
10,0.1856,1.044137,0.695408,0.667683,0.681263,0.768454


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 21. Training using 88 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4023,2.591613,0.263864,0.207552,0.232344,0.386014
2,2.2035,1.593411,0.560429,0.527439,0.543434,0.659052
3,0.8115,1.326475,0.618989,0.5856,0.601832,0.705206
4,0.5272,1.205673,0.647738,0.631332,0.63943,0.731158
5,0.4536,1.106801,0.675122,0.649156,0.661884,0.750583
6,0.2971,1.097235,0.678908,0.659006,0.668809,0.753846
7,0.2723,1.080782,0.677365,0.658302,0.667697,0.755089
8,0.2059,1.058916,0.680719,0.66651,0.67354,0.757576
9,0.1871,1.062125,0.679723,0.667448,0.67353,0.75742
10,0.1816,1.05813,0.680708,0.666979,0.673774,0.757887


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 22. Training using 92 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3324,2.602158,0.324723,0.206379,0.252366,0.40575
2,2.1953,1.555535,0.569378,0.530253,0.54912,0.66589
3,0.8029,1.270256,0.629703,0.596623,0.612717,0.716706
4,0.5343,1.19568,0.64923,0.622889,0.635787,0.73209
5,0.3785,1.077473,0.678726,0.659944,0.669203,0.751981
6,0.3268,1.042439,0.689621,0.662289,0.675679,0.757887
7,0.2303,1.030876,0.693201,0.671904,0.682387,0.765035
8,0.1991,1.018183,0.691824,0.670732,0.681115,0.763636
9,0.1908,1.0123,0.694915,0.673077,0.683822,0.765657
10,0.1757,1.009703,0.695663,0.673311,0.684305,0.766434


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 23. Training using 96 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3817,2.58675,0.317779,0.252345,0.281307,0.436053
2,1.3245,1.538578,0.570561,0.529081,0.549039,0.666045
3,0.8082,1.271865,0.632504,0.60788,0.619947,0.716861
4,0.5087,1.164883,0.655452,0.644231,0.649793,0.737529
5,0.3868,1.067123,0.68706,0.663696,0.675176,0.759751
6,0.2889,1.074404,0.680989,0.665338,0.673072,0.756333
7,0.2352,1.035611,0.68963,0.67378,0.681613,0.763636
8,0.1795,1.02675,0.693726,0.679409,0.686493,0.765812
9,0.1773,1.022711,0.694883,0.68152,0.688136,0.767211
10,0.1688,1.02093,0.695767,0.682223,0.688928,0.767988


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 24. Training using 100 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3788,2.541668,0.300926,0.213415,0.249726,0.404662
2,1.3567,1.543968,0.575742,0.532129,0.553077,0.6662
3,0.8356,1.27838,0.635872,0.606942,0.62107,0.715307
4,0.4896,1.1647,0.670646,0.645638,0.657904,0.741725
5,0.4073,1.099839,0.689003,0.658302,0.673303,0.756954
6,0.2758,1.015761,0.69925,0.677767,0.688341,0.766744
7,0.2537,1.015395,0.69356,0.669325,0.681227,0.767211
8,0.1917,0.9795,0.71073,0.689728,0.700071,0.780264
9,0.1518,0.983902,0.71059,0.689259,0.699762,0.779176
10,0.1526,0.984695,0.709693,0.688555,0.698964,0.778555


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 25. Training using 104 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4522,2.438355,0.313809,0.242495,0.273581,0.429371
2,1.3054,1.408232,0.602651,0.565197,0.583323,0.690132
3,0.736,1.179763,0.650194,0.629456,0.639657,0.733333
4,0.4647,1.067172,0.68762,0.669559,0.67847,0.758664
5,0.3342,1.036979,0.697362,0.669559,0.683178,0.766278
6,0.2762,0.954025,0.71746,0.689024,0.702955,0.785703
7,0.2092,0.95516,0.716265,0.70333,0.709738,0.784771
8,0.1593,0.943076,0.724063,0.702158,0.712942,0.7885
9,0.1405,0.938096,0.722932,0.703096,0.712876,0.78819
10,0.1429,0.937072,0.723963,0.704268,0.71398,0.789744


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 26. Training using 108 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4761,2.481611,0.335161,0.229362,0.272348,0.4115
2,1.4135,1.435027,0.593006,0.560741,0.576422,0.686713
3,0.6757,1.250885,0.638458,0.617495,0.627802,0.723543
4,0.4779,1.126488,0.684185,0.662523,0.67318,0.75711
5,0.3449,1.002229,0.695789,0.67425,0.68485,0.769386
6,0.2601,1.007506,0.705312,0.681989,0.693454,0.774981
7,0.2092,0.968999,0.713324,0.695591,0.704346,0.783683
8,0.1713,0.977283,0.704801,0.692073,0.698379,0.780575
9,0.1474,0.965964,0.714763,0.701689,0.708166,0.785082
10,0.1402,0.965669,0.71316,0.700281,0.706662,0.78446


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 27. Training using 112 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4835,2.38437,0.361785,0.26243,0.3042,0.446309
2,1.3175,1.395252,0.59821,0.579972,0.58895,0.697125
3,0.6419,1.151085,0.674836,0.652205,0.663327,0.749806
4,0.4074,1.03397,0.699951,0.675657,0.687589,0.7669
5,0.3172,0.968768,0.707382,0.683161,0.695061,0.773893
6,0.2386,0.945136,0.728665,0.702861,0.715531,0.786325
7,0.1959,0.930359,0.730909,0.707083,0.718798,0.793939
8,0.1716,0.925776,0.728345,0.709897,0.719002,0.791453
9,0.1366,0.928671,0.730464,0.712477,0.721358,0.79425
10,0.1305,0.927904,0.729107,0.712008,0.720456,0.793784


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 28. Training using 116 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5261,2.390074,0.335972,0.248827,0.285907,0.431235
2,1.3532,1.403104,0.615289,0.577627,0.595863,0.699767
3,0.6918,1.194406,0.651392,0.636726,0.643975,0.733023
4,0.4421,1.066859,0.693517,0.672373,0.682782,0.760995
5,0.309,0.965151,0.703008,0.685038,0.693907,0.773737
6,0.249,0.908308,0.732247,0.706144,0.718959,0.794095
7,0.1879,0.910533,0.731011,0.708724,0.719695,0.793318
8,0.1593,0.900471,0.737386,0.719747,0.72846,0.799689
9,0.1352,0.895228,0.740785,0.725844,0.733239,0.802176
10,0.1218,0.89618,0.740546,0.72561,0.733002,0.802331


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 29. Training using 120 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4413,2.286133,0.371125,0.294794,0.328584,0.474437
2,1.3883,1.336217,0.617263,0.587008,0.601755,0.700544
3,0.6675,1.132516,0.662651,0.644934,0.653672,0.740326
4,0.4416,1.013066,0.686312,0.66909,0.677592,0.759285
5,0.3194,0.943479,0.717438,0.701454,0.709356,0.782751
6,0.2209,0.894394,0.721839,0.710835,0.716294,0.791919
7,0.1702,0.889227,0.728838,0.714822,0.721762,0.795338
8,0.1529,0.892551,0.725202,0.716698,0.720925,0.795804
9,0.1387,0.886435,0.730641,0.721388,0.725985,0.799223
10,0.1309,0.886574,0.73327,0.724672,0.728946,0.800311


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▂▁▃▄▃▅▆▅▃▇▇▆▃▅▇▇▇▂▇▇▇██▇▇████▇██▇▅██████
eval/f1,▁▁▂▃▂▄▅▄▂▆▆▅▂▄▇▇▇▁▇▇▇█▇▇▆▇▇█▇▆██▇▄██▇███
eval/loss,▇█▆▅▆▄▄▄▆▃▂▃▆▅▂▂▂▆▂▂▂▂▂▂▃▁▁▁▂▃▁▁▂▅▁▁▂▁▁▁
eval/precision,▁▁▃▃▃▄▅▄▂▆▆▆▂▄▇▇▇▂▇▇▇█▇▇▆███▇▆██▇▄██▇███
eval/recall,▁▁▂▂▁▄▅▄▂▆▆▅▁▃▇▇▆▁▇▇▇▇▇▇▆▇▇▇▇▆██▇▃██▇███
eval/runtime,▁▂▁▆▁▂▂▆▂▆▁▂▂▂▇▂▂▂▂▂▂▂▂▇▂▂▃▃▇▂▇▇▇█▂▂▂▂▂▂
eval/samples_per_second,█▇█▂█▇▇▂▇▃▇▇▇▆▂▇▆▇▇▇▇▆▇▂▆▆▅▅▂▇▂▁▂▁▇▆▆▆▇▇
eval/steps_per_second,█▇█▂█▇▇▂▇▃▇▇▇▆▂▇▆▇▇▇▇▆▇▂▆▆▅▅▂▇▂▁▂▁▇▆▆▆▇▇
train/epoch,▆█▂▃▃▄▃▃▃▁█▇▆▄▃█▆▄▂█▅▃█▅▂▇▄▁▆▃▇▄█▅▂▆▂▇▃▇
train/global_step,▁▁▁▁▁▂▂▂▂▁▄▄▃▃▂▅▄▃▂▅▄▂▆▄▂▆▄▁▅▃▇▄█▅▂▆▃▇▃█

0,1
eval/accuracy,0.80031
eval/f1,0.72895
eval/loss,0.88657
eval/precision,0.73327
eval/recall,0.72467
eval/runtime,0.9414
eval/samples_per_second,81.789
eval/steps_per_second,21.244
train/epoch,10.0
train/global_step,300.0


Current seed:  1063


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 0. Training using 4 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.816309,0.013294,0.016886,0.014876,0.02331
2,No log,3.56267,0.052768,0.047608,0.050055,0.153846
3,No log,3.350509,0.068852,0.02955,0.041352,0.192696
4,No log,3.205517,0.094703,0.027674,0.042831,0.193473
5,No log,3.121142,0.115806,0.034709,0.05341,0.200622
6,No log,3.070256,0.12987,0.044559,0.066352,0.214608
7,No log,3.039236,0.132959,0.049953,0.072622,0.222844
8,No log,3.022436,0.142183,0.05652,0.080886,0.230769
9,No log,3.014938,0.148469,0.060272,0.085738,0.233722
10,No log,3.012997,0.151376,0.061914,0.087883,0.235431


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 1. Training using 8 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.755104,0.019094,0.023218,0.020955,0.043512
2,No log,3.360649,0.038125,0.017355,0.023852,0.168609
3,No log,3.259086,0.067888,0.038227,0.048912,0.204662
4,No log,3.190733,0.094149,0.058865,0.072439,0.22129
5,No log,3.08628,0.094075,0.04878,0.064247,0.22129
6,No log,3.007193,0.105882,0.056989,0.074097,0.231235
7,No log,2.959579,0.122704,0.07364,0.092042,0.248019
8,2.691300,2.93162,0.135752,0.085131,0.104641,0.256099
9,2.691300,2.918194,0.140527,0.088884,0.108892,0.260451
10,2.691300,2.91554,0.142328,0.090056,0.110313,0.260917


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 2. Training using 12 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.580426,0.056949,0.045263,0.050438,0.174204
2,No log,3.166033,0.148148,0.028143,0.0473,0.195804
3,No log,3.064323,0.146701,0.079268,0.102923,0.249573
4,No log,2.910117,0.16516,0.094278,0.120036,0.261072
5,No log,2.746047,0.257531,0.142355,0.183356,0.298057
6,2.578000,2.616241,0.276453,0.174015,0.213587,0.323388
7,2.578000,2.531629,0.279086,0.189024,0.225391,0.341103
8,2.578000,2.487271,0.285619,0.201689,0.236426,0.353225
9,2.578000,2.468643,0.285295,0.205206,0.238712,0.358664
10,2.578000,2.465078,0.284695,0.20591,0.238977,0.360062


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 3. Training using 16 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.54383,0.068657,0.043152,0.052995,0.189588
2,No log,3.158704,0.157582,0.074578,0.101242,0.248019
3,No log,2.887411,0.208217,0.104597,0.139244,0.271173
4,2.638600,2.611889,0.303784,0.197702,0.239523,0.362393
5,2.638600,2.421229,0.32465,0.233818,0.271847,0.414297
6,2.638600,2.290671,0.337423,0.257974,0.292398,0.440249
7,2.638600,2.207434,0.353541,0.284475,0.31527,0.463714
8,1.268500,2.161084,0.366495,0.300657,0.330327,0.47972
9,1.268500,2.140799,0.376238,0.311914,0.341069,0.487179
10,1.268500,2.137136,0.377321,0.314493,0.343054,0.488423


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 4. Training using 20 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.377072,0.085962,0.051126,0.064118,0.200622
2,No log,3.175526,0.144279,0.074812,0.098533,0.236208
3,No log,2.852712,0.139656,0.081848,0.103209,0.257187
4,2.785500,2.577533,0.296577,0.193011,0.23384,0.364258
5,2.785500,2.363716,0.330376,0.253283,0.286738,0.431857
6,2.785500,2.213311,0.363461,0.309334,0.33422,0.481896
7,1.416100,2.106803,0.407943,0.363743,0.384577,0.525253
8,1.416100,2.068782,0.414899,0.367026,0.389497,0.532246
9,1.416100,2.036647,0.429401,0.381567,0.404073,0.541103
10,0.949900,2.030932,0.432154,0.383912,0.406607,0.542191


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 5. Training using 24 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.33381,0.076107,0.028612,0.041589,0.167677
2,No log,3.119035,0.13554,0.040338,0.062172,0.20979
3,2.785800,2.748423,0.213877,0.148921,0.175584,0.325097
4,2.785800,2.454937,0.321072,0.241557,0.275696,0.412121
5,2.785800,2.26304,0.352684,0.283537,0.314353,0.450816
6,1.517100,2.107753,0.3928,0.34803,0.369062,0.498524
7,1.517100,2.06951,0.401276,0.354128,0.37623,0.505828
8,0.959400,2.000135,0.417036,0.375469,0.395162,0.523854
9,0.959400,1.98256,0.420508,0.380863,0.399705,0.528982
10,0.959400,1.983643,0.420834,0.380863,0.399852,0.528671


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 6. Training using 28 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.178766,0.078593,0.042448,0.055124,0.2
2,No log,2.929467,0.103338,0.068246,0.082203,0.225175
3,2.831600,2.437527,0.352923,0.268996,0.305297,0.437607
4,2.831600,2.156038,0.394956,0.323171,0.355475,0.48951
5,1.471000,1.965089,0.422583,0.378283,0.399208,0.533023
6,1.471000,1.83748,0.470799,0.431051,0.450049,0.57467
7,0.904400,1.762492,0.491175,0.456848,0.47339,0.596115
8,0.904400,1.714839,0.504874,0.473734,0.488808,0.609324
9,0.904400,1.699306,0.510516,0.478189,0.493824,0.612432
10,0.651700,1.697505,0.511261,0.479128,0.494673,0.612898


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 7. Training using 32 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.288913,0.095914,0.052298,0.067689,0.198291
2,2.937100,2.790939,0.205548,0.144231,0.169515,0.325874
3,2.937100,2.307895,0.344599,0.267824,0.301399,0.446775
4,1.505100,1.982088,0.43352,0.381567,0.405887,0.542502
5,1.505100,1.794929,0.495133,0.44137,0.466708,0.593162
6,0.855700,1.667274,0.527842,0.482411,0.504105,0.61756
7,0.855700,1.600922,0.548715,0.505863,0.526419,0.637607
8,0.589700,1.5503,0.559142,0.519934,0.538826,0.648329
9,0.589700,1.526659,0.568405,0.529081,0.548038,0.654856
10,0.515700,1.523763,0.56884,0.530019,0.548743,0.656099


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 8. Training using 36 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.094755,0.120496,0.072936,0.090869,0.229526
2,2.985000,2.63111,0.311267,0.21834,0.256651,0.386169
3,2.985000,2.153486,0.382254,0.321295,0.349134,0.480031
4,1.540100,1.901663,0.459418,0.422139,0.43999,0.555556
5,1.540100,1.752885,0.521728,0.489916,0.505322,0.615074
6,0.922000,1.625602,0.539516,0.507505,0.523021,0.629837
7,0.922000,1.550448,0.54342,0.523921,0.533493,0.643512
8,0.626900,1.515378,0.553771,0.533771,0.543587,0.653458
9,0.483000,1.504197,0.559425,0.538696,0.548865,0.65843
10,0.483000,1.499876,0.561967,0.541276,0.551428,0.660451


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 9. Training using 40 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.118084,0.068476,0.026032,0.037723,0.167988
2,3.056700,2.608189,0.264031,0.205206,0.230932,0.379953
3,3.056700,2.09746,0.387025,0.327392,0.35472,0.510023
4,1.701200,1.793851,0.495261,0.441135,0.466634,0.600311
5,0.914200,1.637789,0.523306,0.487101,0.504555,0.62129
6,0.914200,1.526795,0.546723,0.51454,0.530144,0.643512
7,0.613500,1.483018,0.566235,0.533302,0.549275,0.662937
8,0.447700,1.431735,0.580238,0.549484,0.564442,0.678166
9,0.447700,1.426578,0.583869,0.553471,0.568264,0.68174
10,0.400700,1.425066,0.584364,0.55394,0.568745,0.682207


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 10. Training using 44 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.069347,0.06044,0.018058,0.027808,0.148096
2,3.110300,2.481267,0.314252,0.252345,0.279917,0.421445
3,1.779900,1.997308,0.431913,0.389775,0.409763,0.536131
4,1.779900,1.739589,0.521084,0.478189,0.498716,0.614608
5,0.973400,1.544136,0.558838,0.532364,0.54528,0.664336
6,0.657500,1.457237,0.586036,0.555113,0.570155,0.684071
7,0.657500,1.406315,0.597246,0.569653,0.583123,0.691375
8,0.464100,1.391734,0.600049,0.573874,0.58667,0.694639
9,0.390800,1.375417,0.601078,0.575281,0.587897,0.696348
10,0.390800,1.373194,0.601963,0.575281,0.58832,0.696659


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 11. Training using 48 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.032644,0.102241,0.060976,0.076392,0.224398
2,3.117300,2.250768,0.370506,0.285178,0.32229,0.463559
3,1.696300,1.792666,0.491308,0.430816,0.459078,0.584615
4,0.960700,1.555925,0.54789,0.508443,0.52743,0.646465
5,0.960700,1.433765,0.594407,0.563321,0.578447,0.68174
6,0.598600,1.315147,0.625554,0.595919,0.610377,0.711422
7,0.451000,1.302888,0.628177,0.597092,0.61224,0.716861
8,0.383300,1.259745,0.637674,0.612101,0.624626,0.719969
9,0.383300,1.25557,0.643836,0.617261,0.630268,0.724009
10,0.338700,1.254039,0.645374,0.618433,0.631617,0.724631


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 12. Training using 52 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.970105,0.096039,0.04151,0.057966,0.218803
2,3.135500,2.199432,0.357748,0.277205,0.312368,0.458741
3,1.756100,1.737606,0.501727,0.442777,0.470412,0.593318
4,1.037300,1.485208,0.584562,0.54878,0.566106,0.674903
5,0.631000,1.361985,0.624066,0.587477,0.605219,0.705517
6,0.631000,1.286613,0.640497,0.604597,0.622029,0.718415
7,0.445500,1.28326,0.639081,0.606707,0.622474,0.71826
8,0.363900,1.251186,0.647998,0.618668,0.632993,0.726496
9,0.361600,1.249517,0.650296,0.617964,0.633718,0.726496
10,0.321600,1.248503,0.649568,0.61773,0.633249,0.726496


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 13. Training using 56 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.977922,0.086121,0.032598,0.047295,0.181197
2,3.202200,2.239472,0.39395,0.268762,0.319532,0.442424
3,1.951500,1.72609,0.523206,0.483818,0.502742,0.620202
4,1.128100,1.450418,0.587801,0.560507,0.57383,0.688889
5,0.716700,1.345013,0.610988,0.584193,0.59729,0.704429
6,0.503600,1.282381,0.618054,0.597326,0.607513,0.709713
7,0.373000,1.257709,0.628839,0.614681,0.621679,0.721834
8,0.355400,1.227377,0.642771,0.626642,0.634604,0.729915
9,0.355400,1.231078,0.639971,0.624062,0.631916,0.727584
10,0.303800,1.230656,0.63925,0.623358,0.631204,0.727584


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 14. Training using 60 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.889179,0.127014,0.031426,0.050385,0.197047
2,3.248400,1.927923,0.43731,0.370544,0.401168,0.533333
3,1.827300,1.534816,0.555556,0.525328,0.540019,0.664802
4,0.977800,1.369924,0.61147,0.577627,0.594067,0.700389
5,0.660500,1.283198,0.638465,0.608818,0.623289,0.720124
6,0.455600,1.217859,0.645723,0.626642,0.636039,0.729604
7,0.364100,1.201732,0.650109,0.63227,0.641065,0.73411
8,0.317800,1.176309,0.660486,0.643293,0.651776,0.74157
9,0.295200,1.176804,0.664327,0.643762,0.653883,0.742502
10,0.262500,1.175861,0.666264,0.646107,0.65603,0.743279


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 15. Training using 64 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.2348,2.830549,0.220008,0.132036,0.16503,0.291064
2,1.9002,1.902624,0.444415,0.390947,0.41597,0.550583
3,1.0467,1.50568,0.576271,0.526266,0.550135,0.670707
4,0.6727,1.340752,0.627038,0.586304,0.605987,0.709557
5,0.476,1.251584,0.641013,0.611398,0.625855,0.723077
6,0.3747,1.196057,0.663588,0.640244,0.651707,0.739549
7,0.306,1.177711,0.670351,0.654315,0.662236,0.747786
8,0.2683,1.160955,0.675826,0.657598,0.666587,0.751671
9,0.2448,1.159007,0.674812,0.654081,0.664285,0.752448
10,0.2389,1.157738,0.673435,0.653377,0.663254,0.752137


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 16. Training using 68 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3828,2.920998,0.106856,0.068715,0.083643,0.218648
2,2.1564,1.999503,0.40527,0.342636,0.371331,0.504584
3,1.2148,1.527885,0.567615,0.533537,0.550048,0.67366
4,0.7476,1.368683,0.608417,0.579737,0.593731,0.697747
5,0.5727,1.286131,0.626721,0.608349,0.617399,0.717793
6,0.4154,1.219355,0.64848,0.635553,0.641952,0.736597
7,0.3465,1.217751,0.653145,0.638133,0.645552,0.737995
8,0.3361,1.197182,0.660418,0.644465,0.652344,0.74359
9,0.242,1.200656,0.658261,0.642824,0.650451,0.742347
10,0.2681,1.197161,0.660264,0.644934,0.652509,0.743124


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 17. Training using 72 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3259,2.801848,0.200369,0.127345,0.155721,0.298368
2,2.1089,1.81069,0.482347,0.435741,0.457861,0.58042
3,1.1545,1.450163,0.595992,0.564962,0.580063,0.687646
4,0.6791,1.319178,0.627649,0.597326,0.612112,0.710179
5,0.4894,1.224007,0.662178,0.645872,0.653924,0.739394
6,0.429,1.178976,0.668189,0.651735,0.65986,0.74561
7,0.289,1.157301,0.665394,0.653846,0.659569,0.746542
8,0.2354,1.137621,0.674308,0.662289,0.668244,0.752448
9,0.2259,1.141252,0.675915,0.662758,0.669272,0.753069
10,0.206,1.140137,0.675191,0.662523,0.668797,0.75338


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 18. Training using 76 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.348,2.716462,0.241734,0.154315,0.188377,0.338928
2,1.962,1.6983,0.524706,0.470685,0.496229,0.62533
3,1.0845,1.362826,0.625966,0.589118,0.606983,0.707071
4,0.7075,1.274929,0.634164,0.608583,0.621111,0.717016
5,0.4932,1.172694,0.664004,0.643293,0.653484,0.741103
6,0.336,1.10303,0.679602,0.657129,0.668177,0.750738
7,0.2782,1.094868,0.69291,0.664634,0.678477,0.759285
8,0.2338,1.060915,0.696286,0.672608,0.684242,0.762549
9,0.2221,1.073089,0.698684,0.672139,0.685154,0.763481
10,0.2196,1.068404,0.69844,0.671904,0.684915,0.763326


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 19. Training using 80 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3783,2.764554,0.286465,0.166276,0.210417,0.36892
2,2.2473,1.749551,0.50721,0.470216,0.488013,0.614297
3,1.1928,1.379928,0.609521,0.579503,0.594133,0.698679
4,0.5677,1.267078,0.645667,0.627345,0.636374,0.726807
5,0.4163,1.189172,0.667805,0.64212,0.654711,0.740171
6,0.3693,1.164437,0.664578,0.646811,0.655574,0.745455
7,0.3031,1.140314,0.672322,0.650563,0.661263,0.74934
8,0.2228,1.12922,0.680995,0.661351,0.671029,0.755866
9,0.2155,1.123387,0.677528,0.658302,0.667777,0.754934
10,0.2019,1.122505,0.677131,0.657598,0.667222,0.754468


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 20. Training using 84 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3984,2.595992,0.322416,0.221623,0.262682,0.406838
2,2.0781,1.581993,0.558363,0.518293,0.537582,0.657343
3,1.1596,1.278306,0.630682,0.598734,0.614293,0.71655
4,0.5483,1.145473,0.670516,0.640009,0.654908,0.738306
5,0.4566,1.104085,0.685028,0.662054,0.673345,0.755245
6,0.3394,1.045916,0.701772,0.668856,0.684918,0.763015
7,0.2482,0.992926,0.705781,0.675657,0.690391,0.76488
8,0.2349,0.995462,0.716004,0.684099,0.699688,0.770629
9,0.202,0.992447,0.712818,0.683396,0.697797,0.770785
10,0.1931,0.98945,0.714425,0.685272,0.699545,0.771873


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 21. Training using 88 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4872,2.684583,0.263279,0.188321,0.219579,0.373893
2,2.1633,1.659387,0.51829,0.471857,0.493985,0.620668
3,0.7671,1.363068,0.599259,0.569184,0.583834,0.694794
4,0.5484,1.185782,0.655484,0.646107,0.650762,0.739239
5,0.4721,1.121909,0.677083,0.655488,0.666111,0.752448
6,0.2906,1.089431,0.680566,0.666041,0.673225,0.754157
7,0.2431,1.070067,0.688224,0.668856,0.678402,0.756643
8,0.1809,1.081403,0.684109,0.665338,0.674593,0.756022
9,0.1816,1.07477,0.685735,0.666276,0.675865,0.756488
10,0.1681,1.074116,0.684984,0.66651,0.675621,0.756333


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 22. Training using 92 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4054,2.563132,0.313102,0.251642,0.279027,0.435742
2,2.1497,1.520974,0.582075,0.542214,0.561438,0.676301
3,0.7779,1.256854,0.6388,0.609287,0.623695,0.720591
4,0.5923,1.151233,0.661391,0.646811,0.654019,0.743745
5,0.368,1.084201,0.691731,0.670966,0.68119,0.758974
6,0.3178,1.038329,0.702709,0.681285,0.691831,0.765812
7,0.2495,0.998838,0.709998,0.687852,0.698749,0.768298
8,0.2101,1.014286,0.708897,0.687617,0.698095,0.76892
9,0.1854,1.012299,0.708614,0.690666,0.699525,0.76892
10,0.1712,1.010746,0.709196,0.690901,0.699929,0.76892


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 23. Training using 96 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4233,2.545208,0.295928,0.223265,0.254511,0.408858
2,1.2048,1.509039,0.582981,0.549484,0.565737,0.682051
3,0.751,1.278402,0.632976,0.609522,0.621027,0.717483
4,0.4771,1.177438,0.665926,0.632974,0.649032,0.740171
5,0.378,1.105295,0.674765,0.657833,0.666192,0.749184
6,0.3121,1.088632,0.68626,0.665338,0.675637,0.755556
7,0.2679,1.02583,0.694451,0.672139,0.683113,0.762238
8,0.2157,1.020056,0.694337,0.672842,0.683421,0.762393
9,0.1913,1.01625,0.701286,0.677767,0.689326,0.765346
10,0.1834,1.016521,0.701384,0.677533,0.689252,0.765657


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 24. Training using 100 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4469,2.519763,0.288095,0.227017,0.253935,0.401709
2,1.2942,1.474061,0.589447,0.550188,0.569141,0.686558
3,0.7701,1.233314,0.64425,0.620075,0.631931,0.723232
4,0.4807,1.125418,0.666264,0.646107,0.65603,0.744833
5,0.4028,1.058836,0.679674,0.665807,0.672669,0.756799
6,0.2853,1.01987,0.689076,0.673077,0.680982,0.760839
7,0.2365,0.995606,0.695224,0.679409,0.687226,0.766744
8,0.1943,0.968338,0.697991,0.684568,0.691215,0.77265
9,0.1584,0.972372,0.700669,0.687852,0.694201,0.77467
10,0.1333,0.971724,0.699307,0.686679,0.692936,0.773271


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 25. Training using 104 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.424,2.49982,0.366605,0.231707,0.283949,0.41958
2,1.2996,1.481138,0.582206,0.544794,0.562879,0.676146
3,0.7937,1.236349,0.646743,0.621717,0.633983,0.725874
4,0.4839,1.144289,0.671674,0.650094,0.660708,0.746232
5,0.3626,1.075086,0.686117,0.66182,0.67375,0.758197
6,0.3096,1.049297,0.6916,0.670028,0.680643,0.763015
7,0.2347,0.981652,0.712497,0.693949,0.703101,0.779953
8,0.1745,0.998918,0.711256,0.692073,0.701533,0.779021
9,0.1532,0.992729,0.711506,0.694653,0.702979,0.780264
10,0.1542,0.996558,0.711127,0.693949,0.702433,0.779798


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 26. Training using 108 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4793,2.565167,0.252199,0.18152,0.211101,0.355245
2,1.46,1.483528,0.583144,0.540338,0.560925,0.677078
3,0.6897,1.225135,0.645319,0.619137,0.631957,0.727739
4,0.5305,1.138774,0.653498,0.643996,0.648712,0.738928
5,0.3947,1.045932,0.687122,0.666979,0.676901,0.760373
6,0.2778,0.970034,0.703207,0.689024,0.696044,0.77669
7,0.2328,0.954405,0.715392,0.693246,0.704145,0.784149
8,0.1817,0.944782,0.716375,0.70075,0.708477,0.787257
9,0.161,0.942552,0.717308,0.699812,0.708452,0.787257
10,0.1496,0.943636,0.719471,0.701923,0.710589,0.7885


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 27. Training using 112 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5393,2.479211,0.298114,0.215056,0.249864,0.397514
2,1.4543,1.469356,0.577772,0.546201,0.561543,0.66993
3,0.6822,1.206804,0.643629,0.628987,0.636223,0.728982
4,0.456,1.067938,0.679174,0.670732,0.674926,0.754779
5,0.331,1.032964,0.692989,0.679174,0.686012,0.764569
6,0.2589,0.972757,0.704583,0.695826,0.700177,0.775447
7,0.1786,0.953346,0.715278,0.700516,0.70782,0.783528
8,0.1673,0.947878,0.71466,0.703096,0.708831,0.784615
9,0.1458,0.943782,0.717107,0.706848,0.71194,0.786014
10,0.1296,0.944106,0.71704,0.706614,0.711788,0.786169


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 28. Training using 116 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4187,2.312635,0.376157,0.324109,0.348199,0.49324
2,1.359,1.388618,0.593902,0.57106,0.582257,0.691064
3,0.6487,1.180211,0.64837,0.634381,0.641299,0.729293
4,0.4448,1.03633,0.695536,0.672373,0.683759,0.768609
5,0.3571,0.972744,0.705631,0.696529,0.70105,0.776068
6,0.2744,0.955507,0.718066,0.700047,0.708942,0.781507
7,0.1939,0.921849,0.724509,0.709897,0.717129,0.787568
8,0.154,0.914483,0.727011,0.712008,0.719431,0.788345
9,0.1506,0.916057,0.726253,0.713649,0.719896,0.789277
10,0.1341,0.917348,0.724779,0.712711,0.718695,0.788811


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 29. Training using 120 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4581,2.319897,0.369139,0.291745,0.32591,0.475214
2,1.4133,1.375544,0.609498,0.58091,0.594861,0.700855
3,0.6299,1.176521,0.662416,0.645638,0.653919,0.739394
4,0.4803,1.05104,0.699203,0.678705,0.688802,0.765967
5,0.3424,1.016747,0.705586,0.690197,0.697807,0.774981
6,0.2514,0.983823,0.712885,0.699343,0.706049,0.779332
7,0.1929,0.957584,0.72181,0.707083,0.71437,0.785703
8,0.1601,0.957753,0.717686,0.707083,0.712345,0.787102
9,0.1491,0.956117,0.716865,0.707786,0.712296,0.787102
10,0.1488,0.955246,0.71595,0.706379,0.711132,0.786791


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▂▁▃▄▃▅▆▅▃▆▇▆▂▅▇▇▇▂▇▇▇███▆████▇███▅██▇███
eval/f1,▁▁▂▃▂▄▅▄▂▆▆▅▁▄▇▇▆▁▇▇▆▇▇▇▆█▇█▇▆██▇▄██▇███
eval/loss,▇█▆▅▆▄▄▄▆▃▃▃▆▅▂▂▂▆▂▂▂▂▂▂▃▁▂▁▂▂▁▁▂▅▁▁▂▁▁▁
eval/precision,▂▁▂▄▃▅▅▄▂▆▆▅▁▄▇▇▇▂▇▇▇▇▇▇▆████▇██▇▄██▇███
eval/recall,▁▁▂▃▂▄▄▄▁▆▆▅▁▃▇▇▆▁▇▇▆▇▇▇▆█▇█▇▆██▇▃██▇███
eval/runtime,▁▂█▁▂▂▇▇▁▆█▇▄▄▂▂▇▂▇▂▇▂▁▇▂▂▂▂█▆▁▂▁▁▁▆▆▁▆▁
eval/samples_per_second,█▇▁█▇▆▂▂█▂▁▂▄▄▇▇▁▇▁▆▂▇▇▂▆▇▇▇▁▂█▇▇█▇▂▂█▃█
eval/steps_per_second,█▇▁█▇▆▂▂█▂▁▂▄▄▇▇▁▇▁▆▂▇▇▂▆▇▇▇▁▂█▇▇█▇▂▂█▃█
train/epoch,▆█▂▃▃▄▃▃▃▁█▇▆▄▃█▆▄▂█▅▃█▅▂▇▄▁▆▃▇▄█▅▂▆▂▇▃▇
train/global_step,▁▁▁▁▁▂▂▂▂▁▄▄▃▃▂▅▄▃▂▅▄▂▆▄▂▆▄▁▅▃▇▄█▅▂▆▃▇▃█

0,1
eval/accuracy,0.78679
eval/f1,0.71113
eval/loss,0.95525
eval/precision,0.71595
eval/recall,0.70638
eval/runtime,1.1538
eval/samples_per_second,66.738
eval/steps_per_second,17.334
train/epoch,10.0
train/global_step,300.0


Current seed:  184


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 0. Training using 4 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.832788,0.015657,0.019934,0.017538,0.02906
2,No log,3.591454,0.053993,0.062148,0.057785,0.133023
3,No log,3.386785,0.085479,0.075516,0.080189,0.19627
4,No log,3.230706,0.098967,0.069653,0.081762,0.197358
5,No log,3.125157,0.108688,0.068949,0.084374,0.199845
6,No log,3.056917,0.127382,0.089353,0.105031,0.227195
7,No log,3.012968,0.143162,0.109991,0.124403,0.261228
8,No log,2.987821,0.148874,0.119371,0.1325,0.278322
9,No log,2.976237,0.156071,0.127814,0.140536,0.28951
10,No log,2.97325,0.157431,0.129925,0.142362,0.292152


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 1. Training using 8 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.793654,0.0206,0.026266,0.02309,0.046309
2,No log,3.388875,0.069714,0.057223,0.062854,0.194095
3,No log,3.120377,0.1011,0.068949,0.081985,0.230303
4,No log,2.962188,0.172423,0.152205,0.161684,0.324009
5,No log,2.858643,0.214718,0.216229,0.215471,0.361927
6,No log,2.783642,0.228158,0.236398,0.232205,0.375913
7,No log,2.726753,0.243584,0.249296,0.246407,0.391764
8,2.700700,2.691097,0.259379,0.262664,0.261011,0.404507
9,2.700700,2.675524,0.26416,0.266886,0.265516,0.408547
10,2.700700,2.67224,0.26532,0.268058,0.266682,0.409169


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 2. Training using 12 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.550951,0.072091,0.080206,0.075933,0.150427
2,No log,3.052217,0.155205,0.115385,0.132365,0.2777
3,No log,2.854486,0.235425,0.17425,0.20027,0.353069
4,No log,2.681999,0.25519,0.207552,0.228919,0.391142
5,No log,2.529655,0.302523,0.258677,0.278887,0.439627
6,2.749600,2.408495,0.337662,0.292683,0.313568,0.46791
7,2.749600,2.329152,0.344958,0.29925,0.320482,0.475058
8,2.749600,2.280247,0.356319,0.311445,0.332374,0.484071
9,2.749600,2.259464,0.360717,0.316135,0.336958,0.488112
10,2.749600,2.255306,0.363685,0.319418,0.340117,0.489821


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 3. Training using 16 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.456042,0.083913,0.079268,0.081524,0.194561
2,No log,2.946545,0.205923,0.104362,0.138521,0.293862
3,No log,2.721629,0.274331,0.226079,0.247879,0.409946
4,2.792700,2.487582,0.340437,0.292214,0.314488,0.465734
5,2.792700,2.283413,0.390211,0.342167,0.364613,0.501632
6,2.792700,2.120325,0.424767,0.374062,0.397805,0.524631
7,2.792700,2.011572,0.44996,0.395403,0.420921,0.54188
8,1.419700,1.955817,0.467945,0.417683,0.441388,0.555245
9,1.419700,1.933963,0.472987,0.427064,0.448854,0.56115
10,1.419700,1.930013,0.473916,0.428236,0.44992,0.562393


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 4. Training using 20 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.368667,0.081485,0.071529,0.076183,0.183217
2,No log,2.749434,0.268534,0.222561,0.243396,0.405128
3,No log,2.404435,0.320845,0.26712,0.291528,0.447086
4,2.786200,2.126762,0.397112,0.335366,0.363636,0.507071
5,2.786200,1.933183,0.452809,0.383677,0.415387,0.544367
6,2.786200,1.812306,0.471131,0.41909,0.443589,0.567832
7,1.285500,1.720006,0.498587,0.455206,0.47591,0.601088
8,1.285500,1.678463,0.520998,0.480066,0.499695,0.621445
9,1.285500,1.660562,0.532458,0.494371,0.512708,0.629837
10,0.787900,1.657614,0.533064,0.49531,0.513494,0.630614


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 5. Training using 24 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.276754,0.125623,0.094512,0.107869,0.247708
2,No log,2.725878,0.285267,0.213884,0.244471,0.408858
3,2.940100,2.330462,0.379908,0.30863,0.34058,0.478788
4,2.940100,2.018817,0.420208,0.350141,0.381988,0.514685
5,2.940100,1.820019,0.475455,0.440666,0.4574,0.58042
6,1.382200,1.698986,0.537074,0.502814,0.51938,0.631235
7,1.382200,1.616483,0.565466,0.533771,0.549162,0.656721
8,0.839300,1.578726,0.57543,0.54925,0.562035,0.669153
9,0.839300,1.560206,0.584608,0.555816,0.569849,0.675524
10,0.839300,1.557045,0.586249,0.557927,0.571738,0.675991


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 6. Training using 28 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.156243,0.099307,0.060507,0.075197,0.206216
2,No log,2.654757,0.259217,0.212711,0.233673,0.401865
3,2.978800,2.283491,0.321593,0.278377,0.298429,0.45035
4,2.978800,1.950148,0.438827,0.368433,0.400561,0.530225
5,1.577500,1.736984,0.499087,0.44864,0.472521,0.597824
6,1.577500,1.613524,0.540711,0.503049,0.5212,0.642269
7,0.867200,1.548353,0.562469,0.527908,0.544641,0.660606
8,0.867200,1.497907,0.577844,0.543152,0.559961,0.672572
9,0.867200,1.48181,0.579523,0.546904,0.562741,0.67397
10,0.628100,1.480028,0.579052,0.547139,0.562643,0.673815


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 7. Training using 32 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.153715,0.110199,0.066135,0.082662,0.217716
2,3.140700,2.663854,0.261426,0.2106,0.233277,0.392696
3,3.140700,2.239583,0.358656,0.297842,0.325432,0.478477
4,1.707600,1.869601,0.466977,0.422842,0.443815,0.577778
5,1.707600,1.641426,0.517474,0.486163,0.50133,0.622533
6,0.916000,1.510532,0.571322,0.541041,0.55577,0.669775
7,0.916000,1.424192,0.601179,0.574109,0.587332,0.694328
8,0.606900,1.388778,0.603245,0.575516,0.589054,0.696503
9,0.606900,1.376227,0.609613,0.583021,0.59602,0.70272
10,0.525300,1.374201,0.609069,0.582786,0.595638,0.70303


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 8. Training using 36 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.061728,0.111034,0.074812,0.089393,0.241803
2,3.086200,2.505948,0.268935,0.21318,0.237834,0.408081
3,3.086200,2.040352,0.411275,0.347326,0.376605,0.517327
4,1.666200,1.73554,0.507531,0.466229,0.486004,0.607148
5,1.666200,1.549294,0.567087,0.538227,0.55228,0.663248
6,0.851200,1.421699,0.59739,0.579737,0.588431,0.692463
7,0.851200,1.362944,0.615683,0.594747,0.605034,0.702875
8,0.603200,1.341423,0.618622,0.599906,0.60912,0.708314
9,0.442700,1.3264,0.623308,0.604597,0.61381,0.712044
10,0.442700,1.324455,0.623882,0.6053,0.614451,0.712665


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 9. Training using 40 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.025508,0.155045,0.128283,0.1404,0.27972
2,3.152900,2.420364,0.350562,0.306989,0.327332,0.468842
3,3.152900,1.889672,0.454545,0.416276,0.43457,0.558819
4,1.703900,1.634836,0.510755,0.478893,0.494311,0.626418
5,0.871900,1.47281,0.588567,0.562617,0.5753,0.682207
6,0.871900,1.361918,0.613183,0.597795,0.605391,0.701787
7,0.584900,1.330627,0.616965,0.605535,0.611197,0.707848
8,0.471100,1.297498,0.626408,0.613039,0.619652,0.714685
9,0.471100,1.291663,0.626884,0.614681,0.620722,0.716706
10,0.383700,1.290423,0.626016,0.613977,0.619938,0.716239


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 10. Training using 44 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.988254,0.158048,0.12758,0.141189,0.285625
2,3.181900,2.322536,0.368406,0.307927,0.335462,0.476768
3,1.810200,1.815834,0.478529,0.436445,0.456519,0.584615
4,1.810200,1.511238,0.568347,0.531426,0.549267,0.664025
5,0.959400,1.371263,0.615707,0.586538,0.600769,0.697436
6,0.601200,1.291371,0.639961,0.618199,0.628892,0.71826
7,0.601200,1.246436,0.640629,0.621248,0.630789,0.724786
8,0.446500,1.216518,0.646105,0.626407,0.636104,0.728827
9,0.362400,1.2135,0.650206,0.629925,0.639905,0.728516
10,0.362400,1.211525,0.649008,0.629221,0.638962,0.727894


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 11. Training using 48 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.927742,0.20682,0.109522,0.143208,0.297902
2,3.278100,2.224606,0.360606,0.306989,0.331644,0.480497
3,1.762400,1.706472,0.495533,0.46834,0.481553,0.606838
4,1.009300,1.422519,0.594402,0.567777,0.580784,0.685625
5,1.009300,1.2728,0.636605,0.619137,0.627749,0.723699
6,0.615600,1.211802,0.646678,0.63227,0.639393,0.733023
7,0.396900,1.179288,0.647676,0.637195,0.642393,0.734577
8,0.338000,1.159428,0.65291,0.641886,0.647351,0.73784
9,0.338000,1.147101,0.654753,0.644465,0.649569,0.739705
10,0.302400,1.145562,0.656748,0.647045,0.651861,0.741414


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 12. Training using 52 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.886175,0.166992,0.100375,0.125385,0.283916
2,3.188700,2.111768,0.373737,0.329737,0.350361,0.503186
3,1.798300,1.602789,0.54655,0.510788,0.528064,0.652836
4,0.979900,1.419958,0.584479,0.558161,0.571017,0.680963
5,0.652000,1.292043,0.621366,0.601548,0.611296,0.71049
6,0.652000,1.249439,0.624577,0.605535,0.614908,0.71453
7,0.463700,1.211706,0.636407,0.619841,0.628015,0.724165
8,0.392000,1.189236,0.639518,0.62242,0.630853,0.72603
9,0.289200,1.181742,0.645658,0.629456,0.637454,0.730381
10,0.310200,1.179618,0.645999,0.630394,0.638101,0.730536


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 13. Training using 56 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.847908,0.196363,0.14939,0.169686,0.32603
2,3.298700,2.008041,0.420765,0.379221,0.398915,0.52634
3,1.828500,1.542316,0.557298,0.516651,0.536205,0.656255
4,1.019800,1.325107,0.612604,0.58818,0.600144,0.703963
5,0.615900,1.228917,0.635427,0.621717,0.628497,0.724476
6,0.448700,1.185934,0.634302,0.62969,0.631988,0.72634
7,0.352600,1.158209,0.641777,0.636961,0.63936,0.731002
8,0.296100,1.139905,0.65052,0.645638,0.64807,0.73784
9,0.296100,1.125728,0.645941,0.645638,0.645789,0.736908
10,0.270600,1.125592,0.64757,0.646811,0.64719,0.737995


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 14. Training using 60 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.848681,0.228512,0.180816,0.201885,0.360528
2,3.410100,1.951282,0.448353,0.421435,0.434478,0.567988
3,1.970200,1.499933,0.580041,0.537054,0.55772,0.669153
4,1.044400,1.316278,0.631323,0.608818,0.619866,0.715618
5,0.702400,1.233721,0.645099,0.631332,0.638142,0.728361
6,0.454300,1.190235,0.65291,0.641886,0.647351,0.73582
7,0.393400,1.138526,0.664747,0.649625,0.657099,0.744522
8,0.304600,1.119721,0.663973,0.655253,0.659585,0.745765
9,0.279100,1.111937,0.665397,0.655722,0.660524,0.747941
10,0.261500,1.111099,0.664765,0.655722,0.660213,0.748096


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 15. Training using 64 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3207,2.750331,0.246694,0.140009,0.178636,0.329915
2,1.9367,1.781486,0.500256,0.458021,0.478208,0.608858
3,1.0179,1.413228,0.591181,0.55652,0.573327,0.687646
4,0.6633,1.273087,0.623454,0.614681,0.619036,0.716706
5,0.4945,1.180461,0.64626,0.636257,0.64122,0.730847
6,0.38,1.12305,0.66563,0.652674,0.659088,0.744056
7,0.3132,1.094038,0.673876,0.657598,0.665638,0.748873
8,0.2733,1.075075,0.671905,0.66182,0.666824,0.750427
9,0.2515,1.066127,0.675778,0.666745,0.671231,0.754623
10,0.2453,1.065061,0.677611,0.667917,0.672729,0.755556


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 16. Training using 68 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3922,2.801495,0.188865,0.143996,0.163407,0.342968
2,2.1059,1.82533,0.499871,0.454034,0.475851,0.60777
3,1.1765,1.399123,0.605451,0.57833,0.59158,0.70101
4,0.6977,1.226655,0.648378,0.623593,0.635744,0.727117
5,0.5207,1.163925,0.66294,0.639775,0.651152,0.734887
6,0.416,1.088259,0.669086,0.648687,0.658728,0.742191
7,0.3401,1.035872,0.684975,0.663931,0.674288,0.755245
8,0.2866,1.035697,0.681327,0.664869,0.672997,0.755556
9,0.2592,1.023591,0.685652,0.66909,0.67727,0.75711
10,0.2193,1.020002,0.68666,0.671201,0.678843,0.758042


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 17. Training using 72 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3031,2.662257,0.308995,0.136961,0.189795,0.326496
2,1.9874,1.61336,0.555867,0.511023,0.532502,0.65066
3,0.9094,1.329088,0.627484,0.607176,0.617163,0.714064
4,0.6978,1.218176,0.643684,0.626173,0.634807,0.725097
5,0.5367,1.131638,0.665627,0.650797,0.658129,0.74188
6,0.4226,1.088109,0.668447,0.645872,0.656966,0.743434
7,0.3287,1.055261,0.679769,0.661116,0.670313,0.752914
8,0.2512,1.025846,0.682078,0.671201,0.676596,0.756333
9,0.2299,1.020842,0.683445,0.671904,0.677625,0.758974
10,0.2222,1.01971,0.685088,0.674484,0.679745,0.760528


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 18. Training using 76 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3941,2.734189,0.253978,0.202158,0.225124,0.378555
2,2.1217,1.748775,0.51359,0.469747,0.490691,0.624242
3,1.2881,1.343322,0.616634,0.596388,0.606342,0.709091
4,0.7603,1.142291,0.648367,0.642589,0.645465,0.735198
5,0.5027,1.05351,0.670165,0.659006,0.664538,0.753069
6,0.3446,1.007001,0.691307,0.673311,0.682191,0.770163
7,0.2718,0.964285,0.707752,0.689493,0.698503,0.78244
8,0.2219,0.962081,0.711143,0.689962,0.700393,0.786169
9,0.227,0.9515,0.713702,0.696295,0.704891,0.789277
10,0.1955,0.951566,0.714699,0.695591,0.705015,0.789588


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 19. Training using 80 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3283,2.548198,0.322841,0.271811,0.295136,0.454701
2,1.9736,1.479122,0.591143,0.538462,0.563574,0.675058
3,1.0276,1.210596,0.651493,0.634381,0.642823,0.731624
4,0.4962,1.104774,0.67425,0.653377,0.663649,0.749029
5,0.3677,1.012306,0.681687,0.666979,0.674253,0.756799
6,0.3233,0.998499,0.686256,0.670966,0.678525,0.766589
7,0.2838,0.952481,0.7017,0.687383,0.694467,0.776068
8,0.2039,0.935088,0.706996,0.692073,0.699455,0.782906
9,0.1902,0.923792,0.713738,0.698171,0.705868,0.7885
10,0.1733,0.923969,0.713634,0.697233,0.705338,0.787257


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 20. Training using 84 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3902,2.619678,0.285064,0.234991,0.257617,0.425641
2,2.1924,1.564861,0.559284,0.519934,0.538892,0.660917
3,1.2454,1.250662,0.640675,0.614681,0.627409,0.724942
4,0.5513,1.11477,0.660658,0.6447,0.652582,0.740016
5,0.4143,0.996283,0.687665,0.673311,0.680412,0.761616
6,0.377,0.956427,0.702626,0.696529,0.699564,0.780886
7,0.2408,0.942303,0.717391,0.704268,0.710769,0.794716
8,0.2242,0.905167,0.726273,0.712477,0.719309,0.802642
9,0.1909,0.897379,0.729801,0.713884,0.721755,0.803885
10,0.175,0.899195,0.728363,0.712477,0.720332,0.803263


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 21. Training using 88 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3516,2.506042,0.306787,0.183396,0.229561,0.380264
2,2.0854,1.490291,0.57406,0.533537,0.553057,0.668376
3,0.754,1.242099,0.62674,0.612336,0.619454,0.719503
4,0.5231,1.144297,0.659513,0.641417,0.650339,0.740948
5,0.4597,1.049547,0.674297,0.663227,0.668716,0.755556
6,0.2869,0.997438,0.698568,0.686445,0.692453,0.778399
7,0.2673,0.949656,0.712937,0.708255,0.710588,0.795649
8,0.2025,0.947804,0.722975,0.713649,0.718282,0.800932
9,0.1742,0.946039,0.728121,0.714118,0.721051,0.802331
10,0.1959,0.944622,0.728445,0.715291,0.721808,0.802486


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 22. Training using 92 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4076,2.571599,0.270205,0.157598,0.199082,0.360373
2,2.276,1.477627,0.579846,0.54925,0.564133,0.675835
3,0.8433,1.209338,0.640707,0.628987,0.634793,0.727117
4,0.5947,1.099955,0.667393,0.646107,0.656578,0.739549
5,0.3855,1.021656,0.68753,0.672373,0.679867,0.760062
6,0.308,1.005449,0.691906,0.68363,0.687743,0.770629
7,0.2294,0.937839,0.714218,0.708021,0.711106,0.793318
8,0.183,0.939863,0.708658,0.704503,0.706574,0.792541
9,0.1935,0.935514,0.713576,0.707552,0.710551,0.795183
10,0.1654,0.935783,0.713576,0.707552,0.710551,0.795027


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 23. Training using 96 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.564,2.528886,0.314737,0.261961,0.285934,0.443357
2,1.2784,1.53672,0.557766,0.512899,0.534392,0.656876
3,0.8287,1.250051,0.632267,0.612101,0.622021,0.720124
4,0.4904,1.106842,0.670387,0.662054,0.666195,0.749184
5,0.3763,1.007008,0.690054,0.685038,0.687537,0.76892
6,0.2688,0.94485,0.723022,0.718105,0.720555,0.793318
7,0.2253,0.916477,0.727979,0.724906,0.726439,0.800311
8,0.17,0.908153,0.734911,0.728189,0.731535,0.806993
9,0.1663,0.904717,0.734655,0.729831,0.732235,0.808392
10,0.1464,0.904745,0.735947,0.730769,0.733349,0.808858


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 24. Training using 100 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4144,2.478706,0.338183,0.288931,0.311623,0.462626
2,1.3107,1.473666,0.582443,0.53682,0.558701,0.673504
3,0.8303,1.222361,0.655893,0.632974,0.64423,0.734887
4,0.5193,1.066852,0.684935,0.653612,0.668907,0.753691
5,0.4001,0.993581,0.698054,0.68152,0.689688,0.773427
6,0.2978,0.930497,0.714083,0.708724,0.711394,0.792541
7,0.2564,0.905799,0.731927,0.724203,0.728044,0.804662
8,0.1786,0.879379,0.731656,0.729597,0.730625,0.809635
9,0.1637,0.880103,0.733192,0.728893,0.731036,0.810101
10,0.1601,0.879455,0.732846,0.728893,0.730864,0.809946


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 25. Training using 104 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5073,2.480803,0.300599,0.211773,0.248487,0.411189
2,1.4456,1.482513,0.577277,0.529081,0.552129,0.666977
3,0.8349,1.224819,0.649577,0.629925,0.6396,0.731158
4,0.5082,1.08786,0.667452,0.664634,0.66604,0.750427
5,0.3216,1.010834,0.678369,0.675188,0.676775,0.761927
6,0.3062,0.991064,0.690088,0.684099,0.68708,0.77094
7,0.2179,0.959674,0.708098,0.705441,0.706767,0.790054
8,0.1692,0.946658,0.717736,0.710835,0.714269,0.799689
9,0.157,0.941604,0.720199,0.714118,0.717146,0.800622
10,0.1541,0.939831,0.721591,0.714822,0.71819,0.801243


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 26. Training using 108 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4994,2.3633,0.358707,0.314962,0.335415,0.502253
2,1.3564,1.417384,0.600707,0.558161,0.578653,0.680653
3,0.6449,1.135278,0.655422,0.637899,0.646541,0.735198
4,0.4731,1.026448,0.690751,0.672608,0.681559,0.759907
5,0.3312,0.94111,0.709809,0.695826,0.702748,0.783217
6,0.2465,0.942807,0.72556,0.713649,0.719555,0.799534
7,0.1814,0.893874,0.72933,0.717871,0.723555,0.804662
8,0.1561,0.899936,0.733971,0.719512,0.72667,0.806061
9,0.1356,0.905761,0.739234,0.724672,0.731881,0.809479
10,0.1284,0.903808,0.739172,0.724437,0.73173,0.809169


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 27. Training using 112 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5259,2.378392,0.365401,0.309099,0.3349,0.500389
2,1.3518,1.35686,0.615211,0.576689,0.595327,0.695571
3,0.7117,1.120319,0.672431,0.647514,0.659737,0.740948
4,0.4633,0.994407,0.689256,0.684568,0.686904,0.766278
5,0.3533,0.927958,0.717309,0.707552,0.712397,0.785237
6,0.2554,0.878371,0.727725,0.720216,0.723951,0.798601
7,0.1901,0.873335,0.736517,0.727017,0.731736,0.807459
8,0.1809,0.849808,0.748577,0.74015,0.74434,0.816317
9,0.1499,0.857966,0.745312,0.736398,0.740828,0.814763
10,0.1406,0.858013,0.744772,0.734991,0.739849,0.814141


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 28. Training using 116 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5099,2.333104,0.345602,0.299484,0.320895,0.498679
2,1.4702,1.380649,0.602689,0.567777,0.584712,0.688267
3,0.7303,1.147569,0.655663,0.638133,0.646779,0.733955
4,0.4495,0.997765,0.689147,0.680582,0.684838,0.760373
5,0.3394,0.928693,0.713713,0.710366,0.712036,0.792852
6,0.2466,0.873801,0.725772,0.721857,0.72381,0.803419
7,0.2043,0.848413,0.735232,0.732645,0.733936,0.810101
8,0.142,0.855859,0.733224,0.73288,0.733052,0.80979
9,0.123,0.846179,0.741071,0.739681,0.740376,0.815074
10,0.1297,0.847634,0.739948,0.738039,0.738993,0.814452


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 29. Training using 120 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.472,2.265273,0.394405,0.271107,0.321334,0.467444
2,1.3247,1.339891,0.621035,0.596857,0.608706,0.706449
3,0.6827,1.098383,0.670468,0.647983,0.659034,0.746076
4,0.4476,0.969859,0.699264,0.690901,0.695057,0.773893
5,0.312,0.89597,0.723544,0.699109,0.711116,0.790365
6,0.221,0.855653,0.73911,0.724203,0.73158,0.806527
7,0.1882,0.838854,0.745763,0.732645,0.739146,0.815229
8,0.1565,0.823306,0.74542,0.734756,0.74005,0.817249
9,0.1462,0.825532,0.744368,0.736163,0.740243,0.817871
10,0.1209,0.826857,0.744473,0.734522,0.739464,0.817716


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▂▁▄▅▄▆▆▆▄▇▇▆▃▅▇▇▇▄▇▇▇▇▇▇▆███▇▇██▇▄██▇███
eval/f1,▂▁▃▄▃▅▆▅▃▆▇▆▂▄▇▇▆▂▇▇▆▇▇▇▆███▇▆██▇▃██▇███
eval/loss,▇█▅▅▅▄▃▃▅▃▂▃▆▅▂▂▂▆▂▂▂▂▁▂▃▁▁▁▂▃▁▁▂▅▁▁▂▁▁▁
eval/precision,▂▁▃▄▃▅▆▅▃▆▇▆▂▄▇▇▆▃▇▇▇▇▇▇▆███▇▆██▇▄██▇███
eval/recall,▁▁▃▄▃▅▅▅▃▆▆▅▂▄▇▇▆▂▇▇▆▇▇▇▅███▇▆██▇▃██▇███
eval/runtime,▆▁▁▁▁▁▂▇▁█▁▁▁▁▇▁▂▂▁▁▁▂▁█▁▁▅▇▄▂█▇█▅▁▁▂▂▂▂
eval/samples_per_second,▂▇█▇▇█▇▁█▁████▂▇▇▇█▇█▇█▁█▇▃▂▄▇▁▂▁▃▇▇▇▇▇▇
eval/steps_per_second,▂▇█▇▇█▇▁█▁████▂▇▇▇█▇█▇█▁█▇▃▂▄▇▁▂▁▃▇▇▇▇▇▇
train/epoch,▆█▂▃▃▄▃▃▃▁█▇▆▄▃█▆▄▂█▅▃█▅▂▇▄▁▆▃▇▄█▅▂▆▂▇▃▇
train/global_step,▁▁▁▁▁▂▂▂▂▁▄▄▃▃▂▅▄▃▂▅▄▂▆▄▂▆▄▁▅▃▇▄█▅▂▆▃▇▃█

0,1
eval/accuracy,0.81772
eval/f1,0.73946
eval/loss,0.82686
eval/precision,0.74447
eval/recall,0.73452
eval/runtime,0.9353
eval/samples_per_second,82.322
eval/steps_per_second,21.382
train/epoch,10.0
train/global_step,300.0


Current seed:  1255


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 0. Training using 4 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.890504,0.007775,0.009615,0.008598,0.013831
2,No log,3.611541,0.066031,0.081144,0.072811,0.128671
3,No log,3.383992,0.106147,0.065197,0.080779,0.215851
4,No log,3.214814,0.109375,0.02955,0.046529,0.198291
5,No log,3.102307,0.111389,0.020872,0.035157,0.191142
6,No log,3.033006,0.130102,0.023921,0.040412,0.195493
7,No log,2.990445,0.153576,0.030722,0.051202,0.204507
8,No log,2.966095,0.166488,0.036351,0.059673,0.21181
9,No log,2.954934,0.179104,0.042214,0.068324,0.218959
10,No log,2.952078,0.182796,0.043856,0.07074,0.219891


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 1. Training using 8 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.713386,0.025788,0.032598,0.028796,0.051593
2,No log,3.280517,0.076668,0.037992,0.050808,0.201399
3,No log,3.037379,0.111528,0.04878,0.067874,0.223776
4,No log,2.916597,0.155747,0.068011,0.094678,0.251904
5,No log,2.830342,0.203803,0.115619,0.147539,0.306138
6,No log,2.750148,0.25494,0.154315,0.192257,0.340171
7,No log,2.687798,0.277305,0.175657,0.215075,0.365501
8,2.493900,2.650579,0.296069,0.192542,0.233338,0.383528
9,2.493900,2.634166,0.303226,0.198405,0.239864,0.389899
10,2.493900,2.630647,0.303355,0.199343,0.240589,0.390676


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 2. Training using 12 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.601207,0.043518,0.044325,0.043918,0.126185
2,No log,3.131835,0.065912,0.023218,0.034339,0.158664
3,No log,3.023146,0.09321,0.051829,0.066616,0.227195
4,No log,2.838238,0.210698,0.106238,0.141254,0.301943
5,No log,2.688649,0.222637,0.139775,0.171733,0.337529
6,2.603300,2.571664,0.265265,0.186445,0.218978,0.377001
7,2.603300,2.488896,0.29149,0.209662,0.243896,0.397203
8,2.603300,2.442782,0.301633,0.220919,0.255043,0.408236
9,2.603300,2.42165,0.309949,0.227955,0.262703,0.415695
10,2.603300,2.417741,0.310707,0.228659,0.263442,0.416628


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 3. Training using 16 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.485967,0.056822,0.042683,0.048748,0.163015
2,No log,3.039233,0.074377,0.040572,0.052504,0.205594
3,No log,2.826129,0.139535,0.088649,0.108418,0.271173
4,2.633100,2.597911,0.277814,0.197936,0.23117,0.3885
5,2.633100,2.397737,0.333839,0.258208,0.291193,0.44864
6,2.633100,2.265793,0.352445,0.284006,0.314545,0.472727
7,2.633100,2.177372,0.368143,0.310038,0.336601,0.489355
8,1.268600,2.127109,0.374554,0.320356,0.345342,0.498213
9,1.268600,2.104453,0.382666,0.329268,0.353964,0.504895
10,1.268600,2.100633,0.383715,0.330441,0.355091,0.505828


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 4. Training using 20 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.325193,0.09877,0.060272,0.074862,0.20777
2,No log,2.931303,0.158003,0.089822,0.114533,0.250505
3,No log,2.543418,0.300189,0.22303,0.25592,0.418803
4,2.687400,2.238042,0.363222,0.29925,0.328147,0.488112
5,2.687400,2.028999,0.411478,0.356473,0.382006,0.53007
6,2.687400,1.922613,0.42159,0.368199,0.39309,0.536597
7,1.151700,1.839787,0.450876,0.398218,0.422914,0.558508
8,1.151700,1.798867,0.460152,0.412992,0.435298,0.568298
9,1.151700,1.787171,0.463255,0.413931,0.437206,0.571406
10,0.793500,1.784303,0.464848,0.415572,0.438831,0.57265


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 5. Training using 24 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.281723,0.080477,0.050657,0.062176,0.206682
2,No log,2.851196,0.156126,0.082786,0.108199,0.262005
3,2.844300,2.469094,0.322341,0.275094,0.296849,0.45641
4,2.844300,2.192542,0.383218,0.336304,0.358231,0.513131
5,2.844300,2.023618,0.417772,0.369371,0.392084,0.536441
6,1.265000,1.915814,0.431393,0.384146,0.406401,0.549495
7,1.265000,1.829427,0.456784,0.409006,0.431576,0.564413
8,0.780200,1.802097,0.465903,0.419794,0.441648,0.573582
9,0.780200,1.794407,0.467239,0.421435,0.443157,0.575447
10,0.780200,1.791579,0.467654,0.422139,0.443732,0.575913


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 6. Training using 28 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.137723,0.055832,0.023687,0.033262,0.193939
2,No log,2.834068,0.158268,0.099437,0.122137,0.300389
3,2.905200,2.512609,0.28626,0.211069,0.242981,0.406061
4,2.905200,2.153671,0.403195,0.34334,0.370868,0.516861
5,1.533300,1.977503,0.412085,0.355066,0.381456,0.527739
6,1.533300,1.814011,0.453813,0.396341,0.423135,0.564103
7,0.884800,1.757059,0.459596,0.405488,0.43085,0.567677
8,0.884800,1.697637,0.47399,0.420966,0.445907,0.579176
9,0.884800,1.678308,0.478238,0.427767,0.451597,0.583528
10,0.652900,1.676999,0.480199,0.429409,0.453386,0.58446


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 7. Training using 32 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.151727,0.094189,0.058161,0.071915,0.225175
2,2.934500,2.666304,0.242496,0.168621,0.198921,0.357265
3,2.934500,2.181004,0.386188,0.327861,0.354642,0.507692
4,1.412200,1.862599,0.452726,0.403143,0.426498,0.558353
5,1.412200,1.682148,0.50503,0.459193,0.481022,0.60777
6,0.758500,1.604193,0.542543,0.508443,0.524939,0.6446
7,0.758500,1.52552,0.565878,0.532833,0.548859,0.662626
8,0.520600,1.508866,0.571145,0.5394,0.554818,0.665734
9,0.520600,1.495771,0.573165,0.541979,0.557136,0.66791
10,0.459200,1.493148,0.572208,0.540807,0.556065,0.667133


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 8. Training using 36 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.048205,0.077836,0.04151,0.054145,0.198601
2,3.041400,2.580792,0.325673,0.232645,0.271409,0.418337
3,3.041400,2.069303,0.409933,0.358114,0.382276,0.524786
4,1.556600,1.808725,0.466081,0.410882,0.436744,0.5669
5,1.556600,1.644331,0.522434,0.475141,0.497666,0.62129
6,0.827900,1.532792,0.553367,0.520403,0.536379,0.65641
7,0.827900,1.515862,0.560921,0.531191,0.545652,0.66014
8,0.565300,1.466919,0.573192,0.544559,0.558509,0.669775
9,0.438600,1.448312,0.578117,0.550188,0.563807,0.674281
10,0.438600,1.448256,0.579961,0.551126,0.565176,0.674126


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 9. Training using 40 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.026947,0.109499,0.054878,0.073114,0.20202
2,3.024100,2.382739,0.353683,0.2894,0.318328,0.469153
3,3.024100,1.840607,0.460443,0.394465,0.424908,0.56115
4,1.493400,1.60971,0.545857,0.491323,0.517156,0.636674
5,0.751300,1.494301,0.589684,0.538931,0.563166,0.671484
6,0.751300,1.463414,0.597141,0.548546,0.571813,0.673038
7,0.529800,1.372488,0.615462,0.571295,0.592557,0.695571
8,0.389800,1.373217,0.614,0.575985,0.594385,0.692929
9,0.389800,1.367298,0.615327,0.57622,0.595131,0.69526
10,0.349200,1.365945,0.616387,0.576923,0.596002,0.695726


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 10. Training using 44 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.990104,0.120214,0.06848,0.087255,0.236208
2,3.097500,2.235279,0.395769,0.329034,0.359329,0.513598
3,1.513200,1.778299,0.473515,0.415103,0.442389,0.567988
4,1.513200,1.554664,0.553273,0.505394,0.528251,0.647086
5,0.795800,1.42359,0.596851,0.560038,0.577858,0.689666
6,0.512000,1.361529,0.614888,0.581144,0.59754,0.700078
7,0.512000,1.319977,0.620715,0.590291,0.605121,0.705672
8,0.379000,1.300974,0.634074,0.602251,0.617753,0.71453
9,0.344800,1.296363,0.629412,0.602251,0.615532,0.713598
10,0.344800,1.295396,0.629993,0.602955,0.616177,0.713908


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 11. Training using 48 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.064032,0.0,0.0,0.0,0.126651
2,3.162500,2.363012,0.356743,0.299015,0.325338,0.484848
3,1.794900,1.800321,0.49327,0.438321,0.464175,0.599223
4,0.984100,1.552915,0.552378,0.498358,0.52398,0.636053
5,0.984100,1.412703,0.608468,0.572936,0.590168,0.697436
6,0.594600,1.35991,0.613205,0.583724,0.598102,0.702098
7,0.439500,1.309349,0.623738,0.594043,0.608529,0.710334
8,0.372500,1.28691,0.635874,0.608583,0.621929,0.717793
9,0.372500,1.27819,0.636629,0.613039,0.624612,0.720901
10,0.314500,1.278841,0.635102,0.611867,0.623268,0.720435


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 12. Training using 52 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.951779,0.08694,0.054174,0.066753,0.214297
2,3.258800,2.199098,0.384755,0.349203,0.366118,0.514219
3,1.784300,1.707309,0.506579,0.451454,0.477431,0.601243
4,1.007000,1.483258,0.581476,0.550657,0.565647,0.676301
5,0.627100,1.354479,0.61512,0.587711,0.601103,0.706915
6,0.627100,1.309456,0.631708,0.604597,0.617855,0.715152
7,0.476400,1.247551,0.639512,0.614916,0.626973,0.722611
8,0.373700,1.239515,0.64317,0.620544,0.631654,0.726962
9,0.309300,1.233958,0.643846,0.621951,0.632709,0.727739
10,0.294100,1.232585,0.643792,0.622655,0.633047,0.728205


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 13. Training using 56 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.962896,0.064682,0.02955,0.040567,0.197669
2,3.321000,2.047468,0.430204,0.370779,0.398287,0.544211
3,1.869200,1.647726,0.54904,0.496248,0.521311,0.640404
4,1.015300,1.401798,0.613847,0.575985,0.594313,0.699301
5,0.615300,1.318919,0.62411,0.596154,0.609812,0.707692
6,0.498900,1.232598,0.650062,0.61515,0.632124,0.725563
7,0.373100,1.204816,0.653951,0.619137,0.636068,0.729448
8,0.320800,1.167631,0.663083,0.636492,0.649515,0.737685
9,0.320800,1.156459,0.668939,0.644465,0.656474,0.742502
10,0.296000,1.156444,0.66927,0.643058,0.655902,0.742657


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 14. Training using 60 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.86922,0.117261,0.029315,0.046904,0.188656
2,3.252000,1.982887,0.424202,0.374062,0.397557,0.540637
3,1.891900,1.532121,0.564005,0.521811,0.542088,0.659829
4,0.981600,1.341087,0.610451,0.578096,0.593833,0.698834
5,0.644500,1.261297,0.628961,0.605066,0.616782,0.71251
6,0.446000,1.235061,0.631681,0.609756,0.620525,0.716239
7,0.351900,1.186879,0.657385,0.636726,0.646891,0.732246
8,0.308900,1.182373,0.665466,0.649859,0.65757,0.739083
9,0.261400,1.173005,0.662419,0.646107,0.654161,0.738306
10,0.246300,1.172778,0.662023,0.646341,0.654088,0.737995


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 15. Training using 64 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3237,2.874018,0.079386,0.042448,0.055318,0.222378
2,1.9403,1.949325,0.431898,0.374062,0.400905,0.538617
3,1.0635,1.509394,0.569146,0.534709,0.551391,0.673815
4,0.6803,1.344068,0.60468,0.57575,0.589861,0.698679
5,0.4931,1.25458,0.635503,0.613743,0.624433,0.72028
6,0.3806,1.180904,0.651435,0.638602,0.644955,0.736286
7,0.3129,1.145957,0.675077,0.665103,0.670053,0.750272
8,0.2703,1.146304,0.664446,0.654784,0.659579,0.744988
9,0.2488,1.13583,0.673013,0.663227,0.668084,0.748873
10,0.2433,1.135084,0.672299,0.662523,0.667375,0.748563


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 16. Training using 68 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3122,2.770393,0.179018,0.094043,0.123309,0.268842
2,1.981,1.822734,0.460758,0.404784,0.430961,0.570785
3,1.0406,1.419909,0.61857,0.578096,0.597648,0.702098
4,0.6542,1.29234,0.633751,0.605066,0.619076,0.713131
5,0.4789,1.205819,0.662363,0.637664,0.649779,0.734421
6,0.3702,1.169122,0.673509,0.654081,0.663653,0.742191
7,0.3137,1.114768,0.690436,0.672139,0.681165,0.759285
8,0.2574,1.115168,0.689904,0.673077,0.681387,0.759907
9,0.2483,1.106969,0.691215,0.673546,0.682266,0.760373
10,0.2204,1.107135,0.690293,0.67378,0.681937,0.760528


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 17. Training using 72 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3899,2.795191,0.170613,0.106473,0.131119,0.311733
2,2.0907,1.757347,0.508858,0.464822,0.485844,0.619736
3,1.1213,1.421838,0.596609,0.56121,0.578369,0.686713
4,0.6942,1.250222,0.64984,0.618902,0.633994,0.723543
5,0.5534,1.171601,0.659932,0.635788,0.647635,0.735664
6,0.3968,1.137339,0.66748,0.641182,0.654067,0.745921
7,0.3067,1.090373,0.687545,0.669325,0.678313,0.75913
8,0.2286,1.084964,0.685976,0.66651,0.676103,0.76115
9,0.2127,1.081091,0.692141,0.673311,0.682596,0.76488
10,0.2187,1.081777,0.6914,0.673077,0.682115,0.764569


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 18. Training using 76 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3455,2.729321,0.218666,0.152205,0.17948,0.350272
2,2.0705,1.782013,0.49479,0.456614,0.474936,0.609479
3,1.1754,1.383707,0.615616,0.576923,0.595642,0.70272
4,0.6937,1.266015,0.635092,0.606942,0.620698,0.713908
5,0.5879,1.182496,0.6584,0.635084,0.646532,0.734732
6,0.3432,1.163288,0.668589,0.652439,0.660415,0.742968
7,0.2706,1.119627,0.676527,0.659709,0.668012,0.751826
8,0.239,1.099651,0.684958,0.667448,0.67609,0.757265
9,0.232,1.096605,0.688825,0.670732,0.679658,0.761461
10,0.21,1.094746,0.689547,0.671435,0.680371,0.761616


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 19. Training using 80 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3805,2.670897,0.215446,0.12758,0.160259,0.324786
2,2.0984,1.599116,0.580325,0.535413,0.556965,0.668687
3,1.1624,1.315451,0.625188,0.584428,0.604121,0.707848
4,0.5318,1.18708,0.656995,0.626642,0.64146,0.731158
5,0.4001,1.11315,0.691652,0.66651,0.678849,0.754157
6,0.3342,1.042613,0.693263,0.680582,0.686864,0.764258
7,0.2744,1.054336,0.700533,0.677533,0.688841,0.763481
8,0.2041,1.035037,0.699524,0.689024,0.694234,0.769852
9,0.2031,1.021657,0.70491,0.690197,0.697476,0.772183
10,0.1837,1.02271,0.706165,0.690432,0.698209,0.77265


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 20. Training using 84 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.3585,2.628914,0.286379,0.202158,0.237009,0.390987
2,2.0882,1.614992,0.555915,0.523452,0.539196,0.655633
3,1.1523,1.331881,0.621548,0.585835,0.603163,0.707848
4,0.5568,1.20714,0.64972,0.626407,0.637851,0.728516
5,0.468,1.115555,0.676683,0.660178,0.668329,0.748096
6,0.3887,1.055319,0.702128,0.681051,0.691429,0.767055
7,0.2817,1.056989,0.695359,0.678236,0.686691,0.763326
8,0.22,1.032202,0.706167,0.684803,0.695321,0.772183
9,0.2125,1.026285,0.706495,0.68363,0.694875,0.772183
10,0.1895,1.025597,0.704787,0.68363,0.694048,0.772028


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 21. Training using 88 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4293,2.587524,0.26007,0.208959,0.23173,0.388967
2,2.1713,1.551776,0.558992,0.525563,0.541762,0.657498
3,0.7773,1.29799,0.624878,0.599672,0.612015,0.710179
4,0.5644,1.153079,0.667784,0.654315,0.660981,0.743434
5,0.4554,1.06573,0.685255,0.680113,0.682674,0.758197
6,0.3118,1.008718,0.693273,0.686445,0.689842,0.766278
7,0.2742,0.997295,0.699647,0.696529,0.698084,0.777778
8,0.1924,0.998177,0.698887,0.691839,0.695345,0.776224
9,0.1844,0.989357,0.703432,0.696998,0.7002,0.779487
10,0.1926,0.989197,0.70338,0.697936,0.700647,0.779332


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 22. Training using 92 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4057,2.635954,0.317956,0.210131,0.253036,0.395804
2,2.3146,1.573087,0.570303,0.526032,0.547273,0.660761
3,0.8036,1.276349,0.64251,0.619606,0.63085,0.721834
4,0.5755,1.11811,0.677603,0.668386,0.672963,0.752137
5,0.392,1.054956,0.688751,0.677767,0.683215,0.758353
6,0.33,0.996302,0.69668,0.684099,0.690333,0.769386
7,0.2468,0.964153,0.716017,0.706614,0.711284,0.786169
8,0.2017,0.959514,0.714999,0.705441,0.710188,0.783528
9,0.1931,0.955064,0.719742,0.707083,0.713356,0.786791
10,0.1684,0.953832,0.719085,0.707786,0.713391,0.787257


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 23. Training using 96 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4542,2.553689,0.329309,0.275094,0.29977,0.468842
2,1.2765,1.533355,0.568803,0.522514,0.544677,0.663248
3,0.8102,1.223104,0.640226,0.611398,0.62548,0.717483
4,0.46,1.091321,0.67954,0.65197,0.66547,0.746853
5,0.3827,1.010019,0.712239,0.687852,0.699833,0.77669
6,0.2668,0.971322,0.731457,0.70075,0.715774,0.786325
7,0.2262,0.933138,0.73621,0.704268,0.719885,0.787879
8,0.1734,0.931536,0.738014,0.707552,0.722462,0.788811
9,0.1413,0.928022,0.740059,0.707083,0.723195,0.789122
10,0.1508,0.92809,0.739525,0.707786,0.723307,0.789277


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 24. Training using 100 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4674,2.521307,0.324045,0.26454,0.291285,0.459829
2,1.2721,1.547225,0.557611,0.523218,0.539867,0.657187
3,0.8618,1.245033,0.63462,0.6053,0.619613,0.716395
4,0.5425,1.138188,0.660409,0.643527,0.651859,0.736908
5,0.3755,1.030425,0.689412,0.665807,0.677404,0.758508
6,0.2765,1.003682,0.707707,0.684803,0.696067,0.775291
7,0.2657,0.969275,0.715461,0.69348,0.704299,0.782906
8,0.1831,0.976463,0.713836,0.692073,0.702786,0.783061
9,0.1741,0.970639,0.716219,0.694887,0.705392,0.784615
10,0.1578,0.968994,0.717118,0.695591,0.70619,0.784926


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 25. Training using 104 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5216,2.492904,0.299857,0.245075,0.269712,0.426107
2,1.3677,1.487952,0.584839,0.546435,0.564985,0.677855
3,0.8337,1.213774,0.64249,0.614916,0.6284,0.724476
4,0.5112,1.091379,0.680871,0.666979,0.673854,0.751981
5,0.3444,1.025981,0.694631,0.679644,0.687055,0.766434
6,0.2873,0.975513,0.706462,0.689728,0.697995,0.777622
7,0.2022,0.955703,0.717344,0.699343,0.708229,0.783683
8,0.1623,0.966997,0.707817,0.696529,0.702128,0.781974
9,0.1619,0.952501,0.714968,0.702392,0.708624,0.786791
10,0.1368,0.953995,0.71323,0.701689,0.707412,0.786325


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 26. Training using 108 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5644,2.40791,0.321788,0.254925,0.284481,0.437141
2,1.3743,1.426884,0.5932,0.568715,0.580699,0.690909
3,0.6483,1.196895,0.653947,0.643058,0.648457,0.734577
4,0.5058,1.063665,0.67632,0.651735,0.6638,0.749495
5,0.3842,1.015208,0.696227,0.679409,0.687715,0.768298
6,0.2697,0.934404,0.714353,0.706144,0.710225,0.787257
7,0.2271,0.939092,0.717931,0.712711,0.715311,0.79021
8,0.1885,0.933801,0.724466,0.715291,0.719849,0.794095
9,0.1365,0.936417,0.722591,0.714118,0.71833,0.793007
10,0.1588,0.937502,0.724252,0.71576,0.719981,0.793473


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 27. Training using 112 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.44,2.352543,0.35817,0.286351,0.318259,0.465734
2,1.3259,1.37762,0.614004,0.577861,0.595385,0.703186
3,0.6898,1.140412,0.66634,0.636961,0.651319,0.73986
4,0.4485,1.049709,0.684324,0.669559,0.676861,0.756954
5,0.373,0.974364,0.704551,0.67894,0.691508,0.771406
6,0.2466,0.914313,0.727666,0.7106,0.719032,0.794095
7,0.2021,0.911079,0.729127,0.712711,0.720825,0.79596
8,0.1587,0.915472,0.727076,0.710366,0.718624,0.79596
9,0.1392,0.917554,0.730197,0.713415,0.721708,0.796581
10,0.1305,0.9176,0.730456,0.714353,0.722314,0.796581


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 28. Training using 116 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.469,2.353706,0.369668,0.329268,0.348301,0.511267
2,1.3611,1.41863,0.589391,0.562852,0.575816,0.690287
3,0.6968,1.17124,0.655333,0.628283,0.641523,0.734732
4,0.4744,1.043087,0.683446,0.662289,0.672701,0.75742
5,0.3586,0.990695,0.702532,0.676829,0.689441,0.770629
6,0.2976,0.936613,0.7093,0.695826,0.702498,0.780109
7,0.1947,0.916679,0.721495,0.710835,0.716125,0.7885
8,0.1579,0.916519,0.725243,0.716229,0.720708,0.794561
9,0.1486,0.917559,0.724015,0.715525,0.719745,0.794406
10,0.1395,0.919089,0.725891,0.716698,0.721265,0.795649


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 29. Training using 120 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.48,2.255658,0.370038,0.319184,0.342735,0.511422
2,1.3429,1.337731,0.626166,0.598265,0.611897,0.711577
3,0.6392,1.133266,0.682563,0.65455,0.668263,0.746542
4,0.4001,1.015189,0.697305,0.679644,0.688361,0.764103
5,0.2922,0.97734,0.715078,0.695122,0.704959,0.781041
6,0.2537,0.934822,0.722723,0.714587,0.718632,0.790987
7,0.1819,0.92006,0.726605,0.719278,0.722923,0.793939
8,0.1352,0.923239,0.729018,0.721154,0.725065,0.79627
9,0.1389,0.921641,0.729063,0.720685,0.72485,0.796892
10,0.1293,0.922404,0.730641,0.721388,0.725985,0.797514


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▂▁▄▄▃▅▆▆▃▆▇▆▂▅▇▇▇▂▇▇▇██▇▆████▇██▇▅██▇███
eval/f1,▁▁▃▃▂▄▅▅▂▅▆▅▁▄▇▇▆▁▇▇▆▇█▇▅████▆██▇▃██▇███
eval/loss,▇█▅▅▆▄▃▄▆▃▃▃▆▄▂▂▂▆▂▂▂▂▁▂▃▁▁▁▁▃▁▁▂▅▁▁▂▁▁▁
eval/precision,▂▁▄▃▂▅▅▅▂▅▆▅▂▅▇▇▆▁▇▇▆▇█▇▆███▇▆██▇▄██▇███
eval/recall,▁▁▃▃▂▄▅▄▂▅▆▅▁▄▇▇▆▁▇▇▆▇█▇▅████▆██▇▃██▇███
eval/runtime,▁▂▂▇▂▂▁█▁█▂▇▁▂█▁▂▁▂▂▄▁▂▄▂▂▄▅▇▂██▇▁▁▂▂▂▂▁
eval/samples_per_second,█▇▇▁▇▇█▁█▁▇▂█▇▁▇▇▇▇▇▅▇▇▄▇▇▅▃▂▇▁▁▂██▇▇▇▇█
eval/steps_per_second,█▇▇▁▇▇█▁█▁▇▂█▇▁▇▇▇▇▇▅▇▇▄▇▇▅▃▂▇▁▁▂██▇▇▇▇█
train/epoch,▆█▂▃▃▄▃▃▃▁█▇▆▄▃█▆▄▂█▅▃█▅▂▇▄▁▆▃▇▄█▅▂▆▂▇▃▇
train/global_step,▁▁▁▁▁▂▂▂▂▁▄▄▃▃▂▅▄▃▂▅▄▂▆▄▂▆▄▁▅▃▇▄█▅▂▆▃▇▃█

0,1
eval/accuracy,0.79751
eval/f1,0.72599
eval/loss,0.9224
eval/precision,0.73064
eval/recall,0.72139
eval/runtime,1.0925
eval/samples_per_second,70.479
eval/steps_per_second,18.306
train/epoch,10.0
train/global_step,300.0
