<a href="https://colab.research.google.com/github/ekaterinatao/NER_biomed_domain/blob/main/active_learning/%D0%92%D0%9A%D0%A0_nerel_bio_RuBioBERT_active_learning_v4_mnlp_seeds.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Обучение с разными сидами для расчета доверительных интервалов
# Инструменты
Предобработанный дасасет [NEREL-BIO](https://huggingface.co/datasets/ekaterinatao/nerel_bio_ner_unnested)  

Исходная модель [RuBioBERT](https://huggingface.co/alexyalunin/RuBioBERT)

### Установка зависимостей

In [None]:
!pip install datasets accelerate evaluate wandb seqeval -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m3.2 M

In [None]:
import numpy as np
import pandas as pd
import random
import json
from dataclasses import dataclass
import torch
from torch.nn.functional import softmax
import datasets
from datasets import Dataset, DatasetDict, concatenate_datasets
from transformers import (AutoTokenizer,
                          DataCollatorForTokenClassification,
                          AutoModelForTokenClassification,
                          TrainingArguments, Trainer)
import evaluate
import warnings
warnings.filterwarnings("ignore")

In [None]:
@dataclass
class TrainingConfig:
    dataset = 'ekaterinatao/nerel_bio_ner_unnested'
    checkpoint = 'alexyalunin/RuBioBERT'
    hf_repo_id = "ekaterinatao/nerel-bio-RuBioBERT-al"
    n_labels = 45
    n_epochs = 10
    train_batch_size = 4
    eval_batch_size = 4
    device = "cuda" if torch.cuda.is_available() else "cpu"
    l_rate = 5e-05
    w_decay = 0.1
    warm_up = 0.1
    run_name = "RuBioBERT_AL_v4_3"

config = TrainingConfig()

In [None]:
def fix_seed(seed):
    random.seed(seed)
    np.random.seed(seed)

    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    print("Current seed: ", seed)

In [None]:
# генерация фиксированного списка сидов для воспроизводимости экспериментов
np.random.seed(64)
seed_lst = np.random.randint(65, 2024, size=4)

### Скачивание датасета

In [None]:
dataset = datasets.load_dataset(config.dataset)
dataset

Downloading readme:   0%|          | 0.00/1.56k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/603k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/76.2k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/70.8k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/612 [00:00<?, ? examples/s]

Generating valid split:   0%|          | 0/77 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/77 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'words', 'ner_tags'],
        num_rows: 612
    })
    valid: Dataset({
        features: ['id', 'words', 'ner_tags'],
        num_rows: 77
    })
    test: Dataset({
        features: ['id', 'words', 'ner_tags'],
        num_rows: 77
    })
})

In [None]:
# Labels
url = 'https://raw.githubusercontent.com/ekaterinatao/NER_biomed_domain/main/labels.txt'
tags = pd.read_csv(url, names=['tag']).values.tolist()
tags = [item for sublist in tags for item in sublist]
tag_to_id = {tag: i for i, tag in enumerate(tags)}
id_to_tag = {i: tag for i, tag in enumerate(tags)}

___
### Токенизация

In [None]:
tokenizer = AutoTokenizer.from_pretrained(config.checkpoint)

tokenizer_config.json:   0%|          | 0.00/413 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/1.78M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.75M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["words"], truncation=True, max_length=512, is_split_into_words=True)

    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        word_idxs = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_idxs:
            if word_idx is None:
                label_ids.append(-100) # Set the special tokens to -100.
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

In [None]:
tokenized_dataset = dataset.map(
    tokenize_and_align_labels, batched=True
)
tokenized_dataset

Map:   0%|          | 0/612 [00:00<?, ? examples/s]

Map:   0%|          | 0/77 [00:00<?, ? examples/s]

Map:   0%|          | 0/77 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'words', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 612
    })
    valid: Dataset({
        features: ['id', 'words', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 77
    })
    test: Dataset({
        features: ['id', 'words', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 77
    })
})

# Обучение модели

In [None]:
seqeval = evaluate.load("seqeval")

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [tags[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [tags[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

Downloading builder script:   0%|          | 0.00/6.34k [00:00<?, ?B/s]

In [None]:
def choose_samples_mnlp(test_predictions, n_tokens) -> np.ndarray:
    """Функция для реализации стратеги отбора примеров для разметки
    на основе MNLP"""

    test_pred_pt = torch.from_numpy(test_predictions)
    probas = softmax(test_pred_pt, dim=-1).detach().cpu().numpy()
    scores = np.array([
        -np.sum(np.log(np.max(i, axis=1))) / len(i) for i in probas
    ])
    args = np.argsort(-scores)

    return args[:n_tokens]

In [None]:
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [None]:
print(f'device is {config.device}')

device is cuda


In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import wandb
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
def train_and_predict(
    al_train_dataset, eval_dataset, al_test_dataset, test_dataset,
    model, tokenizer, data_collator, compute_metrics, SEED
):
    "Функция для тренировки модели, сохранения метрик и предсказаний"

    global config
    training_args = TrainingArguments(
        output_dir=config.hf_repo_id,
        num_train_epochs=config.n_epochs,
        learning_rate=config.l_rate,
        weight_decay=config.w_decay,
        warmup_ratio=config.warm_up,
        per_device_train_batch_size=config.train_batch_size,
        per_device_eval_batch_size=config.eval_batch_size,
        group_by_length=True,
        optim="adamw_torch",
        lr_scheduler_type="cosine",
        evaluation_strategy="epoch",
        seed = SEED,
        data_seed = SEED,
        push_to_hub=True,
        save_strategy="no",
        report_to="wandb",
        logging_steps=16,
        run_name=config.run_name + f'{SEED}',
    )
    metrics = {}
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=al_train_dataset,
        eval_dataset=eval_dataset,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
    )
    train_result = trainer.train()
    metrics['train_loss'] = train_result.metrics['train_loss']
    metrics.update(trainer.evaluate(eval_dataset))
    if al_test_dataset.num_rows > 0:
        al_test_predictions = trainer.predict(
            al_test_dataset, metric_key_prefix="pred"
        )
        probas = al_test_predictions.predictions
        metrics.update(al_test_predictions.metrics)
    else:
        probas = None
    test_predictions = trainer.predict(test_dataset, metric_key_prefix="test")
    metrics.update(test_predictions.metrics)

    return metrics, probas, trainer

In [None]:
def run_active_learning(
    dataset, al_iters, init_train_size, choose_tokens,
    tokenizer, data_collator, compute_metrics, SEED
):
    "Функция для реализации цикла активного обучения"

    train_dataset = dataset['train'].select(
        random.sample(
            range(dataset['train'].num_rows),
            int(dataset['train'].num_rows * init_train_size)
        )
    )
    test_dataset = dataset['train'].filter(
        lambda s: s['id'] not in train_dataset['id']
    )
    all_metrics = {}

    for iter in range(al_iters):
        train_size = train_dataset.num_rows
        print(f'\nIteration: {iter}. Training using {train_size} samples')
        model = AutoModelForTokenClassification.from_pretrained(
            config.checkpoint, num_labels=config.n_labels,
            id2label=id_to_tag, label2id=tag_to_id
        )

        metrics, test_predictions, trainer = train_and_predict(
            train_dataset, dataset['valid'], test_dataset, dataset['test'],
            model, tokenizer, data_collator, compute_metrics, SEED
        )

        all_metrics[f'{iter}'] = {'train_size': train_size, 'metrics': metrics}

        if test_dataset.num_rows >= choose_tokens:
            assert test_predictions is not None, "test_predictions is None, must be array"
            new_args = choose_samples_mnlp(test_predictions, choose_tokens)
            new_train_samples = test_dataset.select(new_args)
            extended_train_dataset = concatenate_datasets(
                [train_dataset, new_train_samples]
            )

            train_dataset = extended_train_dataset
            test_dataset = dataset['train'].filter(
                lambda s: s['id'] not in train_dataset['id']
            )

    return all_metrics, trainer

# Сэмплирование по 4 примера (по ~0.7% данных, 30 итераций активного обучения) для каждого сида (4 цикла).

In [None]:
for SEED in seed_lst:
    fix_seed(SEED)
    run = wandb.init(
        project="ner_bert_nerel_bio",
        name=config.run_name + f'{SEED}',
        reinit=True)
    metrics, trainer = run_active_learning(
        dataset=tokenized_dataset,
        al_iters=30,
        init_train_size=0.007,
        choose_tokens=4,
        tokenizer=tokenizer,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
        SEED=SEED
    )
    run.join()
    with open(f'/content/drive/MyDrive/data/VKR/metrics_30_mnlp_seed{SEED}.json', 'w') as file:
        json.dump(metrics, file)

[34m[1mwandb[0m: Currently logged in as: [33mtaoea[0m. Use [1m`wandb login --relogin`[0m to force relogin


Current seed:  517


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 0. Training using 4 samples


config.json:   0%|          | 0.00/889 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/714M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.847057,0.01114,0.013837,0.012343,0.02735
2,No log,3.642503,0.035247,0.008208,0.013316,0.158974
3,No log,3.487499,0.014493,0.000469,0.000909,0.156177
4,No log,3.388926,0.022472,0.000469,0.000919,0.1554
5,No log,3.318139,0.023529,0.000469,0.00092,0.1554
6,No log,3.258257,0.022989,0.000469,0.000919,0.155556
7,No log,3.207805,0.021053,0.000469,0.000918,0.156333
8,No log,3.175283,0.044643,0.001173,0.002285,0.158042
9,No log,3.159549,0.047619,0.001407,0.002733,0.159596
10,No log,3.155416,0.053846,0.001642,0.003186,0.160218


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 1. Training using 8 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.787425,0.015999,0.020403,0.017934,0.041181
2,No log,3.354702,0.05278,0.013133,0.021033,0.178244
3,No log,3.095957,0.051213,0.004456,0.008198,0.166123
4,No log,2.954611,0.096774,0.012664,0.022397,0.19021
5,No log,2.846002,0.178189,0.042917,0.069174,0.245998
6,No log,2.764034,0.237255,0.085131,0.125302,0.295882
7,No log,2.704937,0.278139,0.120544,0.168194,0.333489
8,2.653300,2.668714,0.289447,0.135084,0.184202,0.348407
9,2.653300,2.652343,0.289268,0.139071,0.187837,0.35338
10,2.653300,2.648855,0.288341,0.139775,0.18828,0.354934


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 2. Training using 12 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.589089,0.040587,0.030488,0.03482,0.128205
2,No log,3.098466,0.045267,0.00258,0.004881,0.161616
3,No log,2.899937,0.168109,0.031895,0.053617,0.20373
4,No log,2.738437,0.273314,0.109287,0.15614,0.298834
5,No log,2.612905,0.285253,0.145169,0.192415,0.346387
6,2.862300,2.492654,0.293474,0.176126,0.220138,0.383838
7,2.862300,2.410675,0.297515,0.190901,0.232571,0.399223
8,2.862300,2.363658,0.309434,0.211538,0.251288,0.418959
9,2.862300,2.342866,0.312187,0.219278,0.257611,0.427661
10,2.862300,2.338817,0.313018,0.221623,0.259508,0.430303


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 3. Training using 16 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.481695,0.05189,0.032833,0.040218,0.164569
2,No log,2.992096,0.077869,0.004456,0.008429,0.161461
3,No log,2.779248,0.185773,0.096154,0.126719,0.30303
4,2.971200,2.596755,0.25,0.146107,0.184429,0.361461
5,2.971200,2.401605,0.272629,0.177298,0.214864,0.394561
6,2.971200,2.256427,0.308553,0.219981,0.256846,0.44087
7,2.971200,2.154278,0.344132,0.264071,0.298832,0.476612
8,1.732400,2.091713,0.375804,0.301595,0.334634,0.505828
9,1.732400,2.065198,0.387069,0.315901,0.347882,0.515462
10,1.732400,2.060488,0.387568,0.317308,0.348936,0.516861


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 4. Training using 20 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.34452,0.098148,0.074578,0.084755,0.21958
2,No log,2.862185,0.206861,0.04667,0.076158,0.218182
3,No log,2.555754,0.270779,0.195591,0.227124,0.40373
4,2.992900,2.275092,0.339107,0.238743,0.280209,0.446775
5,2.992900,2.016285,0.458094,0.393527,0.423363,0.577467
6,2.992900,1.845031,0.491354,0.433161,0.460426,0.605284
7,1.594700,1.732212,0.533229,0.481707,0.506161,0.639005
8,1.594700,1.672123,0.545993,0.496951,0.520319,0.648329
9,1.594700,1.649526,0.547625,0.500235,0.522858,0.650194
10,1.065900,1.645935,0.547112,0.499765,0.522368,0.650194


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 5. Training using 24 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.319491,0.076087,0.026266,0.039052,0.179487
2,No log,2.871984,0.197248,0.030253,0.05246,0.20979
3,3.166300,2.566441,0.262103,0.162523,0.200637,0.370474
4,3.166300,2.275641,0.311221,0.206848,0.248521,0.41181
5,3.166300,1.987067,0.410112,0.342402,0.373211,0.535509
6,1.931700,1.786665,0.463135,0.408068,0.433861,0.591453
7,1.931700,1.661063,0.515657,0.471154,0.492402,0.634499
8,1.214800,1.583512,0.540493,0.503987,0.521602,0.656099
9,1.214800,1.554203,0.545477,0.510553,0.527438,0.660917
10,1.214800,1.549158,0.547165,0.511492,0.528727,0.66216


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 6. Training using 28 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.209203,0.106418,0.060272,0.076958,0.218959
2,No log,2.675682,0.2369,0.126173,0.164652,0.327117
3,3.138300,2.271835,0.344467,0.239447,0.282512,0.450816
4,3.138300,1.88472,0.477242,0.410647,0.441447,0.590521
5,1.837600,1.620907,0.54842,0.500704,0.523477,0.645066
6,1.837600,1.485182,0.567168,0.530722,0.54834,0.670241
7,1.073900,1.399244,0.583395,0.553705,0.568163,0.68749
8,1.073900,1.373864,0.588337,0.558396,0.572976,0.690132
9,1.073900,1.354843,0.590584,0.561914,0.575892,0.693551
10,0.796800,1.351038,0.590382,0.561445,0.57555,0.693706


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 7. Training using 32 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.159521,0.082814,0.021811,0.034528,0.183838
2,3.239300,2.633001,0.298345,0.147983,0.197837,0.342502
3,3.239300,2.146608,0.374065,0.281426,0.321199,0.490132
4,1.971000,1.708692,0.514332,0.450281,0.48018,0.619736
5,1.971000,1.44503,0.587535,0.543856,0.564852,0.684227
6,1.115900,1.29768,0.621047,0.589587,0.604909,0.717016
7,1.115900,1.239207,0.631996,0.598499,0.614792,0.730225
8,0.749300,1.198681,0.640937,0.609522,0.624835,0.735043
9,0.749300,1.183214,0.639312,0.610225,0.62443,0.737218
10,0.644200,1.180842,0.640541,0.611398,0.62563,0.73784


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 8. Training using 36 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.134123,0.095578,0.015713,0.026989,0.18073
2,3.317400,2.570153,0.274441,0.146811,0.191291,0.361927
3,3.317400,2.055993,0.378222,0.275328,0.318675,0.49899
4,2.069200,1.628941,0.540867,0.482645,0.5101,0.646931
5,2.069200,1.385005,0.595819,0.561445,0.578121,0.692152
6,1.215200,1.259236,0.628863,0.591698,0.609715,0.722766
7,1.215200,1.183887,0.640575,0.606473,0.623057,0.73411
8,0.801300,1.154824,0.647772,0.613743,0.630299,0.738151
9,0.661900,1.141389,0.647044,0.613508,0.62983,0.738928
10,0.661900,1.139234,0.647306,0.614212,0.630325,0.739083


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 9. Training using 40 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.116484,0.134975,0.074578,0.096073,0.230925
2,3.370300,2.550122,0.261364,0.18879,0.219227,0.408702
3,3.370300,1.944421,0.434614,0.342167,0.382889,0.549029
4,2.166900,1.472706,0.586474,0.551126,0.568251,0.682984
5,1.284500,1.25793,0.621476,0.594512,0.607695,0.720591
6,1.284500,1.172885,0.634526,0.611163,0.622626,0.732556
7,0.799300,1.102304,0.659208,0.628752,0.64362,0.750427
8,0.626000,1.074352,0.660068,0.633443,0.646482,0.750583
9,0.626000,1.067669,0.66325,0.63743,0.650084,0.753069
10,0.536600,1.066163,0.664144,0.638133,0.650879,0.753535


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 10. Training using 44 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.039701,0.041322,0.001173,0.002281,0.156643
2,3.344200,2.446092,0.308129,0.241792,0.270959,0.455322
3,2.244600,1.847264,0.502245,0.44606,0.472488,0.616472
4,2.244600,1.453327,0.568078,0.522514,0.544344,0.669308
5,1.302400,1.224705,0.641714,0.611163,0.626066,0.736597
6,0.845400,1.098187,0.666421,0.635319,0.650498,0.754157
7,0.845400,1.04926,0.679481,0.650797,0.66483,0.766744
8,0.596000,1.041324,0.676104,0.649625,0.6626,0.765657
9,0.484700,1.025252,0.676406,0.651501,0.66372,0.766278
10,0.484700,1.021745,0.678102,0.653612,0.665632,0.767055


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 11. Training using 48 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.041684,0.110248,0.016651,0.028932,0.184149
2,3.397400,2.382553,0.354731,0.224203,0.274752,0.426263
3,2.276300,1.665233,0.548654,0.497186,0.521654,0.643978
4,1.368200,1.372976,0.601657,0.562148,0.581232,0.695416
5,1.368200,1.149005,0.649404,0.626407,0.637698,0.740171
6,0.880400,1.066417,0.664244,0.643058,0.65348,0.75711
7,0.615300,1.00977,0.679922,0.657598,0.668574,0.770163
8,0.542300,1.003776,0.681719,0.658537,0.669927,0.770008
9,0.542300,1.003264,0.686709,0.661585,0.673913,0.771717
10,0.472200,0.99815,0.688624,0.6644,0.676295,0.773582


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 12. Training using 52 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.017379,0.010526,0.000235,0.000459,0.156022
2,3.355300,2.244639,0.380765,0.294325,0.332011,0.500078
3,2.287200,1.582116,0.575,0.523218,0.547888,0.66014
4,1.401300,1.280115,0.632468,0.605769,0.618831,0.725563
5,0.903500,1.121956,0.664791,0.637195,0.650701,0.755711
6,0.903500,1.01975,0.691252,0.661585,0.676093,0.77467
7,0.617200,0.983275,0.699804,0.668621,0.683857,0.778866
8,0.486900,0.962024,0.705122,0.678002,0.691296,0.784149
9,0.420400,0.960642,0.704634,0.677533,0.690818,0.782906
10,0.394500,0.959015,0.705279,0.676829,0.690761,0.782906


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 13. Training using 56 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.996972,0.050562,0.002111,0.004052,0.164724
2,3.440300,2.179257,0.387135,0.307692,0.342872,0.50474
3,2.359200,1.477777,0.578125,0.54667,0.561958,0.673504
4,1.416900,1.191051,0.642581,0.623593,0.632945,0.732867
5,0.888400,1.123356,0.65743,0.628752,0.642772,0.746076
6,0.677500,1.002895,0.685118,0.66182,0.673267,0.772805
7,0.494500,0.948457,0.694169,0.672842,0.683339,0.778866
8,0.438600,0.941756,0.697375,0.672842,0.684889,0.780575
9,0.438600,0.935292,0.700024,0.672608,0.686042,0.781352
10,0.362700,0.933192,0.70134,0.675188,0.688015,0.782129


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 14. Training using 60 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.001921,0.0,0.0,0.0,0.156643
2,3.501900,2.233052,0.278443,0.196295,0.230261,0.40404
3,2.463100,1.508115,0.559156,0.522045,0.539964,0.661538
4,1.548600,1.175967,0.628851,0.603189,0.615753,0.722145
5,0.941200,1.042124,0.668149,0.634146,0.650704,0.760218
6,0.724300,0.976383,0.683325,0.657364,0.670093,0.767832
7,0.519900,0.907643,0.697295,0.670966,0.683877,0.782595
8,0.402200,0.898163,0.69815,0.672608,0.685141,0.78446
9,0.360200,0.897224,0.701998,0.675657,0.688576,0.788656
10,0.309600,0.895651,0.703965,0.678705,0.691104,0.789277


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 15. Training using 64 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4219,2.897572,0.148256,0.023921,0.041195,0.204507
2,2.3386,1.892647,0.518346,0.443949,0.478272,0.61927
3,1.4125,1.361009,0.597576,0.566604,0.581678,0.683605
4,0.9376,1.130929,0.64553,0.623124,0.634129,0.737529
5,0.6644,1.008938,0.684851,0.659475,0.671924,0.767832
6,0.5091,0.894689,0.724071,0.699109,0.711371,0.797669
7,0.396,0.90472,0.696984,0.682927,0.689884,0.785082
8,0.3376,0.866089,0.723636,0.700047,0.711646,0.79798
9,0.3081,0.862327,0.725627,0.705206,0.715271,0.800155
10,0.2922,0.862979,0.723789,0.704268,0.713895,0.8


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 16. Training using 68 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4752,2.916662,0.167033,0.017824,0.03221,0.177312
2,2.5087,2.023971,0.435425,0.366088,0.397758,0.558819
3,1.6001,1.403841,0.578724,0.544794,0.561247,0.671329
4,1.0561,1.108828,0.65583,0.629221,0.64225,0.743745
5,0.7072,0.970115,0.693473,0.665338,0.679114,0.774825
6,0.5797,0.880708,0.710655,0.681989,0.696027,0.793784
7,0.4518,0.843542,0.721359,0.696998,0.708969,0.798446
8,0.364,0.825568,0.730524,0.699343,0.714594,0.806061
9,0.3477,0.817097,0.730386,0.705206,0.717575,0.808392
10,0.2538,0.816394,0.730919,0.705206,0.717832,0.808858


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 17. Training using 72 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4861,2.877302,0.236039,0.074343,0.113073,0.247242
2,2.3985,1.77783,0.523631,0.457317,0.488232,0.624242
3,1.496,1.231131,0.618273,0.606238,0.612197,0.711111
4,1.0194,0.998515,0.693284,0.665807,0.679268,0.773116
5,0.7036,0.920483,0.705668,0.680347,0.692776,0.786014
6,0.5107,0.842773,0.731891,0.706144,0.718787,0.803574
7,0.4204,0.823886,0.735694,0.711538,0.723414,0.809013
8,0.2893,0.838338,0.735459,0.708724,0.721844,0.80777
9,0.2599,0.82543,0.737871,0.716932,0.727251,0.811344
10,0.2217,0.825338,0.739004,0.717167,0.727922,0.81181


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 18. Training using 76 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4281,2.832917,0.151386,0.016651,0.030002,0.193318
2,2.433,1.788018,0.52516,0.482176,0.502751,0.632634
3,1.6118,1.22834,0.625395,0.604128,0.614577,0.716084
4,1.0375,1.043243,0.691216,0.662523,0.676566,0.772339
5,0.7486,0.936025,0.712954,0.68152,0.696882,0.784615
6,0.4651,0.906667,0.706821,0.685272,0.69588,0.787568
7,0.3398,0.824698,0.741614,0.715525,0.728336,0.809013
8,0.2988,0.837217,0.730473,0.7106,0.720399,0.804196
9,0.2811,0.839369,0.731408,0.712711,0.721938,0.805128
10,0.2598,0.839235,0.732113,0.712711,0.722282,0.805439


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 19. Training using 80 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5234,2.809046,0.287476,0.07106,0.113953,0.256566
2,2.5471,1.70015,0.521327,0.464353,0.491193,0.623155
3,1.6027,1.191771,0.632495,0.613508,0.622857,0.723699
4,0.7054,0.974846,0.690268,0.665338,0.677573,0.771406
5,0.5375,0.924431,0.707287,0.680582,0.693678,0.785859
6,0.412,0.851826,0.709793,0.69864,0.704172,0.793784
7,0.3206,0.820301,0.721824,0.708959,0.715334,0.807615
8,0.2369,0.812228,0.730687,0.720919,0.72577,0.813986
9,0.2089,0.813239,0.730979,0.723265,0.727101,0.814297
10,0.2085,0.812975,0.73191,0.723499,0.72768,0.814763


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 20. Training using 84 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4753,2.768477,0.23366,0.10061,0.140656,0.290754
2,2.4864,1.66803,0.539939,0.494606,0.516279,0.639938
3,1.6452,1.223183,0.633528,0.609756,0.621415,0.711733
4,0.7795,0.958106,0.70438,0.67894,0.691426,0.778244
5,0.5992,0.87395,0.7252,0.70122,0.713008,0.798601
6,0.4157,0.85047,0.723646,0.701923,0.712619,0.804662
7,0.2976,0.774102,0.745188,0.726313,0.735629,0.820202
8,0.2188,0.804754,0.736399,0.717402,0.726776,0.816783
9,0.2063,0.797843,0.746625,0.726313,0.736329,0.822222
10,0.1987,0.797053,0.746319,0.725141,0.735577,0.822067


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 21. Training using 88 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5223,2.708555,0.186934,0.111398,0.139603,0.305983
2,2.4737,1.571864,0.542663,0.505629,0.523492,0.647863
3,1.0087,1.202044,0.63044,0.608114,0.619076,0.719814
4,0.8597,0.971011,0.683939,0.653143,0.668186,0.766744
5,0.5835,0.86365,0.718134,0.700281,0.709095,0.797514
6,0.3755,0.825432,0.723957,0.712242,0.718052,0.806838
7,0.2887,0.792949,0.740803,0.727251,0.733964,0.82129
8,0.2281,0.814105,0.743029,0.724906,0.733856,0.818959
9,0.1925,0.808326,0.743719,0.728893,0.736231,0.821134
10,0.1799,0.810052,0.744147,0.730535,0.737278,0.821601


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 22. Training using 92 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4733,2.647995,0.29527,0.144934,0.194431,0.356177
2,2.4903,1.541512,0.563696,0.525094,0.543711,0.662471
3,1.1388,1.083191,0.672131,0.644231,0.657885,0.756799
4,0.7452,0.950677,0.701424,0.68152,0.691329,0.780886
5,0.4609,0.835722,0.730704,0.703799,0.716999,0.804351
6,0.3731,0.832369,0.728988,0.715994,0.722433,0.805594
7,0.258,0.788986,0.746841,0.734756,0.740749,0.821601
8,0.2122,0.776857,0.750835,0.737805,0.744263,0.824553
9,0.1921,0.788722,0.749104,0.735225,0.7421,0.823465
10,0.1671,0.789084,0.748327,0.734287,0.741241,0.822844


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 23. Training using 96 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5374,2.713603,0.310801,0.104597,0.156519,0.285159
2,1.7505,1.552012,0.555411,0.501876,0.527288,0.65066
3,1.1727,1.167483,0.630187,0.609053,0.619439,0.719503
4,0.6529,0.948804,0.688154,0.663462,0.675582,0.767988
5,0.4719,0.847323,0.708075,0.695122,0.701538,0.79798
6,0.3175,0.826683,0.724578,0.705206,0.714761,0.807304
7,0.2237,0.793909,0.739443,0.722795,0.731025,0.816317
8,0.1986,0.778927,0.7503,0.733583,0.741848,0.823776
9,0.1724,0.78801,0.743467,0.727251,0.73527,0.819891
10,0.1644,0.786266,0.742754,0.727251,0.734921,0.820047


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 24. Training using 100 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4936,2.655458,0.253953,0.124296,0.166903,0.326962
2,1.8089,1.514586,0.577078,0.537289,0.556473,0.674437
3,1.1581,1.100969,0.65894,0.626642,0.642385,0.735354
4,0.6426,0.930654,0.697212,0.674484,0.68566,0.780264
5,0.5198,0.838038,0.732555,0.706614,0.719351,0.809324
6,0.3135,0.779464,0.742446,0.726079,0.734171,0.820202
7,0.2897,0.777152,0.743393,0.72561,0.734394,0.824709
8,0.1899,0.786487,0.747482,0.731004,0.739151,0.825796
9,0.1804,0.787505,0.747303,0.731004,0.739063,0.825641
10,0.1581,0.785377,0.748621,0.731942,0.740187,0.826573


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 25. Training using 104 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5469,2.567386,0.28016,0.212946,0.241972,0.417871
2,1.6449,1.363401,0.608932,0.578799,0.593483,0.697902
3,1.0409,1.065211,0.662379,0.640009,0.651002,0.751049
4,0.6246,0.859106,0.716534,0.694184,0.705182,0.794095
5,0.3869,0.832638,0.724727,0.716229,0.720453,0.803419
6,0.337,0.782638,0.737933,0.734991,0.736459,0.81927
7,0.2329,0.762044,0.744867,0.74015,0.742501,0.823621
8,0.1911,0.787255,0.737127,0.735225,0.736175,0.818648
9,0.1718,0.777494,0.744153,0.738743,0.741438,0.822067
10,0.1469,0.774764,0.746343,0.741792,0.74406,0.82331


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 26. Training using 108 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5694,2.539439,0.276426,0.170497,0.210908,0.377312
2,1.6781,1.341456,0.599257,0.567073,0.582721,0.683761
3,0.8413,1.050796,0.666907,0.650797,0.658754,0.753225
4,0.6367,0.870247,0.715607,0.689259,0.702186,0.797514
5,0.4047,0.805802,0.744686,0.72303,0.733698,0.816006
6,0.2963,0.752219,0.75,0.731707,0.740741,0.827506
7,0.213,0.759399,0.755588,0.737336,0.74635,0.828904
8,0.1861,0.767062,0.752329,0.738743,0.745474,0.828749
9,0.1491,0.763031,0.750298,0.739212,0.744714,0.827661
10,0.1474,0.762074,0.751905,0.740619,0.746219,0.828594


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 27. Training using 112 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.6252,2.493941,0.291415,0.203799,0.239856,0.414452
2,1.7077,1.356443,0.599604,0.567542,0.583133,0.68547
3,0.8304,0.965137,0.699096,0.670732,0.68462,0.777622
4,0.5947,0.846415,0.715205,0.703799,0.709456,0.800777
5,0.4044,0.758088,0.747855,0.735929,0.741844,0.826107
6,0.2672,0.749125,0.744782,0.736398,0.740566,0.825641
7,0.2083,0.717062,0.758907,0.749296,0.754071,0.833411
8,0.1514,0.718931,0.763866,0.75258,0.758181,0.837451
9,0.1571,0.717397,0.763489,0.753283,0.758352,0.836985
10,0.136,0.717348,0.764384,0.753987,0.75915,0.837451


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 28. Training using 116 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.6131,2.443681,0.31484,0.216932,0.256873,0.427506
2,1.6399,1.26275,0.630731,0.596857,0.613327,0.704584
3,0.8562,0.989595,0.705738,0.672139,0.688529,0.770319
4,0.5622,0.838003,0.739547,0.705206,0.721969,0.801399
5,0.3732,0.758672,0.750906,0.728893,0.739736,0.82129
6,0.2854,0.758422,0.757267,0.733114,0.744995,0.826729
7,0.2101,0.740954,0.756724,0.738977,0.747746,0.832168
8,0.1655,0.726508,0.766467,0.750469,0.758384,0.837296
9,0.1336,0.736038,0.764131,0.751407,0.757716,0.837141
10,0.1398,0.736936,0.762393,0.750235,0.756265,0.836208


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 29. Training using 120 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5229,2.316834,0.333952,0.210835,0.258482,0.420357
2,1.6051,1.233456,0.633824,0.595919,0.614287,0.705983
3,0.8748,0.950409,0.702174,0.674015,0.687807,0.779798
4,0.5459,0.819946,0.73213,0.72303,0.727552,0.810256
5,0.3501,0.760504,0.753658,0.736867,0.745168,0.822999
6,0.2396,0.728964,0.762199,0.750938,0.756527,0.832479
7,0.1719,0.753414,0.754278,0.744371,0.749292,0.829992
8,0.1427,0.748697,0.754833,0.750938,0.75288,0.8331
9,0.1397,0.740422,0.755902,0.750938,0.753412,0.834343
10,0.1225,0.742464,0.755608,0.750469,0.75303,0.834965


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]



VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▂▁▄▄▃▅▆▅▄▇▇▆▃▅▇▇▇▂█▇▇███▆████▆██▇▄██▇███
eval/f1,▁▁▃▃▂▄▆▄▃▆▇▆▂▃▇▇▇▁▇▇▆██▇▆████▆██▇▃██▇███
eval/loss,▇█▅▅▆▄▃▄▅▂▂▃▆▅▂▂▂▆▁▂▂▁▁▁▃▁▁▁▁▃▁▁▁▅▁▁▂▁▁▁
eval/precision,▁▁▄▄▃▄▆▅▃▆▇▆▂▄▇▇▇▁▇▇▆██▇▆████▆██▇▃██▇███
eval/recall,▁▁▂▃▂▄▆▄▂▆▇▅▂▃▇▇▇▁▇▇▆█▇▇▅████▆██▇▃██▇███
eval/runtime,▂▂▁▁▁█▁▁▇▇▂▂▁▂▂▂▁▁▂▇▇▁▂▂▁▂▂▁█▂▃▂▂▂▂█▂▂▂▃
eval/samples_per_second,▇▇███▁▇█▂▂▆▇▇▇▇▇██▇▂▂▇▇▇▇▇▇▇▁▇▆▇▇▇▆▁▇▆▇▆
eval/steps_per_second,▇▇███▁▇█▂▂▆▇▇▇▇▇██▇▂▂▇▇▇▇▇▇▇▁▇▆▇▇▇▆▁▇▆▇▆
train/epoch,▆█▂▃▃▄▃▃▃▁█▇▆▄▃█▆▄▂█▅▃█▅▂▇▄▁▆▃▇▄█▅▂▆▂▇▃▇
train/global_step,▁▁▁▁▁▂▂▂▂▁▄▄▃▃▂▅▄▃▂▅▄▂▆▄▂▆▄▁▅▃▇▄█▅▂▆▃▇▃█

0,1
eval/accuracy,0.83497
eval/f1,0.75303
eval/loss,0.74246
eval/precision,0.75561
eval/recall,0.75047
eval/runtime,0.8753
eval/samples_per_second,87.967
eval/steps_per_second,22.849
train/epoch,10.0
train/global_step,300.0


Current seed:  1063


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 0. Training using 4 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.816309,0.013294,0.016886,0.014876,0.02331
2,No log,3.56267,0.052768,0.047608,0.050055,0.153846
3,No log,3.350509,0.068852,0.02955,0.041352,0.192696
4,No log,3.205517,0.094703,0.027674,0.042831,0.193473
5,No log,3.121142,0.115806,0.034709,0.05341,0.200622
6,No log,3.070256,0.12987,0.044559,0.066352,0.214608
7,No log,3.039236,0.132959,0.049953,0.072622,0.222844
8,No log,3.022436,0.142183,0.05652,0.080886,0.230769
9,No log,3.014938,0.148469,0.060272,0.085738,0.233722
10,No log,3.012997,0.151376,0.061914,0.087883,0.235431


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 1. Training using 8 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.759276,0.017337,0.021341,0.019132,0.032479
2,No log,3.338366,0.045177,0.02955,0.035729,0.167366
3,No log,3.110311,0.06238,0.037992,0.047223,0.200777
4,No log,2.985697,0.08526,0.052767,0.065189,0.223621
5,No log,2.898949,0.125984,0.086304,0.102436,0.263403
6,No log,2.830687,0.165934,0.11515,0.135955,0.304429
7,No log,2.776557,0.194859,0.143996,0.16561,0.34157
8,2.798200,2.740374,0.21033,0.158537,0.180797,0.3554
9,2.798200,2.723866,0.214792,0.163462,0.185644,0.358508
10,2.798200,2.720446,0.215578,0.164869,0.186844,0.360373


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 2. Training using 12 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.594489,0.054074,0.052298,0.053171,0.145765
2,No log,3.118058,0.137821,0.010084,0.018794,0.171873
3,No log,2.889461,0.319424,0.052064,0.089534,0.211655
4,No log,2.708612,0.272573,0.150797,0.194172,0.336131
5,No log,2.556704,0.318581,0.229597,0.266867,0.427661
6,2.816900,2.429217,0.359356,0.272045,0.309664,0.466977
7,2.816900,2.340124,0.386942,0.303002,0.339866,0.492463
8,2.816900,2.289098,0.392512,0.314728,0.349343,0.502875
9,2.816900,2.267062,0.393746,0.318949,0.352423,0.506605
10,2.816900,2.262834,0.394509,0.320122,0.353444,0.508159


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 3. Training using 16 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.556639,0.081479,0.068715,0.074555,0.192696
2,No log,3.008564,0.194969,0.036351,0.061277,0.19596
3,No log,2.719095,0.292348,0.184568,0.226279,0.385392
4,2.983700,2.46871,0.371187,0.288227,0.324488,0.494017
5,2.983700,2.232793,0.401201,0.313321,0.351857,0.517327
6,2.983700,2.057241,0.428979,0.35272,0.38713,0.545765
7,2.983700,1.953466,0.452727,0.38743,0.417541,0.569386
8,1.603600,1.899977,0.472634,0.411116,0.439734,0.586636
9,1.603600,1.877676,0.479095,0.421904,0.448684,0.595338
10,1.603600,1.873737,0.480595,0.424015,0.450536,0.596426


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 4. Training using 20 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.414847,0.095416,0.089822,0.092534,0.217249
2,No log,2.92525,0.136929,0.015478,0.027813,0.188345
3,No log,2.638446,0.30598,0.22561,0.259719,0.436364
4,3.123300,2.339202,0.376938,0.313555,0.342338,0.513753
5,3.123300,2.076754,0.43365,0.374765,0.402063,0.562393
6,3.123300,1.892395,0.48335,0.442542,0.462047,0.611966
7,1.801600,1.784086,0.515252,0.471388,0.492345,0.633256
8,1.801600,1.733835,0.527156,0.4803,0.502638,0.639938
9,1.801600,1.708127,0.532821,0.487336,0.509064,0.644444
10,1.202100,1.703635,0.533623,0.489447,0.510581,0.645221


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 5. Training using 24 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.326056,0.05454,0.042683,0.047888,0.194872
2,No log,2.843084,0.187189,0.061679,0.092785,0.242269
3,3.228400,2.543257,0.298265,0.189493,0.231751,0.397824
4,3.228400,2.208887,0.387207,0.318011,0.349215,0.509713
5,3.228400,1.929908,0.467804,0.431051,0.448676,0.593007
6,1.979600,1.764922,0.497713,0.459428,0.477805,0.613364
7,1.979600,1.634552,0.535291,0.508677,0.521645,0.646775
8,1.247600,1.591131,0.541232,0.511023,0.525694,0.65035
9,1.247600,1.576398,0.541947,0.510553,0.525782,0.65035
10,1.247600,1.572528,0.543938,0.51243,0.527714,0.651593


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 6. Training using 28 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.167044,0.080745,0.027439,0.040959,0.186636
2,No log,2.739403,0.185567,0.109756,0.137931,0.301632
3,3.230600,2.393369,0.302077,0.21834,0.253471,0.433566
4,3.230600,1.992198,0.475769,0.428236,0.450753,0.601709
5,2.030900,1.708775,0.525789,0.480535,0.502144,0.637296
6,2.030900,1.507609,0.593275,0.55863,0.575432,0.691064
7,1.189800,1.419972,0.606488,0.574343,0.589978,0.700233
8,1.189800,1.37589,0.613168,0.578799,0.595488,0.705051
9,1.189800,1.367283,0.609387,0.575516,0.591967,0.702564
10,0.869900,1.364167,0.610174,0.576689,0.592959,0.70303


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 7. Training using 32 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.292963,0.076186,0.037289,0.050071,0.197669
2,3.369900,2.663198,0.243508,0.169325,0.199751,0.372805
3,3.369900,2.153656,0.422813,0.361632,0.389837,0.555089
4,2.030000,1.722922,0.537696,0.481707,0.508164,0.635431
5,2.030000,1.422994,0.599198,0.560976,0.579457,0.693551
6,1.142400,1.315336,0.615099,0.582786,0.598507,0.712354
7,1.142400,1.246921,0.634921,0.609756,0.622084,0.729759
8,0.786000,1.19542,0.64808,0.621482,0.634503,0.742347
9,0.786000,1.18701,0.654006,0.625938,0.639664,0.746542
10,0.683200,1.185982,0.656548,0.628987,0.642472,0.747786


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 8. Training using 36 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.07546,0.140688,0.032598,0.052932,0.194872
2,3.298100,2.573756,0.30116,0.18879,0.232089,0.391453
3,3.298100,2.079532,0.433817,0.371248,0.400101,0.552292
4,2.163500,1.639437,0.552756,0.5197,0.535719,0.648329
5,2.163500,1.408283,0.588736,0.551595,0.56956,0.677078
6,1.231300,1.280264,0.621017,0.598734,0.609672,0.705828
7,1.231300,1.216596,0.635001,0.616088,0.625402,0.725408
8,0.838700,1.191208,0.64296,0.625469,0.634094,0.731313
9,0.672500,1.17892,0.645808,0.626876,0.636201,0.733955
10,0.672500,1.177445,0.645864,0.626173,0.635866,0.73411


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 9. Training using 40 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.09405,0.164726,0.06637,0.094617,0.228127
2,3.336400,2.577779,0.201729,0.164165,0.181019,0.369231
3,3.336400,2.014634,0.387819,0.292683,0.333601,0.501632
4,2.227100,1.561108,0.566462,0.5197,0.542074,0.665268
5,1.361200,1.310556,0.616774,0.584662,0.600289,0.71453
6,1.361200,1.222628,0.630717,0.604831,0.617503,0.728827
7,0.844600,1.146298,0.648721,0.624531,0.636396,0.742968
8,0.611300,1.118822,0.650859,0.630863,0.640705,0.745455
9,0.611300,1.111881,0.653865,0.63485,0.644217,0.748252
10,0.549100,1.110774,0.654348,0.635319,0.644693,0.748718


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 10. Training using 44 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.054187,0.103147,0.055347,0.072039,0.205594
2,3.371500,2.474145,0.274627,0.21576,0.24166,0.409479
3,2.245400,1.8104,0.514782,0.465525,0.488916,0.621445
4,2.245400,1.418429,0.588105,0.558865,0.573112,0.679099
5,1.316200,1.239083,0.628523,0.611867,0.620083,0.717949
6,0.848800,1.133702,0.663606,0.643996,0.653654,0.752603
7,0.848800,1.069846,0.676471,0.658068,0.667142,0.765346
8,0.605400,1.069685,0.671487,0.653377,0.662308,0.760684
9,0.509500,1.061606,0.674553,0.655253,0.664763,0.764103
10,0.509500,1.058568,0.676819,0.65666,0.666587,0.76519


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 11. Training using 48 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.018081,0.123499,0.067542,0.087326,0.231857
2,3.370800,2.251818,0.372628,0.280957,0.320364,0.506138
3,2.216000,1.599973,0.57792,0.527908,0.551783,0.665268
4,1.303100,1.284535,0.635274,0.596388,0.615217,0.715773
5,1.303100,1.147467,0.660701,0.640713,0.650554,0.745455
6,0.814800,1.120663,0.671756,0.639775,0.655375,0.753535
7,0.614400,1.026698,0.694183,0.674484,0.684192,0.774359
8,0.485000,0.998366,0.705897,0.676595,0.690935,0.78042
9,0.485000,0.983592,0.706683,0.681989,0.694116,0.784149
10,0.428900,0.981877,0.708646,0.68621,0.697248,0.785703


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 12. Training using 52 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.991682,0.137363,0.064493,0.087775,0.232168
2,3.400400,2.193237,0.366866,0.302767,0.331749,0.500544
3,2.318900,1.557302,0.56403,0.513368,0.537508,0.652991
4,1.350300,1.259782,0.623855,0.591229,0.607104,0.708469
5,0.877100,1.111969,0.66845,0.644465,0.656239,0.746232
6,0.877100,1.028041,0.685984,0.663462,0.674535,0.76892
7,0.625500,1.003414,0.687244,0.668386,0.677684,0.771717
8,0.469500,0.983081,0.69062,0.67167,0.681013,0.774825
9,0.415800,0.97654,0.698068,0.677767,0.687768,0.778555
10,0.385600,0.973949,0.697949,0.678471,0.688072,0.778866


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 13. Training using 56 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.009228,0.072619,0.028612,0.04105,0.156488
2,3.417700,2.252166,0.382799,0.25469,0.305872,0.461072
3,2.390300,1.566658,0.552258,0.510553,0.530587,0.654701
4,1.501600,1.262208,0.62653,0.600375,0.613174,0.71826
5,0.977400,1.118181,0.655049,0.643527,0.649237,0.748873
6,0.651600,1.068029,0.672269,0.65666,0.664373,0.765657
7,0.513700,1.008191,0.691623,0.67378,0.682585,0.777312
8,0.415600,0.978752,0.699064,0.683161,0.691021,0.782284
9,0.415600,0.974051,0.701675,0.687852,0.694694,0.783372
10,0.379400,0.975198,0.701557,0.686914,0.694158,0.783528


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 14. Training using 60 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.950734,0.176335,0.071998,0.102248,0.242735
2,3.435300,2.005845,0.46378,0.391886,0.424813,0.563792
3,2.326000,1.39835,0.597348,0.570591,0.583663,0.687335
4,1.466700,1.143171,0.668613,0.644465,0.656317,0.753535
5,0.924700,1.078432,0.682544,0.662054,0.672143,0.767521
6,0.673000,0.972006,0.696948,0.690901,0.693911,0.783683
7,0.454800,0.937102,0.69684,0.687852,0.692317,0.783838
8,0.381000,0.915141,0.704292,0.696529,0.700389,0.790831
9,0.362100,0.914339,0.70752,0.697233,0.702339,0.79021
10,0.292000,0.915771,0.70626,0.695826,0.701004,0.789744


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 15. Training using 64 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4232,2.934113,0.3472,0.050891,0.088771,0.203574
2,2.4245,1.980443,0.432231,0.340291,0.38079,0.532712
3,1.5299,1.393009,0.590522,0.552298,0.570771,0.678943
4,0.9909,1.108464,0.666028,0.652439,0.659164,0.756022
5,0.6898,0.980302,0.713016,0.696295,0.704556,0.791919
6,0.4908,0.946824,0.695231,0.690666,0.692941,0.786014
7,0.3761,0.90225,0.720376,0.701454,0.710789,0.797203
8,0.3194,0.902608,0.710433,0.704268,0.707337,0.798291
9,0.2852,0.902947,0.712456,0.704268,0.708338,0.799068
10,0.2702,0.902065,0.713438,0.704737,0.709061,0.799223


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 16. Training using 68 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.502,2.954286,0.202694,0.077627,0.11226,0.231857
2,2.5087,1.96636,0.440608,0.374062,0.404617,0.549495
3,1.5722,1.386872,0.596196,0.573405,0.584579,0.687179
4,1.0622,1.22015,0.635337,0.619841,0.627493,0.725874
5,0.7131,1.003691,0.6962,0.687383,0.691763,0.781818
6,0.544,0.939393,0.71008,0.707083,0.708578,0.795183
7,0.421,0.930527,0.704374,0.702392,0.703382,0.792385
8,0.3605,0.914665,0.706117,0.706614,0.706365,0.796426
9,0.2791,0.918413,0.708021,0.708021,0.708021,0.797358
10,0.252,0.918813,0.706585,0.707083,0.706834,0.797047


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 17. Training using 72 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4282,2.854589,0.313853,0.068011,0.111796,0.225641
2,2.5103,1.773399,0.544369,0.492026,0.516876,0.647086
3,1.5063,1.254969,0.616179,0.602017,0.609015,0.717483
4,1.0212,1.02893,0.696672,0.682458,0.689492,0.780886
5,0.7415,0.971995,0.713667,0.695591,0.704513,0.792696
6,0.4792,0.901762,0.725818,0.712711,0.719205,0.801865
7,0.4094,0.859896,0.7283,0.710366,0.719221,0.805439
8,0.2675,0.864185,0.729097,0.71576,0.722367,0.807304
9,0.2598,0.864338,0.730871,0.714587,0.722637,0.807304
10,0.2447,0.864151,0.730391,0.714118,0.722163,0.807304


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 18. Training using 76 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4561,2.827687,0.167803,0.075047,0.103711,0.256255
2,2.4384,1.683817,0.545716,0.489916,0.516312,0.640715
3,1.4895,1.231108,0.631399,0.607411,0.619173,0.713598
4,1.111,1.07669,0.672259,0.648452,0.660141,0.756333
5,0.7217,0.942139,0.706165,0.690432,0.698209,0.789433
6,0.4207,0.904339,0.707889,0.70075,0.704302,0.792385
7,0.3313,0.883108,0.712381,0.701689,0.706994,0.79425
8,0.2605,0.862634,0.71787,0.711304,0.714572,0.798135
9,0.2851,0.855831,0.722986,0.715525,0.719236,0.802797
10,0.2252,0.857141,0.726148,0.71576,0.720916,0.80404


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 19. Training using 80 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.557,2.82784,0.258065,0.090056,0.133519,0.266045
2,2.5628,1.773721,0.498705,0.451689,0.474034,0.60979
3,1.7422,1.269951,0.611532,0.587008,0.599019,0.699301
4,0.7895,1.031594,0.689639,0.675891,0.682696,0.774981
5,0.5531,0.975525,0.696667,0.68621,0.691399,0.783217
6,0.4801,0.882144,0.719791,0.709662,0.714691,0.80202
7,0.3374,0.889067,0.715677,0.706614,0.711116,0.800932
8,0.2524,0.897737,0.721891,0.709193,0.715486,0.802176
9,0.2357,0.89289,0.719648,0.710366,0.714977,0.802176
10,0.2156,0.890823,0.717236,0.70849,0.712836,0.801399


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 20. Training using 84 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5199,2.718597,0.182094,0.11257,0.13913,0.308159
2,2.5206,1.537079,0.559979,0.511257,0.53451,0.65439
3,1.4591,1.101242,0.666992,0.640713,0.653589,0.752914
4,0.7044,0.937914,0.723029,0.700985,0.711836,0.797203
5,0.6174,0.882017,0.715065,0.706848,0.710933,0.797514
6,0.4042,0.839958,0.727015,0.721388,0.724191,0.804973
7,0.2879,0.824033,0.741813,0.733114,0.737438,0.814918
8,0.2726,0.809556,0.745678,0.738508,0.742076,0.819425
9,0.2085,0.80218,0.749165,0.736867,0.742965,0.820202
10,0.1941,0.803721,0.750895,0.738039,0.744412,0.820513


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 21. Training using 88 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.6083,2.740337,0.234091,0.120779,0.159344,0.313131
2,2.5683,1.582384,0.57704,0.534006,0.554689,0.669308
3,1.1277,1.129697,0.666749,0.634381,0.650162,0.742191
4,0.8687,0.94782,0.706651,0.685272,0.695797,0.78819
5,0.5617,0.889793,0.721069,0.708724,0.714843,0.803263
6,0.3492,0.836552,0.739514,0.72772,0.73357,0.816006
7,0.2848,0.820438,0.745843,0.736398,0.74109,0.822999
8,0.1819,0.813206,0.744575,0.740385,0.742474,0.822533
9,0.1891,0.8101,0.745687,0.739916,0.74279,0.823465
10,0.1766,0.809488,0.745863,0.739916,0.742877,0.823465


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 22. Training using 92 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5354,2.679579,0.261712,0.15197,0.192285,0.367988
2,2.4943,1.458786,0.587189,0.541745,0.563552,0.674592
3,1.0792,1.120764,0.651599,0.635553,0.643476,0.743279
4,0.8567,0.906412,0.717491,0.697467,0.707337,0.798757
5,0.5026,0.84861,0.724357,0.72045,0.722399,0.809013
6,0.3536,0.803635,0.74089,0.734287,0.737574,0.823932
7,0.2674,0.812545,0.739606,0.730066,0.734805,0.818182
8,0.2199,0.802788,0.747137,0.734522,0.740776,0.823465
9,0.2096,0.803202,0.748811,0.738274,0.743505,0.826107
10,0.1699,0.802051,0.750238,0.738977,0.744565,0.826573


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 23. Training using 96 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.483,2.620537,0.271977,0.131332,0.177131,0.336131
2,1.6956,1.46112,0.585366,0.557223,0.570948,0.67397
3,1.1513,1.109813,0.658997,0.638133,0.648397,0.74965
4,0.6948,0.90369,0.715012,0.692542,0.703598,0.794716
5,0.4903,0.857468,0.722697,0.713884,0.718263,0.804973
6,0.3121,0.803779,0.74082,0.733349,0.737065,0.820047
7,0.2722,0.817005,0.747025,0.736163,0.741554,0.823155
8,0.2,0.808024,0.740076,0.734522,0.737288,0.81927
9,0.1893,0.823063,0.737328,0.730066,0.733679,0.817405
10,0.1626,0.822865,0.736144,0.728893,0.732501,0.816939


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 24. Training using 100 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5432,2.618511,0.28852,0.155019,0.201678,0.352292
2,1.7504,1.444179,0.586667,0.546904,0.566088,0.67195
3,1.2376,1.04199,0.683118,0.655722,0.66914,0.7669
4,0.6269,0.884577,0.726391,0.719747,0.723053,0.808547
5,0.4915,0.812671,0.735831,0.72772,0.731753,0.8115
6,0.3279,0.7602,0.750826,0.746248,0.74853,0.826263
7,0.2413,0.776948,0.74823,0.743668,0.745942,0.824864
8,0.1876,0.759794,0.756877,0.754925,0.7559,0.831391
9,0.1644,0.765708,0.753415,0.750235,0.751821,0.829526
10,0.1562,0.767306,0.75489,0.751173,0.753027,0.830148


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 25. Training using 104 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4868,2.61711,0.294178,0.135084,0.185149,0.337218
2,1.7549,1.398413,0.600855,0.560272,0.579854,0.688423
3,1.1633,1.118344,0.641845,0.626642,0.634152,0.729293
4,0.6738,0.88266,0.724404,0.705206,0.714676,0.801709
5,0.4275,0.819101,0.736691,0.72045,0.72848,0.810412
6,0.298,0.804624,0.736817,0.724203,0.730455,0.814141
7,0.2315,0.813697,0.74156,0.731473,0.736482,0.819425
8,0.1901,0.792958,0.750297,0.739916,0.74507,0.825175
9,0.1593,0.803884,0.746365,0.734287,0.740277,0.821756
10,0.1676,0.805484,0.746543,0.734287,0.740364,0.821911


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 26. Training using 108 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.583,2.634999,0.221631,0.117261,0.153374,0.319192
2,1.8848,1.418586,0.580905,0.542214,0.560893,0.670707
3,0.9019,0.984511,0.691636,0.66909,0.680176,0.770008
4,0.7,0.846984,0.72176,0.711773,0.716732,0.801399
5,0.4096,0.776346,0.741552,0.730769,0.736121,0.81756
6,0.2844,0.761168,0.751184,0.744137,0.747644,0.828283
7,0.225,0.773253,0.742424,0.73546,0.738926,0.821756
8,0.1658,0.773653,0.746286,0.742261,0.744268,0.825641
9,0.1534,0.769093,0.755703,0.745779,0.750708,0.829837
10,0.145,0.768742,0.755455,0.746951,0.751179,0.830148


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 27. Training using 112 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.6597,2.536334,0.331426,0.203799,0.252396,0.401088
2,1.8324,1.380962,0.59877,0.570826,0.584464,0.687024
3,0.9971,0.997183,0.691731,0.670966,0.68119,0.775913
4,0.551,0.827948,0.748028,0.733818,0.740855,0.81756
5,0.4131,0.776573,0.745317,0.737101,0.741186,0.821911
6,0.2655,0.785162,0.749055,0.743433,0.746234,0.824709
7,0.2104,0.775202,0.745472,0.743199,0.744334,0.825796
8,0.16,0.783951,0.749705,0.746013,0.747855,0.826729
9,0.1588,0.782745,0.748707,0.746951,0.747828,0.826884
10,0.1282,0.784031,0.748707,0.746951,0.747828,0.827195


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 28. Training using 116 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5252,2.492277,0.324342,0.228424,0.268061,0.432945
2,1.7847,1.321118,0.6089,0.577627,0.592851,0.683916
3,0.8667,0.950045,0.712524,0.691135,0.701667,0.787879
4,0.6041,0.819127,0.73575,0.723499,0.729573,0.809479
5,0.3813,0.810095,0.735322,0.734287,0.734804,0.813054
6,0.3021,0.747041,0.753912,0.745779,0.749823,0.826418
7,0.2132,0.750788,0.761399,0.755863,0.758621,0.832479
8,0.156,0.753693,0.757825,0.755159,0.75649,0.831702
9,0.1322,0.758969,0.754748,0.754925,0.754836,0.831235
10,0.1181,0.760787,0.758596,0.755394,0.756992,0.831702


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 29. Training using 120 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5298,2.466308,0.365815,0.21834,0.273462,0.416783
2,1.8692,1.324289,0.605825,0.585366,0.59542,0.694949
3,0.9251,0.974067,0.701888,0.671435,0.686324,0.777933
4,0.5866,0.834932,0.74445,0.723499,0.733825,0.81181
5,0.3541,0.788487,0.745991,0.741792,0.743885,0.821911
6,0.2648,0.778879,0.755603,0.751173,0.753381,0.829215
7,0.1832,0.741586,0.762972,0.758677,0.760818,0.837296
8,0.1361,0.76016,0.760752,0.759146,0.759948,0.835897
9,0.1366,0.767009,0.761613,0.757505,0.759553,0.835742
10,0.1226,0.766125,0.762736,0.758443,0.760583,0.836364


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▂▁▄▅▄▆▆▆▃▇▇▆▃▄▇▇▇▂██▇███▆████▇███▄██▇███
eval/f1,▁▁▃▄▃▅▆▅▂▆▇▆▂▃▇▇▇▁▇▇▆█▇▇▆████▆███▃██▇███
eval/loss,▇█▆▅▆▄▃▄▆▂▂▃▆▅▂▂▂▆▂▂▂▁▁▂▃▁▁▁▁▃▁▁▁▅▁▁▂▁▁▁
eval/precision,▂▁▃▄▄▅▆▅▃▇▇▆▂▃▇▇▇▂▇▇▆█▇█▆████▆███▄██▇███
eval/recall,▁▁▂▃▃▅▅▅▂▆▇▆▁▃▇▇▆▁▇▇▆▇█▇▅████▆███▂██▇███
eval/runtime,▁█▂▂▁▁▂▇▂▂▂▇▁▂▂▂▃▂▂▂▂▁▇▂▁▇▂▂▁▆▁█▂▁▁▂▆▁▂▁
eval/samples_per_second,█▁▇▇▇█▇▂▇▆▇▂█▆▇▇▆▆▇▇▇█▁▇▇▂▇▇█▂▇▁▇▇▇▇▂█▇▇
eval/steps_per_second,█▁▇▇▇█▇▂▇▆▇▂█▆▇▇▆▆▇▇▇█▁▇▇▂▇▇█▂▇▁▇▇▇▇▂█▇▇
train/epoch,▆█▂▃▃▄▃▃▃▁█▇▆▄▃█▆▄▂█▅▃█▅▂▇▄▁▆▃▇▄█▅▂▆▂▇▃▇
train/global_step,▁▁▁▁▁▂▂▂▂▁▄▄▃▃▂▅▄▃▂▅▄▂▆▄▂▆▄▁▅▃▇▄█▅▂▆▃▇▃█

0,1
eval/accuracy,0.83636
eval/f1,0.76058
eval/loss,0.76612
eval/precision,0.76274
eval/recall,0.75844
eval/runtime,0.8776
eval/samples_per_second,87.741
eval/steps_per_second,22.79
train/epoch,10.0
train/global_step,300.0


Current seed:  184


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113391933334545, max=1.0…

Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 0. Training using 4 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.832788,0.015657,0.019934,0.017538,0.02906
2,No log,3.591454,0.053993,0.062148,0.057785,0.133023
3,No log,3.386785,0.085479,0.075516,0.080189,0.19627
4,No log,3.230706,0.098967,0.069653,0.081762,0.197358
5,No log,3.125157,0.108688,0.068949,0.084374,0.199845
6,No log,3.056917,0.127382,0.089353,0.105031,0.227195
7,No log,3.012968,0.143162,0.109991,0.124403,0.261228
8,No log,2.987821,0.148874,0.119371,0.1325,0.278322
9,No log,2.976237,0.156071,0.127814,0.140536,0.28951
10,No log,2.97325,0.157431,0.129925,0.142362,0.292152


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 1. Training using 8 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.781943,0.022279,0.028612,0.025051,0.05066
2,No log,3.347039,0.120053,0.021341,0.036241,0.204662
3,No log,3.098431,0.152968,0.015713,0.028499,0.195804
4,No log,2.963582,0.158397,0.019465,0.03467,0.207459
5,No log,2.85311,0.241733,0.049719,0.082474,0.24662
6,No log,2.780179,0.310268,0.097795,0.148716,0.291686
7,No log,2.724097,0.320505,0.119137,0.173705,0.311267
8,2.739200,2.687713,0.318532,0.126173,0.180749,0.31826
9,2.739200,2.670902,0.318315,0.12758,0.182153,0.320435
10,2.739200,2.66727,0.319485,0.128049,0.182823,0.321057


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 2. Training using 12 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.556666,0.068207,0.072702,0.070383,0.147319
2,No log,3.060042,0.19491,0.079034,0.112465,0.269308
3,No log,2.851556,0.262626,0.079268,0.12178,0.273349
4,No log,2.682518,0.298375,0.14212,0.192534,0.337685
5,No log,2.541835,0.297266,0.181051,0.22504,0.381974
6,2.823600,2.408587,0.311771,0.206848,0.248696,0.410256
7,2.823600,2.320716,0.328333,0.225844,0.267612,0.429681
8,2.823600,2.27048,0.346329,0.25,0.290384,0.452059
9,2.823600,2.24889,0.352643,0.261257,0.300148,0.46216
10,2.823600,2.245061,0.357098,0.265478,0.304547,0.465579


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 3. Training using 16 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.489131,0.057782,0.043621,0.049713,0.169075
2,No log,3.049449,0.192529,0.031426,0.054032,0.20373
3,No log,2.801086,0.304014,0.120779,0.172877,0.314064
4,2.952400,2.592798,0.286842,0.17894,0.220393,0.384615
5,2.952400,2.383262,0.331404,0.214822,0.260672,0.420202
6,2.952400,2.221264,0.373633,0.280488,0.320429,0.479565
7,2.952400,2.11094,0.421884,0.344512,0.379293,0.528671
8,1.645900,2.046341,0.441886,0.373593,0.40488,0.550117
9,1.645900,2.018646,0.451942,0.38485,0.415706,0.558664
10,1.645900,2.013704,0.452747,0.386492,0.417004,0.560528


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 4. Training using 20 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.414795,0.075975,0.060741,0.067509,0.206527
2,No log,2.845922,0.239733,0.109522,0.150354,0.320746
3,No log,2.529422,0.328648,0.2303,0.270822,0.436208
4,3.034900,2.222703,0.404978,0.324343,0.360203,0.518104
5,3.034900,1.939201,0.512155,0.439728,0.473186,0.599845
6,3.034900,1.752738,0.5375,0.473968,0.503739,0.623155
7,1.606700,1.65886,0.545952,0.491792,0.517458,0.634499
8,1.606700,1.612535,0.551421,0.500469,0.524711,0.642735
9,1.606700,1.593443,0.555556,0.506567,0.529931,0.647397
10,1.050200,1.590124,0.554784,0.505863,0.529195,0.647863


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 5. Training using 24 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.351479,0.098879,0.057927,0.073055,0.225486
2,No log,2.806046,0.234244,0.098499,0.138683,0.282362
3,3.142100,2.4269,0.313014,0.214353,0.254454,0.408081
4,3.142100,2.044257,0.451323,0.388133,0.41735,0.563481
5,3.142100,1.799909,0.532789,0.474437,0.501923,0.624242
6,1.743600,1.66181,0.545949,0.494606,0.519011,0.630769
7,1.743600,1.570995,0.56218,0.515244,0.53769,0.645532
8,1.092000,1.529198,0.567779,0.526501,0.546362,0.653768
9,1.092000,1.512629,0.571824,0.533068,0.551766,0.659052
10,1.092000,1.509972,0.572904,0.533537,0.55252,0.659674


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 6. Training using 28 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.203233,0.107374,0.062148,0.078728,0.218026
2,No log,2.671754,0.29314,0.128283,0.178467,0.325097
3,3.139200,2.292637,0.317814,0.220919,0.260653,0.423932
4,3.139200,1.892574,0.48527,0.424953,0.453113,0.589744
5,1.885200,1.621811,0.563438,0.5197,0.540686,0.665268
6,1.885200,1.484559,0.595113,0.559803,0.576918,0.686092
7,1.033300,1.389648,0.606924,0.579737,0.593019,0.697436
8,1.033300,1.380163,0.61407,0.581379,0.597277,0.69899
9,1.033300,1.348852,0.616405,0.588649,0.602207,0.703341
10,0.812300,1.344871,0.616405,0.588649,0.602207,0.703807


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 7. Training using 32 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.221887,0.012821,0.000704,0.001334,0.158819
2,3.286800,2.711777,0.231813,0.13227,0.168434,0.330381
3,3.286800,2.194433,0.349244,0.243668,0.287056,0.447242
4,1.957400,1.742434,0.558506,0.498124,0.52659,0.651748
5,1.957400,1.502661,0.591204,0.554878,0.572466,0.683294
6,1.087100,1.36657,0.608006,0.584193,0.595862,0.696348
7,1.087100,1.3073,0.608421,0.586304,0.597158,0.698834
8,0.732100,1.272782,0.615891,0.592636,0.60404,0.70272
9,0.732100,1.257026,0.620244,0.596388,0.608082,0.705206
10,0.632100,1.254282,0.620185,0.596623,0.608176,0.705672


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 8. Training using 36 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.11676,0.129239,0.080441,0.099162,0.241958
2,3.313300,2.599294,0.306519,0.136726,0.189102,0.330847
3,3.313300,2.124947,0.345455,0.240619,0.28366,0.437918
4,2.043300,1.667845,0.560264,0.517824,0.538208,0.655322
5,2.043300,1.418526,0.593353,0.569418,0.581139,0.684382
6,1.144900,1.308892,0.613075,0.593809,0.603288,0.701166
7,1.144900,1.248114,0.622578,0.60272,0.612488,0.708936
8,0.750300,1.210075,0.638225,0.613977,0.625867,0.720124
9,0.583500,1.191091,0.641874,0.623358,0.632481,0.725874
10,0.583500,1.189168,0.642357,0.623827,0.632957,0.726807


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 9. Training using 40 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.105828,0.073529,0.007036,0.012842,0.171717
2,3.296300,2.499645,0.280423,0.161585,0.205029,0.365967
3,3.296300,1.97498,0.453067,0.398452,0.424008,0.581197
4,2.129200,1.550509,0.577789,0.541745,0.559187,0.672883
5,1.272800,1.353913,0.611396,0.586304,0.598587,0.703652
6,1.272800,1.243243,0.634259,0.61046,0.622132,0.720591
7,0.776900,1.192895,0.65211,0.626876,0.639244,0.737218
8,0.635500,1.16171,0.653453,0.630159,0.641595,0.737995
9,0.635500,1.151824,0.659606,0.635319,0.647235,0.740482
10,0.537000,1.149442,0.659766,0.635319,0.647312,0.740793


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 10. Training using 44 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.08224,0.141316,0.089118,0.109305,0.248951
2,3.363000,2.455989,0.297115,0.15455,0.203332,0.34561
3,2.224100,1.837755,0.513699,0.457317,0.483871,0.618803
4,2.224100,1.434615,0.594235,0.565666,0.579599,0.684693
5,1.310300,1.249902,0.621125,0.610929,0.615985,0.712354
6,0.855200,1.185719,0.641707,0.617026,0.629125,0.729448
7,0.855200,1.108101,0.653576,0.63227,0.642746,0.741259
8,0.621900,1.0778,0.667641,0.643058,0.655119,0.751826
9,0.506600,1.064881,0.674045,0.649859,0.661731,0.755556
10,0.506600,1.063754,0.674757,0.65197,0.663168,0.756799


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 11. Training using 48 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.017886,0.172727,0.02228,0.039468,0.190676
2,3.408000,2.40713,0.290951,0.147045,0.195358,0.350272
3,2.240300,1.760383,0.520334,0.468105,0.49284,0.622688
4,1.399700,1.397622,0.6,0.579034,0.58933,0.693862
5,1.399700,1.192012,0.641631,0.631098,0.636321,0.732556
6,0.832600,1.088544,0.6708,0.647045,0.658708,0.747319
7,0.613100,1.039042,0.683216,0.661585,0.672227,0.760062
8,0.481200,1.02087,0.683777,0.662289,0.672862,0.763015
9,0.481200,1.018067,0.685208,0.661585,0.673189,0.763947
10,0.423100,1.016996,0.686013,0.662523,0.674063,0.764724


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 12. Training using 52 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.967335,0.199566,0.086304,0.120498,0.268998
2,3.360100,2.262986,0.337937,0.258208,0.292741,0.46216
3,2.282900,1.647077,0.538034,0.499296,0.517942,0.641336
4,1.442900,1.328372,0.606529,0.570826,0.588136,0.690132
5,0.906300,1.18591,0.651475,0.626876,0.638939,0.739549
6,0.906300,1.077441,0.686072,0.655019,0.670186,0.760684
7,0.669500,1.011,0.696246,0.669794,0.682764,0.774359
8,0.528300,0.995091,0.701993,0.677298,0.689425,0.777778
9,0.444800,0.986641,0.700267,0.676126,0.687985,0.776535
10,0.400100,0.984325,0.701166,0.676829,0.688783,0.77669


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 13. Training using 56 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.919963,0.209953,0.03166,0.055023,0.196115
2,3.431200,2.140086,0.340748,0.237101,0.279629,0.435276
3,2.273800,1.52462,0.584259,0.555347,0.569436,0.675991
4,1.390800,1.253302,0.627873,0.602251,0.614795,0.712976
5,0.972700,1.101252,0.666181,0.643527,0.654658,0.746542
6,0.650600,1.014655,0.691562,0.668856,0.680019,0.767521
7,0.494400,0.982519,0.688604,0.667448,0.677861,0.768298
8,0.421200,0.967474,0.69377,0.67636,0.684954,0.776224
9,0.421200,0.956046,0.694911,0.67894,0.686833,0.77871
10,0.354000,0.954541,0.694932,0.678471,0.686603,0.779021


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 14. Training using 60 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.948535,0.178412,0.093809,0.122963,0.268531
2,3.565800,2.027055,0.455707,0.38485,0.417292,0.567521
3,2.304800,1.405475,0.604868,0.576923,0.590565,0.694794
4,1.463600,1.168481,0.65589,0.628049,0.641668,0.734266
5,0.926300,1.046125,0.692422,0.657833,0.674684,0.760995
6,0.619200,0.976923,0.695819,0.679174,0.687396,0.773737
7,0.477800,0.925523,0.708253,0.692308,0.70019,0.787257
8,0.392400,0.924362,0.712074,0.690197,0.700965,0.787102
9,0.345100,0.916223,0.713634,0.697233,0.705338,0.789588
10,0.308700,0.914938,0.711714,0.695356,0.70344,0.7885


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 15. Training using 64 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4349,2.874548,0.318777,0.05136,0.088467,0.212587
2,2.3486,1.916478,0.469758,0.413462,0.439815,0.588967
3,1.4357,1.333543,0.623291,0.598734,0.610766,0.706605
4,0.9258,1.116977,0.68265,0.647749,0.664741,0.758974
5,0.643,1.016135,0.695946,0.67636,0.686013,0.771251
6,0.4781,0.952749,0.701224,0.685272,0.693156,0.775136
7,0.3828,0.909283,0.716261,0.704503,0.710333,0.787568
8,0.3247,0.890552,0.718609,0.707317,0.712918,0.79021
9,0.2951,0.889914,0.718193,0.708255,0.713189,0.791142
10,0.2907,0.890968,0.718437,0.707317,0.712834,0.790676


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 16. Training using 68 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5205,2.929756,0.228328,0.069184,0.106192,0.232945
2,2.4523,1.997352,0.393852,0.288462,0.333017,0.490598
3,1.6401,1.346843,0.608111,0.583724,0.595668,0.696659
4,1.0224,1.137273,0.65252,0.634615,0.643443,0.740793
5,0.7474,0.968471,0.700097,0.679409,0.689598,0.779798
6,0.5294,0.914318,0.714354,0.699109,0.706649,0.796426
7,0.4147,0.90195,0.721916,0.706848,0.714303,0.79798
8,0.3773,0.873361,0.726256,0.715525,0.720851,0.803885
9,0.2997,0.873906,0.729162,0.715994,0.722518,0.804196
10,0.2986,0.87401,0.729704,0.716698,0.723142,0.804351


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 17. Training using 72 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4531,2.865665,0.252149,0.103189,0.146447,0.278166
2,2.4079,1.740667,0.546911,0.496248,0.520349,0.64087
3,1.4771,1.224596,0.636561,0.618199,0.627246,0.717638
4,0.9187,1.074604,0.68389,0.658068,0.67073,0.763636
5,0.7986,0.976819,0.697821,0.675891,0.686681,0.779798
6,0.5505,0.910327,0.715457,0.695826,0.705505,0.790054
7,0.4218,0.883951,0.725575,0.7106,0.718009,0.802797
8,0.3152,0.864562,0.727055,0.711538,0.719213,0.805284
9,0.281,0.868383,0.733828,0.71834,0.726001,0.808236
10,0.2706,0.866676,0.734611,0.719278,0.726863,0.808858


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 18. Training using 76 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5204,2.863238,0.153463,0.079503,0.104743,0.267754
2,2.54,1.782686,0.480562,0.417448,0.446787,0.599534
3,1.5824,1.213754,0.642247,0.616792,0.629262,0.723077
4,1.1622,1.009501,0.678658,0.654784,0.666508,0.761305
5,0.7212,0.867077,0.726502,0.70333,0.714728,0.802176
6,0.4282,0.8469,0.73892,0.715525,0.727034,0.805439
7,0.3519,0.84446,0.734948,0.718574,0.726669,0.807304
8,0.2529,0.805282,0.738237,0.724906,0.731511,0.810567
9,0.2458,0.815169,0.741533,0.723968,0.732645,0.8115
10,0.2465,0.815997,0.741239,0.724203,0.732622,0.811033


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 19. Training using 80 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4452,2.692529,0.274574,0.117261,0.164339,0.301321
2,2.33,1.569882,0.592122,0.535882,0.5626,0.675058
3,1.5034,1.133424,0.655863,0.621717,0.638334,0.738928
4,0.732,0.93938,0.718262,0.689962,0.703828,0.794561
5,0.5056,0.849569,0.731351,0.703565,0.717189,0.805284
6,0.3778,0.830911,0.735618,0.719747,0.727596,0.810723
7,0.3362,0.796996,0.745705,0.722795,0.734072,0.816939
8,0.2281,0.787434,0.745089,0.729362,0.737142,0.820357
9,0.2152,0.796701,0.743337,0.726079,0.734607,0.818493
10,0.1922,0.798776,0.743276,0.725844,0.734457,0.818493


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 20. Training using 84 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5177,2.71437,0.257042,0.136961,0.178703,0.335198
2,2.5041,1.566236,0.552989,0.505394,0.528122,0.64662
3,1.6006,1.098536,0.665779,0.645169,0.655312,0.751204
4,0.7203,0.933031,0.70625,0.689024,0.697531,0.789588
5,0.5141,0.842268,0.728148,0.709193,0.718546,0.804196
6,0.4469,0.834209,0.733253,0.713649,0.723318,0.806838
7,0.283,0.785638,0.746705,0.730769,0.738651,0.818648
8,0.2474,0.792267,0.743221,0.726313,0.73467,0.818337
9,0.1974,0.794481,0.747847,0.733114,0.740407,0.820513
10,0.1902,0.796108,0.746415,0.732411,0.739347,0.819736


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 21. Training using 88 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4992,2.665925,0.267201,0.148452,0.190864,0.346076
2,2.4644,1.525977,0.578621,0.547139,0.56244,0.668842
3,1.0379,1.099499,0.661368,0.643996,0.652567,0.747786
4,0.7195,0.945018,0.709858,0.682223,0.695767,0.787257
5,0.5724,0.83718,0.726813,0.707552,0.717053,0.804351
6,0.3382,0.830961,0.73176,0.719747,0.725703,0.808702
7,0.3013,0.79612,0.741095,0.727017,0.733988,0.818337
8,0.2328,0.803585,0.736679,0.72303,0.729791,0.815074
9,0.1817,0.804616,0.739079,0.726079,0.732521,0.816939
10,0.2116,0.8048,0.739265,0.726782,0.732971,0.817405


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 22. Training using 92 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5201,2.656676,0.322152,0.130629,0.185884,0.319814
2,2.4836,1.475407,0.574203,0.540807,0.557005,0.667599
3,1.1252,1.115788,0.666003,0.62758,0.646221,0.741725
4,0.8185,0.939785,0.714217,0.695122,0.70454,0.793939
5,0.4781,0.819287,0.730012,0.721623,0.725793,0.808702
6,0.3343,0.818916,0.736261,0.719512,0.72779,0.809169
7,0.2636,0.769499,0.745472,0.733583,0.73948,0.817716
8,0.193,0.790994,0.747091,0.737805,0.742419,0.819891
9,0.2249,0.789634,0.745476,0.734287,0.739839,0.820824
10,0.1706,0.790127,0.747266,0.737101,0.742149,0.822222


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 23. Training using 96 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.6531,2.674622,0.228811,0.153846,0.183985,0.344678
2,1.785,1.456024,0.595058,0.553471,0.573512,0.674126
3,1.1071,1.044384,0.679721,0.663462,0.671493,0.756022
4,0.6551,0.868393,0.737654,0.718105,0.727748,0.809479
5,0.4639,0.793002,0.753467,0.738977,0.746152,0.824553
6,0.3129,0.796789,0.747889,0.727017,0.737305,0.820513
7,0.2282,0.76861,0.759264,0.744841,0.751983,0.828127
8,0.184,0.759133,0.76317,0.74742,0.755213,0.831235
9,0.1666,0.759719,0.764565,0.747889,0.756135,0.832012
10,0.1599,0.759154,0.764368,0.748593,0.756398,0.832323


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 24. Training using 100 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5052,2.601119,0.282019,0.207083,0.23881,0.414918
2,1.7062,1.435568,0.606031,0.575047,0.590132,0.694949
3,1.1462,1.022162,0.700548,0.659475,0.679391,0.77094
4,0.5941,0.862916,0.72462,0.704737,0.71454,0.801554
5,0.4586,0.80527,0.747318,0.735225,0.741222,0.81958
6,0.3256,0.798512,0.749701,0.734053,0.741794,0.818959
7,0.2402,0.756686,0.76312,0.750235,0.756623,0.832012
8,0.2029,0.781841,0.756847,0.74531,0.751034,0.828594
9,0.1523,0.767526,0.762914,0.751642,0.757236,0.832168
10,0.1534,0.767498,0.763892,0.751173,0.757479,0.832323


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 25. Training using 104 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5949,2.599189,0.287506,0.145169,0.192925,0.345765
2,1.8036,1.401951,0.596251,0.552064,0.573307,0.682673
3,1.138,1.021175,0.691694,0.650328,0.670374,0.756954
4,0.6804,0.90682,0.725237,0.698874,0.711812,0.800622
5,0.4267,0.767014,0.762879,0.743199,0.75291,0.828438
6,0.3231,0.782669,0.754839,0.740854,0.747781,0.825796
7,0.2391,0.767538,0.761483,0.746482,0.753908,0.827661
8,0.1829,0.751637,0.770464,0.754925,0.762615,0.836053
9,0.1538,0.766503,0.765282,0.751642,0.7584,0.832479
10,0.1632,0.765914,0.764144,0.750704,0.757364,0.832168


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 26. Training using 108 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5534,2.502788,0.322252,0.222795,0.26345,0.42533
2,1.6893,1.313835,0.622161,0.584662,0.602829,0.700233
3,0.8836,0.993148,0.700789,0.666276,0.683097,0.77094
4,0.6732,0.846506,0.738951,0.713649,0.72608,0.806216
5,0.4154,0.789683,0.735148,0.728424,0.731771,0.815695
6,0.2945,0.771598,0.746089,0.727017,0.73643,0.820979
7,0.2375,0.767572,0.755131,0.742026,0.748521,0.829526
8,0.1851,0.754201,0.756731,0.744841,0.750739,0.831857
9,0.1583,0.75512,0.757634,0.744841,0.751183,0.831702
10,0.1511,0.75703,0.757807,0.745544,0.751625,0.832323


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 27. Training using 112 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5676,2.509139,0.347826,0.234522,0.280151,0.462782
2,1.7813,1.313878,0.615538,0.577861,0.596105,0.692774
3,0.896,0.968201,0.704734,0.677298,0.690744,0.781818
4,0.5168,0.806443,0.753325,0.730535,0.741755,0.821601
5,0.3984,0.760072,0.747224,0.741792,0.744498,0.826107
6,0.2736,0.740222,0.756705,0.747655,0.752153,0.830925
7,0.2079,0.748273,0.762402,0.753283,0.757815,0.834654
8,0.1673,0.747102,0.758523,0.756567,0.757544,0.833566
9,0.1452,0.74855,0.760237,0.753283,0.756744,0.833877
10,0.1458,0.74952,0.761106,0.755394,0.758239,0.833877


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 28. Training using 116 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5713,2.451866,0.291797,0.241088,0.26403,0.4446
2,1.8238,1.286317,0.618967,0.598499,0.608561,0.700233
3,0.8699,0.910021,0.724046,0.70333,0.713538,0.798757
4,0.5759,0.803459,0.747412,0.727955,0.737555,0.818182
5,0.349,0.774839,0.749819,0.726782,0.738121,0.817094
6,0.2651,0.757685,0.761984,0.749296,0.755587,0.828749
7,0.2062,0.737223,0.762234,0.748827,0.755471,0.832323
8,0.1514,0.743947,0.762728,0.755394,0.759043,0.8331
9,0.1345,0.744631,0.7638,0.756098,0.759929,0.834654
10,0.132,0.745827,0.763576,0.755159,0.759344,0.834188


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 29. Training using 120 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5339,2.441661,0.304404,0.165338,0.214286,0.367521
2,1.7963,1.325094,0.610657,0.577861,0.593806,0.693862
3,1.0037,0.950885,0.70183,0.683396,0.69249,0.780886
4,0.6115,0.790457,0.756296,0.739447,0.747777,0.827661
5,0.3862,0.74067,0.764607,0.748827,0.756635,0.831702
6,0.2646,0.72111,0.763446,0.752345,0.757855,0.836364
7,0.2461,0.744968,0.761735,0.749765,0.755703,0.83481
8,0.1609,0.728394,0.767101,0.760084,0.763576,0.840715
9,0.1569,0.737095,0.76699,0.759615,0.763285,0.840249
10,0.139,0.736802,0.766643,0.758912,0.762758,0.840249


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▂▁▃▄▃▆▆▆▃▇▇▆▂▄▇▇▇▂▇▇▇██▇▆████▆███▄██▇███
eval/f1,▂▁▃▃▂▅▆▆▃▆▆▆▁▃▇▇▆▁▇▇▇██▇▅████▆███▃██▇███
eval/loss,▇█▅▅▆▄▃▃▅▂▂▃▆▅▂▂▂▆▂▂▂▁▁▂▃▁▁▁▁▃▁▁▁▅▁▁▂▁▁▁
eval/precision,▂▁▄▄▄▅▆▆▄▇▇▆▁▄▇▇▆▃▇▇▇██▇▅████▆███▃██▇███
eval/recall,▂▁▂▃▂▅▆▅▂▆▆▆▁▂▇▇▆▁▇▇▇██▇▅████▆██▇▂██▇███
eval/runtime,█▂▃▁▁▇▁▁▂▁▂▃▄▁▂▁▂▁▂▁▁▂█▂▂▂▂▇▁▇▂▇▂▁▁▁█▂▁▂
eval/samples_per_second,▁▇▅██▁██▇█▇▅▅█▇▇▇█▇█▇▇▁▇▇▇▇▂▇▂▇▂▇▇▇▇▁▇█▆
eval/steps_per_second,▁▇▅██▁██▇█▇▅▅█▇▇▇█▇█▇▇▁▇▇▇▇▂▇▂▇▂▇▇▇▇▁▇█▆
train/epoch,▆█▂▃▃▄▃▃▃▁█▇▆▄▃█▆▄▂█▅▃█▅▂▇▄▁▆▃▇▄█▅▂▆▂▇▃▇
train/global_step,▁▁▁▁▁▂▂▂▂▁▄▄▃▃▂▅▄▃▂▅▄▂▆▄▂▆▄▁▅▃▇▄█▅▂▆▃▇▃█

0,1
eval/accuracy,0.84025
eval/f1,0.76276
eval/loss,0.7368
eval/precision,0.76664
eval/recall,0.75891
eval/runtime,0.9339
eval/samples_per_second,82.446
eval/steps_per_second,21.415
train/epoch,10.0
train/global_step,300.0


Current seed:  1255


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 0. Training using 4 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.890504,0.007775,0.009615,0.008598,0.013831
2,No log,3.611541,0.066031,0.081144,0.072811,0.128671
3,No log,3.383992,0.106147,0.065197,0.080779,0.215851
4,No log,3.214814,0.109375,0.02955,0.046529,0.198291
5,No log,3.102307,0.111389,0.020872,0.035157,0.191142
6,No log,3.033006,0.130102,0.023921,0.040412,0.195493
7,No log,2.990445,0.153576,0.030722,0.051202,0.204507
8,No log,2.966095,0.166488,0.036351,0.059673,0.21181
9,No log,2.954934,0.179104,0.042214,0.068324,0.218959
10,No log,2.952078,0.182796,0.043856,0.07074,0.219891


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 1. Training using 8 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.720268,0.023158,0.02955,0.025966,0.046465
2,No log,3.307484,0.094089,0.036585,0.052685,0.195027
3,No log,3.053767,0.172065,0.019934,0.035729,0.180264
4,No log,2.90645,0.266667,0.060976,0.099256,0.224553
5,No log,2.803865,0.294221,0.118199,0.168646,0.297902
6,No log,2.719258,0.304041,0.181754,0.227506,0.376845
7,No log,2.653835,0.332523,0.224672,0.26816,0.422844
8,2.761600,2.614486,0.34081,0.238743,0.280789,0.440093
9,2.761600,2.596271,0.346574,0.245544,0.28744,0.446775
10,2.761600,2.592486,0.34702,0.245779,0.287754,0.447397


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 2. Training using 12 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.59948,0.051413,0.058865,0.054887,0.101787
2,No log,3.051762,0.131649,0.023218,0.039474,0.195027
3,No log,2.825932,0.256136,0.09545,0.139074,0.282828
4,No log,2.654861,0.281706,0.181285,0.220605,0.390987
5,No log,2.498852,0.32291,0.230066,0.268694,0.443201
6,2.827100,2.362991,0.35689,0.260553,0.301206,0.469308
7,2.827100,2.270739,0.381887,0.282833,0.32498,0.486402
8,2.827100,2.219261,0.390826,0.291745,0.334094,0.493551
9,2.827100,2.198419,0.392657,0.295966,0.337523,0.495882
10,2.827100,2.194487,0.392224,0.295732,0.337211,0.496037


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 3. Training using 16 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.504719,0.073613,0.066604,0.069934,0.172805
2,No log,2.988256,0.158582,0.079737,0.106117,0.258741
3,No log,2.705151,0.286453,0.129925,0.178767,0.325719
4,2.956300,2.455514,0.308471,0.214353,0.25294,0.446154
5,2.956300,2.242633,0.377488,0.284709,0.324599,0.504429
6,2.956300,2.076386,0.432618,0.340291,0.38094,0.542347
7,2.956300,1.972882,0.45646,0.376173,0.412445,0.566434
8,1.621900,1.918095,0.468898,0.394231,0.428335,0.581197
9,1.621900,1.895371,0.475954,0.403846,0.436945,0.587257
10,1.621900,1.891112,0.477624,0.405488,0.43861,0.588345


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 4. Training using 20 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.389183,0.095967,0.076454,0.085106,0.201709
2,No log,2.860447,0.190257,0.135553,0.158313,0.314375
3,No log,2.576216,0.314848,0.243668,0.274722,0.456876
4,3.055300,2.287639,0.40393,0.347092,0.37336,0.538772
5,3.055300,2.00191,0.480233,0.424484,0.450641,0.599689
6,3.055300,1.828378,0.520463,0.474203,0.496257,0.630614
7,1.759800,1.72519,0.526978,0.481004,0.502943,0.634343
8,1.759800,1.670491,0.533981,0.49015,0.511127,0.64087
9,1.759800,1.648823,0.543528,0.499296,0.520474,0.646309
10,1.190300,1.645004,0.544711,0.5,0.521399,0.647086


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 5. Training using 24 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.372957,0.09507,0.08818,0.091495,0.219891
2,No log,2.845937,0.131232,0.083959,0.102403,0.254545
3,3.204100,2.50605,0.343208,0.301595,0.321059,0.498524
4,3.204100,2.125235,0.434586,0.382505,0.406885,0.5669
5,3.204100,1.825141,0.516475,0.474203,0.494437,0.62735
6,1.820600,1.648847,0.552362,0.518293,0.534785,0.655012
7,1.820600,1.547851,0.569314,0.541276,0.554941,0.66791
8,1.161700,1.496557,0.582695,0.552767,0.567337,0.675991
9,1.161700,1.476824,0.585986,0.556989,0.571119,0.678011
10,1.161700,1.47363,0.585721,0.556051,0.5705,0.6777


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 6. Training using 28 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.190048,0.118357,0.057458,0.07736,0.216472
2,No log,2.767704,0.239595,0.099906,0.141013,0.290287
3,3.211200,2.4086,0.324887,0.25258,0.284206,0.45843
4,3.211200,2.066744,0.439084,0.382036,0.408578,0.558197
5,2.028900,1.747082,0.527575,0.47561,0.500247,0.634188
6,2.028900,1.546829,0.562862,0.523921,0.542694,0.659674
7,1.266000,1.433958,0.590659,0.554644,0.572085,0.678788
8,1.266000,1.387473,0.605092,0.56848,0.586215,0.685625
9,1.266000,1.364448,0.608968,0.570122,0.588905,0.689821
10,0.885600,1.360871,0.608609,0.570356,0.588862,0.689821


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 7. Training using 32 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.18666,0.104635,0.069887,0.083802,0.227195
2,3.258900,2.611168,0.260568,0.193715,0.222222,0.404507
3,3.258900,2.076184,0.452176,0.3947,0.421488,0.58073
4,1.937500,1.680316,0.550194,0.5,0.523897,0.649883
5,1.937500,1.484196,0.576661,0.535413,0.555272,0.671018
6,1.119900,1.356922,0.614482,0.577158,0.595235,0.700855
7,1.119900,1.285288,0.627446,0.594043,0.610288,0.71453
8,0.775600,1.262433,0.627797,0.592167,0.609462,0.713598
9,0.775600,1.240436,0.632375,0.598265,0.614847,0.719658
10,0.673400,1.237524,0.633143,0.599437,0.615829,0.720435


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 8. Training using 36 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.096943,0.046154,0.004925,0.0089,0.16488
2,3.332500,2.561399,0.285911,0.195122,0.231949,0.398757
3,3.332500,2.037791,0.432555,0.356473,0.390846,0.545921
4,2.082200,1.623399,0.54697,0.484756,0.513987,0.638539
5,2.082200,1.353467,0.611607,0.57833,0.594503,0.699922
6,1.205200,1.24001,0.623196,0.587242,0.604685,0.710179
7,1.205200,1.17414,0.633505,0.604831,0.618836,0.719192
8,0.815100,1.138801,0.64636,0.612101,0.628764,0.725874
9,0.634500,1.115509,0.646611,0.617495,0.631718,0.728827
10,0.634500,1.114414,0.647925,0.618902,0.633081,0.729293


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 9. Training using 40 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.040952,0.227928,0.059334,0.094157,0.205905
2,3.301300,2.38486,0.369786,0.24742,0.296473,0.441026
3,3.301300,1.776514,0.536884,0.472795,0.502806,0.632789
4,1.974100,1.455062,0.6,0.561445,0.580082,0.6892
5,1.149900,1.247967,0.637847,0.614212,0.625806,0.719969
6,1.149900,1.158079,0.654771,0.632505,0.643445,0.739083
7,0.730600,1.100971,0.673359,0.644934,0.65884,0.74934
8,0.606200,1.073525,0.672852,0.646341,0.65933,0.75136
9,0.606200,1.067717,0.674731,0.647514,0.660843,0.754002
10,0.497300,1.067341,0.674652,0.64728,0.660682,0.754002


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 10. Training using 44 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.043948,0.207224,0.025563,0.045511,0.183061
2,3.361100,2.239484,0.381273,0.292214,0.330855,0.496348
3,1.983900,1.596344,0.567533,0.52228,0.543967,0.659207
4,1.983900,1.329935,0.612872,0.589587,0.601004,0.701166
5,1.180900,1.279725,0.618779,0.591932,0.605058,0.709246
6,0.807000,1.100255,0.653967,0.636023,0.64487,0.738462
7,0.807000,1.05181,0.668128,0.643527,0.655597,0.747164
8,0.588600,1.018771,0.674746,0.652908,0.663647,0.754312
9,0.485600,1.009335,0.670944,0.649859,0.660234,0.7554
10,0.485600,1.0075,0.671996,0.650563,0.661106,0.756333


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 11. Training using 48 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.053118,0.15625,0.015244,0.027778,0.174514
2,3.353800,2.397621,0.335025,0.247655,0.28479,0.455012
3,2.309900,1.716245,0.542421,0.491792,0.515867,0.637296
4,1.343600,1.335454,0.612195,0.588649,0.600191,0.697747
5,1.343600,1.175098,0.644133,0.625704,0.634785,0.730847
6,0.860000,1.108803,0.660131,0.63954,0.649672,0.746076
7,0.572300,1.047801,0.67316,0.656426,0.664688,0.758353
8,0.484300,1.029289,0.679894,0.662992,0.671337,0.765812
9,0.484300,1.025081,0.6808,0.662758,0.671658,0.767521
10,0.441000,1.024001,0.680866,0.663462,0.672051,0.767521


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 12. Training using 52 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.025859,0.016854,0.000704,0.001351,0.158042
2,3.463700,2.295817,0.331709,0.223499,0.267059,0.425486
3,2.325300,1.578774,0.556046,0.50258,0.527963,0.651127
4,1.427100,1.264231,0.627152,0.59803,0.612245,0.713908
5,0.888100,1.13893,0.651504,0.624765,0.637855,0.742036
6,0.888100,1.013039,0.686553,0.669325,0.677829,0.768298
7,0.680800,0.975444,0.704318,0.677064,0.690422,0.781974
8,0.534900,0.954745,0.706407,0.687852,0.697006,0.784615
9,0.418600,0.94245,0.708404,0.689962,0.699061,0.785859
10,0.394900,0.94067,0.709056,0.690432,0.69962,0.786636


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 13. Training using 56 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,3.033227,0.167588,0.07106,0.099802,0.231391
2,3.497600,2.212329,0.391071,0.324578,0.354735,0.528361
3,2.422000,1.562124,0.559696,0.517824,0.537946,0.657187
4,1.467400,1.276933,0.61913,0.587477,0.602888,0.703341
5,1.005400,1.035073,0.692967,0.663227,0.677771,0.769542
6,0.651800,0.937096,0.724542,0.695826,0.709894,0.798291
7,0.489800,0.903341,0.730621,0.711773,0.721074,0.804973
8,0.382500,0.888137,0.738611,0.711069,0.724579,0.808392
9,0.382500,0.883438,0.739961,0.717402,0.728507,0.811344
10,0.344600,0.882206,0.739583,0.715994,0.727598,0.810878


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 14. Training using 60 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,No log,2.91301,0.208912,0.067073,0.101544,0.236364
2,3.409700,2.03491,0.417951,0.36257,0.388296,0.543745
3,2.383100,1.440348,0.590772,0.552533,0.571013,0.680963
4,1.478100,1.120489,0.656113,0.631801,0.643728,0.742657
5,0.977100,1.019204,0.681807,0.665338,0.673472,0.770319
6,0.655000,0.940745,0.707838,0.684099,0.695766,0.786169
7,0.525200,0.90423,0.720077,0.699812,0.7098,0.79798
8,0.416200,0.897543,0.71999,0.706144,0.713,0.800466
9,0.344900,0.886363,0.724006,0.708724,0.716283,0.802953
10,0.340000,0.888486,0.723404,0.709662,0.716467,0.802797


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 15. Training using 64 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5108,2.943848,0.180873,0.020403,0.03667,0.186014
2,2.5127,2.050392,0.41801,0.35272,0.3826,0.543745
3,1.6126,1.421814,0.595564,0.560507,0.577504,0.682828
4,1.0348,1.086107,0.673129,0.643293,0.657873,0.757887
5,0.7277,0.974714,0.696933,0.671435,0.683946,0.778555
6,0.5293,0.903798,0.72346,0.708021,0.715657,0.800155
7,0.4204,0.87806,0.722663,0.705206,0.713828,0.802176
8,0.355,0.879864,0.724502,0.708021,0.716167,0.805439
9,0.3156,0.869398,0.725791,0.710131,0.717876,0.807148
10,0.3039,0.869048,0.726402,0.711069,0.718654,0.807459


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 16. Training using 68 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5029,2.862248,0.240409,0.066135,0.103734,0.24087
2,2.4547,1.880585,0.50436,0.434099,0.466599,0.611189
3,1.5383,1.30638,0.624209,0.578096,0.600268,0.708314
4,0.9724,1.122742,0.649766,0.619137,0.634082,0.740482
5,0.7554,0.936688,0.717084,0.678236,0.697119,0.789122
6,0.5689,0.873715,0.730732,0.711538,0.721008,0.806838
7,0.418,0.860825,0.729375,0.717402,0.723339,0.808858
8,0.3173,0.844457,0.739048,0.727955,0.733459,0.817405
9,0.2882,0.840512,0.739866,0.72772,0.733743,0.818803
10,0.291,0.839691,0.739618,0.726782,0.733144,0.818493


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 17. Training using 72 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5323,2.923572,0.179104,0.014071,0.026093,0.188034
2,2.5754,1.895197,0.460831,0.387664,0.421093,0.562859
3,1.621,1.358789,0.606383,0.561445,0.583049,0.689355
4,1.1174,1.066442,0.680364,0.648452,0.664025,0.753225
5,0.7402,0.940253,0.708995,0.69137,0.700071,0.788656
6,0.5778,0.863017,0.725411,0.714353,0.719839,0.807459
7,0.4568,0.833691,0.740005,0.724906,0.732378,0.814608
8,0.3214,0.830786,0.72933,0.717871,0.723555,0.809479
9,0.2719,0.827607,0.73688,0.724437,0.730605,0.81352
10,0.2697,0.826413,0.737431,0.725844,0.731592,0.814608


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 18. Training using 76 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4597,2.808935,0.269036,0.087008,0.13149,0.276768
2,2.5238,1.76019,0.501053,0.446295,0.472091,0.604196
3,1.6135,1.214684,0.627031,0.597326,0.611818,0.714841
4,0.9992,0.963155,0.703867,0.674484,0.688862,0.782129
5,0.7189,0.893165,0.719115,0.69348,0.706065,0.797047
6,0.4306,0.844855,0.721085,0.7106,0.715804,0.802486
7,0.3268,0.811811,0.738209,0.726782,0.732451,0.815695
8,0.2841,0.821868,0.737333,0.723499,0.73035,0.815695
9,0.2612,0.812123,0.73567,0.728424,0.732029,0.816472
10,0.2207,0.8107,0.736144,0.728893,0.732501,0.817094


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 19. Training using 80 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5755,2.803001,0.178105,0.025563,0.044709,0.21756
2,2.5204,1.660889,0.540072,0.489916,0.513773,0.637141
3,1.6299,1.198269,0.656662,0.631098,0.643626,0.736286
4,0.7949,0.989001,0.706739,0.671435,0.688635,0.777312
5,0.5939,0.894856,0.719797,0.700047,0.709785,0.798446
6,0.3988,0.831797,0.743596,0.728424,0.735932,0.817405
7,0.3957,0.825256,0.741269,0.731707,0.736457,0.81756
8,0.2404,0.819007,0.746148,0.738274,0.74219,0.822999
9,0.2277,0.826533,0.745549,0.736632,0.741064,0.820979
10,0.214,0.825189,0.745429,0.736163,0.740767,0.820668


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 20. Training using 84 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.4568,2.735788,0.208304,0.137664,0.165772,0.336286
2,2.5416,1.56783,0.553131,0.505394,0.528186,0.64864
3,1.6436,1.187577,0.634936,0.606942,0.620624,0.721834
4,0.7831,0.929066,0.710256,0.696764,0.703445,0.792385
5,0.6211,0.867917,0.728565,0.709428,0.718869,0.804662
6,0.4476,0.821905,0.732589,0.722795,0.727659,0.813054
7,0.2967,0.802018,0.73991,0.735225,0.73756,0.819425
8,0.2494,0.797528,0.745182,0.734522,0.739813,0.822222
9,0.2447,0.796089,0.749582,0.736398,0.742932,0.823776
10,0.1983,0.797986,0.749224,0.735694,0.742397,0.823465


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 21. Training using 88 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5376,2.684913,0.244652,0.128752,0.168715,0.323388
2,2.5624,1.542428,0.574615,0.525563,0.548996,0.661383
3,1.0884,1.137141,0.663112,0.641651,0.652205,0.74934
4,0.8495,0.928476,0.713977,0.697233,0.705505,0.794406
5,0.5941,0.851578,0.733397,0.722561,0.727939,0.808081
6,0.3533,0.823712,0.738197,0.726079,0.732088,0.816939
7,0.329,0.813296,0.745,0.733818,0.739367,0.820513
8,0.242,0.828376,0.739141,0.7303,0.734694,0.817871
9,0.1976,0.822903,0.742126,0.734991,0.738541,0.821756
10,0.1994,0.8251,0.743766,0.734522,0.739115,0.822067


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 22. Training using 92 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5256,2.759676,0.28128,0.068011,0.109537,0.255167
2,2.6811,1.569579,0.560491,0.503049,0.530219,0.644911
3,1.1621,1.080947,0.699382,0.663462,0.680948,0.765501
4,0.8861,0.880766,0.728716,0.7106,0.719544,0.803263
5,0.5084,0.847917,0.730778,0.719981,0.72534,0.807459
6,0.3672,0.775676,0.754448,0.745779,0.750088,0.826418
7,0.2534,0.782937,0.751072,0.739447,0.745214,0.825486
8,0.2192,0.784093,0.752019,0.742495,0.747227,0.826573
9,0.2065,0.780489,0.75447,0.742261,0.748315,0.826263
10,0.1887,0.779631,0.757208,0.74531,0.751211,0.827817


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 23. Training using 96 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.6068,2.681284,0.305422,0.104362,0.155567,0.285936
2,1.7305,1.514061,0.580595,0.522045,0.549765,0.662937
3,1.1147,1.039401,0.692765,0.655722,0.673735,0.768298
4,0.6372,0.939728,0.716019,0.691839,0.703721,0.792852
5,0.4618,0.823516,0.75186,0.734756,0.74321,0.822533
6,0.3236,0.80666,0.745093,0.730066,0.737503,0.816006
7,0.2721,0.793265,0.755136,0.741323,0.748166,0.825952
8,0.2359,0.792035,0.753013,0.747186,0.750088,0.828283
9,0.1834,0.794107,0.754574,0.744841,0.749675,0.828127
10,0.1817,0.795195,0.752438,0.742026,0.747196,0.82704


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 24. Training using 100 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5947,2.683186,0.275524,0.098734,0.145373,0.291375
2,1.8442,1.515877,0.552112,0.518058,0.534543,0.658741
3,1.247,1.089729,0.668035,0.648921,0.658339,0.751204
4,0.6249,0.856444,0.737894,0.71834,0.727986,0.808081
5,0.5231,0.794518,0.74591,0.727017,0.736342,0.816783
6,0.3532,0.807348,0.734,0.723499,0.728711,0.814918
7,0.2659,0.786152,0.746496,0.736867,0.74165,0.821756
8,0.2043,0.771951,0.754569,0.745544,0.750029,0.831235
9,0.1798,0.772934,0.755603,0.743199,0.74935,0.828749
10,0.1734,0.774465,0.755,0.743668,0.749291,0.828749


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 25. Training using 104 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5951,2.634899,0.204409,0.143527,0.168641,0.34359
2,1.8309,1.424772,0.583576,0.564962,0.574118,0.684848
3,1.3152,0.990592,0.694187,0.66651,0.680067,0.779798
4,0.6514,0.870328,0.722155,0.694887,0.708259,0.796737
5,0.4471,0.787902,0.736728,0.719278,0.727898,0.81352
6,0.3193,0.799106,0.735005,0.735694,0.735349,0.820047
7,0.2255,0.761326,0.760483,0.752814,0.756629,0.836208
8,0.1888,0.766338,0.755833,0.752111,0.753967,0.8331
9,0.1571,0.775813,0.7513,0.74531,0.748293,0.830458
10,0.1711,0.775147,0.75266,0.746482,0.749558,0.831391


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 26. Training using 108 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.6286,2.513445,0.262992,0.15666,0.196355,0.361461
2,1.7168,1.291208,0.642628,0.610225,0.626007,0.72028
3,0.8657,0.95593,0.693253,0.674719,0.68386,0.775447
4,0.6215,0.807473,0.734689,0.728659,0.731661,0.817249
5,0.3768,0.830033,0.732438,0.716463,0.724363,0.812121
6,0.2974,0.751787,0.754645,0.742964,0.748759,0.827972
7,0.2291,0.760545,0.74539,0.739447,0.742406,0.82735
8,0.1691,0.736781,0.75867,0.749062,0.753835,0.833722
9,0.1395,0.749432,0.755797,0.749062,0.752415,0.832323
10,0.1505,0.749112,0.754556,0.747655,0.75109,0.831857


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 27. Training using 112 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5399,2.472526,0.277232,0.158771,0.201909,0.359441
2,1.7797,1.295073,0.62359,0.596388,0.609686,0.712044
3,0.8738,0.95444,0.705551,0.676595,0.69077,0.785392
4,0.5827,0.815291,0.732044,0.709897,0.7208,0.807615
5,0.4096,0.745181,0.744564,0.730769,0.737602,0.821911
6,0.3015,0.781957,0.739807,0.736163,0.73798,0.822999
7,0.2326,0.798967,0.740364,0.734287,0.737313,0.824553
8,0.189,0.764144,0.748992,0.740385,0.744663,0.828283
9,0.1624,0.78175,0.748405,0.742964,0.745675,0.829371
10,0.1515,0.78612,0.746635,0.741557,0.744088,0.828594


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 28. Training using 116 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5351,2.405599,0.333669,0.23288,0.274309,0.424398
2,1.728,1.317265,0.59832,0.568011,0.582772,0.696348
3,0.9045,0.973773,0.697674,0.682458,0.689982,0.777778
4,0.5692,0.880774,0.702212,0.707317,0.704755,0.791142
5,0.3586,0.810493,0.736405,0.727251,0.731799,0.817249
6,0.3016,0.792487,0.744476,0.74273,0.743602,0.826107
7,0.235,0.786996,0.737326,0.733349,0.735332,0.823776
8,0.1774,0.781295,0.752354,0.749531,0.75094,0.829992
9,0.1314,0.789515,0.753726,0.747186,0.750442,0.829681
10,0.1317,0.787946,0.754543,0.749765,0.752147,0.831546


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]


Iteration: 29. Training using 120 samples


Some weights of BertForTokenClassification were not initialized from the model checkpoint at alexyalunin/RuBioBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Precision,Recall,F1,Accuracy
1,3.5585,2.422579,0.359664,0.241323,0.288842,0.437918
2,1.7754,1.246474,0.64417,0.624531,0.634199,0.728205
3,0.9116,0.914568,0.715934,0.683865,0.699532,0.789588
4,0.561,0.800967,0.730596,0.721857,0.7262,0.813209
5,0.3782,0.762823,0.749642,0.736632,0.74308,0.825175
6,0.2603,0.746773,0.741392,0.732176,0.736755,0.820824
7,0.1738,0.767431,0.744093,0.738508,0.74129,0.824864
8,0.1533,0.760718,0.750591,0.744606,0.747587,0.828283
9,0.1313,0.771901,0.752435,0.74273,0.747551,0.828283
10,0.1158,0.769717,0.751067,0.742964,0.746994,0.828749


Filter:   0%|          | 0/612 [00:00<?, ? examples/s]

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▂▁▅▅▃▆▆▆▃▇▇▆▂▅▇▇▇▃██▇███▆████▆███▄██████
eval/f1,▁▁▄▄▂▅▆▆▂▆▇▆▂▄▇▇▇▂█▇▆███▅████▆███▂██▇███
eval/loss,▇█▅▅▆▄▃▄▆▂▂▃▆▅▂▂▂▆▁▁▃▁▁▁▃▁▁▁▁▃▁▁▁▅▁▁▁▁▁▁
eval/precision,▂▁▄▄▄▅▆▆▃▇▇▆▃▄▇▇▇▂██▆███▆████▆███▃██████
eval/recall,▁▁▃▃▂▅▅▅▂▆▆▅▁▄▇▇▇▁█▇▆██▇▅████▆███▂██▇███
eval/runtime,▇▂▃▁▁▇▂▁▁▂▇▂▁▇▇█▇▇▁▁▂▂█▂▂▇▁▂▁▇▂▇▂▁▁▁█▂▁▁
eval/samples_per_second,▂▆▆█▇▂▇▇▇▇▂▇█▂▂▁▂▂██▇▇▁▆▇▂█▇▇▂▇▂▇█▇▇▁▇▇█
eval/steps_per_second,▂▆▆█▇▂▇▇▇▇▂▇█▂▂▁▂▂██▇▇▁▆▇▂█▇▇▂▇▂▇█▇▇▁▇▇█
train/epoch,▆█▂▃▃▄▃▃▃▁█▇▆▄▃█▆▄▂█▅▃█▅▂▇▄▁▆▃▇▄█▅▂▆▂▇▃▇
train/global_step,▁▁▁▁▁▂▂▂▂▁▄▄▃▃▂▅▄▃▂▅▄▂▆▄▂▆▄▁▅▃▇▄█▅▂▆▃▇▃█

0,1
eval/accuracy,0.82875
eval/f1,0.74699
eval/loss,0.76972
eval/precision,0.75107
eval/recall,0.74296
eval/runtime,0.8875
eval/samples_per_second,86.763
eval/steps_per_second,22.536
train/epoch,10.0
train/global_step,300.0
