In [None]:
!pip install transformers datasets evaluate seqeval


Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━

In [None]:
from transformers import AutoTokenizer, DataCollatorForTokenClassification, Trainer, TrainingArguments, AutoModelForTokenClassification
from datasets import Dataset
from sklearn.metrics import classification_report
import numpy as np
import torch


In [None]:
import os
import zipfile

# Unzip datasets
dataset_zip_files = {
    "BC2GM": "/content/BC2GM-IOBES.zip",
    "BC4CHEMD": "/content/BC4CHEMD-IOBES.zip",
    "BC5CDR": "/content/BC5CDR-IOBES.zip",
    "JNLPBA": "/content/JNLPBA-IOBES.zip",
    "NCBI": "/content/NCBI-IOBES.zip"
}

all_datasets = {}
for task, zip_path in dataset_zip_files.items():
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(f"/content/{task}")
    train_file = f"/content/{task}/{task}-IOBES/train.tsv"
    test_file = f"/content/{task}/{task}-IOBES/test.tsv"

    def read_tsv(file_path):
        sentences, labels = [], []
        sentence, label = [], []
        with open(file_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line:
                    token, tag = line.split("\t")
                    sentence.append(token)
                    label.append(tag)
                else:
                    if sentence:
                        sentences.append(sentence)
                        labels.append(label)
                        sentence, label = [], []
        if sentence:
            sentences.append(sentence)
            labels.append(label)
        return {"sentences": sentences, "labels": labels}

    all_datasets[task] = {
        "train": read_tsv(train_file),
        "test": read_tsv(test_file),
    }


In [None]:

tokenizer = AutoTokenizer.from_pretrained("dmis-lab/biobert-base-cased-v1.1")

label_to_id = {}
id_to_label = {}
for task in all_datasets:
    unique_labels = set(tag for label_seq in all_datasets[task]["train"]["labels"] + all_datasets[task]["test"]["labels"] for tag in label_seq)
    label_to_id[task] = {label: idx for idx, label in enumerate(sorted(unique_labels))}
    id_to_label[task] = {idx: label for label, idx in label_to_id[task].items()}


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

In [None]:
from datasets import Dataset

def preprocess_data(sentences, labels, label_to_id):
    tokenized_inputs = tokenizer(sentences, truncation=True, padding=True, is_split_into_words=True, max_length=128)
    tokenized_labels = []
    for i, label_seq in enumerate(labels):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        tokenized_labels.append([-100 if word_idx is None else label_to_id[label_seq[word_idx]] for word_idx in word_ids])
    tokenized_inputs["labels"] = tokenized_labels
    return tokenized_inputs

tokenized_datasets = {}
for task in all_datasets:
    tokenized_train = preprocess_data(
        all_datasets[task]["train"]["sentences"],
        all_datasets[task]["train"]["labels"],
        label_to_id[task]
    )
    tokenized_test = preprocess_data(
        all_datasets[task]["test"]["sentences"],
        all_datasets[task]["test"]["labels"],
        label_to_id[task]
    )
    tokenized_datasets[task] = {
        "train": Dataset.from_dict(tokenized_train),
        "test": Dataset.from_dict(tokenized_test),
    }


In [None]:
class MultiTaskNER(torch.nn.Module):
    def __init__(self, num_labels_dict):
        super(MultiTaskNER, self).__init__()
        self.models = torch.nn.ModuleDict({
            task: AutoModelForTokenClassification.from_pretrained(
                "dmis-lab/biobert-base-cased-v1.1",
                num_labels=num_labels
            )
            for task, num_labels in num_labels_dict.items()
        })

    def forward(self, input_ids, attention_mask, labels=None, task_name=None):
        return self.models[task_name](input_ids=input_ids, attention_mask=attention_mask, labels=labels)

num_labels_dict = {task: len(label_to_id[task]) for task in all_datasets}
multi_task_model = MultiTaskNER(num_labels_dict)


pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkpoint at dmis-lab/biobert-base-cased-v1.1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForTokenClassification were not initialized from the model checkp

In [None]:
def compute_metrics(predictions, labels, id_to_label):
    predictions = np.argmax(predictions, axis=2)

    # Convert predictions and labels to human-readable format
    true_predictions = [
        id_to_label[p]
        for prediction, label in zip(predictions, labels)
        for p, l in zip(prediction, label) if l != -100
    ]
    true_labels = [
        id_to_label[l]
        for prediction, label in zip(predictions, labels)
        for p, l in zip(prediction, label) if l != -100
    ]

    # Generate the classification report
    report = classification_report(
        true_labels,
        true_predictions,
        zero_division=0,
    )
    return {"classification_report": report}

for task in tokenized_datasets:
    training_args = TrainingArguments(
        output_dir=f"./results/{task}",
        evaluation_strategy="steps",
        save_strategy="steps",
        learning_rate=5e-5,
        per_device_train_batch_size=8,
        num_train_epochs=3,
        weight_decay=0.01,
        save_total_limit=2,
        load_best_model_at_end=True
    )

    trainer = Trainer(
        model=multi_task_model.models[task],
        args=training_args,
        train_dataset=tokenized_datasets[task]["train"],
        eval_dataset=tokenized_datasets[task]["test"],
        tokenizer=tokenizer,
        compute_metrics=lambda p: compute_metrics(p.predictions, p.label_ids, id_to_label[task]),
    )

    print(f"Training Task: {task}")
    trainer.train()
    print(f"Evaluation Results for {task}:")
    trainer.evaluate()


  trainer = Trainer(


model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]



Training Task: BC2GM


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss,Validation Loss,Classification Report
500,0.208,0.132436,precision recall f1-score support  B-GENE 0.80 0.82 0.81 8118  E-GENE 0.78 0.82 0.80 5817  I-GENE 0.84 0.65 0.74 7691  O 0.98 0.98 0.98 163783  S-GENE 0.85 0.82 0.84 8057  accuracy 0.95 193466  macro avg 0.85 0.82 0.83 193466 weighted avg 0.95 0.95 0.95 193466
1000,0.1328,0.114,precision recall f1-score support  B-GENE 0.84 0.85 0.85 8118  E-GENE 0.81 0.83 0.82 5817  I-GENE 0.81 0.79 0.80 7691  O 0.98 0.98 0.98 163783  S-GENE 0.85 0.87 0.86 8057  accuracy 0.96 193466  macro avg 0.86 0.86 0.86 193466 weighted avg 0.96 0.96 0.96 193466
1500,0.1148,0.106856,precision recall f1-score support  B-GENE 0.85 0.85 0.85 8118  E-GENE 0.85 0.83 0.84 5817  I-GENE 0.83 0.74 0.79 7691  O 0.98 0.99 0.98 163783  S-GENE 0.87 0.85 0.86 8057  accuracy 0.96 193466  macro avg 0.88 0.85 0.86 193466 weighted avg 0.96 0.96 0.96 193466
2000,0.0726,0.118144,precision recall f1-score support  B-GENE 0.85 0.88 0.86 8118  E-GENE 0.82 0.85 0.84 5817  I-GENE 0.80 0.82 0.81 7691  O 0.99 0.98 0.98 163783  S-GENE 0.90 0.85 0.87 8057  accuracy 0.96 193466  macro avg 0.87 0.88 0.87 193466 weighted avg 0.96 0.96 0.96 193466
2500,0.068,0.118419,precision recall f1-score support  B-GENE 0.86 0.87 0.86 8118  E-GENE 0.85 0.83 0.84 5817  I-GENE 0.84 0.78 0.81 7691  O 0.98 0.99 0.98 163783  S-GENE 0.84 0.90 0.87 8057  accuracy 0.96 193466  macro avg 0.88 0.87 0.87 193466 weighted avg 0.96 0.96 0.96 193466
3000,0.0582,0.12044,precision recall f1-score support  B-GENE 0.84 0.90 0.87 8118  E-GENE 0.82 0.88 0.85 5817  I-GENE 0.80 0.85 0.82 7691  O 0.99 0.98 0.99 163783  S-GENE 0.88 0.88 0.88 8057  accuracy 0.96 193466  macro avg 0.87 0.90 0.88 193466 weighted avg 0.97 0.96 0.97 193466
3500,0.034,0.14614,precision recall f1-score support  B-GENE 0.86 0.88 0.87 8118  E-GENE 0.83 0.88 0.85 5817  I-GENE 0.79 0.86 0.82 7691  O 0.99 0.98 0.99 163783  S-GENE 0.88 0.89 0.88 8057  accuracy 0.97 193466  macro avg 0.87 0.90 0.88 193466 weighted avg 0.97 0.97 0.97 193466
4000,0.0288,0.139228,precision recall f1-score support  B-GENE 0.86 0.88 0.87 8118  E-GENE 0.85 0.86 0.85 5817  I-GENE 0.83 0.82 0.82 7691  O 0.99 0.99 0.99 163783  S-GENE 0.88 0.89 0.88 8057  accuracy 0.97 193466  macro avg 0.88 0.89 0.88 193466 weighted avg 0.97 0.97 0.97 193466
4500,0.029,0.13835,precision recall f1-score support  B-GENE 0.86 0.87 0.87 8118  E-GENE 0.85 0.85 0.85 5817  I-GENE 0.80 0.84 0.82 7691  O 0.99 0.98 0.99 163783  S-GENE 0.89 0.88 0.88 8057  accuracy 0.97 193466  macro avg 0.88 0.88 0.88 193466 weighted avg 0.97 0.97 0.97 193466


Trainer is attempting to log a value of "              precision    recall  f1-score   support

      B-GENE       0.80      0.82      0.81      8118
      E-GENE       0.78      0.82      0.80      5817
      I-GENE       0.84      0.65      0.74      7691
           O       0.98      0.98      0.98    163783
      S-GENE       0.85      0.82      0.84      8057

    accuracy                           0.95    193466
   macro avg       0.85      0.82      0.83    193466
weighted avg       0.95      0.95      0.95    193466
" of type <class 'str'> for key "eval/classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "              precision    recall  f1-score   support

      B-GENE       0.84      0.85      0.85      8118
      E-GENE       0.81      0.83      0.82      5817
      I-GENE       0.81      0.79      0.80      7691
           O       0.98      0.98      0.98

Evaluation Results for BC2GM:


Trainer is attempting to log a value of "              precision    recall  f1-score   support

      B-GENE       0.85      0.85      0.85      8118
      E-GENE       0.85      0.83      0.84      5817
      I-GENE       0.83      0.74      0.79      7691
           O       0.98      0.99      0.98    163783
      S-GENE       0.87      0.85      0.86      8057

    accuracy                           0.96    193466
   macro avg       0.88      0.85      0.86    193466
weighted avg       0.96      0.96      0.96    193466
" of type <class 'str'> for key "eval/classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
  trainer = Trainer(


Training Task: BC4CHEMD


Step,Training Loss,Validation Loss,Classification Report
500,0.14,0.073857,precision recall f1-score support  B-Chemical 0.84 0.87 0.85 14871  E-Chemical 0.81 0.90 0.85 16976  I-Chemical 0.90 0.92 0.91 27923  O 0.99 0.99 0.99 920458  S-Chemical 0.88 0.84 0.86 55344  accuracy 0.98 1035572  macro avg 0.88 0.90 0.89 1035572 weighted avg 0.98 0.98 0.98 1035572
1000,0.0771,0.059067,precision recall f1-score support  B-Chemical 0.90 0.83 0.86 14871  E-Chemical 0.92 0.85 0.88 16976  I-Chemical 0.93 0.93 0.93 27923  O 0.99 0.99 0.99 920458  S-Chemical 0.85 0.91 0.88 55344  accuracy 0.98 1035572  macro avg 0.92 0.90 0.91 1035572 weighted avg 0.98 0.98 0.98 1035572
1500,0.0621,0.058073,precision recall f1-score support  B-Chemical 0.90 0.86 0.88 14871  E-Chemical 0.94 0.83 0.88 16976  I-Chemical 0.91 0.94 0.93 27923  O 0.99 0.99 0.99 920458  S-Chemical 0.90 0.87 0.89 55344  accuracy 0.98 1035572  macro avg 0.93 0.90 0.91 1035572 weighted avg 0.98 0.98 0.98 1035572
2000,0.0548,0.06085,precision recall f1-score support  B-Chemical 0.87 0.86 0.86 14871  E-Chemical 0.90 0.86 0.88 16976  I-Chemical 0.92 0.94 0.93 27923  O 0.99 0.99 0.99 920458  S-Chemical 0.89 0.90 0.89 55344  accuracy 0.98 1035572  macro avg 0.91 0.91 0.91 1035572 weighted avg 0.98 0.98 0.98 1035572
2500,0.0528,0.04959,precision recall f1-score support  B-Chemical 0.87 0.92 0.89 14871  E-Chemical 0.87 0.93 0.90 16976  I-Chemical 0.95 0.93 0.94 27923  O 0.99 0.99 0.99 920458  S-Chemical 0.91 0.91 0.91 55344  accuracy 0.98 1035572  macro avg 0.92 0.93 0.93 1035572 weighted avg 0.98 0.98 0.98 1035572
3000,0.049,0.051985,precision recall f1-score support  B-Chemical 0.92 0.87 0.89 14871  E-Chemical 0.92 0.89 0.91 16976  I-Chemical 0.96 0.93 0.94 27923  O 0.99 0.99 0.99 920458  S-Chemical 0.90 0.92 0.91 55344  accuracy 0.98 1035572  macro avg 0.94 0.92 0.93 1035572 weighted avg 0.98 0.98 0.98 1035572
3500,0.0446,0.048456,precision recall f1-score support  B-Chemical 0.87 0.90 0.89 14871  E-Chemical 0.90 0.91 0.90 16976  I-Chemical 0.96 0.93 0.94 27923  O 0.99 0.99 0.99 920458  S-Chemical 0.88 0.94 0.91 55344  accuracy 0.98 1035572  macro avg 0.92 0.93 0.93 1035572 weighted avg 0.98 0.98 0.98 1035572
4000,0.0423,0.060815,precision recall f1-score support  B-Chemical 0.92 0.88 0.90 14871  E-Chemical 0.92 0.90 0.91 16976  I-Chemical 0.96 0.92 0.94 27923  O 0.99 1.00 0.99 920458  S-Chemical 0.95 0.86 0.90 55344  accuracy 0.98 1035572  macro avg 0.95 0.91 0.93 1035572 weighted avg 0.98 0.98 0.98 1035572
4500,0.0248,0.05577,precision recall f1-score support  B-Chemical 0.93 0.87 0.90 14871  E-Chemical 0.93 0.90 0.91 16976  I-Chemical 0.95 0.94 0.95 27923  O 0.99 0.99 0.99 920458  S-Chemical 0.91 0.91 0.91 55344  accuracy 0.99 1035572  macro avg 0.94 0.92 0.93 1035572 weighted avg 0.99 0.99 0.99 1035572
5000,0.0253,0.059323,precision recall f1-score support  B-Chemical 0.94 0.86 0.90 14871  E-Chemical 0.93 0.89 0.91 16976  I-Chemical 0.96 0.92 0.94 27923  O 0.99 0.99 0.99 920458  S-Chemical 0.90 0.92 0.91 55344  accuracy 0.98 1035572  macro avg 0.95 0.92 0.93 1035572 weighted avg 0.98 0.98 0.98 1035572


Trainer is attempting to log a value of "              precision    recall  f1-score   support

  B-Chemical       0.84      0.87      0.85     14871
  E-Chemical       0.81      0.90      0.85     16976
  I-Chemical       0.90      0.92      0.91     27923
           O       0.99      0.99      0.99    920458
  S-Chemical       0.88      0.84      0.86     55344

    accuracy                           0.98   1035572
   macro avg       0.88      0.90      0.89   1035572
weighted avg       0.98      0.98      0.98   1035572
" of type <class 'str'> for key "eval/classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "              precision    recall  f1-score   support

  B-Chemical       0.90      0.83      0.86     14871
  E-Chemical       0.92      0.85      0.88     16976
  I-Chemical       0.93      0.93      0.93     27923
           O       0.99      0.99      0.99

Evaluation Results for BC4CHEMD:


Trainer is attempting to log a value of "              precision    recall  f1-score   support

  B-Chemical       0.87      0.90      0.89     14871
  E-Chemical       0.90      0.91      0.90     16976
  I-Chemical       0.96      0.93      0.94     27923
           O       0.99      0.99      0.99    920458
  S-Chemical       0.88      0.94      0.91     55344

    accuracy                           0.98   1035572
   macro avg       0.92      0.93      0.93   1035572
weighted avg       0.98      0.98      0.98   1035572
" of type <class 'str'> for key "eval/classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
  trainer = Trainer(


Training Task: BC5CDR


Step,Training Loss,Validation Loss,Classification Report
500,0.2218,0.152157,precision recall f1-score support  B-Chemical 0.80 0.88 0.84 1421  B-Disease 0.78 0.77 0.77 3870  E-Chemical 0.85 0.87 0.86 1633  E-Disease 0.89 0.68 0.77 3832  I-Chemical 0.83 0.84 0.84 1277  I-Disease 0.89 0.52 0.66 1499  O 0.98 0.98 0.98 131255  S-Chemical 0.95 0.95 0.95 16588  S-Disease 0.81 0.94 0.87 8641  accuracy 0.96 170016  macro avg 0.87 0.83 0.84 170016 weighted avg 0.96 0.96 0.96 170016
1000,0.0625,0.168168,precision recall f1-score support  B-Chemical 0.85 0.84 0.84 1421  B-Disease 0.77 0.83 0.80 3870  E-Chemical 0.85 0.88 0.87 1633  E-Disease 0.78 0.89 0.83 3832  I-Chemical 0.85 0.86 0.86 1277  I-Disease 0.65 0.75 0.70 1499  O 0.98 0.98 0.98 131255  S-Chemical 0.96 0.95 0.95 16588  S-Disease 0.92 0.88 0.90 8641  accuracy 0.96 170016  macro avg 0.85 0.87 0.86 170016 weighted avg 0.96 0.96 0.96 170016
1500,0.0322,0.182332,precision recall f1-score support  B-Chemical 0.83 0.87 0.85 1421  B-Disease 0.78 0.81 0.79 3870  E-Chemical 0.84 0.90 0.87 1633  E-Disease 0.80 0.86 0.83 3832  I-Chemical 0.80 0.88 0.84 1277  I-Disease 0.63 0.80 0.70 1499  O 0.98 0.98 0.98 131255  S-Chemical 0.95 0.96 0.96 16588  S-Disease 0.91 0.89 0.90 8641  accuracy 0.96 170016  macro avg 0.84 0.88 0.86 170016 weighted avg 0.96 0.96 0.96 170016


Trainer is attempting to log a value of "              precision    recall  f1-score   support

  B-Chemical       0.80      0.88      0.84      1421
   B-Disease       0.78      0.77      0.77      3870
  E-Chemical       0.85      0.87      0.86      1633
   E-Disease       0.89      0.68      0.77      3832
  I-Chemical       0.83      0.84      0.84      1277
   I-Disease       0.89      0.52      0.66      1499
           O       0.98      0.98      0.98    131255
  S-Chemical       0.95      0.95      0.95     16588
   S-Disease       0.81      0.94      0.87      8641

    accuracy                           0.96    170016
   macro avg       0.87      0.83      0.84    170016
weighted avg       0.96      0.96      0.96    170016
" of type <class 'str'> for key "eval/classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "              precision    recall  f1-score 

Evaluation Results for BC5CDR:


Trainer is attempting to log a value of "              precision    recall  f1-score   support

  B-Chemical       0.80      0.88      0.84      1421
   B-Disease       0.78      0.77      0.77      3870
  E-Chemical       0.85      0.87      0.86      1633
   E-Disease       0.89      0.68      0.77      3832
  I-Chemical       0.83      0.84      0.84      1277
   I-Disease       0.89      0.52      0.66      1499
           O       0.98      0.98      0.98    131255
  S-Chemical       0.95      0.95      0.95     16588
   S-Disease       0.81      0.94      0.87      8641

    accuracy                           0.96    170016
   macro avg       0.87      0.83      0.84    170016
weighted avg       0.96      0.96      0.96    170016
" of type <class 'str'> for key "eval/classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
  trainer = Trainer(


Training Task: JNLPBA


Step,Training Loss,Validation Loss,Classification Report
500,0.5584,0.384406,precision recall f1-score support  B-DNA 0.69 0.86 0.76 2618  B-RNA 0.74 0.76 0.75 384  B-cell_line 0.50 0.75 0.60 1342  B-cell_type 0.80 0.62 0.70 3474  B-protein 0.74 0.82 0.78 7198  E-DNA 0.69 0.87 0.77 1108  E-RNA 0.72 0.90 0.80 206  E-cell_line 0.69 0.56 0.62 611  E-cell_type 0.85 0.82 0.84 2480  E-protein 0.73 0.79 0.76 4029  I-DNA 0.68 0.77 0.73 1807  I-RNA 0.59 0.69 0.64 153  I-cell_line 0.34 0.78 0.47 1056  I-cell_type 0.89 0.45 0.60 2697  I-protein 0.72 0.74 0.73 4603  O 0.96 0.93 0.95 111692  S-DNA 0.40 0.30 0.35 669  S-RNA 0.00 0.00 0.00 61  S-cell_line 0.53 0.37 0.44 289  S-cell_type 0.83 0.64 0.72 1463  S-protein 0.70 0.84 0.77 9842  accuracy 0.88 157782  macro avg 0.66 0.68 0.65 157782 weighted avg 0.89 0.88 0.88 157782
1000,0.342,0.39678,precision recall f1-score support  B-DNA 0.75 0.83 0.79 2618  B-RNA 0.73 0.84 0.78 384  B-cell_line 0.46 0.77 0.58 1342  B-cell_type 0.79 0.67 0.73 3474  B-protein 0.77 0.77 0.77 7198  E-DNA 0.71 0.88 0.78 1108  E-RNA 0.70 0.93 0.80 206  E-cell_line 0.60 0.67 0.63 611  E-cell_type 0.83 0.83 0.83 2480  E-protein 0.75 0.75 0.75 4029  I-DNA 0.70 0.79 0.74 1807  I-RNA 0.60 0.73 0.66 153  I-cell_line 0.43 0.71 0.53 1056  I-cell_type 0.80 0.66 0.72 2697  I-protein 0.79 0.65 0.71 4603  O 0.96 0.94 0.95 111692  S-DNA 0.62 0.20 0.30 669  S-RNA 0.20 0.03 0.06 61  S-cell_line 0.52 0.49 0.50 289  S-cell_type 0.63 0.74 0.68 1463  S-protein 0.67 0.85 0.75 9842  accuracy 0.88 157782  macro avg 0.67 0.70 0.67 157782 weighted avg 0.89 0.88 0.89 157782
1500,0.3171,0.390139,precision recall f1-score support  B-DNA 0.74 0.84 0.78 2618  B-RNA 0.73 0.82 0.77 384  B-cell_line 0.48 0.75 0.59 1342  B-cell_type 0.81 0.65 0.72 3474  B-protein 0.69 0.85 0.76 7198  E-DNA 0.80 0.84 0.82 1108  E-RNA 0.76 0.90 0.83 206  E-cell_line 0.51 0.74 0.60 611  E-cell_type 0.88 0.76 0.81 2480  E-protein 0.68 0.84 0.75 4029  I-DNA 0.76 0.74 0.75 1807  I-RNA 0.58 0.49 0.53 153  I-cell_line 0.34 0.74 0.47 1056  I-cell_type 0.83 0.48 0.61 2697  I-protein 0.75 0.67 0.71 4603  O 0.98 0.92 0.95 111692  S-DNA 0.58 0.33 0.42 669  S-RNA 0.21 0.11 0.15 61  S-cell_line 0.54 0.42 0.47 289  S-cell_type 0.86 0.64 0.73 1463  S-protein 0.64 0.92 0.76 9842  accuracy 0.88 157782  macro avg 0.67 0.69 0.66 157782 weighted avg 0.90 0.88 0.88 157782
2000,0.3125,0.355512,precision recall f1-score support  B-DNA 0.77 0.86 0.81 2618  B-RNA 0.72 0.81 0.76 384  B-cell_line 0.46 0.76 0.58 1342  B-cell_type 0.82 0.62 0.70 3474  B-protein 0.75 0.83 0.79 7198  E-DNA 0.80 0.84 0.82 1108  E-RNA 0.82 0.88 0.85 206  E-cell_line 0.56 0.67 0.61 611  E-cell_type 0.85 0.79 0.82 2480  E-protein 0.77 0.78 0.78 4029  I-DNA 0.75 0.77 0.76 1807  I-RNA 0.63 0.59 0.61 153  I-cell_line 0.30 0.77 0.43 1056  I-cell_type 0.83 0.46 0.59 2697  I-protein 0.75 0.74 0.75 4603  O 0.97 0.93 0.95 111692  S-DNA 0.65 0.37 0.47 669  S-RNA 0.47 0.11 0.18 61  S-cell_line 0.45 0.43 0.44 289  S-cell_type 0.75 0.65 0.70 1463  S-protein 0.66 0.91 0.77 9842  accuracy 0.88 157782  macro avg 0.69 0.69 0.67 157782 weighted avg 0.90 0.88 0.89 157782
2500,0.2703,0.368003,precision recall f1-score support  B-DNA 0.70 0.87 0.77 2618  B-RNA 0.74 0.84 0.79 384  B-cell_line 0.54 0.76 0.63 1342  B-cell_type 0.82 0.69 0.75 3474  B-protein 0.75 0.82 0.78 7198  E-DNA 0.76 0.87 0.81 1108  E-RNA 0.78 0.90 0.84 206  E-cell_line 0.66 0.64 0.65 611  E-cell_type 0.85 0.85 0.85 2480  E-protein 0.75 0.81 0.78 4029  I-DNA 0.72 0.73 0.72 1807  I-RNA 0.65 0.71 0.68 153  I-cell_line 0.58 0.68 0.63 1056  I-cell_type 0.81 0.75 0.78 2697  I-protein 0.81 0.68 0.74 4603  O 0.97 0.94 0.95 111692  S-DNA 0.42 0.41 0.41 669  S-RNA 0.16 0.11 0.13 61  S-cell_line 0.28 0.52 0.37 289  S-cell_type 0.87 0.60 0.71 1463  S-protein 0.69 0.88 0.77 9842  accuracy 0.89 157782  macro avg 0.68 0.72 0.69 157782 weighted avg 0.90 0.89 0.89 157782
3000,0.2176,0.376167,precision recall f1-score support  B-DNA 0.74 0.88 0.80 2618  B-RNA 0.74 0.86 0.80 384  B-cell_line 0.55 0.79 0.65 1342  B-cell_type 0.80 0.69 0.74 3474  B-protein 0.74 0.85 0.79 7198  E-DNA 0.76 0.86 0.81 1108  E-RNA 0.77 0.90 0.83 206  E-cell_line 0.58 0.75 0.65 611  E-cell_type 0.85 0.82 0.83 2480  E-protein 0.74 0.80 0.77 4029  I-DNA 0.74 0.80 0.77 1807  I-RNA 0.65 0.78 0.71 153  I-cell_line 0.50 0.77 0.61 1056  I-cell_type 0.84 0.68 0.75 2697  I-protein 0.76 0.76 0.76 4603  O 0.98 0.93 0.95 111692  S-DNA 0.43 0.42 0.43 669  S-RNA 0.28 0.11 0.16 61  S-cell_line 0.39 0.51 0.44 289  S-cell_type 0.77 0.67 0.72 1463  S-protein 0.68 0.89 0.78 9842  accuracy 0.89 157782  macro avg 0.68 0.74 0.70 157782 weighted avg 0.90 0.89 0.89 157782
3500,0.2143,0.388165,precision recall f1-score support  B-DNA 0.73 0.90 0.81 2618  B-RNA 0.80 0.83 0.81 384  B-cell_line 0.49 0.82 0.61 1342  B-cell_type 0.85 0.62 0.72 3474  B-protein 0.75 0.83 0.79 7198  E-DNA 0.74 0.88 0.80 1108  E-RNA 0.80 0.83 0.81 206  E-cell_line 0.51 0.79 0.62 611  E-cell_type 0.88 0.74 0.81 2480  E-protein 0.73 0.82 0.77 4029  I-DNA 0.75 0.79 0.77 1807  I-RNA 0.68 0.71 0.69 153  I-cell_line 0.42 0.76 0.54 1056  I-cell_type 0.81 0.65 0.72 2697  I-protein 0.74 0.76 0.75 4603  O 0.98 0.93 0.95 111692  S-DNA 0.43 0.43 0.43 669  S-RNA 0.23 0.11 0.15 61  S-cell_line 0.35 0.52 0.42 289  S-cell_type 0.78 0.67 0.72 1463  S-protein 0.70 0.88 0.78 9842  accuracy 0.89 157782  macro avg 0.67 0.73 0.69 157782 weighted avg 0.90 0.89 0.89 157782
4000,0.22,0.343653,precision recall f1-score support  B-DNA 0.78 0.85 0.82 2618  B-RNA 0.80 0.83 0.82 384  B-cell_line 0.67 0.67 0.67 1342  B-cell_type 0.78 0.76 0.77 3474  B-protein 0.75 0.82 0.78 7198  E-DNA 0.80 0.86 0.83 1108  E-RNA 0.82 0.88 0.85 206  E-cell_line 0.71 0.66 0.69 611  E-cell_type 0.86 0.83 0.84 2480  E-protein 0.74 0.81 0.77 4029  I-DNA 0.78 0.76 0.77 1807  I-RNA 0.69 0.67 0.68 153  I-cell_line 0.53 0.73 0.61 1056  I-cell_type 0.80 0.74 0.77 2697  I-protein 0.76 0.76 0.76 4603  O 0.97 0.94 0.95 111692  S-DNA 0.53 0.36 0.43 669  S-RNA 0.39 0.11 0.18 61  S-cell_line 0.41 0.45 0.43 289  S-cell_type 0.72 0.70 0.71 1463  S-protein 0.70 0.89 0.78 9842  accuracy 0.90 157782  macro avg 0.71 0.72 0.71 157782 weighted avg 0.90 0.90 0.90 157782
4500,0.2138,0.353201,precision recall f1-score support  B-DNA 0.76 0.87 0.81 2618  B-RNA 0.83 0.83 0.83 384  B-cell_line 0.58 0.75 0.65 1342  B-cell_type 0.81 0.70 0.75 3474  B-protein 0.75 0.82 0.78 7198  E-DNA 0.79 0.87 0.83 1108  E-RNA 0.83 0.90 0.86 206  E-cell_line 0.70 0.64 0.67 611  E-cell_type 0.85 0.84 0.84 2480  E-protein 0.75 0.81 0.78 4029  I-DNA 0.75 0.79 0.77 1807  I-RNA 0.67 0.75 0.70 153  I-cell_line 0.56 0.69 0.62 1056  I-cell_type 0.79 0.75 0.77 2697  I-protein 0.75 0.77 0.76 4603  O 0.97 0.93 0.95 111692  S-DNA 0.47 0.41 0.44 669  S-RNA 0.23 0.11 0.15 61  S-cell_line 0.45 0.43 0.44 289  S-cell_type 0.78 0.66 0.72 1463  S-protein 0.69 0.89 0.78 9842  accuracy 0.89 157782  macro avg 0.70 0.72 0.71 157782 weighted avg 0.90 0.89 0.90 157782
5000,0.1661,0.401846,precision recall f1-score support  B-DNA 0.75 0.88 0.81 2618  B-RNA 0.76 0.84 0.80 384  B-cell_line 0.54 0.78 0.64 1342  B-cell_type 0.80 0.71 0.75 3474  B-protein 0.74 0.84 0.79 7198  E-DNA 0.77 0.88 0.82 1108  E-RNA 0.79 0.91 0.85 206  E-cell_line 0.61 0.70 0.65 611  E-cell_type 0.86 0.81 0.84 2480  E-protein 0.74 0.80 0.77 4029  I-DNA 0.77 0.80 0.78 1807  I-RNA 0.70 0.69 0.70 153  I-cell_line 0.51 0.73 0.60 1056  I-cell_type 0.82 0.72 0.76 2697  I-protein 0.77 0.73 0.75 4603  O 0.97 0.93 0.95 111692  S-DNA 0.50 0.38 0.43 669  S-RNA 0.18 0.11 0.14 61  S-cell_line 0.47 0.47 0.47 289  S-cell_type 0.77 0.69 0.73 1463  S-protein 0.69 0.90 0.78 9842  accuracy 0.89 157782  macro avg 0.69 0.73 0.70 157782 weighted avg 0.90 0.89 0.90 157782


Trainer is attempting to log a value of "              precision    recall  f1-score   support

       B-DNA       0.69      0.86      0.76      2618
       B-RNA       0.74      0.76      0.75       384
 B-cell_line       0.50      0.75      0.60      1342
 B-cell_type       0.80      0.62      0.70      3474
   B-protein       0.74      0.82      0.78      7198
       E-DNA       0.69      0.87      0.77      1108
       E-RNA       0.72      0.90      0.80       206
 E-cell_line       0.69      0.56      0.62       611
 E-cell_type       0.85      0.82      0.84      2480
   E-protein       0.73      0.79      0.76      4029
       I-DNA       0.68      0.77      0.73      1807
       I-RNA       0.59      0.69      0.64       153
 I-cell_line       0.34      0.78      0.47      1056
 I-cell_type       0.89      0.45      0.60      2697
   I-protein       0.72      0.74      0.73      4603
           O       0.96      0.93      0.95    111692
       S-DNA       0.40      0.30      0

Evaluation Results for JNLPBA:


Trainer is attempting to log a value of "              precision    recall  f1-score   support

       B-DNA       0.78      0.85      0.82      2618
       B-RNA       0.80      0.83      0.82       384
 B-cell_line       0.67      0.67      0.67      1342
 B-cell_type       0.78      0.76      0.77      3474
   B-protein       0.75      0.82      0.78      7198
       E-DNA       0.80      0.86      0.83      1108
       E-RNA       0.82      0.88      0.85       206
 E-cell_line       0.71      0.66      0.69       611
 E-cell_type       0.86      0.83      0.84      2480
   E-protein       0.74      0.81      0.77      4029
       I-DNA       0.78      0.76      0.77      1807
       I-RNA       0.69      0.67      0.68       153
 I-cell_line       0.53      0.73      0.61      1056
 I-cell_type       0.80      0.74      0.77      2697
   I-protein       0.76      0.76      0.76      4603
           O       0.97      0.94      0.95    111692
       S-DNA       0.53      0.36      0

Training Task: NCBI


Step,Training Loss,Validation Loss,Classification Report
500,0.134,0.093527,precision recall f1-score support  B-Disease 0.83 0.90 0.86 1217  E-Disease 0.87 0.96 0.91 1152  I-Disease 0.82 0.89 0.85 976  O 0.99 0.99 0.99 28903  S-Disease 0.93 0.87 0.90 1123  accuracy 0.97 33371  macro avg 0.89 0.92 0.90 33371 weighted avg 0.98 0.97 0.98 33371
1000,0.0441,0.095639,precision recall f1-score support  B-Disease 0.87 0.87 0.87 1217  E-Disease 0.91 0.94 0.92 1152  I-Disease 0.86 0.87 0.86 976  O 0.99 0.99 0.99 28903  S-Disease 0.88 0.91 0.90 1123  accuracy 0.98 33371  macro avg 0.90 0.92 0.91 33371 weighted avg 0.98 0.98 0.98 33371
1500,0.0218,0.109044,precision recall f1-score support  B-Disease 0.88 0.90 0.89 1217  E-Disease 0.90 0.93 0.91 1152  I-Disease 0.84 0.91 0.87 976  O 0.99 0.99 0.99 28903  S-Disease 0.88 0.91 0.89 1123  accuracy 0.98 33371  macro avg 0.90 0.92 0.91 33371 weighted avg 0.98 0.98 0.98 33371
2000,0.0106,0.110503,precision recall f1-score support  B-Disease 0.88 0.90 0.89 1217  E-Disease 0.90 0.95 0.92 1152  I-Disease 0.85 0.90 0.87 976  O 0.99 0.99 0.99 28903  S-Disease 0.91 0.90 0.90 1123  accuracy 0.98 33371  macro avg 0.91 0.93 0.92 33371 weighted avg 0.98 0.98 0.98 33371


Trainer is attempting to log a value of "              precision    recall  f1-score   support

   B-Disease       0.83      0.90      0.86      1217
   E-Disease       0.87      0.96      0.91      1152
   I-Disease       0.82      0.89      0.85       976
           O       0.99      0.99      0.99     28903
   S-Disease       0.93      0.87      0.90      1123

    accuracy                           0.97     33371
   macro avg       0.89      0.92      0.90     33371
weighted avg       0.98      0.97      0.98     33371
" of type <class 'str'> for key "eval/classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "              precision    recall  f1-score   support

   B-Disease       0.87      0.87      0.87      1217
   E-Disease       0.91      0.94      0.92      1152
   I-Disease       0.86      0.87      0.86       976
           O       0.99      0.99      0.99

Evaluation Results for NCBI:


Trainer is attempting to log a value of "              precision    recall  f1-score   support

   B-Disease       0.83      0.90      0.86      1217
   E-Disease       0.87      0.96      0.91      1152
   I-Disease       0.82      0.89      0.85       976
           O       0.99      0.99      0.99     28903
   S-Disease       0.93      0.87      0.90      1123

    accuracy                           0.97     33371
   macro avg       0.89      0.92      0.90     33371
weighted avg       0.98      0.97      0.98     33371
" of type <class 'str'> for key "eval/classification_report" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
