In [1]:
from datasets import load_dataset, Dataset 
import datasets
import torch
import os
import pandas as pd
from tqdm import tqdm

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = '0'

In [3]:
data = load_dataset(
    path="../../biomedical/bigbio/hub/hub_repos/symptemist/symptemist.py", 
    name="symptemist_entities_bigbio_kb"
)
data

Found cached dataset symptemist (/home/Ignacio.Rodriguez/.cache/huggingface/datasets/symptemist/symptemist_entities_bigbio_kb/1.0.0/6eca3229d6e85bf4857c66298013fa7ce6ad7a5e9376e32f68dadbbf5cd93ecb)


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'document_id', 'passages', 'entities', 'events', 'coreferences', 'relations'],
        num_rows: 744
    })
})

In [4]:
labels = ["O", "SINTOMA"]
labels

['O', 'SINTOMA']

In [5]:
import spacy
nlp = spacy.load("es_core_news_sm")

In [6]:
def bigbio2spanmarker(split: Dataset) -> Dataset:
    output = {
        "document_id": [],
        "sentence_id": [],
        "tokens": [],
        "ner_tags": [],        
    }
    
    for doc_id, row in tqdm(enumerate(split), desc="Document progress:"):
        
        text = row["passages"][0]["text"][0]
        entities = row["entities"]
        doc = nlp(text)
        
        for sentence_id, sentence in enumerate(doc.sents):
            
            tokens = []
            token_positions = []
            
            for token in sentence:
                tokens.append(token.text)
                token_positions.append(token.idx)
        
            ner_tags = [0] * len(tokens)
                
            for i, position in enumerate(token_positions):
                for entity in entities:
                    for offset in entity["offsets"]:
                        if position==offset[0]:
                            ner_tags[i] = 1
                        elif position in range(offset[0],offset[1]):
                            ner_tags[i] = 1
        
            output["document_id"].append(doc_id)
            output["sentence_id"].append(sentence_id)
            output["tokens"].append(tokens)
            output["ner_tags"].append(ner_tags)
        
    return datasets.Dataset.from_dict(output)

symp = bigbio2spanmarker(data["train"])

Document progress:: 744it [00:23, 31.20it/s]


In [7]:
from span_marker import SpanMarkerModel

model_name = "PlanTL-GOB-ES/roberta-base-biomedical-clinical-es"
model = SpanMarkerModel.from_pretrained(model_name, labels=labels, model_max_length=256)

Some weights of RobertaModel were not initialized from the model checkpoint at PlanTL-GOB-ES/roberta-base-biomedical-clinical-es and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
You are resizing the embedding layer without providing a `pad_to_multiple_of` parameter. This means that the new embeding dimension will be 52002. This might induce some performance reduction as *Tensor Cores* will not be available. For more details  about this, or help on choosing the correct value for resizing, refer to this guide: https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc


In [8]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir = "../checkpoints",
    per_device_train_batch_size = 32,
    per_device_eval_batch_size = 32,
    fp16 = True,
    save_strategy = "epoch",
    overwrite_output_dir = True,
    evaluation_strategy = "epoch",
    save_total_limit = 2,
    num_train_epochs = 40,
    load_best_model_at_end = True,
    metric_for_best_model = "eval_overall_f1",
    lr_scheduler_type = "linear",
    learning_rate = 0.00005,
    warmup_ratio = 0.0,
    label_smoothing_factor = 0.0,
    weight_decay = 0.0,
)


In [9]:
from span_marker import Trainer

trainer = Trainer(
    model=model,
    args=args,
    train_dataset = symp.select(range(10356)),
    eval_dataset = symp.select(range(10356,12946)),
)

In [10]:
import wandb
with wandb.init(project=f'symptemist_ner',tags=["dev"]):
    trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33millorca[0m. Use [1m`wandb login --relogin`[0m to force relogin


Label normalizing the train dataset:   0%|          | 0/10356 [00:00<?, ? examples/s]

Tokenizing the train dataset:   0%|          | 0/10356 [00:00<?, ? examples/s]

This SpanMarker model will ignore 10.791367% of all annotated entities in the train dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 7367 total entities:
- 194 missed entities with 9 words (2.633365%)
- 139 missed entities with 10 words (1.886792%)
- 85 missed entities with 11 words (1.153794%)
- 88 missed entities with 12 words (1.194516%)
- 64 missed entities with 13 words (0.868739%)
- 47 missed entities with 14 words (0.637980%)
- 35 missed entities with 15 words (0.475092%)
- 27 missed entities with 16 words (0.366499%)
- 15 missed entities with 17 words (0.203611%)
- 26 missed entities with 18 words (0.352925%)
- 13 missed entities with 19 words (0.176463%)
- 9 missed entities with 20 words (0.122166%)
- 11 missed entities with 21 words (0.149315%)
- 8 missed entities with 22 words (0.108592%)
- 6 missed entities with 23 words (0.081444%)
- 3 missed entities with 2

Adding document-level context:   0%|          | 0/10356 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/10356 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Overall Precision,Overall Recall,Overall F1,Overall Accuracy
1,0.0226,0.006742,0.716678,0.674327,0.694858,0.957768
2,0.0058,0.005887,0.769122,0.713066,0.740034,0.963001
3,0.0039,0.006286,0.738651,0.747866,0.74323,0.963754
4,0.0031,0.006078,0.728087,0.747209,0.737524,0.962751
5,0.0022,0.007007,0.736334,0.751806,0.74399,0.961982
6,0.0012,0.008767,0.72271,0.756402,0.739172,0.962567
7,0.0009,0.009193,0.728149,0.743926,0.735953,0.961413
8,0.0008,0.009789,0.780282,0.727511,0.752973,0.964974
9,0.0007,0.011322,0.74328,0.726198,0.73464,0.961647
10,0.0007,0.010011,0.715796,0.755745,0.735228,0.961179


Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

Label normalizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

Tokenizing the evaluation dataset:   0%|          | 0/2590 [00:00<?, ? examples/s]

This SpanMarker model won't be able to predict 10.779145% of all annotated entities in the evaluation dataset. This is caused by the SpanMarkerModel maximum entity length of 8 words.
These are the frequencies of the missed entities due to maximum entity length out of 1707 total entities:
- 42 missed entities with 9 words (2.460457%)
- 30 missed entities with 10 words (1.757469%)
- 23 missed entities with 11 words (1.347393%)
- 17 missed entities with 12 words (0.995899%)
- 14 missed entities with 13 words (0.820152%)
- 13 missed entities with 14 words (0.761570%)
- 10 missed entities with 15 words (0.585823%)
- 11 missed entities with 16 words (0.644405%)
- 5 missed entities with 17 words (0.292912%)
- 4 missed entities with 18 words (0.234329%)
- 2 missed entities with 19 words (0.117165%)
- 1 missed entities with 20 words (0.058582%)
- 3 missed entities with 21 words (0.175747%)
- 5 missed entities with 23 words (0.292912%)
- 1 missed entities with 24 words (0.058582%)
- 1 missed ent

Adding document-level context:   0%|          | 0/2590 [00:00<?, ?it/s]

Spreading data between multiple samples:   0%|          | 0/2590 [00:00<?, ? examples/s]

0,1
eval/loss,▁▁▁▁▂▃▃▃▄▃▃▄▄▅▅▅▄▅▅▄▅▅▆▅▅▆▆▆▆▆▆▇▇█▇█████
eval/overall_accuracy,▁▆▇▆▅▅▄█▅▄▄█▄▃█▄▆▅▆▅▆▆▇▇▅▇▆▆▇▇▇▇▆▇▆▇▆▇▆▇
eval/overall_f1,▁▆▆▆▆▆▆▇▅▅▇█▆▅▇▆▆▆▆▆▆▆▇▇▅▆▇▇█▇██▇▇▇▇▇▇▇▇
eval/overall_precision,▁▅▃▂▃▂▂▆▃▁▂▅▇▁▆▄▅▁▃▃▄▂█▄▄▆▅▅▆▅▄▅▄▅▃▅▄▅▄▅
eval/overall_recall,▁▄▇▇▇▇▆▅▅▇█▇▃▇▄▆▅█▆▆▆█▄▇▅▄▆▅▆▇██▆▆▇▆▇▆▇▆
eval/runtime,▅▁▁▆▃▂▃▁▇▂▄▃▄▂▅▅▆▅▅▆▇▃▆▅▅▅█▆▆▆▇▃█▇▂▅▅█▅▄
eval/samples_per_second,▄██▃▆▇▆█▂▇▅▅▅▇▄▃▃▃▄▃▂▆▃▄▄▄▁▃▃▃▂▆▁▂▇▄▄▁▄▅
eval/steps_per_second,▄██▃▆▇▆█▂▇▅▆▅▇▄▃▃▃▄▃▂▆▃▄▄▄▁▃▃▃▂▆▁▂▇▄▄▁▄▅
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████

0,1
eval/loss,0.01791
eval/overall_accuracy,0.964
eval/overall_f1,0.75025
eval/overall_precision,0.75804
eval/overall_recall,0.74261
eval/runtime,24.0388
eval/samples_per_second,192.439
eval/steps_per_second,6.032
train/epoch,40.0
train/global_step,23560.0
