In [1]:
from datasets import load_dataset

wnut = load_dataset("YurtsAI/named_entity_recognition")

In [2]:
wnut

DatasetDict({
    train: Dataset({
        features: ['document_id', 'sentence_id', 'tokens', 'ner_tags'],
        num_rows: 39999
    })
    test: Dataset({
        features: ['document_id', 'sentence_id', 'tokens', 'ner_tags'],
        num_rows: 5000
    })
    eval: Dataset({
        features: ['document_id', 'sentence_id', 'tokens', 'ner_tags'],
        num_rows: 4999
    })
})

In [3]:
print(wnut["train"]['document_id'][0])
print(wnut["train"]['tokens'][0])
print(wnut["train"]['ner_tags'][0])

0
['Subject', ':', '🎉', 'Welcome', 'to', 'Our', 'Exclusive', 'Loyalty', 'Program', '!', '🎉', 'Hey', 'there', ',', 'We', '’', 're', 'super', 'excited', 'to', 'announce', 'our', 'brand', 'new', 'customer', 'loyalty', 'program', 'at', 'Bella', "'s", 'Bistro', '!', '🍝', 'Starting', 'from', 'October', '15th', ',', '2023', ',', 'you', 'can', 'start', 'earning', 'points', 'every', 'time', 'you', 'dine', 'with', 'us', '.', 'It', "'s", 'our', 'way', 'of', 'saying', 'thanks', 'for', 'being', 'such', 'an', 'awesome', 'customer', '!', 'Here', '’', 's', 'the', 'lowdown', ':', 'For', 'every', '$', '1', 'you', 'spend', ',', 'you', '’', 'll', 'earn', '10', 'points', '.', 'Rack', 'up', '500', 'points', 'and', 'you', '’', 'll', 'get', 'a', '$', '25', 'gift', 'card', 'to', 'use', 'on', 'your', 'next', 'visit', '.', 'Plus', ',', 'we', '’', 've', 'got', 'some', 'sweet', 'perks', 'like', 'birthday', 'freebies', ',', 'early', 'access', 'to', 'special', 'events', ',', 'and', 'members-only', 'discounts', '.', 

In [4]:
label_list = wnut["train"].features[f"ner_tags"].feature.names
print(label_list)
print(len(label_list))

id2label = {}
label2id = {}
for count, label in enumerate(label_list):
    id2label.update({count:label})
    label2id.update({label:count})
print(id2label)
print(label2id)

['O', 'B-GENERAL__product-software', 'I-GENERAL__product-software', 'B-GENERAL__person-scholar', 'I-GENERAL__person-scholar', 'B-GENERAL__event-attack/battle/war/militaryconflict', 'I-GENERAL__event-attack/battle/war/militaryconflict', 'B-DATETIME__range', 'I-DATETIME__range', 'B-GENERAL__other-award', 'I-GENERAL__other-award', 'B-GENERAL__other-disease', 'I-GENERAL__other-disease', 'B-GENERAL__building-sportsfacility', 'I-GENERAL__building-sportsfacility', 'B-DATETIME__authored', 'I-DATETIME__authored', 'B-GENERAL__location-bodiesofwater', 'I-GENERAL__location-bodiesofwater', 'B-GENERAL__art-writtenart', 'I-GENERAL__art-writtenart', 'B-GENERAL__other-god', 'I-GENERAL__other-god', 'B-DATETIME__relative', 'I-DATETIME__relative', 'B-GENERAL__organization-government/governmentagency', 'I-GENERAL__organization-government/governmentagency', 'B-GENERAL__product-weapon', 'I-GENERAL__product-weapon', 'B-GENERAL__other-language', 'I-GENERAL__other-language', 'B-GENERAL__event-other', 'I-GENERAL

The letter that prefixes each `ner_tag` indicates the token position of the entity:

- `B-` indicates the beginning of an entity.
- `I-` indicates a token is contained inside the same entity (for example, the `State` token is a part of an entity like
  `Empire State Building`).
- `0` indicates the token doesn't correspond to any entity.

In [5]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased")

In [6]:
example = wnut["train"][0]
tokenized_input = tokenizer(example["tokens"], is_split_into_words=True)
tokens = tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
print(tokens)

['[CLS]', 'subject', ':', '[UNK]', 'welcome', 'to', 'our', 'exclusive', 'loyalty', 'program', '!', '[UNK]', 'hey', 'there', ',', 'we', '’', 're', 'super', 'excited', 'to', 'announce', 'our', 'brand', 'new', 'customer', 'loyalty', 'program', 'at', 'bella', "'", 's', 'bis', '##tro', '!', '[UNK]', 'starting', 'from', 'october', '15th', ',', '202', '##3', ',', 'you', 'can', 'start', 'earning', 'points', 'every', 'time', 'you', 'din', '##e', 'with', 'us', '.', 'it', "'", 's', 'our', 'way', 'of', 'saying', 'thanks', 'for', 'being', 'such', 'an', 'awesome', 'customer', '!', 'here', '’', 's', 'the', 'low', '##down', ':', 'for', 'every', '$', '1', 'you', 'spend', ',', 'you', '’', 'll', 'earn', '10', 'points', '.', 'rack', 'up', '500', 'points', 'and', 'you', '’', 'll', 'get', 'a', '$', '25', 'gift', 'card', 'to', 'use', 'on', 'your', 'next', 'visit', '.', 'plus', ',', 'we', '’', 've', 'got', 'some', 'sweet', 'per', '##ks', 'like', 'birthday', 'free', '##bies', ',', 'early', 'access', 'to', 'spe

In [7]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)

    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

In [8]:
tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)

In [9]:
from transformers import DataCollatorForTokenClassification

data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)

In [10]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer

model = AutoModelForTokenClassification.from_pretrained(
    "google-bert/bert-base-uncased", num_labels=125, id2label=id2label, label2id=label2id
)

Some weights of BertForTokenClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
import evaluate

seqeval = evaluate.load("seqeval")

In [12]:
import numpy as np

labels = [label_list[i] for i in example[f"ner_tags"]]


def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = seqeval.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

In [13]:
training_args = TrainingArguments(
    output_dir="my_awesome_wnut_model",
    evaluation_strategy="steps",
        do_eval=True,
        optim="adamw_torch",
        per_device_train_batch_size=8,
        gradient_accumulation_steps=4,
        per_device_eval_batch_size=8,
        log_level="debug",
        save_strategy="epoch",
        logging_steps=10,
        learning_rate=5e-5,
        #fp16 = not torch.cuda.is_bf16_supported(),
        #bf16 = torch.cuda.is_bf16_supported(),
        eval_steps=30,
        num_train_epochs=1,
        warmup_ratio=0.1,
        lr_scheduler_type="inverse_sqrt",
        report_to="wandb",
        seed=42,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_wnut["train"],
    eval_dataset=tokenized_wnut["test"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    #compute_metrics=compute_metrics,
)



In [14]:
trainer.train()

Currently training with a batch size of: 8
The following columns in the training set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 39,999
  Num Epochs = 1
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 4
  Total optimization steps = 1,250
  Number of trainable parameters = 108,987,773
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdhanishetty[0m ([33mdhanishetty-personalus

  0%|          | 0/1250 [00:00<?, ?it/s]

  attn_output = torch.nn.functional.scaled_dot_product_attention(


{'loss': 4.9177, 'grad_norm': 9.90449047088623, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.01}
{'loss': 4.1744, 'grad_norm': 12.41919231414795, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.02}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 2.2234, 'grad_norm': 10.41997241973877, 'learning_rate': 1.2e-05, 'epoch': 0.02}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.9557464718818665, 'eval_runtime': 88.1607, 'eval_samples_per_second': 56.715, 'eval_steps_per_second': 7.089, 'epoch': 0.02}
{'loss': 0.675, 'grad_norm': 1.0343400239944458, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.03}
{'loss': 0.4893, 'grad_norm': 0.4469161331653595, 'learning_rate': 2e-05, 'epoch': 0.04}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.4601, 'grad_norm': 0.34064096212387085, 'learning_rate': 2.4e-05, 'epoch': 0.05}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.4359284043312073, 'eval_runtime': 90.6478, 'eval_samples_per_second': 55.159, 'eval_steps_per_second': 6.895, 'epoch': 0.05}
{'loss': 0.4374, 'grad_norm': 0.5447105169296265, 'learning_rate': 2.8000000000000003e-05, 'epoch': 0.06}
{'loss': 0.3573, 'grad_norm': 0.5032377243041992, 'learning_rate': 3.2000000000000005e-05, 'epoch': 0.06}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.3149, 'grad_norm': 0.470638632774353, 'learning_rate': 3.6e-05, 'epoch': 0.07}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.27947095036506653, 'eval_runtime': 92.8036, 'eval_samples_per_second': 53.877, 'eval_steps_per_second': 6.735, 'epoch': 0.07}
{'loss': 0.3011, 'grad_norm': 0.5721376538276672, 'learning_rate': 4e-05, 'epoch': 0.08}
{'loss': 0.2709, 'grad_norm': 0.38807451725006104, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.09}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.2403, 'grad_norm': 0.7627832889556885, 'learning_rate': 4.8e-05, 'epoch': 0.1}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.21418051421642303, 'eval_runtime': 93.4005, 'eval_samples_per_second': 53.533, 'eval_steps_per_second': 6.692, 'epoch': 0.1}
{'loss': 0.2181, 'grad_norm': 0.39021122455596924, 'learning_rate': 4.902903378454601e-05, 'epoch': 0.1}
{'loss': 0.2066, 'grad_norm': 0.6714336276054382, 'learning_rate': 4.72455591261534e-05, 'epoch': 0.11}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.1876, 'grad_norm': 0.5277619361877441, 'learning_rate': 4.564354645876385e-05, 'epoch': 0.12}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.1714152842760086, 'eval_runtime': 93.9757, 'eval_samples_per_second': 53.205, 'eval_steps_per_second': 6.651, 'epoch': 0.12}
{'loss': 0.1822, 'grad_norm': 0.3209989070892334, 'learning_rate': 4.4194173824159226e-05, 'epoch': 0.13}
{'loss': 0.1587, 'grad_norm': 0.36568185687065125, 'learning_rate': 4.2874646285627205e-05, 'epoch': 0.14}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.1691, 'grad_norm': 0.25880080461502075, 'learning_rate': 4.166666666666667e-05, 'epoch': 0.14}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.14884047210216522, 'eval_runtime': 94.5431, 'eval_samples_per_second': 52.886, 'eval_steps_per_second': 6.611, 'epoch': 0.14}
{'loss': 0.1645, 'grad_norm': 0.443061500787735, 'learning_rate': 4.055535528269064e-05, 'epoch': 0.15}
{'loss': 0.1439, 'grad_norm': 0.42140403389930725, 'learning_rate': 3.952847075210474e-05, 'epoch': 0.16}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.1394, 'grad_norm': 0.4118771255016327, 'learning_rate': 3.857583749052298e-05, 'epoch': 0.17}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.12734360992908478, 'eval_runtime': 95.0555, 'eval_samples_per_second': 52.601, 'eval_steps_per_second': 6.575, 'epoch': 0.17}
{'loss': 0.1291, 'grad_norm': 0.3343525230884552, 'learning_rate': 3.7688918072220455e-05, 'epoch': 0.18}
{'loss': 0.1169, 'grad_norm': 0.2932218015193939, 'learning_rate': 3.686048903872428e-05, 'epoch': 0.18}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.1264, 'grad_norm': 0.28592023253440857, 'learning_rate': 3.608439182435161e-05, 'epoch': 0.19}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.11595567315816879, 'eval_runtime': 95.153, 'eval_samples_per_second': 52.547, 'eval_steps_per_second': 6.568, 'epoch': 0.19}
{'loss': 0.1277, 'grad_norm': 0.4229891300201416, 'learning_rate': 3.535533905932738e-05, 'epoch': 0.2}
{'loss': 0.1156, 'grad_norm': 0.27077800035476685, 'learning_rate': 3.466876226407682e-05, 'epoch': 0.21}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.1113, 'grad_norm': 0.23715676367282867, 'learning_rate': 3.402069087198859e-05, 'epoch': 0.22}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.10786109417676926, 'eval_runtime': 95.2943, 'eval_samples_per_second': 52.469, 'eval_steps_per_second': 6.559, 'epoch': 0.22}
{'loss': 0.1123, 'grad_norm': 0.3651641607284546, 'learning_rate': 3.3407655239053046e-05, 'epoch': 0.22}
{'loss': 0.1081, 'grad_norm': 0.29335781931877136, 'learning_rate': 3.282660821493064e-05, 'epoch': 0.23}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.1148, 'grad_norm': 0.32116663455963135, 'learning_rate': 3.2274861218395145e-05, 'epoch': 0.24}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.09916553646326065, 'eval_runtime': 95.2902, 'eval_samples_per_second': 52.471, 'eval_steps_per_second': 6.559, 'epoch': 0.24}
{'loss': 0.1, 'grad_norm': 0.43275970220565796, 'learning_rate': 3.1750031750047625e-05, 'epoch': 0.25}
{'loss': 0.0996, 'grad_norm': 0.5081122517585754, 'learning_rate': 3.125e-05, 'epoch': 0.26}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0995, 'grad_norm': 0.3384336531162262, 'learning_rate': 3.0772872744833186e-05, 'epoch': 0.26}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.09402385354042053, 'eval_runtime': 95.5319, 'eval_samples_per_second': 52.339, 'eval_steps_per_second': 6.542, 'epoch': 0.26}
{'loss': 0.1101, 'grad_norm': 0.384884238243103, 'learning_rate': 3.031695312954162e-05, 'epoch': 0.27}
{'loss': 0.0956, 'grad_norm': 0.2910580337047577, 'learning_rate': 2.988071523335984e-05, 'epoch': 0.28}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.096, 'grad_norm': 0.3370329439640045, 'learning_rate': 2.9462782549439484e-05, 'epoch': 0.29}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.09407379478216171, 'eval_runtime': 95.7259, 'eval_samples_per_second': 52.232, 'eval_steps_per_second': 6.529, 'epoch': 0.29}
{'loss': 0.0993, 'grad_norm': 0.39330679178237915, 'learning_rate': 2.9061909685954824e-05, 'epoch': 0.3}
{'loss': 0.0941, 'grad_norm': 0.24604696035385132, 'learning_rate': 2.8676966733820225e-05, 'epoch': 0.3}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0954, 'grad_norm': 0.41135162115097046, 'learning_rate': 2.8306925853614895e-05, 'epoch': 0.31}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.08541449904441833, 'eval_runtime': 95.7161, 'eval_samples_per_second': 52.238, 'eval_steps_per_second': 6.53, 'epoch': 0.31}
{'loss': 0.0869, 'grad_norm': 0.3805256485939026, 'learning_rate': 2.7950849718747376e-05, 'epoch': 0.32}
{'loss': 0.098, 'grad_norm': 0.4700911343097687, 'learning_rate': 2.7607881518711635e-05, 'epoch': 0.33}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.089, 'grad_norm': 0.31023702025413513, 'learning_rate': 2.7277236279499045e-05, 'epoch': 0.34}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.08985783159732819, 'eval_runtime': 95.7688, 'eval_samples_per_second': 52.209, 'eval_steps_per_second': 6.526, 'epoch': 0.34}
{'loss': 0.0901, 'grad_norm': 0.30388543009757996, 'learning_rate': 2.6958193300859607e-05, 'epoch': 0.34}
{'loss': 0.0838, 'grad_norm': 0.4077324867248535, 'learning_rate': 2.6650089544451305e-05, 'epoch': 0.35}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0826, 'grad_norm': 0.31819066405296326, 'learning_rate': 2.6352313834736496e-05, 'epoch': 0.36}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.08412227779626846, 'eval_runtime': 98.1464, 'eval_samples_per_second': 50.944, 'eval_steps_per_second': 6.368, 'epoch': 0.36}
{'loss': 0.0839, 'grad_norm': 0.31286558508872986, 'learning_rate': 2.6064301757134345e-05, 'epoch': 0.37}
{'loss': 0.0911, 'grad_norm': 0.3003038167953491, 'learning_rate': 2.578553115646984e-05, 'epoch': 0.38}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0872, 'grad_norm': 0.32445192337036133, 'learning_rate': 2.5515518153991443e-05, 'epoch': 0.38}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.08109450340270996, 'eval_runtime': 95.5679, 'eval_samples_per_second': 52.319, 'eval_steps_per_second': 6.54, 'epoch': 0.38}
{'loss': 0.0951, 'grad_norm': 0.3692586421966553, 'learning_rate': 2.5253813613805265e-05, 'epoch': 0.39}
{'loss': 0.0833, 'grad_norm': 0.39296185970306396, 'learning_rate': 2.5e-05, 'epoch': 0.4}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0794, 'grad_norm': 0.2961695194244385, 'learning_rate': 2.4753688574416857e-05, 'epoch': 0.41}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.07591819018125534, 'eval_runtime': 95.4623, 'eval_samples_per_second': 52.377, 'eval_steps_per_second': 6.547, 'epoch': 0.41}
{'loss': 0.0813, 'grad_norm': 0.41920268535614014, 'learning_rate': 2.4514516892273005e-05, 'epoch': 0.42}
{'loss': 0.0767, 'grad_norm': 0.32657432556152344, 'learning_rate': 2.4282146558931605e-05, 'epoch': 0.42}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0756, 'grad_norm': 0.28314897418022156, 'learning_rate': 2.4056261216234405e-05, 'epoch': 0.43}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.07660890370607376, 'eval_runtime': 95.591, 'eval_samples_per_second': 52.306, 'eval_steps_per_second': 6.538, 'epoch': 0.43}
{'loss': 0.0797, 'grad_norm': 0.30566734075546265, 'learning_rate': 2.383656473113981e-05, 'epoch': 0.44}
{'loss': 0.0815, 'grad_norm': 0.44152235984802246, 'learning_rate': 2.36227795630767e-05, 'epoch': 0.45}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0826, 'grad_norm': 0.2717912793159485, 'learning_rate': 2.341464528954235e-05, 'epoch': 0.46}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.07285229861736298, 'eval_runtime': 95.7297, 'eval_samples_per_second': 52.23, 'eval_steps_per_second': 6.529, 'epoch': 0.46}
{'loss': 0.0841, 'grad_norm': 0.3223874270915985, 'learning_rate': 2.3211917272131486e-05, 'epoch': 0.46}
{'loss': 0.0824, 'grad_norm': 0.33856770396232605, 'learning_rate': 2.301436544745809e-05, 'epoch': 0.47}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0841, 'grad_norm': 0.3864303529262543, 'learning_rate': 2.2821773229381925e-05, 'epoch': 0.48}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.07150722295045853, 'eval_runtime': 95.7312, 'eval_samples_per_second': 52.23, 'eval_steps_per_second': 6.529, 'epoch': 0.48}
{'loss': 0.0724, 'grad_norm': 0.2857248783111572, 'learning_rate': 2.2633936510629633e-05, 'epoch': 0.49}
{'loss': 0.0713, 'grad_norm': 0.37248024344444275, 'learning_rate': 2.2450662753346862e-05, 'epoch': 0.5}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.076, 'grad_norm': 0.2778356075286865, 'learning_rate': 2.22717701593687e-05, 'epoch': 0.5}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.0736687108874321, 'eval_runtime': 95.7616, 'eval_samples_per_second': 52.213, 'eval_steps_per_second': 6.527, 'epoch': 0.5}
{'loss': 0.0696, 'grad_norm': 0.25716230273246765, 'learning_rate': 2.2097086912079613e-05, 'epoch': 0.51}
{'loss': 0.0762, 'grad_norm': 0.35245102643966675, 'learning_rate': 2.1926450482675732e-05, 'epoch': 0.52}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0746, 'grad_norm': 0.34155288338661194, 'learning_rate': 2.175970699446223e-05, 'epoch': 0.53}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06906171888113022, 'eval_runtime': 95.7604, 'eval_samples_per_second': 52.214, 'eval_steps_per_second': 6.527, 'epoch': 0.53}
{'loss': 0.0749, 'grad_norm': 0.2651234269142151, 'learning_rate': 2.1596710639534002e-05, 'epoch': 0.54}
{'loss': 0.0776, 'grad_norm': 0.3137856721878052, 'learning_rate': 2.1437323142813602e-05, 'epoch': 0.54}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0719, 'grad_norm': 0.40775513648986816, 'learning_rate': 2.1281413268968714e-05, 'epoch': 0.55}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06967412680387497, 'eval_runtime': 96.1269, 'eval_samples_per_second': 52.015, 'eval_steps_per_second': 6.502, 'epoch': 0.55}
{'loss': 0.0726, 'grad_norm': 0.3233773112297058, 'learning_rate': 2.1128856368212917e-05, 'epoch': 0.56}
{'loss': 0.0705, 'grad_norm': 0.285941481590271, 'learning_rate': 2.097953395741723e-05, 'epoch': 0.57}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0722, 'grad_norm': 0.29153862595558167, 'learning_rate': 2.0833333333333336e-05, 'epoch': 0.58}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06733627617359161, 'eval_runtime': 95.8533, 'eval_samples_per_second': 52.163, 'eval_steps_per_second': 6.52, 'epoch': 0.58}
{'loss': 0.0752, 'grad_norm': 0.3015619218349457, 'learning_rate': 2.06901472150592e-05, 'epoch': 0.58}
{'loss': 0.0708, 'grad_norm': 0.3185776174068451, 'learning_rate': 2.0549873413169665e-05, 'epoch': 0.59}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0713, 'grad_norm': 0.33445772528648376, 'learning_rate': 2.0412414523193156e-05, 'epoch': 0.6}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06563226878643036, 'eval_runtime': 96.1632, 'eval_samples_per_second': 51.995, 'eval_steps_per_second': 6.499, 'epoch': 0.6}
{'loss': 0.0635, 'grad_norm': 0.2805996239185333, 'learning_rate': 2.027767764134532e-05, 'epoch': 0.61}
{'loss': 0.0676, 'grad_norm': 0.25092166662216187, 'learning_rate': 2.0145574100634505e-05, 'epoch': 0.62}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0671, 'grad_norm': 0.41168585419654846, 'learning_rate': 2.0016019225635894e-05, 'epoch': 0.62}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06524062156677246, 'eval_runtime': 95.9363, 'eval_samples_per_second': 52.118, 'eval_steps_per_second': 6.515, 'epoch': 0.62}
{'loss': 0.0717, 'grad_norm': 0.3585234582424164, 'learning_rate': 1.9888932104393253e-05, 'epoch': 0.63}
{'loss': 0.0742, 'grad_norm': 0.3386789560317993, 'learning_rate': 1.976423537605237e-05, 'epoch': 0.64}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0741, 'grad_norm': 0.30379876494407654, 'learning_rate': 1.9641855032959654e-05, 'epoch': 0.65}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06750655174255371, 'eval_runtime': 96.2378, 'eval_samples_per_second': 51.955, 'eval_steps_per_second': 6.494, 'epoch': 0.65}
{'loss': 0.0803, 'grad_norm': 0.38160353899002075, 'learning_rate': 1.9521720236075758e-05, 'epoch': 0.66}
{'loss': 0.0726, 'grad_norm': 0.46411585807800293, 'learning_rate': 1.940376314265832e-05, 'epoch': 0.66}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0723, 'grad_norm': 0.26500216126441956, 'learning_rate': 1.928791874526149e-05, 'epoch': 0.67}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06631357222795486, 'eval_runtime': 96.0793, 'eval_samples_per_second': 52.04, 'eval_steps_per_second': 6.505, 'epoch': 0.67}
{'loss': 0.0666, 'grad_norm': 0.28501251339912415, 'learning_rate': 1.917412472118426e-05, 'epoch': 0.68}
{'loss': 0.0565, 'grad_norm': 0.2570032477378845, 'learning_rate': 1.9062321291575584e-05, 'epoch': 0.69}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0687, 'grad_norm': 0.3425997495651245, 'learning_rate': 1.8952451089472586e-05, 'epoch': 0.7}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06494321674108505, 'eval_runtime': 96.1946, 'eval_samples_per_second': 51.978, 'eval_steps_per_second': 6.497, 'epoch': 0.7}
{'loss': 0.0676, 'grad_norm': 0.3043898940086365, 'learning_rate': 1.8844459036110228e-05, 'epoch': 0.7}
{'loss': 0.0682, 'grad_norm': 0.33488571643829346, 'learning_rate': 1.873829222489654e-05, 'epoch': 0.71}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.067, 'grad_norm': 0.3252585530281067, 'learning_rate': 1.8633899812498247e-05, 'epoch': 0.72}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06366347521543503, 'eval_runtime': 96.264, 'eval_samples_per_second': 51.94, 'eval_steps_per_second': 6.493, 'epoch': 0.72}
{'loss': 0.0641, 'grad_norm': 0.284531831741333, 'learning_rate': 1.853123291652753e-05, 'epoch': 0.73}
{'loss': 0.0671, 'grad_norm': 0.2929619252681732, 'learning_rate': 1.843024451936214e-05, 'epoch': 0.74}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0623, 'grad_norm': 0.31288209557533264, 'learning_rate': 1.833088937766916e-05, 'epoch': 0.74}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06425225734710693, 'eval_runtime': 95.8811, 'eval_samples_per_second': 52.148, 'eval_steps_per_second': 6.518, 'epoch': 0.74}
{'loss': 0.0697, 'grad_norm': 0.3102553188800812, 'learning_rate': 1.823312393723682e-05, 'epoch': 0.75}
{'loss': 0.0621, 'grad_norm': 0.30535757541656494, 'learning_rate': 1.8136906252750293e-05, 'epoch': 0.76}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0599, 'grad_norm': 0.28860846161842346, 'learning_rate': 1.8042195912175806e-05, 'epoch': 0.77}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06426078826189041, 'eval_runtime': 95.742, 'eval_samples_per_second': 52.224, 'eval_steps_per_second': 6.528, 'epoch': 0.77}
{'loss': 0.0676, 'grad_norm': 0.34121981263160706, 'learning_rate': 1.7948953965443456e-05, 'epoch': 0.78}
{'loss': 0.0636, 'grad_norm': 0.36336925625801086, 'learning_rate': 1.785714285714286e-05, 'epoch': 0.78}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0686, 'grad_norm': 0.326299250125885, 'learning_rate': 1.7766726362967538e-05, 'epoch': 0.79}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06235704943537712, 'eval_runtime': 95.6664, 'eval_samples_per_second': 52.265, 'eval_steps_per_second': 6.533, 'epoch': 0.79}
{'loss': 0.0631, 'grad_norm': 0.33014631271362305, 'learning_rate': 1.767766952966369e-05, 'epoch': 0.8}
{'loss': 0.0727, 'grad_norm': 0.3273756802082062, 'learning_rate': 1.75899386182573e-05, 'epoch': 0.81}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0638, 'grad_norm': 0.3181602954864502, 'learning_rate': 1.7503501050350124e-05, 'epoch': 0.82}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06225712224841118, 'eval_runtime': 95.7445, 'eval_samples_per_second': 52.222, 'eval_steps_per_second': 6.528, 'epoch': 0.82}
{'loss': 0.0626, 'grad_norm': 0.4268520474433899, 'learning_rate': 1.741832535729044e-05, 'epoch': 0.82}
{'loss': 0.0677, 'grad_norm': 0.2858629822731018, 'learning_rate': 1.733438113203841e-05, 'epoch': 0.83}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0565, 'grad_norm': 0.31830134987831116, 'learning_rate': 1.7251638983558856e-05, 'epoch': 0.84}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06258691102266312, 'eval_runtime': 95.8511, 'eval_samples_per_second': 52.164, 'eval_steps_per_second': 6.521, 'epoch': 0.84}
{'loss': 0.0623, 'grad_norm': 0.37158316373825073, 'learning_rate': 1.717007049358613e-05, 'epoch': 0.85}
{'loss': 0.0665, 'grad_norm': 0.2898356020450592, 'learning_rate': 1.708964817561658e-05, 'epoch': 0.86}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0614, 'grad_norm': 0.3442445397377014, 'learning_rate': 1.7010345435994294e-05, 'epoch': 0.86}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06150106340646744, 'eval_runtime': 95.8783, 'eval_samples_per_second': 52.149, 'eval_steps_per_second': 6.519, 'epoch': 0.86}
{'loss': 0.0623, 'grad_norm': 0.2743555009365082, 'learning_rate': 1.693213653696491e-05, 'epoch': 0.87}
{'loss': 0.0628, 'grad_norm': 0.33757779002189636, 'learning_rate': 1.685499656158105e-05, 'epoch': 0.88}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.063, 'grad_norm': 0.3253263831138611, 'learning_rate': 1.677890138035061e-05, 'epoch': 0.89}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.059201691299676895, 'eval_runtime': 95.9023, 'eval_samples_per_second': 52.136, 'eval_steps_per_second': 6.517, 'epoch': 0.89}
{'loss': 0.0609, 'grad_norm': 0.35458680987358093, 'learning_rate': 1.6703827619526523e-05, 'epoch': 0.9}
{'loss': 0.0643, 'grad_norm': 0.3138948678970337, 'learning_rate': 1.6629752630943485e-05, 'epoch': 0.9}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0592, 'grad_norm': 0.4406217634677887, 'learning_rate': 1.6556654463313047e-05, 'epoch': 0.91}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.06179441511631012, 'eval_runtime': 96.1634, 'eval_samples_per_second': 51.995, 'eval_steps_per_second': 6.499, 'epoch': 0.91}
{'loss': 0.0681, 'grad_norm': 0.2683200538158417, 'learning_rate': 1.648451183489468e-05, 'epoch': 0.92}
{'loss': 0.0639, 'grad_norm': 0.31618234515190125, 'learning_rate': 1.641330410746532e-05, 'epoch': 0.93}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0687, 'grad_norm': 0.3998905420303345, 'learning_rate': 1.6343011261515337e-05, 'epoch': 0.94}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.061761219054460526, 'eval_runtime': 96.097, 'eval_samples_per_second': 52.031, 'eval_steps_per_second': 6.504, 'epoch': 0.94}
{'loss': 0.061, 'grad_norm': 0.25416696071624756, 'learning_rate': 1.6273613872602983e-05, 'epoch': 0.94}
{'loss': 0.0566, 'grad_norm': 0.25004205107688904, 'learning_rate': 1.620509308880411e-05, 'epoch': 0.95}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0577, 'grad_norm': 0.2766192853450775, 'learning_rate': 1.6137430609197572e-05, 'epoch': 0.96}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.05996313318610191, 'eval_runtime': 95.8707, 'eval_samples_per_second': 52.154, 'eval_steps_per_second': 6.519, 'epoch': 0.96}
{'loss': 0.0643, 'grad_norm': 0.309196799993515, 'learning_rate': 1.6070608663330624e-05, 'epoch': 0.97}
{'loss': 0.0632, 'grad_norm': 0.26700371503829956, 'learning_rate': 1.6004609991612e-05, 'epoch': 0.98}


The following columns in the evaluation set don't have a corresponding argument in `BertForTokenClassification.forward` and have been ignored: sentence_id, ner_tags, tokens, document_id. If sentence_id, ner_tags, tokens, document_id are not expected by `BertForTokenClassification.forward`,  you can safely ignore this message.

***** Running Evaluation *****
  Num examples = 5000
  Batch size = 8


{'loss': 0.0629, 'grad_norm': 0.30616605281829834, 'learning_rate': 1.593941782658346e-05, 'epoch': 0.98}


  0%|          | 0/625 [00:00<?, ?it/s]

{'eval_loss': 0.059648316353559494, 'eval_runtime': 95.6266, 'eval_samples_per_second': 52.287, 'eval_steps_per_second': 6.536, 'epoch': 0.98}
{'loss': 0.0602, 'grad_norm': 0.3527510464191437, 'learning_rate': 1.5875015875023812e-05, 'epoch': 0.99}


Saving model checkpoint to my_awesome_wnut_model\checkpoint-1250
Configuration saved in my_awesome_wnut_model\checkpoint-1250\config.json


{'loss': 0.0629, 'grad_norm': 0.271147221326828, 'learning_rate': 1.5811388300841898e-05, 'epoch': 1.0}


Model weights saved in my_awesome_wnut_model\checkpoint-1250\model.safetensors
tokenizer config file saved in my_awesome_wnut_model\checkpoint-1250\tokenizer_config.json
Special tokens file saved in my_awesome_wnut_model\checkpoint-1250\special_tokens_map.json
Saving model checkpoint to my_awesome_wnut_model\checkpoint-1250
Configuration saved in my_awesome_wnut_model\checkpoint-1250\config.json
Model weights saved in my_awesome_wnut_model\checkpoint-1250\model.safetensors
tokenizer config file saved in my_awesome_wnut_model\checkpoint-1250\tokenizer_config.json
Special tokens file saved in my_awesome_wnut_model\checkpoint-1250\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)




{'train_runtime': 6298.7585, 'train_samples_per_second': 6.35, 'train_steps_per_second': 0.198, 'train_loss': 0.19616772747039796, 'epoch': 1.0}


TrainOutput(global_step=1250, training_loss=0.19616772747039796, metrics={'train_runtime': 6298.7585, 'train_samples_per_second': 6.35, 'train_steps_per_second': 0.198, 'total_flos': 1.0282625919198e+16, 'train_loss': 0.19616772747039796, 'epoch': 1.0})

In [15]:
trainer.push_to_hub("dhanishetty/bert-base-uncased_adapters" , token= "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"
                    )

Saving model checkpoint to my_awesome_wnut_model
Configuration saved in my_awesome_wnut_model\config.json
Model weights saved in my_awesome_wnut_model\model.safetensors
tokenizer config file saved in my_awesome_wnut_model\tokenizer_config.json
Special tokens file saved in my_awesome_wnut_model\special_tokens_map.json
Dropping the following result as it does not have all the necessary fields:
{'task': {'name': 'Token Classification', 'type': 'token-classification'}}


model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.11k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/dhanishetty/my_awesome_wnut_model/commit/4ef537fc57cebe9fb970f0de1084cb822c170e7b', commit_message='dhanishetty/bert-base-uncased_adapters', commit_description='', oid='4ef537fc57cebe9fb970f0de1084cb822c170e7b', pr_url=None, pr_revision=None, pr_num=None)