Reference: https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb

In [1]:
import transformers

In [2]:
transformers.__version__

'4.25.1'

### Token Classification

The most common token classification tasks are : 

a. Named-entity recognition(NER)

b. Part-of-Speech tagging(POS)

c. Chunk--Grammatically classify the tokens and group them into "chunks" that go together

In [3]:
task= "ner"   # should be one of "ner", "pos", or "chunk"
model_checkpoint= "distilbert-base-uncased"
batch_size=16

##### Loading the dataset

using Datasets library to download the data and get the metrics we need to use for evaluation

If we want to use our own dataset defined from a JSON or csv file it might need some adjustments in the names of the columns used.
ref: https://huggingface.co/docs/datasets/quickstart

In [4]:
from datasets import load_dataset, load_metric

datasets= load_dataset("conll2003")

Found cached dataset conll2003 (C:/Users/AIXI/.cache/huggingface/datasets/conll2003/conll2003/1.0.0/9a4d16a94f8674ba3466315300359b0acd891b68b6c8743ddf60b9c702adce98)


  0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
# https://aclanthology.org/W03-0419.pdf
datasets

DatasetDict({
    train: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 14041
    })
    validation: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3250
    })
    test: Dataset({
        features: ['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'],
        num_rows: 3453
    })
})

In [6]:
datasets['train'][0]

{'id': '0',
 'tokens': ['EU',
  'rejects',
  'German',
  'call',
  'to',
  'boycott',
  'British',
  'lamb',
  '.'],
 'pos_tags': [22, 42, 16, 21, 35, 37, 16, 21, 7],
 'chunk_tags': [11, 21, 11, 12, 21, 22, 11, 12, 0],
 'ner_tags': [3, 0, 7, 0, 0, 0, 7, 0, 0]}

In [7]:
datasets['train'].features[f"ner_tags"]

Sequence(feature=ClassLabel(names=['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC'], id=None), length=-1, id=None)

So for the NER tags, 0 corresponds to 'O', 1 to 'B-PER' etc... On top of the 'O' (which means no special entity), there are four labels for NER here, each prefixed with 'B-' (for beginning) or 'I-' (for intermediate), that indicate if the token is the first one for the current group with the label or not:
- 'PER' for person
- 'ORG' for organization
- 'LOC' for location
- 'MISC' for miscellaneous

In [8]:
label_list= datasets['train'].features[f"{task}_tags"].feature.names
label_list

['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-MISC', 'I-MISC']

In [15]:
from datasets import ClassLabel, Sequence
import random
import pandas as pd
from IPython.display import display, HTML

def show_random_elements(dataset, num_examples=10):
    # assertion to check if the required no.of examples is not greater than the total length of the dataset.
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."

    # initialize an empty list to store randomly picked indices
    picks = []

    # loop to pick random sample elements from the 'datasets' 
    for _ in range(num_examples):

        # generate a random index between between 0 and the total no.of elements-1 in the dataset[-1 is because index starts from 0]
        pick = random.randint(0, len(dataset)-1)

        # check if randomly picked index is already in the 'picks' list, if so, keep generating until we get unique index
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)

        # append the unique random index to the 'picks' list
        picks.append(pick)
    
    # creating a new dataframe df containing the randomly selected elements from the 'datasets' using the indices in 'picks'
    df = pd.DataFrame(dataset[picks])
    
    # loop over the features of the datasets
    for column, typ in dataset.features.items():
        # check if the feature's type is 'ClassLabel'  
        if isinstance(typ, ClassLabel):
            # transform the column in 'df' to map integer representation to label names
            df[column] = df[column].transform(lambda i: typ.names[i])
        # check if the feature is a sequence and its inner feature is 'ClassLabel'     
        elif isinstance(typ, Sequence) and isinstance(typ.feature, ClassLabel):
            # transform the column in df to map each integer representation in the sequence to its label name
            df[column] = df[column].transform(lambda x: [typ.feature.names[i] for i in x])
    display(HTML(df.to_html()))

In [14]:
show_random_elements(datasets["train"], num_examples=1)

Unnamed: 0,id,tokens,pos_tags,chunk_tags,ner_tags
0,12641,"[SOCCER, -, DUTCH, FIRST, DIVISION, RESULTS, /, STANDINGS, .]","[NN, :, VB, NNP, NNP, NNS, SYM, NNS, .]","[B-NP, O, B-VP, B-NP, I-NP, I-NP, O, B-NP, O]","[O, O, B-MISC, O, O, O, O, O, O]"


### Preprocesssing the data

In [16]:
from transformers import AutoTokenizer

tokenizer= AutoTokenizer.from_pretrained(model_checkpoint)
tokenizer("Hello this is youkesh nepal")

{'input_ids': [101, 7592, 2023, 2003, 2017, 9681, 2232, 8222, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [17]:
# The following assertion ensures that our tokenizer is a fast tokenizers (backed by Rust) from the Tokenizers library. 
# Those fast tokenizers are available for almost all models, and we will 
# need some of the special features they have for our preprocessing.

import transformers
assert isinstance(tokenizer, transformers.PreTrainedTokenizerFast)

In [44]:
print(tokenizer("Hello this is yukesh nepal"))
print(tokenizer("Hello this is yukesh nepal").word_ids())
print(tokenizer("Hello this is yukesh nepal").tokens())

{'input_ids': [101, 7592, 2023, 2003, 9805, 9681, 2232, 8222, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}
[None, 0, 1, 2, 3, 3, 3, 4, None]
['[CLS]', 'hello', 'this', 'is', 'yu', '##kes', '##h', 'nepal', '[SEP]']


Note:

The transformers are often pretrainied with subword tokenizers, meaning that even if our input have been split into words already, each of those words could be split again by the tokenizer

In [21]:
tokenizer(["Hello", ",", "this", "is", "one", "sentence", "split", "into", "words", "."], is_split_into_words=True)

{'input_ids': [101, 7592, 1010, 2023, 2003, 2028, 6251, 3975, 2046, 2616, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [22]:
example = datasets["train"][4]
print(example["tokens"])

['Germany', "'s", 'representative', 'to', 'the', 'European', 'Union', "'s", 'veterinary', 'committee', 'Werner', 'Zwingmann', 'said', 'on', 'Wednesday', 'consumers', 'should', 'buy', 'sheepmeat', 'from', 'countries', 'other', 'than', 'Britain', 'until', 'the', 'scientific', 'advice', 'was', 'clearer', '.']


In [23]:
tokenized_input= tokenizer(example["tokens"], is_split_into_words=True)
print(tokenized_input)
tokens= tokenizer.convert_ids_to_tokens(tokenized_input["input_ids"])
print(tokens)

{'input_ids': [101, 2762, 1005, 1055, 4387, 2000, 1996, 2647, 2586, 1005, 1055, 15651, 2837, 14121, 1062, 9328, 5804, 2056, 2006, 9317, 10390, 2323, 4965, 8351, 4168, 4017, 2013, 3032, 2060, 2084, 3725, 2127, 1996, 4045, 6040, 2001, 24509, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
['[CLS]', 'germany', "'", 's', 'representative', 'to', 'the', 'european', 'union', "'", 's', 'veterinary', 'committee', 'werner', 'z', '##wing', '##mann', 'said', 'on', 'wednesday', 'consumers', 'should', 'buy', 'sheep', '##me', '##at', 'from', 'countries', 'other', 'than', 'britain', 'until', 'the', 'scientific', 'advice', 'was', 'clearer', '.', '[SEP]']


In [46]:
print(tokenizer.convert_ids_to_tokens(tokenized_input['input_ids'][0]))

[CLS]


In [52]:
print(example.keys())
print(example['ner_tags'])

dict_keys(['id', 'tokens', 'pos_tags', 'chunk_tags', 'ner_tags'])
[5, 0, 0, 0, 0, 3, 4, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0]


In [36]:
example

{'id': '4',
 'tokens': ['Germany',
  "'s",
  'representative',
  'to',
  'the',
  'European',
  'Union',
  "'s",
  'veterinary',
  'committee',
  'Werner',
  'Zwingmann',
  'said',
  'on',
  'Wednesday',
  'consumers',
  'should',
  'buy',
  'sheepmeat',
  'from',
  'countries',
  'other',
  'than',
  'Britain',
  'until',
  'the',
  'scientific',
  'advice',
  'was',
  'clearer',
  '.'],
 'pos_tags': [22,
  27,
  21,
  35,
  12,
  22,
  22,
  27,
  16,
  21,
  22,
  22,
  38,
  15,
  22,
  24,
  20,
  37,
  21,
  15,
  24,
  16,
  15,
  22,
  15,
  12,
  16,
  21,
  38,
  17,
  7],
 'chunk_tags': [11,
  11,
  12,
  13,
  11,
  12,
  12,
  11,
  12,
  12,
  12,
  12,
  21,
  13,
  11,
  12,
  21,
  22,
  11,
  13,
  11,
  1,
  13,
  11,
  17,
  11,
  12,
  12,
  21,
  1,
  0],
 'ner_tags': [5,
  0,
  0,
  0,
  0,
  3,
  4,
  0,
  0,
  0,
  1,
  2,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  5,
  0,
  0,
  0,
  0,
  0,
  0,
  0]}

In [25]:
print(example['ner_tags'])
print(example['tokens'])
print(len(example['ner_tags']))
print(len(example['tokens']))

[5, 0, 0, 0, 0, 3, 4, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0]
['Germany', "'s", 'representative', 'to', 'the', 'European', 'Union', "'s", 'veterinary', 'committee', 'Werner', 'Zwingmann', 'said', 'on', 'Wednesday', 'consumers', 'should', 'buy', 'sheepmeat', 'from', 'countries', 'other', 'than', 'Britain', 'until', 'the', 'scientific', 'advice', 'was', 'clearer', '.']
31
31


In [45]:
'''
the word_ids() function returns the same no. of elements as in "input_ids". It helps in mapping special tokens to None and all other
tokens to their respective word. This wy, we can align the labels with the processed input ids.
'''
print(tokenized_input.word_ids())
print("The length of word id index is: ", len(tokenized_input.word_ids()))
print("The length of input_ids is:" ,len(tokenized_input['input_ids']))

[None, 0, 1, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9, 10, 11, 11, 11, 12, 13, 14, 15, 16, 17, 18, 18, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, None]
The length of word id index is:  39
The length of input_ids is: 39


In [48]:
# here we set the labels of all special tokens to -100(the index that is ignored in pytorch) and the labels of all
# other tokens to the label of the word they come from.

word_ids= tokenized_input.word_ids()
aligned_labels= [-100 if i is None else example[f'{task}_tags'][i] for i in word_ids]
print(len(aligned_labels), len(tokenized_input["input_ids"]))

39 39


In [53]:
label_all_tokens= True

In [54]:
'''function to preprocess our samples.
a. We feed examples to the tokenizer wit the argument truncation=True(to truncate if the text is larger than the maximum size allowed
by the model)
b. Setting is_split_into_words= True
c. We align labels with the token ids using above strategy
'''

def tokenize_and_align_labels(example):
    tokenized_input= tokenizer(example["tokens"], truncation=True, is_split_into_words=True)

    labels= []
    for i, label in enumerate(example[f"{task}_tags"]):
        word_ids= tokenized_input.word_ids(batch_index=i)
        previous_word_idx= None
        label_ids= []

        for word_idx in word_ids:
            # special tokens have word ids -100 so we set the label to -100 , which are ignored automatically in loss funcn
            if word_idx is None:
                label_ids.append(-100)
            # we set the label for the first token of each word.
            elif word_idx!= previous_word_idx:
                label_ids.append(label[word_idx])

            # for the other tokens in a word, we set the label to either the current label or -100, depending in the label_all_tokens flag.
            else:
                label_ids.append(label[word_idx] if label_all_tokens else -100)
            previous_word_idx= word_idx
        labels.append(label_ids)

    tokenized_input['labels']= labels
    return tokenized_input

In [55]:
tokenize_and_align_labels(datasets['train'][:5])

{'input_ids': [[101, 7327, 19164, 2446, 2655, 2000, 17757, 2329, 12559, 1012, 102], [101, 2848, 13934, 102], [101, 9371, 2727, 1011, 5511, 1011, 2570, 102], [101, 1996, 2647, 3222, 2056, 2006, 9432, 2009, 18335, 2007, 2446, 6040, 2000, 10390, 2000, 18454, 2078, 2329, 12559, 2127, 6529, 5646, 3251, 5506, 11190, 4295, 2064, 2022, 11860, 2000, 8351, 1012, 102], [101, 2762, 1005, 1055, 4387, 2000, 1996, 2647, 2586, 1005, 1055, 15651, 2837, 14121, 1062, 9328, 5804, 2056, 2006, 9317, 10390, 2323, 4965, 8351, 4168, 4017, 2013, 3032, 2060, 2084, 3725, 2127, 1996, 4045, 6040, 2001, 24509, 1012, 102]], 'attention_mask': [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], 'labels': [[-100, 3, 0, 7, 0, 0, 0, 7, 0, 0, -100], [-100, 1, 2, -100], [-100, 5, 0, 

In [56]:
tokenized_datasets = datasets.map(tokenize_and_align_labels, batched=True)

Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

Map:   0%|          | 0/3453 [00:00<?, ? examples/s]

### Fine-Tuning the Model

In [57]:
from transformers import AutoModelForTokenClassification, TrainingArguments, Trainer

model= AutoModelForTokenClassification.from_pretrained(model_checkpoint, num_labels= len(label_list))

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForTokenClassification: ['vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForTokenClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN t

In [60]:
model_name= model_checkpoint.split("/")[-1]
args= TrainingArguments(
    f"{model_name}-finetuned-{task}",
    evaluation_strategy= "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub= False,
)

In [61]:
from transformers import DataCollatorForTokenClassification

data_collator= DataCollatorForTokenClassification(tokenizer)

In [64]:
metric= load_metric("seqeval")

In [65]:
labels = [label_list[i] for i in example[f"{task}_tags"]]
metric.compute(predictions=[labels], references=[labels])

{'LOC': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 2},
 'ORG': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 1},
 'PER': {'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'number': 1},
 'overall_precision': 1.0,
 'overall_recall': 1.0,
 'overall_f1': 1.0,
 'overall_accuracy': 1.0}

In [66]:
import numpy as np

def compute_metrics(p):
    predictions, labels = p
    predictions = np.argmax(predictions, axis=2)

    # Remove ignored index (special tokens)
    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    results = metric.compute(predictions=true_predictions, references=true_labels)
    return {
        "precision": results["overall_precision"],
        "recall": results["overall_recall"],
        "f1": results["overall_f1"],
        "accuracy": results["overall_accuracy"],
    }

In [67]:
trainer = Trainer(
    model,
    args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

In [68]:
trainer.train()

The following columns in the training set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, chunk_tags, pos_tags, ner_tags. If id, tokens, chunk_tags, pos_tags, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 14041
  Num Epochs = 3
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 2634
  Number of trainable parameters = 66369801


  0%|          | 0/2634 [00:00<?, ?it/s]

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Saving model checkpoint to distilbert-base-uncased-finetuned-ner\checkpoint-500
Configuration saved in distilbert-base-uncased-finetuned-ner\checkpoint-500\config.json


{'loss': 0.2382, 'learning_rate': 1.6203492786636296e-05, 'epoch': 0.57}


Model weights saved in distilbert-base-uncased-finetuned-ner\checkpoint-500\pytorch_model.bin
tokenizer config file saved in distilbert-base-uncased-finetuned-ner\checkpoint-500\tokenizer_config.json
Special tokens file saved in distilbert-base-uncased-finetuned-ner\checkpoint-500\special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, chunk_tags, pos_tags, ner_tags. If id, tokens, chunk_tags, pos_tags, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 16


  0%|          | 0/204 [00:00<?, ?it/s]

{'eval_loss': 0.06768079847097397, 'eval_precision': 0.9192740229373121, 'eval_recall': 0.9235932430920685, 'eval_f1': 0.9214285714285715, 'eval_accuracy': 0.981635344019572, 'eval_runtime': 6.5808, 'eval_samples_per_second': 493.862, 'eval_steps_per_second': 30.999, 'epoch': 1.0}


Saving model checkpoint to distilbert-base-uncased-finetuned-ner\checkpoint-1000
Configuration saved in distilbert-base-uncased-finetuned-ner\checkpoint-1000\config.json


{'loss': 0.0807, 'learning_rate': 1.240698557327259e-05, 'epoch': 1.14}


Model weights saved in distilbert-base-uncased-finetuned-ner\checkpoint-1000\pytorch_model.bin
tokenizer config file saved in distilbert-base-uncased-finetuned-ner\checkpoint-1000\tokenizer_config.json
Special tokens file saved in distilbert-base-uncased-finetuned-ner\checkpoint-1000\special_tokens_map.json
Saving model checkpoint to distilbert-base-uncased-finetuned-ner\checkpoint-1500
Configuration saved in distilbert-base-uncased-finetuned-ner\checkpoint-1500\config.json


{'loss': 0.0533, 'learning_rate': 8.610478359908885e-06, 'epoch': 1.71}


Model weights saved in distilbert-base-uncased-finetuned-ner\checkpoint-1500\pytorch_model.bin
tokenizer config file saved in distilbert-base-uncased-finetuned-ner\checkpoint-1500\tokenizer_config.json
Special tokens file saved in distilbert-base-uncased-finetuned-ner\checkpoint-1500\special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, chunk_tags, pos_tags, ner_tags. If id, tokens, chunk_tags, pos_tags, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 16


  0%|          | 0/204 [00:00<?, ?it/s]

{'eval_loss': 0.0615849532186985, 'eval_precision': 0.9218043454284769, 'eval_recall': 0.9350039154267815, 'eval_f1': 0.9283572142619128, 'eval_accuracy': 0.9833510731250099, 'eval_runtime': 6.8127, 'eval_samples_per_second': 477.049, 'eval_steps_per_second': 29.944, 'epoch': 2.0}


Saving model checkpoint to distilbert-base-uncased-finetuned-ner\checkpoint-2000
Configuration saved in distilbert-base-uncased-finetuned-ner\checkpoint-2000\config.json


{'loss': 0.0377, 'learning_rate': 4.8139711465451785e-06, 'epoch': 2.28}


Model weights saved in distilbert-base-uncased-finetuned-ner\checkpoint-2000\pytorch_model.bin
tokenizer config file saved in distilbert-base-uncased-finetuned-ner\checkpoint-2000\tokenizer_config.json
Special tokens file saved in distilbert-base-uncased-finetuned-ner\checkpoint-2000\special_tokens_map.json
Saving model checkpoint to distilbert-base-uncased-finetuned-ner\checkpoint-2500
Configuration saved in distilbert-base-uncased-finetuned-ner\checkpoint-2500\config.json


{'loss': 0.0293, 'learning_rate': 1.0174639331814731e-06, 'epoch': 2.85}


Model weights saved in distilbert-base-uncased-finetuned-ner\checkpoint-2500\pytorch_model.bin
tokenizer config file saved in distilbert-base-uncased-finetuned-ner\checkpoint-2500\tokenizer_config.json
Special tokens file saved in distilbert-base-uncased-finetuned-ner\checkpoint-2500\special_tokens_map.json
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, chunk_tags, pos_tags, ner_tags. If id, tokens, chunk_tags, pos_tags, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 16


  0%|          | 0/204 [00:00<?, ?it/s]



Training completed. Do not forget to share your model on huggingface.co/models =)




{'eval_loss': 0.06097126752138138, 'eval_precision': 0.9284924845269673, 'eval_recall': 0.9398142969012194, 'eval_f1': 0.9341190860065604, 'eval_accuracy': 0.9844472333868175, 'eval_runtime': 6.7006, 'eval_samples_per_second': 485.029, 'eval_steps_per_second': 30.445, 'epoch': 3.0}
{'train_runtime': 304.3041, 'train_samples_per_second': 138.424, 'train_steps_per_second': 8.656, 'train_loss': 0.08503435557777446, 'epoch': 3.0}


TrainOutput(global_step=2634, training_loss=0.08503435557777446, metrics={'train_runtime': 304.3041, 'train_samples_per_second': 138.424, 'train_steps_per_second': 8.656, 'train_loss': 0.08503435557777446, 'epoch': 3.0})

In [69]:
trainer.evaluate()

The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, chunk_tags, pos_tags, ner_tags. If id, tokens, chunk_tags, pos_tags, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 3250
  Batch size = 16


  0%|          | 0/204 [00:00<?, ?it/s]

{'eval_loss': 0.06097126752138138,
 'eval_precision': 0.9284924845269673,
 'eval_recall': 0.9398142969012194,
 'eval_f1': 0.9341190860065604,
 'eval_accuracy': 0.9844472333868175,
 'eval_runtime': 6.4939,
 'eval_samples_per_second': 500.472,
 'eval_steps_per_second': 31.414,
 'epoch': 3.0}

In [70]:
predictions, labels, _ = trainer.predict(tokenized_datasets["validation"])
predictions = np.argmax(predictions, axis=2)

# Remove ignored index (special tokens)
true_predictions = [
    [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]
true_labels = [
    [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
    for prediction, label in zip(predictions, labels)
]

results = metric.compute(predictions=true_predictions, references=true_labels)
results

The following columns in the test set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, chunk_tags, pos_tags, ner_tags. If id, tokens, chunk_tags, pos_tags, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 3250
  Batch size = 16


  0%|          | 0/204 [00:00<?, ?it/s]

{'LOC': {'precision': 0.9518434913468774,
  'recall': 0.9663865546218487,
  'f1': 0.9590598938589842,
  'number': 2618},
 'MISC': {'precision': 0.8109177215189873,
  'recall': 0.8326563769293257,
  'f1': 0.8216432865731462,
  'number': 1231},
 'ORG': {'precision': 0.8988009592326139,
  'recall': 0.9114785992217899,
  'f1': 0.9050953875875393,
  'number': 2056},
 'PER': {'precision': 0.9773100953633673,
  'recall': 0.979564930784443,
  'f1': 0.9784362139917695,
  'number': 3034},
 'overall_precision': 0.9284924845269673,
 'overall_recall': 0.9398142969012194,
 'overall_f1': 0.9341190860065604,
 'overall_accuracy': 0.9844472333868175}