# Use case - jurisdiction of contracts:  
It is common for parties to a contract to agree upfront where disputes, if any, will be resolved. How does one extract just such jurisdictions from a repository of contracts?  
One way to do so is to treat jurisdiction as another entity and adapt a Named Entity Recognition (NER) token classifier for this task. This is the approach for the solution below.  
Since a repo of contracts is hard to come by, reviews from a motorocyle hobby magazine have been used here as proxy data. The special tags used are for engine capacity, power and torque.

In [13]:
# !pip install -q datasets transformers evaluate seqeval

In [14]:
import re
import numpy as np
import pandas as pd
import evaluate

from datasets import Dataset, Features, Value, ClassLabel, Sequence
import transformers
transformers.logging.set_verbosity_error()
from transformers import AutoTokenizer, DataCollatorForTokenClassification, AutoModelForTokenClassification, TrainingArguments, Trainer


checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [15]:

def get_tokens_with_entities(train_text: str):
    raw_tokens = re.split(r"\s(?![^\[]*\])", train_text)
    entity_value_pattern = r"\[(?P<value>.+?)\]\((?P<entity>.+?)\)"
    entity_value_pattern_compiled = re.compile(entity_value_pattern, flags=re.I|re.M)

    tokens_with_entities = []

    for raw_token in raw_tokens:
        match = entity_value_pattern_compiled.match(raw_token)
        if match:
            raw_entity_name, raw_entity_value = match.group("entity"), match.group("value")

            for i, raw_entity_token in enumerate(re.split("\s", raw_entity_value)):
                entity_prefix = "B" if i == 0 else "I"
                entity_name = f"{entity_prefix}-{raw_entity_name}"
                tokens_with_entities.append((raw_entity_token, entity_name))
        else:
            tokens_with_entities.append((raw_token, "O"))

    return tokens_with_entities


class NERDataMaker:
    def __init__(self, texts):
        self.unique_entities = []
        self.processed_texts = []

        temp_processed_texts = []
        for text in texts:
            tokens_with_entities = get_tokens_with_entities(text)
            for _, ent in tokens_with_entities:
                if ent not in self.unique_entities:
                    self.unique_entities.append(ent)
            temp_processed_texts.append(tokens_with_entities)

        self.unique_entities.sort(key=lambda ent: ent if ent != "O" else "")

        for tokens_with_entities in temp_processed_texts:
            self.processed_texts.append([(t, self.unique_entities.index(ent)) for t, ent in tokens_with_entities])

    @property
    def id2label(self):
        return dict(enumerate(self.unique_entities))

    @property
    def label2id(self):
        return {v:k for k, v in self.id2label.items()}

    def __len__(self):
        return len(self.processed_texts)

    def __getitem__(self, idx):
        def _process_tokens_for_one_text(id, tokens_with_encoded_entities):
            ner_tags = []
            tokens = []
            for t, ent in tokens_with_encoded_entities:
                ner_tags.append(ent)
                tokens.append(t)

            return {
                "id": id,
                "ner_tags": ner_tags,
                "tokens": tokens
            }

        tokens_with_encoded_entities = self.processed_texts[idx]
        if isinstance(idx, int):
            return _process_tokens_for_one_text(idx, tokens_with_encoded_entities)
        else:
            return [_process_tokens_for_one_text(i+idx.start, tee) for i, tee in enumerate(tokens_with_encoded_entities)]

    def as_hf_dataset(self, tokenizer):
        def tokenize_and_align_labels(examples):
            tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)

            labels = []
            for i, label in enumerate(examples[f"ner_tags"]):
                word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
                previous_word_idx = None
                label_ids = []
                for word_idx in word_ids:  # Set the special tokens to -100.
                    if word_idx is None:
                        label_ids.append(-100)
                    elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                        label_ids.append(label[word_idx])
                    else:
                        label_ids.append(-100)
                    previous_word_idx = word_idx
                labels.append(label_ids)

            tokenized_inputs["labels"] = labels
            return tokenized_inputs

        ids, ner_tags, tokens = [], [], []
        for i, pt in enumerate(self.processed_texts):
            ids.append(i)
            pt_tokens,pt_tags = list(zip(*pt))
            ner_tags.append(pt_tags)
            tokens.append(pt_tokens)
        data = {
            "id": ids,
            "ner_tags": ner_tags,
            "tokens": tokens
        }
        features = Features({
            "tokens": Sequence(Value("string")),
            "ner_tags": Sequence(ClassLabel(names=dm.unique_entities)),
            "id": Value("int32")
        })
        ds = Dataset.from_dict(data, features)
        tokenized_ds = ds.map(tokenize_and_align_labels, batched=True)
        return tokenized_ds


In [16]:
metric = evaluate.load("seqeval")

def compute_metrics(eval_preds):
    logits, labels = eval_preds
    predictions = np.argmax(logits, axis=-1)

    # Remove ignored index (special tokens) and convert to labels
    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    prediction_out = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    all_metrics = metric.compute(predictions=prediction_out, references=true_labels)
    return {
        "precision": all_metrics["overall_precision"],
        "recall": all_metrics["overall_recall"],
        "f1": all_metrics["overall_f1"],
        "accuracy": all_metrics["overall_accuracy"],
    }

def postprocess(test_ds):
    logits, labels = trainer.predict(test_ds)[0:2]
    predictions = np.argmax(logits, axis=-1)
    true_labels = [[label_names[l] for l in label if l != -100] for label in labels]
    prediction_out = [
        [label_names[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    return {'tokens': test_ds['tokens']}, {'true_labels': true_labels}, {'prediction_out':prediction_out}

In [17]:
train_text = """
The [286cc](capacity), 4 valve liquid-cooled engine architecture is the same as before, but the engine is now BS6 compliant. Power is now up by [0.6hp](power) for a total of [31.1 horsepower](power), although this is now produced 1,500rpm higher at 9,000rpm.
Being a J-series model, the Hunter 350 gets the same [349cc](capacity), single-cylinder engine that powers the Classic and Meteor 350s. It puts out [20hp](power) and [27Nm](torque), which is adequate for a bike of this size, and comes paired with a 5-speed gearbox"
Powering the TVS Ronin is a [225.9cc](capacity), four-valve, single-cylinder engine that puts out [20.4hp](power) at 7,750rpm and [19.93Nm](torque) of torque at 3,750rpm.
While the power and torque figures are exactly the same ([20hp](power) and [27Nm](torque)), RE says they have mapped the engine slightly differently.
Power has gone up from [150hp](power) to [152hp](power) along with a shift in the redline from 10,000rpm to 11,000rpm. The engine also produces stronger bottom end torque and a wider as well as smoother torque curve.
The in-house developed motor produces similar power figures to the Ola S1 Pro ([4.5kW](power) continuous/[8.5kW](power) peak), but it's the [72Nm](torque) torque figure that is unrivalled in the e-scooter space. 
The Bajaj Pulsar N160 is, essentially, the Pulsar N250 with a smaller displacement engine, a [164.82cc](capacity), air- and oil-cooled single, to be precise. It has a long-stroke configuration, with a simple two-valve head and makes [16hp](power) and [14.7Nm](torque) of peak torque.
With [143Nm](torque) and BMW’s Shiftcam variable valve control system, there’s a huge amount of grunt almost instantly available.
The end result is a power figure of [19.2hp](power), which is considerably higher than the 160’s [17.5hp](power). Torque, though, is actually slightly lower, at [14.2Nm](torque), compared to the 160’s [14.73Nm](torque).
The Jupiter uses the third [125cc](capacity) engine in TVS’ stable, and this is the most basic of the lot, with a two-valve head. Nevertheless, its output figures of [8.2hp](power) and [10.5Nm](torque) are par for the segment.
This mode is nicely usable in the dirt and it will be good for less experienced riders who might find the thought of the KTM’s [43hp](power) and [37Nm](torque) of torque a little intimidating off-road.
third [115cc](capacity) engine [7hp](power) or [5.2kw](power) and [9Nm](torque)
"""

In [18]:
all_txt = NERDataMaker(train_text.split("\n"))

B_capacity_list = []
B_power_list = []
B_torque_list = []
I_power_list = []

label_names = all_txt.unique_entities
print("Here are all the tags and ids mapped both ways:")
print(all_txt.id2label)
print(all_txt.label2id)
for n in range(len(all_txt)):
    [B_capacity_list.append(all_txt[n]['tokens'][all_txt[n]['ner_tags'].index(i)]) for i in all_txt[n]['ner_tags'] if i == 1]
    [B_power_list.append(all_txt[n]['tokens'][all_txt[n]['ner_tags'].index(i)]) for i in all_txt[n]['ner_tags'] if i == 2]
    [B_torque_list.append(all_txt[n]['tokens'][all_txt[n]['ner_tags'].index(i)]) for i in all_txt[n]['ner_tags'] if i == 3]
    [I_power_list.append(all_txt[n]['tokens'][all_txt[n]['ner_tags'].index(i)]) for i in all_txt[n]['ner_tags'] if i == 4]

print('Number of tokens matching the "B capacity" tag: ', len(B_capacity_list))
print('Number of tokens matching the "B power" tag: ', len(B_power_list))
print('Number of tokens matching the "B torque" tag: ', len(B_torque_list))
print('Number of tokens matching the "I power" tag: ', len(I_power_list))

Here are all the tags and ids mapped both ways:
{0: 'O', 1: 'B-capacity', 2: 'B-power', 3: 'B-torque', 4: 'I-power'}
{'O': 0, 'B-capacity': 1, 'B-power': 2, 'B-torque': 3, 'I-power': 4}
Number of tokens matching the "B capacity" tag:  6
Number of tokens matching the "B power" tag:  13
Number of tokens matching the "B torque" tag:  11
Number of tokens matching the "I power" tag:  1


In [37]:
print("Total # of examples in dataset (excluding the two blank lines at the beginning and the end): ", len(all_txt)-2)
print('#'*50)
print("Here is a sample sentence, along with the tags used for training:")
print(train_text.split("\n")[1])
print('#'*50)
print("After pre-processing the same sample sentence, it is converted to a dictionary of NER tags and tokens:")
print(all_txt[1])
print('#'*50)
print("These are the only tokens that have an NER tag, along with their NER tag and label")
[print(all_txt[1]['tokens'][all_txt[1]['ner_tags'].index(i)], i, all_txt.id2label[i]) for i in all_txt[1]['ner_tags'] if i != 0]


Total # of examples in dataset (excluding the two blank lines at the beginning and the end):  12
##################################################
Here is a sample sentence, along with the tags used for training:
The [286cc](capacity), 4 valve liquid-cooled engine architecture is the same as before, but the engine is now BS6 compliant. Power is now up by [0.6hp](power) for a total of [31.1 horsepower](power), although this is now produced 1,500rpm higher at 9,000rpm.
##################################################
These are all the NER tags and tokens in a sample sentence:
{'id': 1, 'ner_tags': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'tokens': ['The', '286cc', '4', 'valve', 'liquid-cooled', 'engine', 'architecture', 'is', 'the', 'same', 'as', 'before,', 'but', 'the', 'engine', 'is', 'now', 'BS6', 'compliant.', 'Power', 'is', 'now', 'up', 'by', '0.6hp', 'for', 'a', 'total', 'of', '31.1', 'horsepower', 

[None, None, None, None]

In [38]:
print("Total # of examples in dataset (excluding the two blank lines at the beginning and the end): ", len(all_txt)-2)
print("Here is a sample sentence, along with the tags used for training:")
print(train_text.split("\n")[1])
print("These are all the NER tags and tokens in a sample sentence:")
print(all_txt[1])
print("These are the only tokens that have an NER tag, along with their NER tag and label")
[print(all_txt[1]['tokens'][all_txt[1]['ner_tags'].index(i)], i, all_txt.id2label[i]) for i in all_txt[1]['ner_tags'] if i != 0]

Total # of examples in dataset (excluding the two blank lines at the beginning and the end):  12
Here is a sample sentence, along with the tags used for training:
The [286cc](capacity), 4 valve liquid-cooled engine architecture is the same as before, but the engine is now BS6 compliant. Power is now up by [0.6hp](power) for a total of [31.1 horsepower](power), although this is now produced 1,500rpm higher at 9,000rpm.
These are all the NER tags and tokens in a sample sentence:
{'id': 1, 'ner_tags': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'tokens': ['The', '286cc', '4', 'valve', 'liquid-cooled', 'engine', 'architecture', 'is', 'the', 'same', 'as', 'before,', 'but', 'the', 'engine', 'is', 'now', 'BS6', 'compliant.', 'Power', 'is', 'now', 'up', 'by', '0.6hp', 'for', 'a', 'total', 'of', '31.1', 'horsepower', 'although', 'this', 'is', 'now', 'produced', '1,500rpm', 'higher', 'at', '9,000rpm.']}
These are the o

[None, None, None, None]

### Credits, sources and references:  
As seen in the sample sentence above, the tags are directly applied on a sentence in the training dataset. This Rasa like approach is much simpler than the error prone effort of directly creating the key value pair 'ner_tags'. Concept and code for this is from   
https://sanjayasubedi.com.np/deeplearning/training-ner-with-huggingface-transformer/  
https://rasa.com/docs/rasa/training-data-format  
For many other ideas, please refer  
https://huggingface.co/course/chapter7/2?fw=pt

In [20]:
dm = NERDataMaker(train_text.split("\n")[:10])
print(f"total examples in train = {len(dm)}")
train_ds = dm.as_hf_dataset(tokenizer=tokenizer)

total examples in train = 10


100%|██████████| 1/1 [00:00<00:00, 73.22ba/s]


In [21]:
dm_val = NERDataMaker(train_text.split("\n")[10:])
print(f"total examples in validation = {len(dm_val)}")
val_ds = dm_val.as_hf_dataset(tokenizer=tokenizer)

total examples in validation = 4


100%|██████████| 1/1 [00:00<00:00, 175.38ba/s]


In [22]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
model = AutoModelForTokenClassification.from_pretrained(checkpoint, num_labels=len(dm.unique_entities), id2label=dm.id2label, label2id=dm.label2id)

In [23]:
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=2,
    num_train_epochs=30,
    weight_decay=0.01,
)

train_ds = dm.as_hf_dataset(tokenizer=tokenizer)
val_ds = dm_val.as_hf_dataset(tokenizer=tokenizer)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=val_ds,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

trainer.train()

100%|██████████| 1/1 [00:00<00:00, 118.70ba/s]
100%|██████████| 1/1 [00:00<00:00, 138.51ba/s]
The following columns in the training set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 10
  Num Epochs = 30
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 90
You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have

{'eval_loss': 1.3037259578704834, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8846153846153846, 'eval_runtime': 0.2236, 'eval_samples_per_second': 17.892, 'eval_steps_per_second': 8.946, 'epoch': 1.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 1.0504786968231201, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8846153846153846, 'eval_runtime': 0.1475, 'eval_samples_per_second': 27.125, 'eval_steps_per_second': 13.563, 'epoch': 2.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.9172098636627197, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8846153846153846, 'eval_runtime': 0.1594, 'eval_samples_per_second': 25.091, 'eval_steps_per_second': 12.546, 'epoch': 3.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.9042731523513794, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8846153846153846, 'eval_runtime': 0.1494, 'eval_samples_per_second': 26.772, 'eval_steps_per_second': 13.386, 'epoch': 4.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.8293474912643433, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8846153846153846, 'eval_runtime': 0.1678, 'eval_samples_per_second': 23.837, 'eval_steps_per_second': 11.919, 'epoch': 5.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.6735749244689941, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8846153846153846, 'eval_runtime': 0.1723, 'eval_samples_per_second': 23.218, 'eval_steps_per_second': 11.609, 'epoch': 6.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.537849485874176, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_accuracy': 0.8846153846153846, 'eval_runtime': 0.1498, 'eval_samples_per_second': 26.703, 'eval_steps_per_second': 13.352, 'epoch': 7.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.44580450654029846, 'eval_precision': 1.0, 'eval_recall': 0.1111111111111111, 'eval_f1': 0.19999999999999998, 'eval_accuracy': 0.8974358974358975, 'eval_runtime': 0.3156, 'eval_samples_per_second': 12.674, 'eval_steps_per_second': 6.337, 'epoch': 8.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.38221412897109985, 'eval_precision': 1.0, 'eval_recall': 0.4444444444444444, 'eval_f1': 0.6153846153846153, 'eval_accuracy': 0.9358974358974359, 'eval_runtime': 0.1671, 'eval_samples_per_second': 23.942, 'eval_steps_per_second': 11.971, 'epoch': 9.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.33923375606536865, 'eval_precision': 1.0, 'eval_recall': 0.6666666666666666, 'eval_f1': 0.8, 'eval_accuracy': 0.9615384615384616, 'eval_runtime': 0.3177, 'eval_samples_per_second': 12.592, 'eval_steps_per_second': 6.296, 'epoch': 10.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.31548207998275757, 'eval_precision': 0.8571428571428571, 'eval_recall': 0.6666666666666666, 'eval_f1': 0.75, 'eval_accuracy': 0.9615384615384616, 'eval_runtime': 0.2435, 'eval_samples_per_second': 16.424, 'eval_steps_per_second': 8.212, 'epoch': 11.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.29611659049987793, 'eval_precision': 0.75, 'eval_recall': 0.6666666666666666, 'eval_f1': 0.7058823529411765, 'eval_accuracy': 0.9615384615384616, 'eval_runtime': 0.1904, 'eval_samples_per_second': 21.003, 'eval_steps_per_second': 10.502, 'epoch': 12.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.27795135974884033, 'eval_precision': 0.7777777777777778, 'eval_recall': 0.7777777777777778, 'eval_f1': 0.7777777777777778, 'eval_accuracy': 0.9743589743589743, 'eval_runtime': 0.1652, 'eval_samples_per_second': 24.207, 'eval_steps_per_second': 12.104, 'epoch': 13.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.2576477527618408, 'eval_precision': 0.7777777777777778, 'eval_recall': 0.7777777777777778, 'eval_f1': 0.7777777777777778, 'eval_accuracy': 0.9743589743589743, 'eval_runtime': 0.1457, 'eval_samples_per_second': 27.457, 'eval_steps_per_second': 13.729, 'epoch': 14.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.23712733387947083, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.1595, 'eval_samples_per_second': 25.078, 'eval_steps_per_second': 12.539, 'epoch': 15.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.2169295847415924, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.2732, 'eval_samples_per_second': 14.641, 'eval_steps_per_second': 7.321, 'epoch': 16.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.2013443112373352, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.1925, 'eval_samples_per_second': 20.778, 'eval_steps_per_second': 10.389, 'epoch': 17.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.19037340581417084, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.2265, 'eval_samples_per_second': 17.662, 'eval_steps_per_second': 8.831, 'epoch': 18.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.18220141530036926, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.1756, 'eval_samples_per_second': 22.783, 'eval_steps_per_second': 11.392, 'epoch': 19.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.1756155788898468, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.1554, 'eval_samples_per_second': 25.737, 'eval_steps_per_second': 12.869, 'epoch': 20.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.1703716516494751, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.1547, 'eval_samples_per_second': 25.862, 'eval_steps_per_second': 12.931, 'epoch': 21.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.1666470319032669, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.1567, 'eval_samples_per_second': 25.532, 'eval_steps_per_second': 12.766, 'epoch': 22.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.16377149522304535, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.2257, 'eval_samples_per_second': 17.725, 'eval_steps_per_second': 8.863, 'epoch': 23.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.16151002049446106, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.173, 'eval_samples_per_second': 23.12, 'eval_steps_per_second': 11.56, 'epoch': 24.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.15929174423217773, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.1724, 'eval_samples_per_second': 23.199, 'eval_steps_per_second': 11.6, 'epoch': 25.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.15766137838363647, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.1772, 'eval_samples_per_second': 22.578, 'eval_steps_per_second': 11.289, 'epoch': 26.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.15634822845458984, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.2801, 'eval_samples_per_second': 14.28, 'eval_steps_per_second': 7.14, 'epoch': 27.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.15544500946998596, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.2065, 'eval_samples_per_second': 19.374, 'eval_steps_per_second': 9.687, 'epoch': 28.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


{'eval_loss': 0.15497231483459473, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.1682, 'eval_samples_per_second': 23.786, 'eval_steps_per_second': 11.893, 'epoch': 29.0}


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 4
  Batch size = 2


Training completed. Do not forget to share your model on huggingface.co/models =)




{'eval_loss': 0.15478608012199402, 'eval_precision': 0.8888888888888888, 'eval_recall': 0.8888888888888888, 'eval_f1': 0.8888888888888888, 'eval_accuracy': 0.9871794871794872, 'eval_runtime': 0.1653, 'eval_samples_per_second': 24.193, 'eval_steps_per_second': 12.097, 'epoch': 30.0}
{'train_runtime': 127.2085, 'train_samples_per_second': 2.358, 'train_steps_per_second': 0.707, 'train_loss': 0.19146686130099827, 'epoch': 30.0}


TrainOutput(global_step=90, training_loss=0.19146686130099827, metrics={'train_runtime': 127.2085, 'train_samples_per_second': 2.358, 'train_steps_per_second': 0.707, 'train_loss': 0.19146686130099827, 'epoch': 30.0})

### Saving the trained model (neural network architecture and all parameter values) locally

In [25]:
data_dir = '/Users/bala/Documents/Learning'
trainer.save_model("data_dir/customNER20220919.pt")

Saving model checkpoint to data_dir/customNER20220919.pt
Configuration saved in data_dir/customNER20220919.pt/config.json
Model weights saved in data_dir/customNER20220919.pt/pytorch_model.bin
tokenizer config file saved in data_dir/customNER20220919.pt/tokenizer_config.json
Special tokens file saved in data_dir/customNER20220919.pt/special_tokens_map.json


### Running a test with a sample sentence with no entity tags  
(Note the true labels are all default values and that the predictions are exactly what we'd expect to see)

In [24]:
test_text = """
Unsurprisingly, this bike’s displacement is a neat 164.9cc. But this isn’t just a simple boring and stroking job from TVS – there are also significant changes to the cylinder head, with larger valves and ports, and different cam profiles. The piston is also different from the 160, not just in the sense that it’s larger, but also in its shape. The end result is a power figure of 19.2hp, which is considerably higher than the 160’s 17.5hp. Torque, though, is actually slightly lower, at 14.2Nm, compared to the 160’s 14.73Nm. What’s telling is the fact that the RP makes its peak figures a good deal higher up the rev range than the RTR 160 4V.
"""

dm_test = NERDataMaker(test_text.split("\n"))
print(f"total examples in test = {len(dm_test)}")
test_ds = dm_test.as_hf_dataset(tokenizer=tokenizer)

answer = pd.DataFrame()
answer['tokens'] = postprocess(test_ds)[0]['tokens'][1]
answer['true_labels'] = postprocess(test_ds)[1]['true_labels'][1]
answer['predictions'] = postprocess(test_ds)[2]['prediction_out'][1]
answer[answer['predictions'] != 'O']

total examples in test = 3


100%|██████████| 1/1 [00:00<00:00, 97.59ba/s]
The following columns in the test set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 3
  Batch size = 2
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
The following columns in the test set don't have a corresponding argument in `DistilBertForTokenClassification.forward` and have been ignored: id, tokens, ner_tags. If id, tokens, ner_tags are not expected by `DistilBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 3
  Batch size = 2
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
The following column

Unnamed: 0,tokens,true_labels,predictions
7,164.9cc.,O,B-capacity
68,"19.2hp,",O,B-power
76,17.5hp.,O,B-power
84,"14.2Nm,",O,B-torque
89,14.73Nm.,O,B-torque


### Inference using a simple pipeline

In [None]:
from transformers import pipeline

model_checkpoint = "data_dir/customNER20220919.pt" #loading the trained/ fine-tuned model from local
token_classifier = pipeline(
    "token-classification", model=model_checkpoint, aggregation_strategy="simple"
)

In [33]:
token_classifier("As the name suggests, this bike now gets a 4-valve cylinder head for its 199.6cc engine. Power and torque are both up by about 1hp and 1Nm each, without any changes in the rpms they’re produced at. ")

[{'entity_group': 'capacity',
  'score': 0.644535,
  'word': '199',
  'start': 73,
  'end': 76},
 {'entity_group': 'power',
  'score': 0.8060533,
  'word': '1',
  'start': 127,
  'end': 128},
 {'entity_group': 'torque',
  'score': 0.8604862,
  'word': '1',
  'start': 135,
  'end': 136}]