In [None]:
import torch, evaluate
import numpy as np
from datasets import load_dataset
from transformers import (AutoTokenizer, AutoModelForTokenClassification)

In [None]:
wnut = load_dataset("YurtsAI/named_entity_recognition", split="eval[:10]")
model_id = "dhanishetty/Google_bert-base-uncased"

In [None]:
label_list = wnut.features[f"ner_tags"].feature.names
print(label_list)
print(len(label_list))

id2label = {}
label2id = {}
for count, label in enumerate(label_list):
    id2label.update({count:label})
    label2id.update({label:count})

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForTokenClassification.from_pretrained(model_id, num_labels=125, id2label=id2label, label2id=label2id)

In [None]:
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)

    labels = []
    for i, label in enumerate(examples[f"ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.
        previous_word_idx = None
        label_ids = []
        for word_idx in word_ids:  # Set the special tokens to -100.
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:  # Only label the first token of a given word.
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx
        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

In [132]:
print(wnut)
#print(wnut['document_id'][1])
#print(wnut['sentence_id'][1])
print(wnut['tokens'][1])
print(wnut['ner_tags'][1])

Dataset({
    features: ['document_id', 'sentence_id', 'tokens', 'ner_tags'],
    num_rows: 10
})
['*', '*', 'MEMORANDUM', '*', '*', '*', '*', 'TO', ':', '*', '*', 'Personal', 'Journal', '*', '*', 'FROM', ':', '*', '*', '[', 'Your', 'Name', ']', '*', '*', 'DATE', ':', '*', '*', '1961-05-08', '*', '*', 'SUBJECT', ':', '*', '*', 'Daily', 'Reflections', 'and', 'Observations', '1', '.', '*', '*', 'Re', ':', 'Project', 'Alpha', '*', '*', ':', 'Met', 'w/', 'team', '@', '0900', 'hrs', '.', 'Discussed', 'Q3', 'deliverables', '.', 'Action', 'items', ':', 'finalize', 'SOW', 'by', 'EOD', '10/12', '.', '2', '.', '*', '*', 'Re', ':', 'Budget', 'Review', '*', '*', ':', 'FY24', 'budget', 'mtg', '.', 'w/', 'CFO', '@', '1400', 'hrs', '.', 'Key', 'points', ':', 'reduce', 'OPEX', 'by', '5', '%', ',', 'reallocate', 'CAPEX', 'to', 'R', '&', 'D', '.', '3', '.', '*', '*', 'Re', ':', 'Client', 'Meeting', '*', '*', ':', 'Conf', '.', 'call', 'w/', 'XYZ', 'Corp.', '@', '1100', 'hrs', '.', 'Discussed', 'contract'

In [97]:
tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)
tokenized_wnut

Dataset({
    features: ['document_id', 'sentence_id', 'tokens', 'ner_tags', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 10
})

In [144]:
print(tokenized_wnut['document_id'][1])
print(tokenized_wnut['sentence_id'][1])
print(tokenized_wnut['tokens'][1])
print(tokenized_wnut['ner_tags'][1])
print(tokenized_wnut['input_ids'][1])
print(tokenized_wnut['token_type_ids'][1])
print(tokenized_wnut['attention_mask'][1])
print(tokenized_wnut['labels'][1])
print(len(tokenized_wnut['labels'][1]))

11
0
['*', '*', 'MEMORANDUM', '*', '*', '*', '*', 'TO', ':', '*', '*', 'Personal', 'Journal', '*', '*', 'FROM', ':', '*', '*', '[', 'Your', 'Name', ']', '*', '*', 'DATE', ':', '*', '*', '1961-05-08', '*', '*', 'SUBJECT', ':', '*', '*', 'Daily', 'Reflections', 'and', 'Observations', '1', '.', '*', '*', 'Re', ':', 'Project', 'Alpha', '*', '*', ':', 'Met', 'w/', 'team', '@', '0900', 'hrs', '.', 'Discussed', 'Q3', 'deliverables', '.', 'Action', 'items', ':', 'finalize', 'SOW', 'by', 'EOD', '10/12', '.', '2', '.', '*', '*', 'Re', ':', 'Budget', 'Review', '*', '*', ':', 'FY24', 'budget', 'mtg', '.', 'w/', 'CFO', '@', '1400', 'hrs', '.', 'Key', 'points', ':', 'reduce', 'OPEX', 'by', '5', '%', ',', 'reallocate', 'CAPEX', 'to', 'R', '&', 'D', '.', '3', '.', '*', '*', 'Re', ':', 'Client', 'Meeting', '*', '*', ':', 'Conf', '.', 'call', 'w/', 'XYZ', 'Corp.', '@', '1100', 'hrs', '.', 'Discussed', 'contract', 'renewal', '.', 'Follow-up', 'req', "'d", 'by', 'COB', '10/15', '.', '4', '.', '*', '*', 'R

In [None]:
print(tokenized_wnut['input_ids'])

In [141]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("dhanishetty/Google_bert-base-uncased")
inputs = tokenizer(wnut['tokens'][1], return_tensors="pt", is_split_into_words=True)
print(inputs)
print((inputs['input_ids'].size(1)))

{'input_ids': tensor([[  101,  1008,  1008, 20336,  1008,  1008,  1008,  1008,  2000,  1024,
          1008,  1008,  3167,  3485,  1008,  1008,  2013,  1024,  1008,  1008,
          1031,  2115,  2171,  1033,  1008,  1008,  3058,  1024,  1008,  1008,
          3777,  1011,  5709,  1011,  5511,  1008,  1008,  3395,  1024,  1008,
          1008,  3679, 16055,  1998,  9420,  1015,  1012,  1008,  1008,  2128,
          1024,  2622,  6541,  1008,  1008,  1024,  2777,  1059,  1013,  2136,
          1030,  5641,  8889, 17850,  2015,  1012,  6936,  1053,  2509,  8116,
          3085,  2015,  1012,  2895,  5167,  1024,  2345,  4697,  2061,  2860,
          2011,  1041,  7716,  2184,  1013,  2260,  1012,  1016,  1012,  1008,
          1008,  2128,  1024,  5166,  3319,  1008,  1008,  1024,  1042,  2100,
         18827,  5166, 11047,  2290,  1012,  1059,  1013, 12935,  2080,  1030,
         20652, 17850,  2015,  1012,  3145,  2685,  1024,  5547,  6728, 10288,
          2011,  1019,  1003,  1010,  

In [142]:
from transformers import AutoModelForTokenClassification
import torch

model = AutoModelForTokenClassification.from_pretrained("dhanishetty/Google_bert-base-uncased")
with torch.no_grad():
    logits = model(**inputs).logits
    print(f"logits are {logits}")

logits are tensor([[[10.6675,  0.1120, -0.2097,  ..., -1.4871, -1.2241, -0.5975],
         [11.9228,  0.0135, -0.5378,  ..., -1.3693, -0.7376, -0.6716],
         [11.8895,  0.1200, -0.6164,  ..., -1.3939, -0.7246, -0.6989],
         ...,
         [11.9703, -0.2312, -0.5486,  ..., -1.4095, -0.9093, -0.6575],
         [11.9416, -0.0595, -0.5946,  ..., -1.4339, -0.7784, -0.7288],
         [11.1518,  0.3179, -0.1241,  ..., -1.7010, -1.1350, -0.7336]]])


In [153]:
predictions = torch.argmax(logits, dim=2)
predictions = predictions.tolist()

predictions = predictions[0]
print((predictions))
print((tokenized_wnut['labels'][1]))

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 84, 84, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 24, 24, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 84, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 84, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 23, 24, 24, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 84, 84, 0, 0, 0, 0, 0, 0, 0, 0, 23, 24, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83, 84, 84, 84, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [155]:
predictions_list = predictions
labels_list = tokenized_wnut['labels'][1]

In [156]:
accuracy_metric = evaluate.load("accuracy")
results = accuracy_metric.compute(references=labels_list, predictions=predictions_list)
print(f"Accuarcy of the model is {results["accuracy"]}")

Accuarcy of the model is 0.8364611260053619


In [157]:
f1_metric = evaluate.load("f1")
#A multiclass example, with different values for the `average` input.
f1_macro = f1_metric.compute(predictions=predictions_list, references=labels_list, average="macro")
print(round(f1_macro['f1'], 2))

f1_micro = f1_metric.compute(predictions=predictions_list, references=labels_list, average="micro")
print(round(f1_micro['f1'], 2))

f1_weighted = f1_metric.compute(predictions=predictions_list, references=labels_list, average="weighted")
print(round(f1_weighted['f1'], 2))

results = f1_metric.compute(predictions=predictions_list, references=labels_list, average=None)
print(results)

0.6
0.84
0.78
{'f1': array([0.        , 0.93779904, 1.        , 0.        , 1.        ,
       0.26666667, 1.        , 0.58823529])}


In [158]:
precision_metric = evaluate.load("precision")
precision_macro = precision_metric.compute(predictions=predictions_list, references=labels_list, average='macro')
print(f"precision_macro score is {precision_macro["precision"]}")

precision_micro = precision_metric.compute(predictions=predictions_list, references=labels_list, average='micro')
print(f"precision_micro score is {precision_micro["precision"]}")

precision_weighted = precision_metric.compute(predictions=predictions_list, references=labels_list, average='weighted')
print(f"precision_weighted score is {precision_weighted["precision"]}")

precision_none = precision_metric.compute(predictions=predictions_list, references=labels_list, average=None)
print(f"precision_weighted score is {precision_none}")

precision_macro score is 0.5566744629244629
precision_micro score is 0.8364611260053619
precision_weighted score is 0.7317924750900622
precision_weighted score is {'precision': array([0.        , 0.88288288, 1.        , 0.        , 1.        ,
       0.15384615, 1.        , 0.41666667])}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [159]:
recall_metric = evaluate.load('recall')
recall_macro = recall_metric.compute(predictions=predictions_list, references=labels_list, average='macro')
print(f"recall_macro score is {recall_macro["recall"]}")

recall_micro = recall_metric.compute(predictions=predictions_list, references=labels_list, average='micro')
print(f"recall_micro score is {recall_micro["recall"]}")

recall_weighted = recall_metric.compute(predictions=predictions_list, references=labels_list, average='weighted')
print(f"recall_weighted score is {recall_weighted["recall"]}")

recall_none = recall_metric.compute(predictions=predictions_list, references=labels_list, average=None)
print(f"recall_None score is {recall_none}")

recall_macro score is 0.75
recall_micro score is 0.8364611260053619
recall_weighted score is 0.8364611260053619
recall_None score is {'recall': array([0., 1., 1., 0., 1., 1., 1., 1.])}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
