In [1]:
pip install torch transformers scikit-learn seqeval

Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/43.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==

In [2]:
import torch
import pickle
import numpy as np
from torch.utils.data import DataLoader
from transformers import BertForTokenClassification, BertTokenizerFast
from sklearn.metrics import classification_report

In [3]:
class NERDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.encodings["input_ids"])

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

In [None]:
with open("./ner_tokenized_data.pkl", "rb") as f:
    _, _, _, _, tokenized_test, aligned_test_labels = pickle.load(f)

test_dataset = NERDataset(tokenized_test, aligned_test_labels)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🚀 Using device: {device}")

🚀 Using device: cuda


In [None]:
model_path = "./NER_HUGGING_FACE_3_EPOCH_BERT"
model = BertForTokenClassification.from_pretrained(model_path)
tokenizer = BertTokenizerFast.from_pretrained(model_path)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

BertForTokenClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12

In [7]:
all_preds, all_labels = [], []
with torch.no_grad():
    for batch in test_dataloader:
        batch = {k: v.to(device) for k, v in batch.items() if k != "offset_mapping"}
        outputs = model(**batch)

        predictions = torch.argmax(outputs.logits, dim=2).cpu().numpy()
        true_labels = batch["labels"].cpu().numpy()

        for pred, true in zip(predictions, true_labels):
            all_preds.extend([p for p, t in zip(pred, true) if t != -100])
            all_labels.extend([t for t in true if t != -100])

## Sequval entity level f1

In [8]:
from seqeval.metrics import classification_report

id2label = {0: "O", 1: "B-PER", 2: "I-PER", 3: "B-ORG", 4: "I-ORG",
            5: "B-LOC", 6: "I-LOC", 7: "B-MISC", 8: "I-MISC"}

all_preds_text = [id2label[idx] for idx in all_preds]
all_labels_text = [id2label[idx] for idx in all_labels]

def split_into_sentences(flat_list, sentence_lengths):
    sentences = []
    index = 0
    for length in sentence_lengths:
        sentences.append(flat_list[index:index + length])
        index += length
    return sentences

sentence_lengths = [len(sentence) for sentence in test_dataloader.dataset.encodings["input_ids"]]

all_preds_sentences = split_into_sentences(all_preds_text, sentence_lengths)
all_labels_sentences = split_into_sentences(all_labels_text, sentence_lengths)

print("Entity-Level Test Set Evaluation Report:\n")
print(classification_report(all_labels_sentences, all_preds_sentences, digits=4))

Entity-Level Test Set Evaluation Report:

              precision    recall  f1-score   support

         LOC     0.9251    0.9171    0.9211      3003
        MISC     0.7781    0.7530    0.7653      1267
         ORG     0.8967    0.9256    0.9109      3534
         PER     0.9408    0.9471    0.9439      3005

   micro avg     0.9034    0.9090    0.9062     10809
   macro avg     0.8852    0.8857    0.8853     10809
weighted avg     0.9029    0.9090    0.9058     10809

