In [1]:
# imports
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# check that GPU torch is ok
print("PyTorch CUDA available:", torch.cuda.is_available())
print("PyTorch CUDA device:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU")

PyTorch CUDA available: True
PyTorch CUDA device: NVIDIA GeForce RTX 5060 Laptop GPU


In [3]:
# load models
import os
os.environ["HF_HOME"] = "./models/hf_project_cache"

model_name = "dslim/bert-base-NER"
print(f"Loading model {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir="../models/hf_project_cache")
model = AutoModelForTokenClassification.from_pretrained(model_name, cache_dir="../models/hf_project_cache")

Loading model dslim/bert-base-NER...


Some weights of the model checkpoint at dslim/bert-base-NER were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
# add them to a pipeline
ner_pipeline = pipeline(
    "ner",
    model=model,
    tokenizer=tokenizer,
    aggregation_strategy="simple",   # groups tokens into complete entities
    device=0 if torch.cuda.is_available() else -1
)

Device set to use cuda:0


In [13]:
# inference
sample_text = "Barack Obama met Angela Merkel in Berlin yesterday. The President flew in a private jet."

print("\nInput:", sample_text)
output = ner_pipeline(sample_text)

print("\nNER Output:")
for ent in output:
    print(ent)


Input: Barack Obama met Angela Merkel in Berlin yesterday. The President flew in a private jet.

NER Output:
{'entity_group': 'PER', 'score': np.float32(0.9995825), 'word': 'Barack Obama', 'start': 0, 'end': 12}
{'entity_group': 'PER', 'score': np.float32(0.99536777), 'word': 'Angela Merkel', 'start': 17, 'end': 30}
{'entity_group': 'LOC', 'score': np.float32(0.99965525), 'word': 'Berlin', 'start': 34, 'end': 40}
