# NER with Bert from HuggingFace

Source: https://huggingface.co/dslim/bert-base-NER for alterbnative models see https://huggingface.co/models?other=bert&sort=downloads&search=ner

In [1]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

2022-05-16 12:17:09.291535: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-16 12:17:09.291602: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")

In [9]:
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
example = """me is Wolfgang, I live in Berlin \
             and I think that Berlin is best place to be in, 
             at the moment at least, even for the moment,
             certainly after experiencing Amsterdam, Barcelona, Copenhagen, Dubrovnik
             yeah!"""

In [6]:
ner_results = nlp(example)
print(ner_results)

[{'word': 'Wolfgang', 'score': 0.9988356828689575, 'entity': 'B-PER', 'index': 4, 'start': 11, 'end': 19}, {'word': 'Berlin', 'score': 0.9997405409812927, 'entity': 'B-LOC', 'index': 9, 'start': 31, 'end': 37}, {'word': 'Berlin', 'score': 0.9997540712356567, 'entity': 'B-LOC', 'index': 14, 'start': 55, 'end': 61}]


## NER with Flair

Source: https://github.com/flairNLP/flair/

In [10]:
from flair.data import Sentence
from flair.models import SequenceTagger

In [11]:
sentence = Sentence(example)

In [12]:
tagger = SequenceTagger.load('ner')

2022-05-16 13:31:46,047 --------------------------------------------------------------------------------
2022-05-16 13:31:46,049 The model key 'ner' now maps to 'https://huggingface.co/flair/ner-english' on the HuggingFace ModelHub
2022-05-16 13:31:46,050  - The most current version of the model is automatically downloaded from there.
2022-05-16 13:31:46,051  - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/ner/en-ner-conll03-v0.4.pt)
2022-05-16 13:31:46,052 --------------------------------------------------------------------------------
2022-05-16 13:31:46,636 loading file /home/erikt/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4


In [13]:
tagger.predict(sentence)
print(sentence)

Sentence: "me is Wolfgang , I live in Berlin and I think that Berlin is best place to be in , at the moment at least , even for the moment , rea !"   [− Tokens: 33  − Token-Labels: "me is Wolfgang <S-PER> , I live in Berlin <S-LOC> and I think that Berlin <S-LOC> is best place to be in , at the moment at least , even for the moment , rea !"]


In [14]:
for entity in sentence.get_spans():
    print(type(entity))
    for x in entity:
        print(x)

<class 'flair.data.Span'>
Token: 4 Wolfgang
<class 'flair.data.Span'>
Token: 9 Berlin


In [17]:
sentence[0].set_label("ner", "A")

Token: 1 My

In [25]:
sentence[1].get_labels("ner")

[O (1.0)]

## Flair wrapper

In [80]:
from REL.entity_disambiguation import EntityDisambiguation
from flair import cache_root
from flair.models import SequenceTagger
from flair.data import Sentence
import argparse
from http.server import HTTPServer
from REL.server import make_handler


from REL.utils import fetch_model


def load_flair_ner(path_or_url):
    try:
        return SequenceTagger.load(path_or_url)
    except Exception:
        pass
    return SequenceTagger.load(fetch_model(path_or_url, cache_root))

In [85]:
NER_MODEL = "ner-fast"

In [86]:
ner_model = load_flair_ner(NER_MODEL)

2022-05-16 17:52:36,867 --------------------------------------------------------------------------------
2022-05-16 17:52:36,870 The model key 'ner-fast' now maps to 'https://huggingface.co/flair/ner-english-fast' on the HuggingFace ModelHub
2022-05-16 17:52:36,881  - The most current version of the model is automatically downloaded from there.
2022-05-16 17:52:36,886  - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/ner-fast/en-ner-fast-conll03-v0.4.pt)
2022-05-16 17:52:36,889 --------------------------------------------------------------------------------
2022-05-16 17:52:37,593 loading file /home/erikt/.flair/models/ner-english-fast/4c58e7191ff952c030b82db25b3694b58800b0e722ff15427f527e1631ed6142.e13c7c4664ffe2bbfa8f1f5375bd0dced866b8c1dd7ff89a6d705518abf0a611


In [87]:
tokenized_sentence = Sentence("John went to Microsoft in Seattle")
ner_model.predict(tokenized_sentence)
tokenized_sentence

Sentence: "John went to Microsoft in Seattle"   [− Tokens: 6  − Token-Labels: "John <S-PER> went to Microsoft <S-ORG> in Seattle <S-LOC>"]