# NER with Bert from HuggingFace

Source: https://huggingface.co/dslim/bert-base-NER for alterbnative models see https://huggingface.co/models?other=bert&sort=downloads&search=ner

In [1]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

2022-05-31 17:58:53.246583: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-31 17:58:53.246604: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")

In [5]:
nlp = pipeline("ner", model=model, tokenizer=tokenizer)
example = """My name is Wolfgang, I live in Berlin where
             I work for Siemens. I think that this city is 
             best place to be in, at the moment at least,
             certainly after experiencing Amsterdam, 
             Barcelona, Copenhagen, Dubrovnik, yeah!"""

In [6]:
ner_results = nlp(example)
print(ner_results)

[{'word': 'Wolfgang', 'score': 0.9989216327667236, 'entity': 'B-PER', 'index': 4, 'start': 11, 'end': 19}, {'word': 'Berlin', 'score': 0.9997388124465942, 'entity': 'B-LOC', 'index': 9, 'start': 31, 'end': 37}, {'word': 'Siemens', 'score': 0.9976899027824402, 'entity': 'B-ORG', 'index': 14, 'start': 68, 'end': 75}, {'word': 'Amsterdam', 'score': 0.9994820952415466, 'entity': 'B-LOC', 'index': 37, 'start': 204, 'end': 213}, {'word': 'Barcelona', 'score': 0.9987280964851379, 'entity': 'B-LOC', 'index': 39, 'start': 229, 'end': 238}, {'word': 'Copenhagen', 'score': 0.9992740750312805, 'entity': 'B-LOC', 'index': 41, 'start': 240, 'end': 250}, {'word': 'Dub', 'score': 0.9992077946662903, 'entity': 'B-LOC', 'index': 43, 'start': 252, 'end': 255}, {'word': '##rov', 'score': 0.9974023103713989, 'entity': 'I-LOC', 'index': 44, 'start': 255, 'end': 258}, {'word': '##nik', 'score': 0.9861747622489929, 'entity': 'I-LOC', 'index': 45, 'start': 258, 'end': 261}]


## NER with Flair

Source: https://github.com/flairNLP/flair/

In [7]:
from flair.data import Sentence
from flair.models import SequenceTagger

In [8]:
sentence = Sentence(example)

In [9]:
tagger = SequenceTagger.load('ner')

2022-05-31 18:02:55,068 --------------------------------------------------------------------------------
2022-05-31 18:02:55,075 The model key 'ner' now maps to 'https://huggingface.co/flair/ner-english' on the HuggingFace ModelHub
2022-05-31 18:02:55,079  - The most current version of the model is automatically downloaded from there.
2022-05-31 18:02:55,086  - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/ner/en-ner-conll03-v0.4.pt)
2022-05-31 18:02:55,092 --------------------------------------------------------------------------------
2022-05-31 18:02:55,852 loading file /home/erikt/.flair/models/ner-english/4f4cdab26f24cb98b732b389e6cebc646c36f54cfd6e0b7d3b90b25656e4262f.8baa8ae8795f4df80b28e7f7b61d788ecbb057d1dc85aacb316f1bd02837a4a4


In [10]:
tagger.predict(sentence)
print(sentence)

Sentence: "My name is Wolfgang , I live in Berlin where I work for Siemens . I think that this city is best place to be in , at the moment at least , certainly after experiencing Amsterdam , Barcelona , Copenhagen , Dubrovnik , yeah !"   [− Tokens: 46  − Token-Labels: "My name is Wolfgang <S-PER> , I live in Berlin <S-LOC> where I work for Siemens <S-ORG> . I think that this city is best place to be in , at the moment at least , certainly after experiencing Amsterdam <S-LOC> , Barcelona <S-LOC> , Copenhagen <S-LOC> , Dubrovnik <S-LOC> , yeah !"]


In [11]:
for entity in sentence.get_spans():
    print(type(entity))
    for x in entity:
        print(x)

<class 'flair.data.Span'>
Token: 4 Wolfgang
<class 'flair.data.Span'>
Token: 9 Berlin
<class 'flair.data.Span'>
Token: 14 Siemens
<class 'flair.data.Span'>
Token: 37 Amsterdam
<class 'flair.data.Span'>
Token: 39 Barcelona
<class 'flair.data.Span'>
Token: 41 Copenhagen
<class 'flair.data.Span'>
Token: 43 Dubrovnik


In [12]:
sentence[0].set_label("ner", "A")

Token: 1 My

In [13]:
sentence[1].get_labels("ner")

[O (1.0)]

## Flair wrapper

In [14]:
from REL.entity_disambiguation import EntityDisambiguation
from flair import cache_root
from flair.models import SequenceTagger
from flair.data import Sentence
import argparse
from http.server import HTTPServer
from REL.server import make_handler


from REL.utils import fetch_model


def load_flair_ner(path_or_url):
    try:
        return SequenceTagger.load(path_or_url)
    except Exception:
        pass
    return SequenceTagger.load(fetch_model(path_or_url, cache_root))

In [15]:
NER_MODEL = "ner-fast"

In [16]:
ner_model = load_flair_ner(NER_MODEL)

2022-05-31 18:05:18,948 --------------------------------------------------------------------------------
2022-05-31 18:05:18,954 The model key 'ner-fast' now maps to 'https://huggingface.co/flair/ner-english-fast' on the HuggingFace ModelHub
2022-05-31 18:05:18,961  - The most current version of the model is automatically downloaded from there.
2022-05-31 18:05:18,968  - (you can alternatively manually download the original model at https://nlp.informatik.hu-berlin.de/resources/models/ner-fast/en-ner-fast-conll03-v0.4.pt)
2022-05-31 18:05:18,971 --------------------------------------------------------------------------------
2022-05-31 18:05:19,610 loading file /home/erikt/.flair/models/ner-english-fast/4c58e7191ff952c030b82db25b3694b58800b0e722ff15427f527e1631ed6142.e13c7c4664ffe2bbfa8f1f5375bd0dced866b8c1dd7ff89a6d705518abf0a611


In [17]:
tokenized_sentence = Sentence("John went to Microsoft in Seattle")
ner_model.predict(tokenized_sentence)
tokenized_sentence

Sentence: "John went to Microsoft in Seattle"   [− Tokens: 6  − Token-Labels: "John <S-PER> went to Microsoft <S-ORG> in Seattle <S-LOC>"]

## BERT wrapper

(to do)