In [87]:
# Import necessary Python modules from the Transformers library
from transformers import AutoModelForTokenClassification, AutoTokenizer
from transformers import pipeline

# Define the model name to be used for token classification, we use the Impresso NER
# that can be found at "https://huggingface.co/impresso-project/ner-stacked-bert-multilingual"
MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual"

# Load the tokenizer corresponding to the specified model name
ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [103]:
ner_pipeline = pipeline("generic-ner", model=MODEL_NAME, 
                        tokenizer=ner_tokenizer, 
                        trust_remote_code=True,
                        device='cpu')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/eboros/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /Users/eboros/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


In [104]:
sentence = "Meanwhile, across the Atlantic, George Washington, the first secretary of the world, was shaping policies."
# Function to print each entry nicely
def print_nicely(data):
    for idx, entry in enumerate(data, start=1):
        for key, value in entry.items():
            print(f"  {key.capitalize()}: {value}")
        print()  # Blank line between entries

entities = ner_pipeline(sentence)
print_nicely(entities)

  Type: loc.adm.nat
  Score: 16.29
  Index: (4, 5)
  Surface: Atlantic
  Start: 22
  End: 30
  Loffset: 0
  Roffset: 8

  Type: pers.ind
  Score: 92.82
  Index: (6, 8)
  Surface: George Washington
  Start: 32
  End: 49
  Loffset: 0
  Roffset: 17
  Function: secretary
  Name: George Washington



In [105]:
sentence = "Thomas Jefferson, the nation's Secretary of State, played a key role in drafting policies for the new American government."
# Function to print each entry nicely
def print_nicely(data):
    for idx, entry in enumerate(data, start=1):
        for key, value in entry.items():
            print(f"  {key.capitalize()}: {value}")
        print()  # Blank line between entries

entities = ner_pipeline(sentence)
print_nicely(entities)

  Type: pers.ind
  Score: 91.86
  Index: (0, 10)
  Surface: Thomas Jefferson, the nation's Secretary
  Start: 0
  End: 49
  Loffset: 0
  Roffset: 40
  Name: Thomas Jefferson
  Function: nation's Secretary of State



In [85]:
sentence = "Meanwhile, across the Atlantic, George Washington, the first President of the United States, was shaping policies."
# Function to print each entry nicely
def print_nicely(data):
    for idx, entry in enumerate(data, start=1):
        for key, value in entry.items():
            print(f"  {key.capitalize()}: {value}")
        print()  # Blank line between entries

entities = ner_pipeline(sentence)
print_nicely(entities)

  Entity: loc.adm.nat
  Score: 16.14
  Index: (4, 5)
  Text: Atlantic
  Start: 22
  End: 30

  Entity: pers.ind
  Score: 80.22
  Index: (6, 8)
  Text: George Washington
  Start: 32
  End: 49
  Name: George Washington
  Function: President of the United States



In [86]:
# Import the necessary modules from the transformers library
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

NEL_MODEL_NAME = "impresso-project/nel-mgenre-multilingual"

# Load the tokenizer and model from the specified pre-trained model name
# The model used here is "https://huggingface.co/impresso-project/nel-mgenre-multilingual"
nel_tokenizer = AutoTokenizer.from_pretrained("impresso-project/nel-mgenre-multilingual")

nel_pipeline = pipeline("generic-nel", model=NEL_MODEL_NAME, 
                        tokenizer=nel_tokenizer, 
                        trust_remote_code=True,
                        device='cpu')

simple_sentence = "The event was held at the [START] Palace of Versailles [END], a symbol of French monarchy."

linked_entity = nel_pipeline(simple_sentence)

# Call the function
print_nicely(linked_entity)

generic_nel.py:   0%|          | 0.00/5.48k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/impresso-project/nel-mgenre-multilingual:
- generic_nel.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/eboros/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /Users/eboros/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
  return self.fget.__get__(instance, owner)()


  Id: 33:55:Palace of Versailles:nel-mgenre-multilingual
  Surface: Palace of Versailles
  Label: Palace of Versailles
  Wkd_id: Q2946
  Url: https://en.wikipedia.org/wiki/Palace_of_Versailles
  Type: UNK
  Confidence_nel: 0.0
  Loffset: 33
  Roffset: 55

