# NERC using Flair

We used [Flair](https://github.com/flairNLP/flair/) library's SequenceTagger model for NERC. This model was pre-trained on the [OntoNotes 5.0](https://paperswithcode.com/dataset/ontonotes-5-0) corpus.


https://huggingface.co/flair/ner-english-ontonotes-large

@misc{schweter2020flert,
    title={FLERT: Document-Level Features for Named Entity Recognition},
    author={Stefan Schweter and Alan Akbik},
    year={2020},
    eprint={2011.06993},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}


In [1]:
#pip install flair

In [2]:
from flair.data import Sentence
from flair.models import SequenceTagger
from sklearn.metrics import classification_report
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Getting the test data into proper form to sentence level

ner_test_data = pd.read_csv("NER-test.tsv", sep="\t")

test_sentences = []
current_sentence = []
for index, row in ner_test_data.iterrows():
    if row["token id"] == 0:
        if current_sentence:
            s = " ".join(current_sentence)
            s =  s[:-2] + s[-1] # To delete the space from the second to last index that occurs from the previos operation
            test_sentences.append(s)
        current_sentence = []
    current_sentence.append(row["token"])

if current_sentence:
    s = " ".join(current_sentence)
    s =  s[:-2] + s[-1]
    test_sentences.append(s)


print(test_sentences)




["I would n't be caught dead watching the NFL if it were n't for Taylor Swift.", "Chris O'Donnell stated that while filming for this movie , he felt like he was in a Toys '' R '' Us commercial.", 'The whole game was a rollercoaster ride , but Los Angeles Lakers ultimately persevered and won!', 'Zendaya slayed in Dune 2 , as she does in all her movies.', "While my favorite player was playing this match and started off strongggg , it went downhill after Messi 's injyry midgame.", "My uncle 's brother 's neighbor 's cat 's veterinarian David reads the communist manifesto in his spare time.", 'He said that The Great Gatsby is the best novell ever , and I was about to throw hands.', 'I could not look away from this train wrck of a movie , on February 14th of all days.', "The film Everything Everywhere All At Once follows Evelyn Wang , a woman drowning under the stress of her family 's failing laundromat.", 'I just finished reading pride and prejudice which had me HOOOKED from the beginning.

In [4]:
# Convert flair NE tags to IOB format 

def iob_adder_for_flair(sentence):

    spans = sentence.get_spans('ner')
    iob_labels = []
    start_O_idx = 0

    for entity in spans:
        inside = False
        end_O_idx = entity[0].idx - 1

        #print(f"Entity:, {entity}")
        #print(f"Text:, {entity.text}")
        label = entity.tag
        
        for i in range(start_O_idx, end_O_idx):
            iob_labels.append('O')
            print(f"{sentence[i]} -> 'O")  
        
        start_O_idx= entity[-1].idx

        for token in entity:
            current_idx = token.idx - 1

            if inside:
                iob_labels.append('I-' + label)
            else:
                iob_labels.append('B-' + label)
                inside = True
            
            print(f"{sentence[current_idx]} -> {label}")  


            
    sent_length = len(sentence) - 1
    if start_O_idx != sent_length:
        for i in range(start_O_idx, sent_length):
            iob_labels.append('O')
            print(f"{sentence[i]} -> 'O") 


    return iob_labels



In [5]:
seq_tagger = SequenceTagger.load("flair/ner-english-ontonotes-large")
# https://huggingface.co/flair/ner-english-ontonotes-large

predictions = [] 

for sent in test_sentences:
    flair_sentence = Sentence(sent)
    seq_tagger.predict(flair_sentence)
    predictions.extend( iob_adder_for_flair(flair_sentence) )

print(predictions)

2024-03-31 20:43:47,453 SequenceTagger predicts: Dictionary with 76 tags: <unk>, O, B-CARDINAL, E-CARDINAL, S-PERSON, S-CARDINAL, S-PRODUCT, B-PRODUCT, I-PRODUCT, E-PRODUCT, B-WORK_OF_ART, I-WORK_OF_ART, E-WORK_OF_ART, B-PERSON, E-PERSON, S-GPE, B-DATE, I-DATE, E-DATE, S-ORDINAL, S-LANGUAGE, I-PERSON, S-EVENT, S-DATE, B-QUANTITY, E-QUANTITY, S-TIME, B-TIME, I-TIME, E-TIME, B-GPE, E-GPE, S-ORG, I-GPE, S-NORP, B-FAC, I-FAC, E-FAC, B-NORP, E-NORP, S-PERCENT, B-ORG, E-ORG, B-LANGUAGE, E-LANGUAGE, I-CARDINAL, I-ORG, S-WORK_OF_ART, I-QUANTITY, B-MONEY
Token[0]: "I" -> 'O
Token[1]: "would" -> 'O
Token[2]: "n't" -> 'O
Token[3]: "be" -> 'O
Token[4]: "caught" -> 'O
Token[5]: "dead" -> 'O
Token[6]: "watching" -> 'O
Token[7]: "the" -> 'O
Token[8]: "NFL" -> ORG
Token[9]: "if" -> 'O
Token[10]: "it" -> 'O
Token[11]: "were" -> 'O
Token[12]: "n't" -> 'O
Token[13]: "for" -> 'O
Token[14]: "Taylor" -> PERSON
Token[15]: "Swift" -> PERSON
Token[0]: "Chris" -> PERSON
Token[1]: "O'Donnell" -> PERSON
Token[2]:

In [6]:
# Getting the ground truth labels
ground_truth_labels = list(ner_test_data["BIO NER tag"])
print(ground_truth_labels)

['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORG', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'I-PERSON', 'O', 'B-PERSON', 'I-PERSON', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'B-PERSON', 'O', 'O', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'I-WORK_OF_ART', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-DATE', 'I-DATE', 'O', 'O', 'O', 'O', 'O', 'O', 'B-WORK_OF_ART', 'I-WORK_OF_ART', 'I-WORK_OF_ART', 'I-WORK_OF_ART', 'I

In [7]:
# Evaluation

lenp = len(predictions)
leng = len(ground_truth_labels)
for _ in range(leng-lenp):
    predictions.append('O')

report = classification_report(ground_truth_labels, predictions)
print(report)

               precision    recall  f1-score   support

       B-DATE       0.00      0.00      0.00         1
       B-NORP       0.00      0.00      0.00         0
        B-ORG       0.33      0.33      0.33         3
        B-PER       0.00      0.00      0.00         3
     B-PERSON       0.17      0.33      0.22         3
B-WORK_OF_ART       0.00      0.00      0.00         4
       I-DATE       0.25      1.00      0.40         1
        I-ORG       0.50      0.50      0.50         6
        I-PER       0.00      0.00      0.00         1
     I-PERSON       0.33      0.50      0.40         2
I-WORK_OF_ART       0.22      0.22      0.22         9
            O       0.91      0.89      0.90       160

     accuracy                           0.78       193
    macro avg       0.23      0.31      0.25       193
 weighted avg       0.79      0.78      0.79       193



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
