# NER using Flair

- [flair | github](https://github.com/flairNLP/flair)

In [1]:
%pip install datasets
%pip install evaluate
%pip install flair

[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.
[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
from utils import *

In [3]:
from flair.data import Sentence
from flair.nn import Classifier
from flair.models import SequenceTagger

In [4]:
# load the 4-class NER tagger
# tagger = SequenceTagger.load("flair/ner-english")
flair_tagger = Classifier.load('ner')

2024-01-29 03:21:14,202 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>


In [18]:
def predict_ner_flair(flair_tagger: SequenceTagger, labeled_dataset: Dataset) -> Tuple[list[list[str]], list[list[str]]]:
    """ Run inference on the tokens using trained BERT model """
    references: list[list[str]] = []
    fl_predictions: list[list[str]] = []

    for row in tqdm(labeled_dataset, desc=str(len(labeled_dataset))):
        # add ground truth labels to references
        references.append([re.sub("^[BI]-", "", tag_names[id]) for id in row['ner_tags']])
        tokens = row["tokens"]
        # make a flair sentence object
        sentence = Sentence(" ".join(tokens))
        # run NER over sentence
        flair_tagger.predict(sentence)
        ner_results = ["O" for _ in range(len(tokens))]
        for entity in sentence.get_spans('ner'):
            tag = entity.tag
            for token in entity:
                idx = token.idx - 1
                # print(f"{entity.text} {idx} / {len(tokens)}")
                try:
                    ner_results[idx] = tag   
                except IndexError as err:
                    break
        # translate numerical index to NER class label
        predicted_tags = ner_results
        fl_predictions.append(predicted_tags)
    return references, fl_predictions


In [19]:
references, fl_predictions = predict_ner_flair(flair_tagger, test)

3453: 100%|██████████| 3453/3453 [00:25<00:00, 137.96it/s]


In [20]:
results = evaluate_results(references, fl_predictions)
results

{'ER': {'precision': 0.9126637554585153,
  'recall': 0.9047619047619048,
  'f1': 0.9086956521739129,
  'number': 1617},
 'ISC': {'precision': 0.7702503681885126,
  'recall': 0.7546897546897547,
  'f1': 0.762390670553936,
  'number': 693},
 'OC': {'precision': 0.865681233933162,
  'recall': 0.8104693140794224,
  'f1': 0.8371659415786201,
  'number': 1662},
 'overall_precision': 0.868421052631579,
 'overall_recall': 0.8391238670694864,
 'overall_f1': 0.8535211267605634,
 'overall_accuracy': 0.9658231937116399}

In [21]:
# Save NER results to disk
flair_results_path = os.path.join(interim_dir, "ner_results_flair.json")

save_ner_results(flair_results_path, references, fl_predictions)

# Load persisted NER results
# references, fl_predictions = load_ner_results(flair_results_path)

Saving NER results to ../data/interim/ner_results_flair.json


In [22]:
flair_evaluation_path = os.path.join(interim_dir, "evaluation_results_flair.json")
save_evaluation_results(flair_evaluation_path, results)


Saving evaluation results to ../data/interim/evaluation_results_flair.json
