In [None]:
!pip install flair

Load the dataset and initialize embeddings, label dictionary, and sequence taggger.

In [3]:
from flair.data import Corpus
from flair.datasets import ColumnCorpus
from flair.embeddings import WordEmbeddings, FlairEmbeddings, StackedEmbeddings
from flair.models import SequenceTagger

embedding_types = [
    WordEmbeddings('glove'),
    FlairEmbeddings('news-forward'),
    FlairEmbeddings('news-backward'),
]
embeddings = StackedEmbeddings(embeddings=embedding_types)

columns = {0: 'text', 1: 'ner'}

data_folder = 'train/'

corpus: Corpus = ColumnCorpus(data_folder, columns)


tag_type = 'ner'

tag_dictionary = corpus.make_label_dictionary(label_type=tag_type)

tagger : SequenceTagger = SequenceTagger(hidden_size=256,
                                         embeddings=embeddings,
                                         tag_dictionary=tag_dictionary,
                                         tag_type=tag_type,
                                         use_crf=True)



2022-06-01 07:50:22,721 Reading data from train
2022-06-01 07:50:22,724 Train: train/train.txt
2022-06-01 07:50:22,726 Dev: None
2022-06-01 07:50:22,729 Test: None
2022-06-01 07:50:22,743 Computing label dictionary. Progress:


40it [00:00, 12078.63it/s]

2022-06-01 07:50:22,752 Dictionary created for label 'ner' with 4 values: JOB (seen 42 times), ORG (seen 32 times), LOC (seen 10 times)
2022-06-01 07:50:22,753 SequenceTagger predicts: Dictionary with 13 tags: O, S-JOB, B-JOB, E-JOB, I-JOB, S-ORG, B-ORG, E-ORG, I-ORG, S-LOC, B-LOC, E-LOC, I-LOC





Initalize the trainer and start training.

In [None]:
from flair.trainers import ModelTrainer

trainer : ModelTrainer = ModelTrainer(tagger, corpus)

# Adjust settings to optimize training
trainer.train('resoures/tagers/example-ner',
              learning_rate=0.5,
              mini_batch_size=2,
              max_epochs=150)


Testing the model.

In [69]:
from flair.data import Sentence
from flair.models import SequenceTagger

model = SequenceTagger.load("/content/resoures/tagers/example-ner/best-model.pt")

se = Sentence("Senior Software Dev Engineer - Job ID: 996246 | Amazon.jobs")


model.predict(se)

for entity in se.get_spans('ner'):
    print(entity.text, entity.get_label("ner").value, entity.get_label("ner").score)

2022-06-01 08:20:13,687 loading file /content/resoures/tagers/example-ner/best-model.pt
2022-06-01 08:20:14,624 SequenceTagger predicts: Dictionary with 15 tags: O, S-JOB, B-JOB, E-JOB, I-JOB, S-ORG, B-ORG, E-ORG, I-ORG, S-LOC, B-LOC, E-LOC, I-LOC, <START>, <STOP>
Senior Software Dev Engineer JOB 0.9553582072257996
