In [None]:
# Adapted from : https://medium.com/thecyphy/training-custom-ner-model-using-flair-df1f9ea9c762

In [None]:
!pip install -qq flair

In [None]:
# mount google drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
# reading the corpus
from flair.data import Corpus
from flair.datasets import ColumnCorpus


# define columns
columns = {0 : 'text', 1 : 'ner'}

# data dorectory
data_folder = '/content/drive/MyDrive/bio_tagged'

# initializing the corpus
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file = 'bio_tagged_train.txt',
                              test_file = 'bio_tagged_test.txt',
                              dev_file = 'bio_tagged_dev.txt')

In [None]:
print(corpus)

In [None]:
print(len(corpus.train))
print(corpus.train[0].to_tagged_string('ner'))

In [None]:
# tag to predict
label_type = 'ner'
# make tag dictionary from the corpus
tag_dictionary = corpus.make_label_dictionary(label_type=label_type)

In [None]:
from flair.embeddings import WordEmbeddings, StackedEmbeddings, TokenEmbeddings

from typing import List
embedding_types : List[TokenEmbeddings] = [
        WordEmbeddings('glove'),
        ## other embeddings
        ]
embeddings : StackedEmbeddings = StackedEmbeddings(
                                 embeddings=embedding_types)

In [None]:
from flair.models import SequenceTagger
tagger : SequenceTagger = SequenceTagger(hidden_size=256,
                                       embeddings=embeddings,
                                       tag_dictionary=tag_dictionary,
                                       tag_type=label_type,
                                       use_crf=True)
print(tagger)

In [None]:
from flair.trainers import ModelTrainer
trainer : ModelTrainer = ModelTrainer(tagger, corpus)
trainer.train('resources/taggers/example-ner',
              learning_rate=0.1,
              mini_batch_size=32,
              max_epochs=10)

In [None]:
from flair.data import Sentence
from flair.models import SequenceTagger

# load the trained model
model = SequenceTagger.load('/content/resources/taggers/example-ner/final-model.pt')

# create example sentence
sentence = Sentence('she was acting hella extra yesterday')

# predict the tags
model.predict(sentence)
result = sentence.to_tagged_string()
print(result)
print(sentence.labels)

In [None]:
!zip -r /content/ner_model.zip /content/resources