# Training a Sequence Labeling Model (NER)



#### Google Colab Setup

In [0]:
# MOUNT GOOGLE DRIVE
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

In [0]:
# INSTALL FLAIR
!pip install flair==0.4.4 --quiet

In [0]:
# INSTALL ALLENNLP (only necessary when using ELMoEmbeddings)
#!pip install allennlp --quiet

#### Paths

In [0]:
# SETUP PATHS
from pathlib import Path

base_path = Path('/gdrive/My Drive/embeddings-comparison/resources')
emb_path = base_path/'models'/'embeddings'
ner_model_path = base_path/'models'/'taggers'
ner_corpus_path = base_path/'corpora'/'column_corpora'

#### ColumnCorpus

In [0]:
# PREPARE CORPUS
from flair.datasets import ColumnCorpus

# define columns (multiple possible: ... 2: 'pos')
columns = {0: 'text', 1: 'ner'}

# this is the folder in which train, test and dev files reside
corpus_folder = ner_corpus_path/'EXAMPLE-CORPUS'

# init a corpus using column format, data folder 
corpus = ColumnCorpus(corpus_folder, columns)
print(corpus)

# what tag do we want to predict?
tag_type = 'ner'

# make the tag dictionary from the corpus
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
# print(tag_dictionary.idx2item)

#### Embeddings

In [0]:
# INITIALIZE EMBEDDINGS
from flair.embeddings import FlairEmbeddings, StackedEmbeddings, WordEmbeddings, BertEmbeddings, ELMoEmbeddings

'''
# WordEmbeddings [word2vec, fastText, glove]
# we = str(emb_path/'example.kv')

# FlairEmbeddings
# flair_fwd = emb_path/'FLAIR'/'example-fwd.pt'
# flair_bwd = emb_path/'FLAIR'/'example-bwd.pt'

# ELMoEmbeddings
# elmo_opttions = emb_path/'ELMO'/'options.json'
# elmo_weights = emb_path/'ELMO'/'weights.hdf5'

# BertEmbeddings
# bert = str(emb_path/'BERT'/'model_folder')

# StackedEmbeddings
embeddings = StackedEmbeddings([#WordEmbeddings(we),
                                #FlairEmbeddings(flair_fwd),
                                #FlairEmbeddings(flair_bwd),
                                #ELMoEmbeddings(elmo_options, elmo_weights),
                                #BertEmbeddings(bert),
                               ])
'''

#### Train Model

In [0]:
# INITIALIZE SEQUENCE TAGGER
from flair.models import SequenceTagger

tagger = SequenceTagger(hidden_size=512,
                        embeddings=embeddings,
                        tag_dictionary=tag_dictionary,
                        tag_type=tag_type)

In [0]:
# INITIALIZE TRAINER
from flair.trainers import ModelTrainer

# define output path
model_folder = ner_model_path/'EXAMPLE-MODEL'

# option to continue from checkpoint
continue_training = False

if continue_training:
    checkpoint = tagger.load_checkpoint(model_folder/'checkpoint.pt')
    trainer = ModelTrainer.load_from_checkpoint(checkpoint, corpus)
else:
    trainer = ModelTrainer(tagger, corpus)

# Training
trainer.train(model_folder,
              learning_rate=0.5,
              anneal_factor=0.5,
              mini_batch_size=8,
              patience=5,
              max_epochs=50,
              train_with_dev=True,
              monitor_test=True,
              shuffle=True,
              checkpoint=True)