# Flair Experiment

This notebook uses the Flair library to train and annotate the corpus of this project. 

In [5]:
from flair.data import Corpus
from flair.datasets import ColumnCorpus

## Load corpus 

The following cell loads the corpus

In [None]:
corpus: Corpus = ColumnCorpus('../data/iob/', 
                              {0: 'text', 2: 'ner'},
                              train_file='train.txt',
                              test_file='test.txt',
                              dev_file='dev.txt',
                              in_memory=False)
print(corpus)
#print(corpus.obtain_statistics())
#print(corpus.make_tag_dictionary(tag_type='ner'))

2021-06-17 09:48:19,224 Reading data from ../data/iob
2021-06-17 09:48:19,227 Train: ../data/iob/train.txt
2021-06-17 09:48:19,228 Dev: ../data/iob/dev.txt
2021-06-17 09:48:19,229 Test: ../data/iob/test.txt


## Prepare training
Based on the corpus, the following cell prepares the training. It uses FlairEmbeddings

In [None]:
from flair.data import Corpus
from flair.datasets import UD_ENGLISH
from flair.embeddings import WordEmbeddings, StackedEmbeddings, FlairEmbeddings, BytePairEmbeddings, CharacterEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

tag_type = 'ner'
tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)

embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=[
#    WordEmbeddings('de'),
    CharacterEmbeddings(),
    BytePairEmbeddings('de'),
    FlairEmbeddings('de-historic-rw-forward'),
    FlairEmbeddings('de-historic-rw-backward')
])
    
tagger: SequenceTagger = SequenceTagger(hidden_size=256,
                                        embeddings=embeddings,
                                        tag_dictionary=tag_dictionary,
                                        tag_type=tag_type,
                                        use_crf=True)
    
trainer: ModelTrainer = ModelTrainer(tagger, corpus)

## Train model


In [1]:
trainer.train('../resources/ner_models/ner-experiment_xx',
              learning_rate=0.1,
              mini_batch_size=32,
              embeddings_storage_mode="cpu",
              shuffle=False,
              max_epochs=150)

NameError: name 'trainer' is not defined