# Training a Flair Language Model




#### Setup


In [0]:
# Mount google drive
from google.colab import drive
drive.mount('/gdrive')

In [0]:
!pip install flair --quiet

In [0]:
# PATHS
from pathlib import Path

base_path = Path('/gdrive/My Drive/embeddings-comparison/resources')
corpus_path = base_path/'corpora'/'text_corpora'
model_path = base_path/'models'/'embeddings'

#### TextCorpus

In [0]:
from flair.data import Dictionary
from flair.models import LanguageModel
from flair.trainers.language_model_trainer import TextCorpus

# are you training a forward or backward LM?
is_forward_lm = True

# load the default character dictionary
dictionary = Dictionary.load('chars')

# corpus folder with train splits, test and valid
corpus_folder = corpus_path/'EXAMPLE-CORPUS'

# initialize corpus
corpus = TextCorpus(corpus_folder,
                    dictionary,
                    is_forward_lm,
                    character_level=True,
                    random_case_flip=False)

#### Training

In [0]:
from flair.trainers.language_model_trainer import LanguageModelTrainer

# model folder
model_folder = model_path/'FLAIR'/'EXAMPLE-MODEL-forward'

# option to continue training from checkpoint
continue_training = False

if not continue_training:
    # instantiate your language model, set hidden size and number of layers
    language_model = LanguageModel(dictionary,
                                   is_forward_lm,
                                   hidden_size=1024,
                                   nlayers=1)

    trainer = LanguageModelTrainer(language_model, corpus)

else:
    checkpoint = model_folder/'checkpoint.pt'
    trainer = LanguageModelTrainer.load_from_checkpoint(checkpoint, corpus)


trainer.log_interval = 500
trainer.train(model_folder,
              sequence_length=250,
              mini_batch_size=32,
              max_epochs=10,
              learning_rate=20.0,
              patience=10,
              checkpoint=True,
              num_workers=2)

---
# Fine-Tuning an Existing Language Model


#### Setup


In [0]:
# Mount google drive
from google.colab import drive
drive.mount('/gdrive')

In [0]:
!pip install flair --quiet

In [0]:
# PATHS
from pathlib import Path

base_path = Path('/gdrive/My Drive/embeddings-comparison/resources')
corpus_path = base_path/'corpora'/'text_corpora'
model_path = base_path/'models'/'embeddings'

#### Fine-tuning

In [0]:
from flair.data import Dictionary
from flair.embeddings import FlairEmbeddings
from flair.trainers.language_model_trainer import LanguageModelTrainer, TextCorpus


# instantiate an existing LM, such as one from the FlairEmbeddings
language_model = FlairEmbeddings('de-forward').lm

# are you fine-tuning a forward or backward LM?
is_forward_lm = language_model.is_forward_lm

# get the dictionary from the existing language model
dictionary: Dictionary = language_model.dictionary

# corpus folder with train splits, test and valid
corpus_folder = corpus_path/'example-corpus'

# initialize Corpus
corpus = TextCorpus(corpus_folder,
                    dictionary,
                    is_forward_lm,
                    character_level=True)

# use the model trainer to fine-tune this model on your corpus
trainer = LanguageModelTrainer(language_model, corpus)

# model folder
model_folder = model_path/'FLAIR'/'de-forward-finetuned'

trainer.train(model_folder,
              sequence_length=100,
              mini_batch_size=100,
              learning_rate=20,
              patience=10,
              checkpoint=True)