# Training a Text Classification Model



#### Google Colab Setup

In [0]:
# MOUNT GOOGLE DRIVE
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

In [0]:
# INSTALL FLAIR
!pip install flair==0.4.4 --quiet

In [0]:
# INSTALL ALLENNLP (only necessary when using ELMoEmbeddings)
#!pip install allennlp --quiet

#### Paths

In [0]:
# SETUP PATHS
from pathlib import Path

base_path = Path('/gdrive/My Drive/embeddings-comparison/resources')
emb_path = base_path/'models'/'embeddings'
cls_model_path = base_path/'models'/'classifiers'
cls_corpus_path = base_path/'corpora'/'classification_corpora'

#### ClassificationCorpus

In [0]:

from flair.data import Corpus
from flair.datasets import  ClassificationCorpus #TREC_6

# this is the folder in which train, test and dev files reside
corpus_folder = cls_corpus_path/'EXAMPLE-CORPUS'

# get the corpus
corpus = ClassificationCorpus(corpus_folder)
print(corpus)

# create the label dictionary
label_dict = corpus.make_label_dictionary()

#### Embeddings

In [0]:
# INITIALIZE EMBEDDINGS
from flair.embeddings import FlairEmbeddings, DocumentRNNEmbeddings, WordEmbeddings, BertEmbeddings, ELMoEmbeddings

'''
# WordEmbeddings [word2vec, fastText, glove]
# we = str(emb_path/'example.kv')

# FlairEmbeddings
# flair_fwd = emb_path/'FLAIR'/'example-fwd.pt'
# flair_bwd = emb_path/'FLAIR'/'example-bwd.pt'

# ELMoEmbeddings
# elmo_opttions = emb_path/'ELMO'/'options.json'
# elmo_weights = emb_path/'ELMO'/'weights.hdf5'

# BertEmbeddings
# bert = str(emb_path/'BERT'/'model_folder')

# List of Embeddings
embeddings = [#WordEmbeddings(we),
              #FlairEmbeddings(flair_fwd),
              #FlairEmbeddings(flair_bwd),
              #ELMoEmbeddings(elmo_options, elmo_weights),
              #BertEmbeddings(bert),
             ])
             
# DocumentRNNEmbeddings
# Can choose between RNN types (GRU by default, or LSTM)
document_embeddings = DocumentRNNEmbeddings(embeddings,
                                            hidden_size=512,
                                            reproject_words=True,
                                            reproject_words_dimension=256,
                                           )

'''

#### Train Model

In [0]:
# INITIALIZE TEXT CLASIIFIER
from flair.models import TextClassifier

# 5. create the text classifier
classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)

In [0]:
from flair.trainers import ModelTrainer

# define output path
model_folder = cls_model_path/'EXAMPLE-MODEL'

# option to continue from checkpoint
continue_training = False

if continue_training:
    checkpoint = model_folder/'checkpoint.pt'
    trainer = ModelTrainer.load_checkpoint(checkpoint, corpus)
else:
    trainer = ModelTrainer(classifier, corpus)

# 7. start the training
trainer.train(model_folder,
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=20,
              save_final_model=True,
              embeddings_storage_mode='gpu')