# Training a Text Classification Model with Cross-Validation



#### Google Colab Setup

In [0]:
# MOUNT GOOGLE DRIVE
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

In [0]:
# INSTALL FLAIR
!pip install flair==0.4.4 --quiet

In [0]:
# INSTALL ALLENNLP (only necessary when using ELMoEmbeddings)
#!pip install allennlp --quiet

#### Paths

In [0]:
# SETUP PATHS
from pathlib import Path

base_path = Path('/gdrive/My Drive/embeddings-comparison/resources')
emb_path = base_path/'models'/'embeddings'
cls_model_path = base_path/'models'/'classifiers'
cls_corpus_path = base_path/'corpora'/'classification_corpora'

#### ClassificationCorpus

In [0]:

from flair.data import Corpus
from flair.datasets import  ClassificationCorpus #TREC_6

# this is the folder in which train, test and dev files reside
corpus_folder = cls_corpus_path/'EXAMPLE-CORPUS'

# get the corpus
corpus = ClassificationCorpus(corpus_folder)
print(corpus)

# create the label dictionary
label_dict = corpus.make_label_dictionary()

#### Embeddings

In [0]:
# INITIALIZE EMBEDDINGS
from flair.embeddings import FlairEmbeddings, DocumentRNNEmbeddings, WordEmbeddings, BertEmbeddings, ELMoEmbeddings

'''
# WordEmbeddings [word2vec, fastText, glove]
# we = str(emb_path/'example.kv')

# FlairEmbeddings
# flair_fwd = emb_path/'FLAIR'/'example-fwd.pt'
# flair_bwd = emb_path/'FLAIR'/'example-bwd.pt'

# ELMoEmbeddings
# elmo_opttions = emb_path/'ELMO'/'options.json'
# elmo_weights = emb_path/'ELMO'/'weights.hdf5'

# BertEmbeddings
# bert = str(emb_path/'BERT'/'model_folder')

# List of Embeddings
embeddings = [#WordEmbeddings(we),
              #FlairEmbeddings(flair_fwd),
              #FlairEmbeddings(flair_bwd),
              #ELMoEmbeddings(elmo_options, elmo_weights),
              #BertEmbeddings(bert),
             ])
             
# DocumentRNNEmbeddings
# Can choose between RNN types (GRU by default, or LSTM)
document_embeddings = DocumentRNNEmbeddings(embeddings,
                                            hidden_size=512,
                                            reproject_words=True,
                                            reproject_words_dimension=256,
                                           )

'''

#### Helper Evaluation function

In [0]:
import pandas as pd
import numpy as np
import re

def result_summary(result):
    scores = []
    lines = result.detailed_results.split('\n')
    
    for line in lines[3:]:
        split_line = re.split('\ -\ |\ +|:\ ', line)
        scores.append(split_line)
    scores = np.array(scores)
    tags = scores[:,0].tolist()
    scores_ = scores[:, 2::2]
    tag_tfpn = scores_[:, :4].astype(int)
    tag_metrics = scores_[:, 4:].astype(float)
    metrics = np.array(result.log_line.split('\t')).astype(float).reshape(1,3)
    
    df_tag_tfpn = pd.DataFrame(data=tag_tfpn,index=tags,columns=['true-positive','false-positive', 'false-negative', 'true-negative'])
    df_tag_metrics = pd.DataFrame(data=tag_metrics,index=tags,columns=['precision','recall', 'accuracy','f1-score'])
    df_metrics = pd.DataFrame(data=metrics, index=None,columns=['precision','recall','f1-score'])
    
    return df_tag_tfpn, df_tag_metrics, df_metrics

#### Train Model

In [0]:
from sklearn.model_selection import KFold
from flair.datasets import DataLoader, SentenceDataset
from flair.models import TextClassifier
from flair.trainers import ModelTrainer
import pickle


# Set number of splits
kf = KFold(n_splits=5)

# All sentences
complete_corpus = corpus.get_all_sentences()

# Cross-Validation
i=1
for train_index, test_index in kf.split(complete_corpus):
    print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    print("Fold:", i)
    corpus._train = SentenceDataset([complete_corpus[j] for j in train_index])
    corpus._test = SentenceDataset([complete_corpus[j] for j in test_index])
    corpus._dev = SentenceDataset([complete_corpus[j] for j in test_index])
    print(corpus)
    
    
    # 5. create the text classifier
    classifier = TextClassifier(document_embeddings, label_dictionary=label_dict)
    
    # Initialize ModelTrainer
    trainer = ModelTrainer(classifier, corpus)
    # Define output path
    model_folder = cls_model_path/'EXAMPLE-MODEL'
    
    # Training
    trainer.train(model_folder,
                  learning_rate=0.2,
                  mini_batch_size=32,
                  anneal_factor=0.5,
                  patience=5,
                  max_epochs=50,
                  train_with_dev=False,
                  shuffle=True,
                  save_final_model=True,
                  embeddings_storage_mode='gpu')
    
    
    # Evaluation
    result, eval_loss = trainer.model.evaluate(DataLoader(trainer.corpus.test,
                                                          batch_size=8,
                                                          num_workers=4))
    # tag_tfpn, tag_metrics, metrics
    if i==1:
        tt, tm, m = result_summary(result)
    else:
        tt_, tm_, m_ = result_summary(result)
        tt = tt.append(tt_)
        tm = tm.append(tm_)
        m = m.append(m_)
    
    i+=1  
    
df = tt.groupby(tt.index).sum()
tag_metrics_avg = tm.groupby(tm.index).mean()
summary = m.mean()
    
df['precision'] = df['true-positive'] / (df['true-positive'] + df['false-positive'])
df['recall'] = df['true-positive'] / (df['true-positive'] + df['false-negative'])
df['accuracy'] = df['true-positive'] / (df['true-positive'] + df['false-positive'] + df['false-negative'])
df['f1-score'] = 2*df['precision']*df['recall'] / (df['precision'] + df['recall'])
    
# pickle dump
pickle.dump(df,(model_folder/'details.pkl').open(mode='wb'))
pickle.dump(tag_metrics_avg,(model_folder/'tag_metrics_avg.pkl').open(mode='wb'))
pickle.dump(summary,(model_folder/'summary.pkl').open(mode='wb'))

print(summary)
print('\n-------------------------------------------------------------------\n')
print(df)
print('\n-------------------------------------------------------------------\n')
print(tag_metrics_avg)