# Multi-Label Classifier

In [29]:
from flair.data_fetcher import NLPTaskDataFetcher
from flair.embeddings import WordEmbeddings, FlairEmbeddings, DocumentLSTMEmbeddings, CharacterEmbeddings
from flair.models import TextClassifier
from flair.trainers import ModelTrainer
from pathlib import Path

data_path = '/home/wohlg/itmo/misc/cooking_classification/preprocessed'
corpus = NLPTaskDataFetcher.load_classification_corpus(Path(data_path), 
                                                       test_file='cooking.test', 
                                                       dev_file='cooking.valid', 
                                                       train_file='cooking.train')

word_embeddings = [WordEmbeddings('glove'), 
                   FlairEmbeddings('news-forward-fast'), 
                   FlairEmbeddings('news-backward-fast')]

#word_embeddings = [CharacterEmbeddings()]
#word_embeddings = [WordEmbeddings('glove')]

document_embeddings = DocumentLSTMEmbeddings(word_embeddings, 
                                             hidden_size=512, 
                                             reproject_words=True, 
                                             reproject_words_dimension=256)

print()
print(document_embeddings)


2019-04-20 23:15:29,173 Reading data from /home/wohlg/itmo/misc/cooking_classification/preprocessed
2019-04-20 23:15:29,174 Train: /home/wohlg/itmo/misc/cooking_classification/preprocessed/cooking.train
2019-04-20 23:15:29,175 Dev: /home/wohlg/itmo/misc/cooking_classification/preprocessed/cooking.valid
2019-04-20 23:15:29,176 Test: /home/wohlg/itmo/misc/cooking_classification/preprocessed/cooking.test

DocumentLSTMEmbeddings(
  (embeddings): StackedEmbeddings(
    (list_embedding_0): WordEmbeddings()
    (list_embedding_1): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.25)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 1024)
        (decoder): Linear(in_features=1024, out_features=275, bias=True)
      )
    )
    (list_embedding_2): FlairEmbeddings(
      (lm): LanguageModel(
        (drop): Dropout(p=0.25)
        (encoder): Embedding(275, 100)
        (rnn): LSTM(100, 1024)
        (decoder): Linear(in_features=1024, out_features=275, bias=



In [30]:
print(corpus)
print(corpus.obtain_statistics())

TaggedCorpus: 12404 train + 1500 dev + 1500 test sentences
{
    "TRAIN": {
        "dataset": "TRAIN",
        "total_number_of_documents": 12404,
        "number_of_documents_per_class": {
            "sauce": 332,
            "cheese": 235,
            "food-safety": 967,
            "acidity": 33,
            "cast-iron": 111,
            "stove": 55,
            "restaurant": 15,
            "knife-skills": 61,
            "dicing": 2,
            "storage-method": 366,
            "equipment": 666,
            "bread": 575,
            "baking": 1156,
            "substitutions": 724,
            "peanuts": 26,
            "chocolate": 234,
            "oven": 227,
            "convection": 23,
            "storage-lifetime": 256,
            "mayonnaise": 28,
            "tea": 144,
            "baking-powder": 31,
            "baking-soda": 27,
            "leavening": 10,
            "soup": 184,
            "sous-vide": 100,
            "vacuum": 19,
            "syrup": 46,


In [None]:
classifier = TextClassifier(document_embeddings, 
                            label_dictionary=corpus.make_label_dictionary(), 
                            multi_label=True)

trainer = ModelTrainer(classifier, corpus)

trainer.train('/tmp', max_epochs=20)

2019-04-20 23:16:32,232 ----------------------------------------------------------------------------------------------------
2019-04-20 23:16:32,233 Evaluation method: MICRO_F1_SCORE
2019-04-20 23:16:32,235 ----------------------------------------------------------------------------------------------------
2019-04-20 23:16:33,434 epoch 1 - iter 0/388 - loss 0.02166426
2019-04-20 23:17:14,545 epoch 1 - iter 38/388 - loss 0.02144122
2019-04-20 23:18:05,471 epoch 1 - iter 76/388 - loss 0.02115187
2019-04-20 23:18:57,840 epoch 1 - iter 114/388 - loss 0.02072302
2019-04-20 23:19:38,123 epoch 1 - iter 152/388 - loss 0.02001544
2019-04-20 23:20:35,951 epoch 1 - iter 190/388 - loss 0.01894880
2019-04-20 23:21:30,208 epoch 1 - iter 228/388 - loss 0.01763090
2019-04-20 23:22:18,266 epoch 1 - iter 266/388 - loss 0.01628496
2019-04-20 23:23:05,516 epoch 1 - iter 304/388 - loss 0.01501928
2019-04-20 23:23:55,848 epoch 1 - iter 342/388 - loss 0.01388867
2019-04-20 23:24:47,632 epoch 1 - iter 380/388

2019-04-20 23:35:42,365 EPOCH 7 done: loss 0.0007 - lr 0.1000 - bad epochs 0
2019-04-20 23:35:58,356 DEV  : loss 0.00063264 - f-score 0.0000 - acc 0.0000
2019-04-20 23:36:13,499 TEST : loss 0.00063889 - f-score 0.0000 - acc 0.0000
2019-04-20 23:36:17,289 ----------------------------------------------------------------------------------------------------
2019-04-20 23:36:17,500 epoch 8 - iter 0/388 - loss 0.00071825
2019-04-20 23:36:23,549 epoch 8 - iter 38/388 - loss 0.00072236
2019-04-20 23:36:30,192 epoch 8 - iter 76/388 - loss 0.00071860
2019-04-20 23:36:35,268 epoch 8 - iter 114/388 - loss 0.00072011
2019-04-20 23:36:40,167 epoch 8 - iter 152/388 - loss 0.00071469
2019-04-20 23:36:45,570 epoch 8 - iter 190/388 - loss 0.00071176
2019-04-20 23:36:51,328 epoch 8 - iter 228/388 - loss 0.00070987
2019-04-20 23:36:56,447 epoch 8 - iter 266/388 - loss 0.00071084
2019-04-20 23:37:01,337 epoch 8 - iter 304/388 - loss 0.00071022
2019-04-20 23:37:06,386 epoch 8 - iter 342/388 - loss 0.0007084

2019-04-20 23:45:44,662 epoch 14 - iter 380/388 - loss 0.00064549
2019-04-20 23:45:45,553 ----------------------------------------------------------------------------------------------------
2019-04-20 23:45:45,554 EPOCH 14 done: loss 0.0006 - lr 0.1000 - bad epochs 0
2019-04-20 23:45:58,988 DEV  : loss 0.00058977 - f-score 0.0000 - acc 0.0000
2019-04-20 23:46:12,116 TEST : loss 0.00059683 - f-score 0.0000 - acc 0.0000
2019-04-20 23:46:15,914 ----------------------------------------------------------------------------------------------------
2019-04-20 23:46:16,049 epoch 15 - iter 0/388 - loss 0.00064049
2019-04-20 23:46:20,929 epoch 15 - iter 38/388 - loss 0.00065436
2019-04-20 23:46:25,922 epoch 15 - iter 76/388 - loss 0.00064818
2019-04-20 23:46:30,493 epoch 15 - iter 114/388 - loss 0.00064736
2019-04-20 23:46:35,259 epoch 15 - iter 152/388 - loss 0.00064767
2019-04-20 23:46:40,022 epoch 15 - iter 190/388 - loss 0.00064541
2019-04-20 23:46:45,419 epoch 15 - iter 228/388 - loss 0.000