# Tutorial 8: Model Tuning

### load corpus

In [20]:
from flair.data_fetcher import NLPTaskDataFetcher
from pathlib import Path

# use your own data path
data_folder = Path('/home/wohlg/itmo/teaching/nlp/flair/testcorpus')

# load corpus containing training, test and dev data
corpus: TaggedCorpus = NLPTaskDataFetcher.load_classification_corpus(data_folder,
                                                                     test_file='test.txt',
                                                                     dev_file='dev.txt',
                                                                     train_file='train.txt').downsample(0.1)

2019-04-29 20:04:31,049 Reading data from /home/wohlg/itmo/teaching/nlp/flair/testcorpus
2019-04-29 20:04:31,062 Train: /home/wohlg/itmo/teaching/nlp/flair/testcorpus/train.txt
2019-04-29 20:04:31,063 Dev: /home/wohlg/itmo/teaching/nlp/flair/testcorpus/dev.txt
2019-04-29 20:04:31,064 Test: /home/wohlg/itmo/teaching/nlp/flair/testcorpus/test.txt


In [21]:
print(corpus)

TaggedCorpus: 800 train + 100 dev + 101 test sentences


In [23]:
from hyperopt import hp
from flair.hyperparameter.param_selection import SearchSpace, Parameter
from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings, CharLMEmbeddings


# define your search space
search_space = SearchSpace()
search_space.add(Parameter.EMBEDDINGS, hp.choice, options=[
    [ WordEmbeddings('en') ], 
    [ CharLMEmbeddings('news-forward'), CharLMEmbeddings('news-backward') ]
])
search_space.add(Parameter.HIDDEN_SIZE, hp.choice, options=[32, 64, 128])
search_space.add(Parameter.RNN_LAYERS, hp.choice, options=[1, 2])
search_space.add(Parameter.DROPOUT, hp.uniform, low=0.0, high=0.5)
search_space.add(Parameter.LEARNING_RATE, hp.choice, options=[0.05, 0.1, 0.15, 0.2])
search_space.add(Parameter.MINI_BATCH_SIZE, hp.choice, options=[8, 16, 32])

  # Remove the CWD from sys.path while we load stuff.
  # Remove the CWD from sys.path while we load stuff.


In [24]:
from flair.hyperparameter.param_selection import TextClassifierParamSelector, OptimizationValue

# create the parameter selector
param_selector = TextClassifierParamSelector(
    corpus, 
    False, 
    'resources/results', 
    'lstm',
    max_epochs=2, # default 50
    training_runs=3,
    optimization_value=OptimizationValue.DEV_SCORE
)

# start the optimization
param_selector.optimize(search_space, max_evals=100)

2019-04-29 20:07:31,761 ----------------------------------------------------------------------------------------------------
2019-04-29 20:07:31,762 Evaluation run: 1
2019-04-29 20:07:31,765 Evaluating parameter combination:
2019-04-29 20:07:31,766 	dropout: 0.47078441765795953
2019-04-29 20:07:31,772 	embeddings: /home/wohlg/.flair/embeddings/en-fasttext-news-300d-1M
2019-04-29 20:07:31,774 	hidden_size: 128
2019-04-29 20:07:31,778 	learning_rate: 0.1
2019-04-29 20:07:31,779 	mini_batch_size: 8
2019-04-29 20:07:31,779 	rnn_layers: 1
2019-04-29 20:07:31,780 ----------------------------------------------------------------------------------------------------
2019-04-29 20:07:31,828 ----------------------------------------------------------------------------------------------------
2019-04-29 20:07:31,830 Training run: 1
2019-04-29 20:07:31,838 ----------------------------------------------------------------------------------------------------
2019-04-29 20:07:31,840 Evaluation method: MI

2019-04-29 20:09:08,846 EPOCH 1 done: loss 0.0909 - lr 0.1000 - bad epochs 0
2019-04-29 20:09:09,347 DEV  : loss 0.08911197 - f-score 0.6300 - acc 0.4599
2019-04-29 20:09:09,348 ----------------------------------------------------------------------------------------------------
2019-04-29 20:09:09,444 epoch 2 - iter 0/100 - loss 0.09045847
2019-04-29 20:09:11,066 epoch 2 - iter 10/100 - loss 0.08689481
2019-04-29 20:09:12,950 epoch 2 - iter 20/100 - loss 0.08756529
2019-04-29 20:09:14,614 epoch 2 - iter 30/100 - loss 0.08808527
2019-04-29 20:09:15,957 epoch 2 - iter 40/100 - loss 0.08899501
2019-04-29 20:09:17,596 epoch 2 - iter 50/100 - loss 0.08895745
2019-04-29 20:09:19,366 epoch 2 - iter 60/100 - loss 0.08903889
2019-04-29 20:09:20,949 epoch 2 - iter 70/100 - loss 0.08879535
2019-04-29 20:09:22,739 epoch 2 - iter 80/100 - loss 0.08902576
2019-04-29 20:09:24,330 epoch 2 - iter 90/100 - loss 0.08865670
2019-04-29 20:09:25,551 ----------------------------------------------------------

KeyboardInterrupt: 