In [1]:
from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials, base
from hyperopt.plotting import *
import numpy as np
import math
import time
import logging
from data.data_loader import Dataset
from misc.preferences import PREFERENCES
from misc.run_configuration import from_hyperopt, OutputLayerType, LearningSchedulerType, OptimizerType
from misc import utils
from misc.hyperopt_space import *

from optimizer import get_optimizer
from criterion import NllLoss, LossCombiner

from models.transformer.encoder import TransformerEncoder
from models.jointAspectTagger import JointAspectTagger
from trainer.train import Trainer
import pprint
from data.organic2019 import organic_dataset as dsl
from data.organic2019 import ORGANIC_TASK_ALL, ORGANIC_TASK_ENTITIES, ORGANIC_TASK_ATTRIBUTES, ORGANIC_TASK_ENTITIES_COMBINE

  assert(config.model_size % config.n_heads == 0, f'number of heads {config.n_heads} is not a valid number of heads for model size {config.model_size}.')


# Loading Functions

These functions will load the dataset and the model. The run configuration will determine the architecture and hyper parameters.

In [2]:
def load_model(dataset, rc, experiment_name):
    loss = LossCombiner(4, dataset.class_weights, NllLoss)
    transformer = TransformerEncoder(dataset.source_embedding,
                                     hyperparameters=rc)
    model = JointAspectTagger(transformer, rc, 4, 20, dataset.target_names)
    optimizer = get_optimizer(model, rc)
    trainer = Trainer(
                        model,
                        loss,
                        optimizer,
                        rc,
                        dataset,
                        experiment_name,
                        enable_tensorboard=False,
                        verbose=False)
    return trainer

def load_dataset(rc, logger, task):
    dataset = Dataset(
        task,
        logger,
        rc,
        source_index=0,
        target_vocab_index=1,
        data_path=PREFERENCES.data_root,
        train_file=PREFERENCES.data_train,
        valid_file=PREFERENCES.data_validation,
        test_file=PREFERENCES.data_test,
        file_format='.tsv',
        init_token=None,
        eos_token=None
    )
    dataset.load_data(dsl, verbose=False)
    return dataset

## Preamble - Definitions

In [3]:
PREFERENCES.defaults(
    data_root='./data/data/organic2019',
    data_train='train.csv',    
    data_validation='validation.csv',
    data_test='test.csv',
    early_stopping='highest_5_F1'
)
main_experiment_name = 'Organic_HyperOpt'
use_cuda = True

# get general logger just for search
experiment_name = utils.create_loggers(experiment_name=main_experiment_name)
logger = logging.getLogger(__name__)
dataset_logger = logging.getLogger('data_loader')
logger.info('Run hyper parameter random grid search for experiment with name ' + main_experiment_name)

num_optim_iterations = 100
logger.info('num_optim_iterations: ' + str(num_optim_iterations))

Log path is  C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\Organic_HyperOpt\20190331\0


### Current commit

In [4]:
utils.get_current_git_commit()
logger.info('Current commit: ' + utils.get_current_git_commit())
print('Current commit: ' + utils.get_current_git_commit())

Current commit: b'90a21c6'


### Search Spaces

- BatchSize:
    How big should each batch be?
- Num Encoder Blocks
    How many encoder blocks should be replicated?
    AYNIA: 2-8
    
- Pointwise Layer Size
    How big should the layer between attention heads be?
    AYNIA: 1024 - 4096
    This: 64 - 2048
    
    64: Prev. Experiments have shown that a smaller size can be beneficial because a smaller layer contains less parameters.
    2048: This model has about a third of the AYNIA model size (1000 vs. 300). Going to big, therefore doesn't make much sense.

- Clip Comments to 
    How long should comments be
    This: 30 - 500
    
- Initial Learning Rate
    What is the initial learning rate
- Optimizer:
    - Noam:
        (FROM: https://github.com/tensorflow/tensor2tensor/issues/280#issuecomment-359477755)
        decreasing the learning rate aka learning rate decay (usually exponential, piecewise-constant or inverse-time) is a standard practice in ML for decades. Increasing the learning rate in the early stages with a warmup (usually linear or exponential growth) is a more recent practice, popular esp. in deep learning on ImageNet, see e.g. He et al. 2016 or Goyal et al. 2017.
        The "noam" scheme is just a particular way how to put the warmup and decay together (linear warmup for a given number of steps followed by exponential decay).

        Learning rate schedules is an active research area. See e.g. papers on cyclical learning rate (corresponding to learning_rate_decay_scheme=cosine available in tensor2tensor) and super-convergence, which provide also more insights into the theory behind the learning rate, batch size, gradient noise etc.
    
        - learning rate factor
        - learning rate warmup (steps)
            AYNIA: 4000
            THIS: 100 - 8000
    - Adam:
        - Beta 1
            AYNIA: 0.9

        - Beta 2
            AYNIA: 0.98


    - ?
- Transformer Dropout Rate
    Dropout rate for the transformer layers.
    AYNIA: 0.1
    THIS: 0.1 - 0.8
- Number of Transformer Heads
    How many attention heads should be used:
    AYNIA: 8
    THIS: [1, 2, 3, 4, 5, 6, 10, 12, 15, 20] (Have to be divide 300)
    
- Last Layer Dropout Rate
    Dropout rate right before the last layer
    AYNIA: -
    This 0.0 - 0.8
- Last Layer Types
    - Sum
    - Convolutions:
        - num conv filters
        - kernel size
        - stride
        - padding
        


In [5]:
#search_space = hp
search_space = {
    'batch_size': hp.quniform('batch_size', 10, 64, 1),
    'num_encoder_blocks': hp.quniform('num_encoder_blocks', 1, 4, 1),
    'pointwise_layer_size': hp.quniform('pointwise_layer_size', 32, 350, 1),
    'clip_comments_to': hp.quniform('clip_comments_to', 45, 180, 1),
    'dropout_rate': hp.uniform('dropout_rate', 0.0, 0.8),
    'output_dropout_rate': hp.uniform('last_layer_dropout', 0.0, 0.8),
    'num_heads': hp.choice('num_heads', [1, 2, 3, 4, 5]),
    'transformer_use_bias': hp_bool('transformer_use_bias'),
    'output_layer': hp.choice('output_layer', [
        {
            'type': OutputLayerType.Convolutions,
            'output_conv_num_filters': hp.quniform('output_conv_num_filters', 10, 400, 1),
            'output_conv_kernel_size': hp.quniform('output_conv_kernel_size', 1, 10, 1),
            'output_conv_stride': hp.quniform('output_conv_stride', 1, 10, 1),
            'output_conv_padding': hp.quniform('output_conv_padding', 0, 5, 1),
        },
        {
            'type': OutputLayerType.LinearSum
        }
    ]),
    'learning_rate_scheduler': hp.choice('learning_rate_scheduler', [
        {
            'type': LearningSchedulerType.Noam,
            'noam_learning_rate_warmup': hp.quniform('noam_learning_rate_warmup', 1000, 9000, 1),
            'noam_learning_rate_factor': hp.uniform('noam_learning_rate_factor', 0.01, 4)
        }
    ]),
    'optimizer': hp.choice('optimizer', [
        {
            'type': OptimizerType.Adam,
            'adam_beta1': hp.uniform('adam_beta1', 0.7, 0.999),
            'adam_beta2': hp.uniform('adam_beta2', 0.7, 0.999),
            'adam_eps': hp.loguniform('adam_eps', np.log(1e-10), np.log(1)),
            'learning_rate': hp.lognormal('adam_learning_rate', np.log(0.01), np.log(10)),
            'adam_weight_decay': 1*10**hp.quniform('adam_weight_decay', -8, -3, 1)
        },
        #{
        #    'type': OptimizerType.SGD,
        #    'sgd_momentum': hp.uniform('sgd_momentum', 0.4, 1),
        #    'sgd_weight_decay': hp.loguniform('sgd_weight_decay', np.log(1e-4), np.log(1)),
        #    'sgd_nesterov': hp_bool('sgd_nesterov'),
        #    'learning_rate': hp.lognormal('sgd_learning_rate', np.log(0.01), np.log(10))
    ]),
    'task': hp.choice('task', [
        ORGANIC_TASK_ENTITIES,
        ORGANIC_TASK_ENTITIES_COMBINE
    ]),
    'use_spell_checker': hp_bool('use_spell_checker'),
    'embedding_type': hp.choice('embedding_type', ['fasttext', 'glove'])
}

In [6]:
def objective(parameters):
    run_time = time.time()
    
    utils.reset_loggers()
    experiment_name = utils.create_loggers(experiment_name=main_experiment_name)
    logger = logging.getLogger(__name__)
    dataset_logger = logging.getLogger('data_loader')

    # generate hp's from parameters
    try:
        rc = from_hyperopt(parameters, use_cuda, model_size=300, early_stopping=5, num_epochs=35, log_every_xth_iteration=-1, language='en')
    except Exception as err:
        print('Could not convert params: ' + str(err))
        logger.exception("Could not load parameters from hyperopt configuration: " + parameters)
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }
    logger.info('New Params:')
    logger.info(rc)
    print('\n\n#########################################################################')
    print(rc)

    logger.debug('Load dataset')
    try:
        dataset = load_dataset(rc, dataset_logger, rc.task)
    except Exception as err:
        print('Could not load dataset: ' + str(err))
        logger.exception("Could not load dataset")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }
    logger.debug('dataset loaded')
    logger.debug('Load model')

    try:
        trainer = load_model(dataset, rc, experiment_name)
    except Exception as err:
        print('Could not load model: ' + str(err))
        logger.exception("Could not load model")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }

    logger.debug('model loaded')

    logger.debug('Begin training')
    model = None
    try:
        result = trainer.train(use_cuda=rc.use_cuda, perform_evaluation=False)
        model = result['model']
    except Exception as err:
        print('Exception while training: ' + str(err))
        logger.exception("Could not complete iteration")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }

    if math.isnan(trainer.get_best_loss()):
        print('Loss is nan')
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }

    # perform evaluation and log results
    result = None
    try:
        result = trainer.perform_final_evaluation(use_test_set=True, verbose=False)
    except Exception as err:
        logger.exception("Could not complete iteration evaluation.")
        print('Could not complete iteration evaluation: ' + str(err))
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }
    print(f'VAL f1\t{trainer.get_best_f1()} - ({result[1][1]})')
    print(f'VAL loss\t{trainer.get_best_loss()}')
    
    print(f"       .---.\n \
         /     \\\n\
          \\.@-@./\n\
          /`\\_/`\\\n\
         //  _  \\\\\tLoss: {trainer.get_best_loss()}\n\
        | \\     )|_\tf1: {trainer.get_best_f1()}\n\
       /`\\_`>  <_/ \\\n\
       \\__/'---'\\__/\n")
    
    return {
            'loss': result[1][0],
            'status': STATUS_OK,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1(),
            'sample_iterations': trainer.get_num_samples_seen(),
            'iterations': trainer.get_num_iterations(),
            'rc': rc,
            'results': {
                'train': {
                    'loss': result[0][0],
                    'f1': result[0][1]
                },
                'validation': {
                    'loss': result[1][0],
                    'f1': result[1][1]
                },
                'test': {
                    'loss': result[2][0],
                    'f1': result[2][1]
                }
            }
        }

In [7]:
def test_objective(params):
    rc = from_hyperopt(params, use_cuda, 300, 4, 35, -1, 'de')
    #print(rc)

    return {
        'loss': params['x'] ** 2,
        'status': STATUS_OK
    }

# Perform optimization

In [8]:
trials = Trials()
# domain = base.Domain(test_objective, search_space) 

best = fmin(objective,
    space=search_space,
    algo=tpe.suggest,
    max_evals=num_optim_iterations,
    trials=trials)

print(best)

Log path is                                                                                                            
C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\Organic_HyperOpt\20190331\1       
                                                                                                                       

#########################################################################
+----------------------------------------------------------------------------------+                                   
|                                 Hyperparameters                                  |
+------------------------------+---------------------------------------------------+
|          Parameter           |                       Value                       |
+------------------------------+---------------------------------------------------+
|            kwargs            | {'batch_size': 26.0, 'clip_comments_to':[...]lse} |
|          model_siz

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.44		0.30		0.068		0.865		0.53m - 0.5m / 0.0m                                                                     
2	9k	0.35		0.28		0.167		0.835		0.51m - 1.1m / 18.4m                                                                    
3	13k	0.33		0.27		0.213		0.832		0.51m - 1.6m / 17.9m                                                                   
4	17k	0.30		0.27		0.253		0.814		0.51m - 2.2m / 18.0m                                                                   
5	22k	0.29		0.27		0.257		0.810		0.51m - 2.7m / 17.9m                                                                   
6	26k	0.27		0.28		0.236		0.773		0.51m - 3.3m / 18.1m                                                                   
7	30k	0.26		0.27		0.239		0.772		0.51m - 3.8m / 18.0m                                                                   
8	35k	0.25		0.28		0.259		0.784		0.51m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.24		0.17		0.026		0.878		0.15m - 0.1m / 0.0m                                                                     
2	9k	0.20		0.16		0.022		0.883		0.15m - 0.3m / 5.2m                                                                     
3	13k	0.19		0.16		0.000		0.884		0.14m - 0.5m / 5.2m                                                                    
4	17k	0.19		0.16		0.114		0.831		0.15m - 0.7m / 5.2m                                                                    
5	22k	0.18		0.15		0.115		0.866		0.15m - 0.9m / 5.2m                                                                    
6	26k	0.17		0.15		0.186		0.871		0.15m - 1.1m / 5.5m                                                                    
7	30k	0.16		0.14		0.233		0.787		0.15m - 1.3m / 5.3m                                                                    
8	35k	0.15		0.14		0.244		0.832		0.14m - 

+------------------------------+---------------------------------------------------+
  2%|▉                                           | 2/100 [1:47:25<88:42:59, 3258.97s/it, best loss: 0.2340029372037903]

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.60		0.54		0.174		0.841		0.69m - 0.7m / 0.0m                                                                     
2	9k	0.52		0.50		0.167		0.867		0.67m - 1.4m / 24.2m                                                                    
3	13k	0.51		0.53		0.158		0.752		0.68m - 2.1m / 23.7m                                                                   
4	17k	0.49		0.59		0.231		0.751		0.68m - 2.8m / 23.9m                                                                   
5	22k	0.46		0.60		0.219		0.742		0.71m - 3.6m / 23.8m                                                                   
6	26k	0.46		0.55		0.217		0.732		0.67m - 4.3m / 24.9m                                                                   
7	30k	0.46		0.58		0.224		0.750		0.68m - 5.0m / 23.8m                                                                   
8	35k	0.46		0.53		0.228		0.782		0.67m - 

A Jupyter Widget

Exception while training: size mismatch, m1: [12070 x 3], m2: [355 x 4] at c:\a\w\1\s\tmp_conda_3.6_105809\conda\conda-bld\pytorch_1544094150554\work\aten\src\thc\generic/THCTensorMathBlas.cu:266
Log path is                                                                                                            
C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\Organic_HyperOpt\20190401\3       
                                                                                                                       

#########################################################################
+----------------------------------------------------------------------------------+                                   
|                                 Hyperparameters                                  |
+------------------------------+---------------------------------------------------+
|          Parameter           |                       Value                   

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.54		0.45		0.215		0.760		0.57m - 0.6m / 0.0m                                                                     
2	9k	0.48		0.43		0.174		0.771		0.57m - 1.2m / 20.1m                                                                    
3	13k	0.46		0.42		0.213		0.780		0.58m - 1.8m / 20.2m                                                                   
4	17k	0.45		0.41		0.215		0.791		0.57m - 2.4m / 20.3m                                                                   
5	22k	0.44		0.41		0.270		0.824		0.58m - 3.0m / 20.2m                                                                   
6	26k	0.44		0.43		0.224		0.779		0.58m - 3.6m / 20.4m                                                                   
7	30k	0.44		0.41		0.242		0.805		0.58m - 4.3m / 20.4m                                                                   
8	35k	0.43		0.43		0.240		0.820		0.59m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.51		0.40		0.058		0.876		0.37m - 0.4m / 0.0m                                                                     
2	9k	0.42		0.35		0.173		0.845		0.37m - 0.8m / 13.1m                                                                    
3	13k	0.37		0.37		0.213		0.819		0.37m - 1.2m / 13.2m                                                                   
4	17k	0.34		0.41		0.239		0.800		0.37m - 1.6m / 13.1m                                                                   
5	22k	0.30		0.41		0.261		0.814		0.37m - 2.0m / 13.2m                                                                   
6	26k	0.25		0.45		0.230		0.787		0.38m - 2.4m / 13.2m                                                                   
7	30k	0.22		0.47		0.242		0.775		0.37m - 2.8m / 13.4m                                                                   
8	35k	0.19		0.54		0.247		0.774		0.40m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.25		0.21		0.130		0.869		0.28m - 0.3m / 0.0m                                                                     
2	9k	0.22		0.20		0.217		0.780		0.28m - 0.6m / 9.9m                                                                     
3	13k	0.20		0.18		0.223		0.740		0.28m - 0.9m / 9.9m                                                                    
4	17k	0.19		0.18		0.249		0.749		0.28m - 1.2m / 10.0m                                                                   
5	22k	0.19		0.18		0.242		0.746		0.28m - 1.6m / 10.0m                                                                   
6	26k	0.18		0.17		0.256		0.755		0.28m - 1.9m / 9.9m                                                                    
7	30k	0.17		0.18		0.240		0.758		0.28m - 2.2m / 10.0m                                                                   
8	35k	0.16		0.21		0.203		0.704		0.28m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.60		0.43		0.160		0.844		0.51m - 0.5m / 0.0m                                                                     
2	9k	0.50		0.39		0.212		0.862		0.52m - 1.1m / 18.0m                                                                    
3	13k	0.46		0.41		0.227		0.813		0.51m - 1.6m / 18.4m                                                                   
4	17k	0.44		0.44		0.232		0.769		0.52m - 2.2m / 18.0m                                                                   
5	22k	0.43		0.42		0.265		0.797		0.51m - 2.7m / 18.3m                                                                   
6	26k	0.42		0.39		0.279		0.817		0.52m - 3.3m / 18.1m                                                                   
7	30k	0.41		0.39		0.254		0.795		0.51m - 3.8m / 18.3m                                                                   
8	35k	0.41		0.39		0.278		0.831		0.51m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.24		0.22		0.051		0.665		0.15m - 0.2m / 0.0m                                                                     
2	9k	0.21		0.18		0.187		0.827		0.15m - 0.3m / 5.3m                                                                     
3	13k	0.19		0.19		0.231		0.739		0.15m - 0.5m / 5.3m                                                                    
4	17k	0.16		0.22		0.212		0.708		0.15m - 0.7m / 5.4m                                                                    
5	22k	0.13		0.23		0.235		0.745		0.15m - 0.9m / 5.4m                                                                    
6	26k	0.10		0.26		0.233		0.749		0.15m - 1.1m / 5.5m                                                                    
7	30k	0.08		0.28		0.245		0.767		0.15m - 1.3m / 5.5m                                                                    
8	35k	0.07		0.30		0.269		0.792		0.15m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.58		0.40		0.005		0.876		0.48m - 0.5m / 0.0m                                                                     
2	9k	0.46		0.37		0.066		0.865		0.49m - 1.0m / 17.0m                                                                    
3	13k	0.43		0.35		0.195		0.853		0.49m - 1.5m / 17.2m                                                                   
4	17k	0.40		0.33		0.236		0.859		0.49m - 2.1m / 17.3m                                                                   
5	22k	0.37		0.32		0.260		0.842		0.49m - 2.6m / 17.3m                                                                   
6	26k	0.35		0.32		0.276		0.842		0.48m - 3.1m / 17.2m                                                                   
7	30k	0.33		0.34		0.256		0.798		0.49m - 3.7m / 17.1m                                                                   
8	35k	0.31		0.33		0.277		0.820		0.50m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	2.03		0.95		0.012		0.778		1.00m - 1.0m / 0.0m                                                                     
2	9k	1.31		0.64		0.000		0.885		1.01m - 2.0m / 35.1m                                                                    
3	13k	3.16		0.71		0.000		0.885		1.00m - 3.1m / 35.3m                                                                   
4	17k	1.18		0.73		0.000		0.885		1.05m - 4.2m / 35.1m                                                                   
5	22k	2.00		0.70		0.000		0.885		1.03m - 5.2m / 36.7m                                                                   
6	26k	1.10		0.63		0.000		0.885		0.99m - 6.3m / 36.2m                                                                   
VAL f1	0.011747430249632892 - (0.011747430249632892)                                                                   
VAL loss	0.6269693039029924             

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.82		0.75		0.119		0.739		0.85m - 0.8m / 0.0m                                                                     
2	9k	0.77		0.72		0.184		0.734		0.85m - 1.7m / 29.7m                                                                    
3	13k	0.80		0.78		0.128		0.730		0.85m - 2.6m / 29.9m                                                                   
4	17k	0.79		0.73		0.126		0.811		0.85m - 3.5m / 30.0m                                                                   
5	22k	0.76		0.74		0.000		0.885		0.85m - 4.4m / 30.0m                                                                   
6	26k	0.76		0.67		0.000		0.885		0.85m - 5.3m / 30.0m                                                                   
7	30k	0.76		0.68		0.000		0.885		0.85m - 6.2m / 30.0m                                                                   
VAL f1	0.1837270341207349 - (0.183727034

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.55		0.43		0.164		0.832		0.54m - 0.5m / 0.0m                                                                     
2	9k	0.47		0.41		0.226		0.779		0.55m - 1.1m / 18.9m                                                                    
3	13k	0.43		0.41		0.255		0.779		0.53m - 1.7m / 19.2m                                                                   
4	17k	0.42		0.43		0.261		0.793		0.54m - 2.3m / 18.8m                                                                   
5	22k	0.41		0.40		0.274		0.811		0.53m - 2.8m / 19.2m                                                                   
6	26k	0.40		0.41		0.276		0.794		0.54m - 3.4m / 18.8m                                                                   
7	30k	0.39		0.39		0.284		0.826		0.53m - 4.0m / 19.3m                                                                   
8	35k	0.39		0.40		0.299		0.851		0.54m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.38		0.31		0.000		0.885		0.48m - 0.5m / 0.0m                                                                     
2	9k	0.34		0.28		0.171		0.853		0.48m - 1.0m / 17.0m                                                                    
3	13k	0.30		0.28		0.171		0.815		0.48m - 1.5m / 16.9m                                                                   
4	17k	0.27		0.27		0.276		0.829		0.48m - 2.0m / 17.1m                                                                   
5	22k	0.24		0.28		0.250		0.840		0.48m - 2.6m / 17.0m                                                                   
6	26k	0.22		0.28		0.221		0.825		0.48m - 3.1m / 17.0m                                                                   
7	30k	0.19		0.28		0.291		0.837		0.48m - 3.6m / 17.0m                                                                   
8	35k	0.16		0.29		0.257		0.829		0.48m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.29		0.20		0.177		0.803		0.16m - 0.2m / 0.0m                                                                     
2	9k	0.21		0.18		0.062		0.872		0.16m - 0.4m / 5.8m                                                                     
3	13k	0.20		0.17		0.088		0.876		0.16m - 0.6m / 5.8m                                                                    
4	17k	0.20		0.16		0.173		0.851		0.16m - 0.8m / 5.8m                                                                    
5	22k	0.19		0.16		0.221		0.835		0.16m - 1.0m / 5.9m                                                                    
6	26k	0.17		0.16		0.255		0.822		0.16m - 1.2m / 6.0m                                                                    
7	30k	0.17		0.15		0.274		0.834		0.16m - 1.4m / 5.9m                                                                    
8	35k	0.16		0.16		0.257		0.803		0.16m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.35		0.32		0.218		0.827		0.26m - 0.3m / 0.0m                                                                     
2	9k	0.29		0.27		0.233		0.822		0.26m - 0.6m / 9.1m                                                                     
3	13k	0.25		0.26		0.260		0.813		0.26m - 0.9m / 9.3m                                                                    
4	17k	0.21		0.27		0.260		0.797		0.27m - 1.2m / 9.1m                                                                    
5	22k	0.18		0.25		0.237		0.794		0.27m - 1.5m / 9.4m                                                                    
6	26k	0.14		0.25		0.249		0.818		0.27m - 1.8m / 9.6m                                                                    
7	30k	0.12		0.26		0.236		0.799		0.26m - 2.1m / 9.7m                                                                    
8	35k	0.11		0.28		0.241		0.821		0.25m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.42		0.42		0.079		0.556		0.47m - 0.5m / 0.0m                                                                     
2	9k	0.38		0.43		0.063		0.517		0.44m - 0.9m / 16.4m                                                                    
3	13k	0.38		0.42		0.096		0.513		0.45m - 1.4m / 15.4m                                                                   
4	17k	0.37		0.39		0.154		0.550		0.44m - 1.9m / 15.8m                                                                   
5	22k	0.36		0.40		0.142		0.577		0.44m - 2.4m / 15.6m                                                                   
6	26k	0.36		0.43		0.136		0.499		0.44m - 2.9m / 15.6m                                                                   
7	30k	0.36		0.51		0.130		0.448		0.44m - 3.3m / 15.7m                                                                   
8	35k	0.37		0.51		0.140		0.504		0.44m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.26		0.22		0.120		0.845		0.28m - 0.3m / 0.0m                                                                     
2	9k	0.24		0.25		0.150		0.637		0.28m - 0.6m / 9.9m                                                                     
3	13k	0.22		0.20		0.224		0.733		0.27m - 0.9m / 9.9m                                                                    
4	17k	0.21		0.20		0.213		0.737		0.27m - 1.2m / 9.6m                                                                    
5	22k	0.22		0.20		0.225		0.726		0.27m - 1.5m / 9.7m                                                                    
6	26k	0.21		0.20		0.194		0.718		0.27m - 1.8m / 9.8m                                                                    
7	30k	0.21		0.19		0.244		0.794		0.27m - 2.1m / 9.7m                                                                    
8	35k	0.21		0.19		0.241		0.763		0.27m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.24		0.20		0.182		0.857		0.43m - 0.4m / 0.0m                                                                     
2	9k	0.20		0.19		0.199		0.688		0.43m - 0.9m / 15.0m                                                                    
3	13k	0.19		0.19		0.217		0.733		0.43m - 1.4m / 15.0m                                                                   
4	17k	0.18		0.20		0.194		0.672		0.43m - 1.8m / 15.0m                                                                   
5	22k	0.16		0.18		0.214		0.731		0.43m - 2.3m / 15.1m                                                                   
6	26k	0.14		0.18		0.250		0.766		0.43m - 2.8m / 15.2m                                                                   
7	30k	0.12		0.18		0.272		0.783		0.43m - 3.2m / 15.2m                                                                   
8	35k	0.11		0.19		0.312		0.828		0.43m - 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	4k	0.32		0.25		0.214		0.842		0.32m - 0.3m / 0.0m                                                                     
2	9k	0.27		0.24		0.221		0.823		0.31m - 0.7m / 11.1m                                                                    
3	13k	0.25		0.24		0.249		0.810		0.31m - 1.0m / 11.1m                                                                   
4	17k	0.24		0.24		0.252		0.805		0.31m - 1.4m / 11.1m                                                                   
5	22k	0.24		0.26		0.214		0.789		0.31m - 1.7m / 11.1m                                                                   
6	26k	0.23		0.28		0.204		0.811		0.31m - 2.1m / 11.0m                                                                   
7	30k	0.22		0.28		0.262		0.809		0.32m - 2.4m / 11.2m                                                                   
8	35k	0.22		0.26		0.240		0.804		0.31m - 

KeyboardInterrupt: 

In [None]:
import pickle
path = 'C:\\Users\\felix\\OneDrive\\Studium\\Studium\\6. Semester\\MA\\Project\\ABSA-Transformer\\logs\\Organic_HyperOpt\\20190327\\0\\trials.pkl'
with open(path, 'wb') as f:
    pickle.dump(trials, f)

# Plotting

In [None]:
import matplotlib.pyplot as plt

In [None]:
trials.trials

In [None]:


plt.figure(num=None, figsize=(20, 10), dpi=80)
main_plot_history(trials)

In [None]:
losses = [t['result']['loss'] for t in trials.trials if t['result']['status'] == STATUS_OK]
range(len(losses))

In [None]:
fig = plt.figure(num=None, figsize=(20, 10), dpi=80)
fig.suptitle('Loss over time')
plt.scatter(range(len(losses)), losses)
plt.show()

In [None]:
main_plot_histogram(trials)

In [None]:
plt.figure(num=None, figsize=(25, 15), dpi=80)
main_plot_vars(trials)