In [1]:
import numpy as np
import math
import os
import time
import logging
from hyperopt.plotting import *
from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials, base, rand
from data.data_loader import Dataset
from misc.preferences import PREFERENCES
from misc.run_configuration import from_hyperopt, OutputLayerType, LearningSchedulerType, OptimizerType, default_params
from misc import utils
from misc.hyperopt_space import *

from optimizer import get_optimizer
from criterion import NllLoss, LossCombiner
from models.transformer.encoder import TransformerEncoder
from models.jointAspectTagger import JointAspectTagger
from trainer.train import Trainer
import pprint
import argparse
import pickle

In [2]:
POSSIBLE_DATASET_VALUES = ['germeval', 'organic', 'amazon']


def load_model(dataset, rc, experiment_name):
    loss = LossCombiner(4, dataset.class_weights, NllLoss)
    transformer = TransformerEncoder(dataset.source_embedding,
                                     hyperparameters=rc)
    model = JointAspectTagger(transformer, rc, 4, 20, dataset.target_names)
    optimizer = get_optimizer(model, rc)
    trainer = Trainer(
                        model,
                        loss,
                        optimizer,
                        rc,
                        dataset,
                        experiment_name,
                        enable_tensorboard=False,
                        verbose=False)
    return trainer

def load_dataset(rc, logger, task):
    dataset = Dataset(
            task,
            logger,
            rc,
            source_index=PREFERENCES.source_index,
            target_vocab_index=PREFERENCES.target_vocab_index,
            data_path=PREFERENCES.data_root,
            train_file=PREFERENCES.data_train,
            valid_file=PREFERENCES.data_validation,
            test_file=PREFERENCES.data_test,
            file_format=PREFERENCES.file_format,
            init_token=None,
            eos_token=None
        )
    dataset.load_data(dsl, verbose=False)
    return dataset

def objective(parameters):
    run_time = time.time()

    utils.reset_loggers()
    experiment_name = utils.create_loggers(experiment_name=main_experiment_name)
    logger = logging.getLogger(__name__)
    dataset_logger = logging.getLogger('data_loader')

    # generate hp's from parameters
    try:
        rc = from_hyperopt(parameters, use_cuda, model_size=300, early_stopping=5, num_epochs=35, log_every_xth_iteration=-1, language=PREFERENCES.language)
    except Exception as err:
        print('Could not convert params: ' + str(err))
        logger.exception("Could not load parameters from hyperopt configuration: " + parameters)
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }
    logger.info('New Params:')
    logger.info(rc)
    print('\n\n#########################################################################')
    print(rc)

    logger.debug('Load dataset')
    try:
        dataset = load_dataset(rc, dataset_logger, rc.task)
    except Exception as err:
        print('Could not load dataset: ' + str(err))
        logger.exception("Could not load dataset")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }
    logger.debug('dataset loaded')
    logger.debug('Load model')

    try:
        trainer = load_model(dataset, rc, experiment_name)
    except Exception as err:
        print('Could not load model: ' + str(err))
        logger.exception("Could not load model")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }

    logger.debug('model loaded')

    logger.debug('Begin training')
    model = None
    try:
        result = trainer.train(use_cuda=rc.use_cuda, perform_evaluation=False)
        model = result['model']
    except Exception as err:
        print('Exception while training: ' + str(err))
        logger.exception("Could not complete iteration")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }

    if math.isnan(trainer.get_best_loss()):
        print('Loss is nan')
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }

    # perform evaluation and log results
    result = None
    try:
        result = trainer.perform_final_evaluation(use_test_set=True, verbose=False)
    except Exception as err:
        logger.exception("Could not complete iteration evaluation.")
        print('Could not complete iteration evaluation: ' + str(err))
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }
    print(f'VAL f1\t{trainer.get_best_f1()} - ({result[1][1]})')
    print(f'VAL loss\t{trainer.get_best_loss()}')

    print(f"       .---.\n \
         /     \\\n\
          \\.@-@./\n\
          /`\\_/`\\\n\
         //  _  \\\\\tLoss: {trainer.get_best_loss()}\n\
        | \\     )|_\tf1: {trainer.get_best_f1()}\n\
       /`\\_`>  <_/ \\\n\
       \\__/'---'\\__/\n")

    return {
            'loss': result[1][0],
            'status': STATUS_OK,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1(),
            'sample_iterations': trainer.get_num_samples_seen(),
            'iterations': trainer.get_num_iterations(),
            'rc': rc,
            'results': {
                'train': {
                    'loss': result[0][0],
                    'f1': result[0][1]
                },
                'validation': {
                    'loss': result[1][0],
                    'f1': result[1][1]
                },
                'test': {
                    'loss': result[2][0],
                    'f1': result[2][1]
                }
            }
        }

In [3]:
dataset_choice = 'germeval'
runs = 100
main_experiment_name = 'GermEvalHyperoptRandom'
use_cuda = True
description = 'GermEval random Hyperopt run'

if dataset_choice not in POSSIBLE_DATASET_VALUES:
    raise Error()

In [4]:
if dataset_choice == POSSIBLE_DATASET_VALUES[0]:
    PREFERENCES.defaults(
        data_root='./data/data/germeval2017',
        data_train='train_v1.4.tsv',    
        data_validation='dev_v1.4.tsv',
        data_test='test_TIMESTAMP1.tsv',
        source_index=0,
        target_vocab_index=2,
        file_format='csv',
        language='de'
    )
    from data.germeval2017 import germeval2017_dataset as dsl

    search_space = {
        'batch_size': hp.quniform('batch_size', 10, 100, 1),
        'num_encoder_blocks': hp.quniform('num_encoder_blocks', 1, 8, 1),
        'pointwise_layer_size': hp.quniform('pointwise_layer_size', 32, 256, 1),
        'clip_comments_to': hp.quniform('clip_comments_to', 10, 250, 1),
        'dropout_rate': hp.uniform('dropout_rate', 0.0, 0.8),
        'output_dropout_rate': hp.uniform('last_layer_dropout', 0.0, 0.8),
        'num_heads': hp.choice('num_heads', [1, 2, 3, 4, 5]),
        'transformer_use_bias': hp_bool('transformer_use_bias'),
        'output_layer': hp.choice('output_layer', [
            {
                'type': OutputLayerType.Convolutions,
                'output_conv_num_filters': hp.quniform('output_conv_num_filters', 1, 400, 1),
                'output_conv_kernel_size': hp.quniform('output_conv_kernel_size', 1, 10, 1),
                'output_conv_stride': hp.quniform('output_conv_stride', 1, 10, 1),
                'output_conv_padding': hp.quniform('output_conv_padding', 0, 5, 1),
            },
            {
                'type': OutputLayerType.LinearSum
            }
        ]),
        'learning_rate_scheduler': hp.choice('learning_rate_scheduler', [
            {
                'type': LearningSchedulerType.Noam,
                'noam_learning_rate_warmup': hp.quniform('noam_learning_rate_warmup', 1000, 9000, 1),
                'noam_learning_rate_factor': hp.uniform('noam_learning_rate_factor', 0.01, 4)
            }
        ]),
        'optimizer': hp.choice('optimizer', [
            {
                'type': OptimizerType.Adam,
                'adam_beta1': hp.uniform('adam_beta1', 0.7, 0.999),
                'adam_beta2': hp.uniform('adam_beta2', 0.7, 0.999),
                'adam_eps': hp.loguniform('adam_eps', np.log(1e-10), np.log(1)),
                'learning_rate': hp.lognormal('adam_learning_rate', np.log(0.01), np.log(10)),
                'adam_weight_decay': 1*10**hp.quniform('adam_weight_decay', -8, -3, 1)
            },
            #{
            #    'type': OptimizerType.SGD,
            #    'sgd_momentum': hp.uniform('sgd_momentum', 0.4, 1),
            #    'sgd_weight_decay': hp.loguniform('sgd_weight_decay', np.log(1e-4), np.log(1)),
            #    'sgd_nesterov': hp_bool('sgd_nesterov'),
            #    'learning_rate': hp.lognormal('sgd_learning_rate', np.log(0.01), np.log(10))
        ]),
        'replace_url_tokens': hp_bool('replace_url_tokens'),
        'harmonize_bahn': hp_bool('harmonize_bahn'),
        'embedding_type': hp.choice('embedding_type', ['fasttext', 'glove']),
        'embedding_name': hp.choice('embedding_name', ['6B']),
        'embedding_dim': hp.choice('embedding_dim', [300]),
        'use_stop_words': hp_bool('use_stop_words'),
        'use_spell_checker': hp_bool('use_spell_checker'),
        'embedding_type': hp.choice('embedding_type', ['fasttext', 'glove']),
        'task': 'germeval'
    }

elif dataset_choice == POSSIBLE_DATASET_VALUES[1]:
     from data.organic2019 import organic_dataset as dsl
     from data.organic2019 import ORGANIC_TASK_ALL, ORGANIC_TASK_ENTITIES, ORGANIC_TASK_ATTRIBUTES, ORGANIC_TASK_ENTITIES_COMBINE, ORGANIC_TASK_COARSE
     PREFERENCES.defaults(
        data_root='./data/data/organic2019',
        data_train='train.csv',    
        data_validation='validation.csv',
        data_test='test.csv',
        source_index=0,
        target_vocab_index=1,
        file_format='csv',
        language='en'
     )

     search_space = {
        'batch_size': hp.quniform('batch_size', 10, 64, 1),
        'num_encoder_blocks': hp.quniform('num_encoder_blocks', 1, 4, 1),
        'pointwise_layer_size': hp.quniform('pointwise_layer_size', 32, 350, 1),
        'clip_comments_to': hp.quniform('clip_comments_to', 45, 180, 1),
        'dropout_rate': hp.uniform('dropout_rate', 0.0, 0.8),
        'output_dropout_rate': hp.uniform('last_layer_dropout', 0.0, 0.8),
        'num_heads': hp.choice('num_heads', [1, 2, 3, 4, 5]),
        'transformer_use_bias': hp_bool('transformer_use_bias'),
        'output_layer': hp.choice('output_layer', [
            {
                'type': OutputLayerType.Convolutions,
                'output_conv_num_filters': hp.quniform('output_conv_num_filters', 10, 400, 1),
                'output_conv_kernel_size': hp.quniform('output_conv_kernel_size', 1, 10, 1),
                'output_conv_stride': hp.quniform('output_conv_stride', 1, 10, 1),
                'output_conv_padding': hp.quniform('output_conv_padding', 0, 5, 1),
            },
            {
                'type': OutputLayerType.LinearSum
            }
        ]),
        'learning_rate_scheduler': hp.choice('learning_rate_scheduler', [
            {
                'type': LearningSchedulerType.Noam,
                'noam_learning_rate_warmup': hp.quniform('noam_learning_rate_warmup', 1000, 9000, 1),
                'noam_learning_rate_factor': hp.uniform('noam_learning_rate_factor', 0.01, 4)
            }
        ]),
        'optimizer': hp.choice('optimizer', [
            {
                'type': OptimizerType.Adam,
                'adam_beta1': hp.uniform('adam_beta1', 0.7, 0.999),
                'adam_beta2': hp.uniform('adam_beta2', 0.7, 0.999),
                'adam_eps': hp.loguniform('adam_eps', np.log(1e-10), np.log(1)),
                'learning_rate': hp.lognormal('adam_learning_rate', np.log(0.01), np.log(10)),
                'adam_weight_decay': 1*10**hp.quniform('adam_weight_decay', -8, -3, 1)
            },
            #{
            #    'type': OptimizerType.SGD,
            #    'sgd_momentum': hp.uniform('sgd_momentum', 0.4, 1),
            #    'sgd_weight_decay': hp.loguniform('sgd_weight_decay', np.log(1e-4), np.log(1)),
            #    'sgd_nesterov': hp_bool('sgd_nesterov'),
            #    'learning_rate': hp.lognormal('sgd_learning_rate', np.log(0.01), np.log(10))
        ]),
        'task': hp.choice('task', [
            ORGANIC_TASK_ENTITIES,
            ORGANIC_TASK_ENTITIES_COMBINE
        ]),
        'use_stop_words': hp_bool('use_stop_words'),
        'use_spell_checker': hp_bool('use_spell_checker'),
        'embedding_type': hp.choice('embedding_type', ['fasttext', 'glove'])
    }
else:
    PREFERENCES.defaults(
        data_root='./data/data/amazon/splits',
        data_train='train.pkl',    
        data_validation='val.pkl',
        data_test='test.pkl',
        source_index=0,
        target_vocab_index=1,
        file_format='pkl',
        language='en'
    )
    from data.amazon import amazon_dataset as dsl

In [5]:
experiment_name = utils.create_loggers(experiment_name=main_experiment_name)
logger = logging.getLogger(__name__)
dataset_logger = logging.getLogger('data_loader')
logger.info('Run hyper parameter random grid search for experiment with name ' + main_experiment_name)
logger.info('num_optim_iterations: ' + str(runs))

Log path is  C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\GermEvalHyperoptRandom\20190424\0


In [None]:
try:
    logger.info('Current commit: ' + utils.get_current_git_commit())
    print('Current commit: ' + utils.get_current_git_commit())
except Exception as err:
    logger.exception('Could not print current commit')

trials = Trials()
try:

    best = fmin(objective,
        space=search_space,
        algo=rand.suggest,
        max_evals=runs,
        trials=trials)

    print(best)
except Exception as err:
    logger.exception('Could not complete optimization')
    print('Could not complete optimization. The log file provides more details.')


path = os.path.join(os.getcwd(), 'logs', f'hp_run_{main_experiment_name}.pkl')
with open(path, 'wb') as f:
    pickle.dump(trials, f)

Current commit: b'57a7aff'
Log path is                                                                                                            
C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\GermEvalHyperoptRandom\20190424\1 
                                                                                                                       

#########################################################################
+----------------------------------------------------------------------------------+                                   
|                                 Hyperparameters                                  |
+------------------------------+---------------------------------------------------+
|          Parameter           |                       Value                       |
+------------------------------+---------------------------------------------------+
|            kwargs            | {'batch_size': 66.0, 'learning_rate_sche[...]

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.30		0.22		0.234		0.896		0.84m - 0.8m / 0.0m                                                                    
2	34k	0.23		0.20		0.272		0.912		0.83m - 1.7m / 29.5m                                                                   
3	51k	0.21		0.20		0.211		0.863		0.78m - 2.5m / 29.1m                                                                   
4	68k	0.20		0.19		0.199		0.848		0.84m - 3.4m / 27.4m                                                                   
5	85k	0.18		0.19		0.256		0.894		0.82m - 4.2m / 29.3m                                                                   
6	103k	0.17		0.20		0.217		0.870		0.83m - 5.0m / 28.8m                                                                  
7	120k	0.15		0.22		0.331		0.929		0.80m - 5.8m / 29.1m                                                                  
8	137k	0.14		0.22		0.220		0.869		0.82m -

A Jupyter Widget

Exception while training: size mismatch, m1: [12956 x 3], m2: [164 x 4] at c:\a\w\1\s\tmp_conda_3.6_105809\conda\conda-bld\pytorch_1544094150554\work\aten\src\thc\generic/THCTensorMathBlas.cu:266
Log path is                                                                                                            
C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\GermEvalHyperoptRandom\20190424\3 
                                                                                                                       

#########################################################################
+----------------------------------------------------------------------------------+                                   
|                                 Hyperparameters                                  |
+------------------------------+---------------------------------------------------+
|          Parameter           |                       Value                   

A Jupyter Widget

Exception while training: size mismatch, m1: [23800 x 7], m2: [280 x 4] at c:\a\w\1\s\tmp_conda_3.6_105809\conda\conda-bld\pytorch_1544094150554\work\aten\src\thc\generic/THCTensorMathBlas.cu:266
Log path is                                                                                                            
C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\GermEvalHyperoptRandom\20190424\4 
                                                                                                                       

#########################################################################
+----------------------------------------------------------------------------------+                                   
|                                 Hyperparameters                                  |
+------------------------------+---------------------------------------------------+
|          Parameter           |                       Value                   

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.27		0.25		0.135		0.832		1.40m - 1.4m / 0.0m                                                                    
2	34k	0.25		0.24		0.135		0.800		1.52m - 3.0m / 49.2m                                                                   
3	51k	0.26		0.24		0.169		0.803		1.51m - 4.6m / 53.3m                                                                   
4	68k	0.25		0.23		0.196		0.843		1.39m - 6.0m / 52.8m                                                                   
5	85k	0.25		0.25		0.138		0.752		1.50m - 7.5m / 49.1m                                                                   
6	102k	0.23		0.24		0.193		0.864		1.50m - 9.0m / 52.7m                                                                  
7	119k	0.24		0.27		0.109		0.735		1.53m - 10.6m / 52.5m                                                                 
8	136k	0.24		0.22		0.183		0.854		1.52m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.20		0.15		0.208		0.864		0.69m - 0.7m / 0.0m                                                                    
2	34k	0.16		0.15		0.307		0.925		0.68m - 1.4m / 24.3m                                                                   
3	51k	0.15		0.15		0.233		0.879		0.68m - 2.1m / 24.0m                                                                   
4	69k	0.15		0.16		0.191		0.844		0.69m - 2.8m / 23.8m                                                                   
5	86k	0.15		0.15		0.230		0.880		0.67m - 3.5m / 24.1m                                                                   
6	103k	0.15		0.14		0.225		0.874		0.67m - 4.2m / 23.7m                                                                  
7	120k	0.15		0.15		0.222		0.866		0.64m - 4.8m / 23.5m                                                                  
VAL f1	0.3072042570609906 - (0.307204257

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.45		0.40		0.230		0.893		0.92m - 0.9m / 0.0m                                                                    
2	34k	0.35		0.37		0.205		0.858		0.93m - 1.9m / 32.1m                                                                   
3	51k	0.34		0.59		0.098		0.705		0.93m - 2.8m / 32.7m                                                                   
4	68k	0.32		0.57		0.106		0.684		0.93m - 3.8m / 32.6m                                                                   
5	85k	0.31		0.64		0.100		0.672		0.93m - 4.7m / 32.5m                                                                   
6	102k	0.29		0.59		0.108		0.697		0.94m - 5.6m / 32.6m                                                                  
VAL f1	0.23000824402308326 - (0.23000824402308326)                                                                     
VAL loss	0.3717025903093682             

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.28		0.32		0.089		0.592		2.61m - 2.6m / 0.0m                                                                    
2	34k	0.26		0.32		0.076		0.561		2.59m - 5.2m / 91.2m                                                                   
3	51k	0.24		0.29		0.094		0.655		2.58m - 7.8m / 90.7m                                                                   
4	68k	0.23		0.26		0.158		0.811		2.59m - 10.4m / 90.5m                                                                  
5	85k	0.23		0.29		0.101		0.683		2.59m - 13.1m / 90.9m                                                                  
6	103k	0.22		0.33		0.083		0.604		2.58m - 15.7m / 90.7m                                                                 
7	120k	0.23		0.31		0.111		0.714		2.60m - 18.3m / 90.6m                                                                 
8	137k	0.24		0.31		0.098		0.674		2.58m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.18		0.13		0.253		0.890		1.78m - 1.8m / 0.0m                                                                    
2	34k	0.13		0.11		0.289		0.906		1.79m - 3.6m / 62.2m                                                                   
3	51k	0.12		0.11		0.277		0.901		1.79m - 5.4m / 62.7m                                                                   
4	68k	0.11		0.11		0.329		0.925		1.77m - 7.2m / 62.6m                                                                   
5	86k	0.11		0.12		0.228		0.872		1.79m - 9.0m / 62.2m                                                                   
6	103k	0.10		0.13		0.253		0.885		1.79m - 10.9m / 62.8m                                                                 
7	120k	0.09		0.12		0.276		0.906		1.79m - 12.6m / 62.7m                                                                 
8	137k	0.09		0.11		0.317		0.921		1.80m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.26		0.20		0.192		0.868		2.86m - 2.9m / 0.0m                                                                    
2	34k	0.20		0.20		0.382		0.959		2.80m - 5.7m / 100.1m                                                                  
3	51k	0.20		0.19		0.199		0.869		2.78m - 8.5m / 98.1m                                                                   
4	68k	0.19		0.18		0.382		0.959		2.79m - 11.3m / 97.5m                                                                  
5	85k	0.19		0.19		0.140		0.778		2.81m - 14.1m / 97.7m                                                                  
6	102k	0.19		0.19		0.244		0.912		2.85m - 17.0m / 98.5m                                                                 
7	119k	0.19		0.18		0.265		0.916		2.82m - 19.8m / 99.6m                                                                 
VAL f1	0.38205453392517436 - (0.38205453

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.34		0.27		0.206		0.875		1.71m - 1.7m / 0.0m                                                                    
2	34k	0.27		0.25		0.225		0.872		1.71m - 3.4m / 59.8m                                                                   
3	51k	0.26		0.24		0.280		0.917		1.65m - 5.1m / 59.9m                                                                   
4	68k	0.27		0.25		0.179		0.834		1.68m - 6.8m / 58.1m                                                                   
5	85k	0.27		0.28		0.164		0.807		1.69m - 8.5m / 59.1m                                                                   
6	103k	0.30		0.32		0.318		0.943		1.69m - 10.2m / 59.1m                                                                 
7	120k	0.32		0.31		0.252		0.910		1.65m - 11.9m / 59.4m                                                                 
8	137k	0.32		0.30		0.382		0.959		1.61m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.46		0.37		0.263		0.908		1.34m - 1.3m / 0.0m                                                                    
2	34k	0.48		0.40		0.256		0.919		1.34m - 2.7m / 47.1m                                                                   
3	51k	0.51		0.44		0.279		0.909		1.35m - 4.1m / 46.8m                                                                   
4	68k	0.55		0.53		0.308		0.928		1.32m - 5.4m / 47.4m                                                                   
5	85k	0.64		0.48		0.263		0.903		1.32m - 6.8m / 46.3m                                                                   
6	102k	0.65		0.50		0.167		0.821		1.33m - 8.1m / 46.3m                                                                  
7	119k	0.56		0.48		0.120		0.699		1.31m - 9.4m / 46.6m                                                                  
8	136k	0.41		0.35		0.239		0.884		1.28m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.25		0.27		0.143		0.803		2.42m - 2.4m / 0.0m                                                                    
2	34k	0.22		0.25		0.162		0.827		2.44m - 4.9m / 84.6m                                                                   
3	51k	0.21		0.25		0.161		0.813		2.45m - 7.4m / 85.4m                                                                   
4	68k	0.21		0.26		0.174		0.816		2.46m - 9.8m / 85.9m                                                                   
5	85k	0.21		0.26		0.247		0.900		2.47m - 12.3m / 86.0m                                                                  
6	102k	0.21		0.27		0.299		0.930		2.48m - 14.9m / 86.5m                                                                 
7	119k	0.21		0.27		0.223		0.889		2.46m - 17.4m / 86.7m                                                                 
8	137k	0.23		0.29		0.267		0.917		2.46m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.34		0.51		0.054		0.293		2.44m - 2.4m / 0.0m                                                                    
2	34k	0.31		0.62		0.056		0.283		2.42m - 4.9m / 85.3m                                                                   
3	51k	0.32		0.64		0.023		0.309		2.40m - 7.3m / 84.7m                                                                   
4	68k	0.33		0.41		0.065		0.612		2.40m - 9.7m / 84.3m                                                                   
5	85k	0.33		0.36		0.075		0.677		2.24m - 12.0m / 84.2m                                                                  
6	102k	0.33		0.34		0.098		0.715		2.20m - 14.2m / 79.2m                                                                 
7	119k	0.33		0.39		0.048		0.563		2.27m - 16.7m / 78.2m                                                                 
8	136k	0.36		0.40		0.067		0.561		2.40m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.27		0.20		0.328		0.936		1.02m - 1.0m / 0.0m                                                                    
2	34k	0.23		0.21		0.266		0.904		1.03m - 2.1m / 35.8m                                                                   
3	51k	0.23		0.24		0.320		0.932		1.03m - 3.1m / 36.0m                                                                   
4	68k	0.24		0.23		0.286		0.922		1.10m - 4.2m / 36.0m                                                                   
5	85k	0.24		0.24		0.218		0.865		1.03m - 5.3m / 38.3m                                                                   
6	102k	0.25		0.26		0.243		0.891		1.04m - 6.3m / 36.3m                                                                  
VAL f1	0.32824948782153424 - (0.32824948782153424)                                                                     
VAL loss	0.19624762345547878            

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.51		2.08		0.017		0.069		3.87m - 3.9m / 0.0m                                                                    
2	34k	0.56		2.98		0.015		0.065		4.01m - 8.0m / 135.5m                                                                  
3	51k	0.65		1.27		0.015		0.221		4.28m - 12.3m / 140.2m                                                                 
4	68k	0.71		1.33		0.017		0.410		4.18m - 16.5m / 149.4m                                                                 
5	85k	0.77		3.33		0.018		0.410		3.96m - 20.5m / 146.2m                                                                 
6	102k	0.88		5.25		0.010		0.159		4.04m - 24.5m / 139.2m                                                                
7	119k	0.94		1.06		0.025		0.366		4.03m - 28.6m / 141.8m                                                                
8	137k	1.09		1.39		0.020		0.263		3.96m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.50		0.35		0.307		0.934		1.78m - 1.8m / 0.0m                                                                    
2	34k	0.37		0.32		0.272		0.906		1.77m - 3.6m / 62.5m                                                                   
3	51k	0.32		0.28		0.278		0.906		1.83m - 5.4m / 62.1m                                                                   
4	68k	0.29		0.29		0.192		0.853		1.79m - 7.2m / 64.0m                                                                   
5	85k	0.26		0.24		0.304		0.915		1.81m - 9.1m / 62.9m                                                                   
6	102k	0.25		0.29		0.207		0.863		1.79m - 10.9m / 63.2m                                                                 
VAL f1	0.30671296296296297 - (0.30671296296296297)                                                                     
VAL loss	0.2389094498486275             

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.19		0.24		0.045		0.513		2.37m - 2.4m / 0.0m                                                                    
2	34k	0.18		0.20		0.105		0.680		2.39m - 4.8m / 82.9m                                                                   
3	51k	0.18		0.21		0.108		0.664		2.46m - 7.3m / 83.7m                                                                   
4	68k	0.18		0.19		0.184		0.847		2.48m - 9.8m / 86.2m                                                                   
5	86k	0.18		0.21		0.154		0.794		2.47m - 12.3m / 86.6m                                                                  
6	103k	0.18		0.24		0.081		0.558		2.48m - 14.8m / 86.5m                                                                 
7	120k	0.19		0.21		0.099		0.643		2.49m - 17.3m / 86.7m                                                                 
8	137k	0.20		0.25		0.074		0.570		2.49m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.31		0.21		0.241		0.903		1.60m - 1.6m / 0.0m                                                                    
2	34k	0.22		0.21		0.210		0.871		1.59m - 3.2m / 55.9m                                                                   
3	51k	0.21		0.18		0.240		0.887		1.60m - 4.8m / 55.8m                                                                   
4	68k	0.18		0.21		0.136		0.779		1.61m - 6.5m / 56.0m                                                                   
5	86k	0.17		0.28		0.098		0.683		1.57m - 8.1m / 56.4m                                                                   
6	103k	0.16		0.24		0.121		0.731		1.57m - 9.6m / 55.3m                                                                  
VAL f1	0.2414651493598862 - (0.2414651493598862)                                                                       
VAL loss	0.18423130744982252            

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.19		0.16		0.205		0.851		0.67m - 0.7m / 0.0m                                                                    
2	34k	0.16		0.16		0.149		0.797		0.67m - 1.4m / 23.6m                                                                   
3	51k	0.15		0.15		0.270		0.899		0.65m - 2.0m / 23.4m                                                                   
4	68k	0.15		0.16		0.261		0.896		0.65m - 2.8m / 22.9m                                                                   
5	86k	0.15		0.14		0.296		0.912		0.65m - 3.5m / 23.0m                                                                   
6	103k	0.15		0.14		0.233		0.876		0.65m - 4.1m / 23.0m                                                                  
7	120k	0.15		0.16		0.246		0.887		0.65m - 4.8m / 22.9m                                                                  
8	137k	0.15		0.15		0.324		0.927		0.65m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.30		0.24		0.109		0.720		1.37m - 1.4m / 0.0m                                                                    
2	34k	0.22		0.17		0.189		0.870		1.38m - 2.8m / 48.1m                                                                   
3	51k	0.19		0.17		0.182		0.860		1.38m - 4.2m / 48.2m                                                                   
4	68k	0.18		0.17		0.219		0.889		1.39m - 5.6m / 48.3m                                                                   
5	86k	0.18		0.17		0.188		0.860		1.40m - 7.0m / 48.6m                                                                   
6	103k	0.18		0.17		0.176		0.841		1.40m - 8.4m / 49.1m                                                                  
7	120k	0.17		0.19		0.109		0.701		1.43m - 9.9m / 49.0m                                                                  
8	137k	0.17		0.21		0.092		0.694		1.38m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.20		0.16		0.263		0.905		1.08m - 1.1m / 0.0m                                                                    
2	34k	0.15		0.15		0.270		0.903		1.09m - 2.2m / 37.7m                                                                   
3	51k	0.12		0.15		0.265		0.901		1.06m - 3.3m / 38.3m                                                                   
4	68k	0.09		0.15		0.262		0.899		1.07m - 4.4m / 37.3m                                                                   
5	85k	0.06		0.17		0.277		0.915		1.06m - 5.5m / 37.4m                                                                   
6	102k	0.04		0.18		0.243		0.903		1.09m - 6.6m / 37.4m                                                                  
7	119k	0.03		0.23		0.326		0.933		1.10m - 7.7m / 38.3m                                                                  
8	136k	0.03		0.28		0.382		0.950		1.06m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.44		0.30		0.332		0.940		0.85m - 0.9m / 0.0m                                                                    
2	34k	0.31		0.26		0.318		0.923		0.87m - 1.7m / 29.9m                                                                   
3	51k	0.26		0.25		0.345		0.932		0.89m - 2.6m / 30.3m                                                                   
4	68k	0.23		0.26		0.239		0.877		0.84m - 3.5m / 31.1m                                                                   
5	85k	0.20		0.25		0.297		0.908		0.85m - 4.4m / 29.6m                                                                   
6	102k	0.18		0.26		0.292		0.906		0.84m - 5.2m / 29.9m                                                                  
7	119k	0.15		0.26		0.394		0.942		0.85m - 6.1m / 29.7m                                                                  
8	136k	0.13		0.29		0.358		0.933		0.86m -

+------------------------------+---------------------------------------------------+
 22%|█████████▏                                | 22/100 [6:43:51<27:28:55, 1268.40s/it, best loss: 0.10995660924978375]

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.21		0.16		0.240		0.899		1.40m - 1.4m / 0.0m                                                                    
2	34k	0.16		0.13		0.211		0.868		1.39m - 2.8m / 48.9m                                                                   
3	51k	0.13		0.14		0.180		0.821		1.39m - 4.2m / 48.8m                                                                   
4	68k	0.12		0.14		0.181		0.840		1.39m - 5.6m / 48.8m                                                                   
5	85k	0.12		0.14		0.201		0.842		1.40m - 7.0m / 48.7m                                                                   
6	103k	0.11		0.13		0.230		0.862		1.42m - 8.5m / 49.1m                                                                  
VAL f1	0.24014336917562723 - (0.24014336917562723)                                                                     
VAL loss	0.13256469067562834            

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.30		0.25		0.104		0.702		1.15m - 1.1m / 0.0m                                                                    
2	34k	0.25		0.24		0.114		0.723		1.15m - 2.3m / 40.2m                                                                   
3	51k	0.24		0.23		0.129		0.777		1.12m - 3.5m / 40.3m                                                                   
4	68k	0.23		0.22		0.132		0.781		1.14m - 4.6m / 39.5m                                                                   
5	85k	0.22		0.22		0.193		0.859		1.12m - 5.8m / 39.9m                                                                   
6	103k	0.21		0.20		0.217		0.873		1.14m - 6.9m / 39.5m                                                                  
7	120k	0.20		0.20		0.226		0.884		1.11m - 8.1m / 40.0m                                                                  
8	137k	0.20		0.19		0.239		0.896		1.13m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.55		0.37		0.247		0.888		4.51m - 4.5m / 0.0m                                                                    
2	34k	0.38		0.36		0.232		0.879		4.55m - 9.1m / 158.0m                                                                  
3	51k	0.36		0.37		0.217		0.869		4.58m - 13.7m / 159.4m                                                                 
4	68k	0.35		0.37		0.182		0.865		4.55m - 18.3m / 160.2m                                                                 
5	85k	0.35		0.34		0.294		0.910		4.54m - 22.8m / 159.4m                                                                 
6	102k	0.35		0.35		0.341		0.930		4.58m - 27.4m / 159.1m                                                                
7	119k	0.35		0.34		0.334		0.932		4.55m - 32.0m / 160.3m                                                                
8	136k	0.36		0.33		0.311		0.925		4.53m -

+------------------------------+---------------------------------------------------+
 25%|██████████▌                               | 25/100 [9:23:22<64:49:21, 3111.49s/it, best loss: 0.10995660924978375]

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.37		0.25		0.133		0.787		1.04m - 1.0m / 0.0m                                                                    
2	34k	0.24		0.22		0.318		0.941		1.06m - 2.1m / 36.4m                                                                   
3	51k	0.22		0.21		0.296		0.934		1.03m - 3.2m / 37.2m                                                                   
4	68k	0.21		0.21		0.286		0.926		1.03m - 4.2m / 36.3m                                                                   
5	86k	0.21		0.20		0.234		0.883		1.03m - 5.3m / 36.2m                                                                   
6	103k	0.20		0.20		0.188		0.835		1.06m - 6.4m / 36.3m                                                                  
7	120k	0.19		0.19		0.204		0.848		1.03m - 7.4m / 37.0m                                                                  
VAL f1	0.31843434343434346 - (0.31843434

A Jupyter Widget

Exception while training: size mismatch, m1: [15920 x 2], m2: [199 x 4] at c:\a\w\1\s\tmp_conda_3.6_105809\conda\conda-bld\pytorch_1544094150554\work\aten\src\thc\generic/THCTensorMathBlas.cu:266
Log path is                                                                                                            
C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\GermEvalHyperoptRandom\20190424\28
                                                                                                                       

#########################################################################
+----------------------------------------------------------------------------------+                                   
|                                 Hyperparameters                                  |
+------------------------------+---------------------------------------------------+
|          Parameter           |                       Value                   

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.34		0.26		0.214		0.861		1.37m - 1.4m / 0.0m                                                                    
2	34k	0.25		0.22		0.246		0.884		1.39m - 2.8m / 48.1m                                                                   
3	51k	0.24		0.23		0.392		0.950		1.36m - 4.2m / 48.6m                                                                   
4	68k	0.24		0.22		0.267		0.895		1.37m - 5.6m / 47.9m                                                                   
5	85k	0.23		0.23		0.280		0.904		1.37m - 7.0m / 48.2m                                                                   
6	102k	0.22		0.24		0.230		0.883		1.37m - 8.3m / 48.2m                                                                  
7	119k	0.21		0.24		0.290		0.909		1.37m - 9.7m / 48.2m                                                                  
8	137k	0.20		0.22		0.347		0.932		1.37m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.96		0.95		0.382		0.959		5.52m - 5.5m / 0.0m                                                                    
2	34k	1.00		0.89		0.265		0.917		5.67m - 11.2m / 193.4m                                                                 
3	51k	0.92		0.86		0.382		0.959		5.72m - 17.0m / 198.3m                                                                 
4	68k	0.88		0.82		0.382		0.959		5.63m - 22.6m / 200.1m                                                                 
5	85k	0.87		0.86		0.382		0.959		5.62m - 28.2m / 197.0m                                                                 
6	102k	0.93		0.87		0.382		0.959		5.57m - 33.8m / 196.8m                                                                
VAL f1	0.3821757056771329 - (0.3821757056771329)                                                                       
VAL loss	0.8244816759690882             

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.28		0.22		0.319		0.942		1.13m - 1.1m / 0.0m                                                                    
2	34k	0.21		0.24		0.218		0.888		1.11m - 2.3m / 39.6m                                                                   
3	51k	0.21		0.32		0.120		0.758		1.11m - 3.4m / 39.0m                                                                   
4	68k	0.20		0.24		0.310		0.939		1.10m - 4.5m / 39.0m                                                                   
5	85k	0.20		0.25		0.203		0.875		1.13m - 5.7m / 38.7m                                                                   
6	103k	0.20		0.29		0.118		0.743		1.07m - 6.7m / 39.5m                                                                  
VAL f1	0.31937984496124033 - (0.31937984496124033)                                                                     
VAL loss	0.22277622729783567            

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.32		0.20		0.270		0.922		0.97m - 1.0m / 0.0m                                                                    
2	34k	0.21		0.19		0.283		0.925		0.97m - 2.0m / 34.0m                                                                   
3	51k	0.20		0.19		0.325		0.943		0.97m - 3.0m / 34.0m                                                                   
4	68k	0.20		0.19		0.286		0.926		0.97m - 4.0m / 34.1m                                                                   
5	85k	0.20		0.19		0.305		0.934		0.97m - 5.0m / 34.0m                                                                   
6	102k	0.20		0.18		0.287		0.927		0.97m - 5.9m / 34.1m                                                                  
7	119k	0.20		0.18		0.304		0.933		0.99m - 6.9m / 34.1m                                                                  
8	137k	0.20		0.18		0.285		0.923		1.00m -

A Jupyter Widget

Exception while training: size mismatch, m1: [21983 x 3], m2: [247 x 4] at c:\a\w\1\s\tmp_conda_3.6_105809\conda\conda-bld\pytorch_1544094150554\work\aten\src\thc\generic/THCTensorMathBlas.cu:266
Log path is                                                                                                            
C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\GermEvalHyperoptRandom\20190424\33
                                                                                                                       

#########################################################################
+----------------------------------------------------------------------------------+                                   
|                                 Hyperparameters                                  |
+------------------------------+---------------------------------------------------+
|          Parameter           |                       Value                   

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.35		0.34		0.148		0.809		2.41m - 2.4m / 0.0m                                                                    
2	34k	0.35		0.32		0.157		0.788		2.57m - 5.0m / 84.2m                                                                   
3	51k	0.34		0.36		0.209		0.869		2.39m - 7.4m / 89.8m                                                                   
4	68k	0.34		0.34		0.172		0.818		2.57m - 10.0m / 83.9m                                                                  
5	85k	0.36		0.34		0.197		0.874		2.61m - 12.6m / 89.7m                                                                  
6	102k	0.36		0.35		0.265		0.916		2.63m - 15.3m / 90.9m                                                                 
7	120k	0.39		0.35		0.265		0.916		2.49m - 17.8m / 91.6m                                                                 
8	137k	0.37		0.36		0.265		0.916		2.47m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.34		0.25		0.244		0.885		0.96m - 1.0m / 0.0m                                                                    
2	34k	0.33		0.32		0.191		0.841		0.95m - 1.9m / 33.6m                                                                   
3	51k	0.35		0.28		0.143		0.754		0.96m - 2.9m / 33.3m                                                                   
4	68k	0.35		0.27		0.151		0.776		0.96m - 3.9m / 33.6m                                                                   
5	85k	0.34		0.28		0.130		0.723		0.95m - 4.8m / 33.5m                                                                   
6	102k	0.32		0.27		0.141		0.746		0.95m - 5.8m / 33.5m                                                                  
VAL f1	0.24414266527383974 - (0.24414266527383974)                                                                     
VAL loss	0.2501223031540364             

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.32		0.21		0.234		0.895		0.97m - 1.0m / 0.0m                                                                    
2	34k	0.23		0.21		0.206		0.860		0.98m - 2.0m / 34.1m                                                                   
3	51k	0.21		0.20		0.205		0.859		0.98m - 3.0m / 34.2m                                                                   
4	68k	0.18		0.21		0.250		0.885		0.98m - 4.0m / 34.2m                                                                   
5	85k	0.14		0.23		0.191		0.842		0.97m - 4.9m / 34.3m                                                                   
6	103k	0.11		0.27		0.292		0.908		0.98m - 5.9m / 34.1m                                                                  
7	120k	0.10		0.26		0.281		0.904		1.01m - 7.0m / 34.3m                                                                  
8	137k	0.09		0.24		0.286		0.909		0.98m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.20		0.17		0.155		0.809		1.53m - 1.5m / 0.0m                                                                    
2	34k	0.18		0.16		0.192		0.854		1.52m - 3.1m / 53.4m                                                                   
3	51k	0.17		0.16		0.228		0.886		1.53m - 4.6m / 53.3m                                                                   
4	69k	0.16		0.20		0.220		0.883		1.59m - 6.2m / 53.5m                                                                   
5	86k	0.15		0.18		0.221		0.882		1.57m - 7.8m / 55.7m                                                                   
6	103k	0.15		0.19		0.209		0.881		1.53m - 9.4m / 54.8m                                                                  
7	120k	0.19		0.21		0.382		0.959		1.53m - 10.9m / 53.6m                                                                 
8	137k	0.21		0.20		0.237		0.910		1.52m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.40		0.37		0.227		0.904		2.61m - 2.6m / 0.0m                                                                    
2	34k	0.42		0.39		0.197		0.869		2.61m - 5.2m / 91.3m                                                                   
3	51k	0.41		0.40		0.212		0.874		2.59m - 7.8m / 91.3m                                                                   
4	68k	0.40		0.37		0.382		0.959		2.59m - 10.5m / 90.8m                                                                  
5	85k	0.38		0.36		0.382		0.959		2.60m - 13.1m / 90.8m                                                                  
6	102k	0.37		0.35		0.382		0.959		2.60m - 15.7m / 91.0m                                                                 
7	119k	0.36		0.33		0.265		0.916		2.60m - 18.3m / 91.0m                                                                 
8	136k	0.36		0.33		0.382		0.959		2.60m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.29		0.38		0.073		0.583		4.26m - 4.3m / 0.0m                                                                    
2	34k	0.28		0.39		0.044		0.544		4.33m - 8.6m / 149.1m                                                                  
3	51k	0.27		0.38		0.111		0.703		4.31m - 12.9m / 151.6m                                                                 
4	68k	0.27		0.36		0.178		0.836		4.29m - 17.3m / 151.0m                                                                 
5	85k	0.26		0.39		0.267		0.911		4.29m - 21.6m / 150.2m                                                                 
6	102k	0.25		0.34		0.216		0.866		4.28m - 25.9m / 150.4m                                                                
7	119k	0.25		0.37		0.213		0.869		4.28m - 30.2m / 150.0m                                                                
8	136k	0.24		0.31		0.302		0.925		4.28m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.61		0.49		0.199		0.874		2.80m - 2.8m / 0.0m                                                                    
2	34k	0.43		1.35		0.044		0.494		2.87m - 5.7m / 98.1m                                                                   
3	51k	0.39		0.83		0.106		0.686		2.88m - 8.6m / 100.6m                                                                  
4	68k	0.39		0.50		0.199		0.863		2.78m - 11.4m / 100.9m                                                                 
5	85k	0.40		0.86		0.091		0.689		2.77m - 14.2m / 97.8m                                                                  
6	102k	0.41		0.61		0.205		0.853		2.78m - 17.0m / 97.5m                                                                 
7	119k	0.41		0.54		0.212		0.863		2.78m - 19.8m / 97.5m                                                                 
8	136k	0.41		0.60		0.178		0.818		2.77m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.23		0.16		0.309		0.935		1.21m - 1.2m / 0.0m                                                                    
2	34k	0.16		0.14		0.196		0.846		1.28m - 2.5m / 42.4m                                                                   
3	51k	0.13		0.12		0.258		0.887		1.22m - 3.8m / 44.8m                                                                   
4	68k	0.12		0.11		0.266		0.893		1.23m - 5.0m / 42.7m                                                                   
5	86k	0.10		0.12		0.240		0.877		1.21m - 6.2m / 43.2m                                                                   
6	103k	0.09		0.12		0.235		0.872		1.21m - 7.4m / 42.4m                                                                  
VAL f1	0.30908248378127895 - (0.30908248378127895)                                                                     
VAL loss	0.11227432948247147            

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.30		0.23		0.218		0.877		1.68m - 1.7m / 0.0m                                                                    
2	34k	0.22		0.28		0.110		0.756		1.69m - 3.4m / 58.9m                                                                   
3	51k	0.19		0.22		0.155		0.802		1.68m - 5.1m / 59.0m                                                                   
4	68k	0.18		0.23		0.147		0.814		1.68m - 6.8m / 58.9m                                                                   
5	85k	0.18		0.25		0.140		0.753		1.68m - 8.5m / 58.9m                                                                   
6	102k	0.17		0.21		0.179		0.841		1.70m - 10.2m / 58.9m                                                                 
VAL f1	0.2181765640803352 - (0.2181765640803352)                                                                       
VAL loss	0.2105425222733985             

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.23		0.18		0.205		0.875		0.84m - 0.8m / 0.0m                                                                    
2	34k	0.18		0.18		0.208		0.878		0.84m - 1.7m / 29.5m                                                                   
3	51k	0.17		0.20		0.117		0.715		0.84m - 2.6m / 29.5m                                                                   
4	68k	0.17		0.22		0.094		0.612		0.85m - 3.4m / 29.6m                                                                   
5	86k	0.17		0.23		0.105		0.662		0.85m - 4.3m / 29.7m                                                                   
6	103k	0.16		0.43		0.046		0.371		0.85m - 5.2m / 29.8m                                                                  
7	120k	0.16		0.30		0.078		0.576		0.85m - 6.0m / 29.8m                                                                  
VAL f1	0.20841356836024294 - (0.20841356

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.25		0.18		0.381		0.959		0.73m - 0.7m / 0.0m                                                                    
2	34k	0.19		0.18		0.263		0.909		0.75m - 1.5m / 25.7m                                                                   
3	51k	0.18		0.17		0.193		0.850		0.75m - 2.3m / 26.2m                                                                   
4	68k	0.18		0.18		0.205		0.859		0.75m - 3.0m / 26.3m                                                                   
5	85k	0.17		0.17		0.197		0.851		0.75m - 3.8m / 26.2m                                                                   
6	103k	0.16		0.16		0.242		0.889		0.75m - 4.5m / 26.2m                                                                  
VAL f1	0.3813291139240506 - (0.3813291139240506)                                                                       
VAL loss	0.16315554837146437            

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.78		0.92		0.258		0.915		6.37m - 6.4m / 0.0m                                                                    
2	34k	0.81		0.64		0.382		0.959		6.37m - 12.8m / 223.0m                                                                 
3	51k	0.75		0.67		0.382		0.959		6.32m - 19.1m / 223.0m                                                                 
4	68k	0.74		0.59		0.382		0.959		6.33m - 25.5m / 221.4m                                                                 
5	85k	0.73		0.63		0.382		0.959		6.34m - 31.8m / 221.7m                                                                 
6	102k	0.72		0.61		0.382		0.959		6.32m - 38.2m / 222.0m                                                                
7	119k	0.72		0.57		0.382		0.959		6.33m - 44.5m / 221.5m                                                                
VAL f1	0.38205453392517436 - (0.38205453

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.35		0.27		0.227		0.893		2.75m - 2.7m / 0.0m                                                                    
2	34k	0.28		0.26		0.218		0.869		2.75m - 5.5m / 96.2m                                                                   
3	51k	0.26		0.26		0.231		0.879		2.75m - 8.3m / 96.3m                                                                   
4	68k	0.25		0.23		0.257		0.898		2.74m - 11.1m / 96.3m                                                                  
5	85k	0.24		0.26		0.160		0.812		2.75m - 13.8m / 96.1m                                                                  
6	103k	0.24		0.28		0.165		0.811		2.82m - 16.7m / 96.3m                                                                 
7	120k	0.23		0.28		0.159		0.807		2.75m - 19.4m / 98.5m                                                                 
8	137k	0.23		0.24		0.210		0.866		2.75m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.32		0.23		0.177		0.858		1.03m - 1.0m / 0.0m                                                                    
2	34k	0.22		0.23		0.376		0.958		1.06m - 2.1m / 36.2m                                                                   
3	51k	0.21		0.23		0.199		0.885		1.10m - 3.2m / 37.2m                                                                   
4	68k	0.21		0.22		0.333		0.946		1.03m - 4.3m / 38.4m                                                                   
5	85k	0.21		0.23		0.373		0.956		1.03m - 5.3m / 36.1m                                                                   
6	103k	0.21		0.21		0.307		0.939		1.02m - 6.4m / 36.1m                                                                  
7	120k	0.21		0.22		0.382		0.959		1.03m - 7.4m / 35.9m                                                                  
8	137k	0.21		0.22		0.365		0.955		1.03m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.62		0.48		0.301		0.928		4.24m - 4.2m / 0.0m                                                                    
2	34k	0.54		0.51		0.304		0.925		4.27m - 8.6m / 148.6m                                                                  
3	51k	0.53		0.57		0.262		0.906		4.30m - 12.9m / 149.7m                                                                 
4	68k	0.53		0.54		0.304		0.927		4.23m - 17.2m / 150.5m                                                                 
5	85k	0.52		0.57		0.263		0.908		4.25m - 21.4m / 148.4m                                                                 
6	102k	0.51		0.57		0.316		0.935		4.28m - 25.7m / 148.9m                                                                
7	119k	0.52		0.58		0.256		0.910		4.25m - 30.0m / 150.0m                                                                
8	136k	0.54		0.60		0.265		0.917		4.32m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.23		0.30		0.064		0.419		2.81m - 2.8m / 0.0m                                                                    
2	34k	0.21		0.55		0.043		0.092		2.81m - 5.7m / 98.4m                                                                   
3	51k	0.22		0.53		0.047		0.142		2.81m - 8.5m / 98.4m                                                                   
4	68k	0.22		0.47		0.047		0.190		2.80m - 11.3m / 98.3m                                                                  
5	85k	0.23		0.67		0.040		0.088		2.80m - 14.1m / 98.1m                                                                  
6	103k	0.24		0.74		0.007		0.057		2.80m - 16.9m / 98.0m                                                                 
VAL f1	0.06381586544012932 - (0.06381586544012932)                                                                     
VAL loss	0.2973193164957996             

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.22		0.20		0.206		0.863		1.26m - 1.3m / 0.0m                                                                    
2	34k	0.19		0.21		0.160		0.807		1.31m - 2.6m / 44.0m                                                                   
3	51k	0.18		0.19		0.190		0.845		1.31m - 3.9m / 45.8m                                                                   
4	68k	0.18		0.18		0.229		0.889		1.31m - 5.2m / 45.8m                                                                   
5	85k	0.17		0.18		0.225		0.869		1.30m - 6.6m / 45.8m                                                                   
6	103k	0.16		0.16		0.256		0.895		1.34m - 7.9m / 45.6m                                                                  
7	120k	0.15		0.17		0.197		0.858		1.30m - 9.3m / 46.9m                                                                  
8	137k	0.15		0.18		0.240		0.882		1.31m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.68		0.49		0.315		0.928		2.47m - 2.5m / 0.0m                                                                    
2	34k	0.56		0.53		0.316		0.928		2.49m - 5.0m / 86.6m                                                                   
3	51k	0.54		0.58		0.399		0.953		2.48m - 7.5m / 87.1m                                                                   
4	68k	0.51		0.53		0.376		0.946		2.53m - 10.1m / 86.8m                                                                  
5	85k	0.49		0.48		0.341		0.932		2.49m - 12.6m / 88.4m                                                                  
6	102k	0.46		0.52		0.363		0.939		2.39m - 15.0m / 87.2m                                                                 
7	119k	0.44		0.51		0.282		0.908		2.51m - 17.5m / 84.3m                                                                 
8	136k	0.42		0.46		0.364		0.938		2.51m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.23		0.16		0.200		0.873		2.34m - 2.3m / 0.0m                                                                    
2	34k	0.17		0.16		0.221		0.884		2.36m - 4.7m / 82.1m                                                                   
3	51k	0.17		0.16		0.226		0.893		2.36m - 7.1m / 82.5m                                                                   
4	68k	0.17		0.16		0.241		0.903		2.36m - 9.5m / 82.6m                                                                   
5	86k	0.17		0.17		0.149		0.796		2.36m - 11.9m / 82.8m                                                                  
6	103k	0.16		0.16		0.151		0.785		2.36m - 14.3m / 82.7m                                                                 
7	120k	0.15		0.17		0.117		0.726		2.39m - 16.7m / 82.7m                                                                 
8	137k	0.15		0.17		0.125		0.732		2.36m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.19		0.17		0.179		0.830		1.83m - 1.8m / 0.0m                                                                    
2	34k	0.17		0.16		0.197		0.846		1.86m - 3.7m / 64.1m                                                                   
3	51k	0.16		0.15		0.223		0.873		1.82m - 5.6m / 65.1m                                                                   
4	68k	0.15		0.16		0.232		0.884		1.86m - 7.5m / 63.8m                                                                   
5	85k	0.14		0.16		0.212		0.862		1.82m - 9.3m / 65.0m                                                                   
6	103k	0.12		0.16		0.205		0.854		1.82m - 11.1m / 63.8m                                                                 
7	120k	0.09		0.16		0.253		0.890		1.82m - 13.0m / 63.8m                                                                 
8	137k	0.08		0.17		0.233		0.882		1.85m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.22		0.27		0.058		0.575		2.26m - 2.3m / 0.0m                                                                    
2	34k	0.19		0.19		0.154		0.773		2.29m - 4.6m / 79.2m                                                                   
3	51k	0.17		0.19		0.134		0.758		2.26m - 6.9m / 80.2m                                                                   
4	68k	0.16		0.20		0.134		0.778		2.26m - 9.2m / 79.2m                                                                   
5	85k	0.16		0.18		0.162		0.796		2.27m - 11.4m / 79.4m                                                                  
6	102k	0.15		0.19		0.155		0.787		2.29m - 13.8m / 79.5m                                                                 
7	120k	0.15		0.19		0.175		0.823		2.29m - 16.1m / 80.3m                                                                 
8	137k	0.15		0.20		0.156		0.786		2.28m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.25		0.18		0.369		0.955		0.80m - 0.8m / 0.0m                                                                    
2	34k	0.20		0.18		0.306		0.932		0.83m - 1.7m / 28.0m                                                                   
3	51k	0.19		0.16		0.264		0.903		0.82m - 2.5m / 28.9m                                                                   
4	68k	0.17		0.15		0.230		0.871		0.80m - 3.3m / 28.9m                                                                   
5	85k	0.16		0.15		0.200		0.846		0.80m - 4.1m / 28.0m                                                                   
6	102k	0.14		0.17		0.144		0.774		0.80m - 4.9m / 28.1m                                                                  
VAL f1	0.3688573120189461 - (0.3688573120189461)                                                                       
VAL loss	0.14649001213899704            

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.26		0.21		0.193		0.872		2.43m - 2.4m / 0.0m                                                                    
2	34k	0.22		0.20		0.185		0.852		2.44m - 4.9m / 84.9m                                                                   
3	51k	0.21		0.19		0.226		0.886		2.43m - 7.3m / 85.4m                                                                   
4	68k	0.21		0.20		0.181		0.836		2.42m - 9.8m / 85.1m                                                                   
5	85k	0.21		0.21		0.151		0.781		2.43m - 12.2m / 85.0m                                                                  
6	102k	0.20		0.20		0.172		0.828		2.42m - 14.7m / 85.0m                                                                 
7	120k	0.20		0.21		0.156		0.796		2.43m - 17.1m / 84.9m                                                                 
8	137k	0.19		0.20		0.188		0.845		2.44m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.33		0.28		0.200		0.865		1.83m - 1.8m / 0.0m                                                                    
2	34k	0.26		0.31		0.135		0.750		1.87m - 3.7m / 64.2m                                                                   
3	51k	0.22		0.27		0.134		0.771		1.86m - 5.6m / 65.3m                                                                   
4	68k	0.20		0.33		0.090		0.671		1.84m - 7.5m / 65.1m                                                                   
5	85k	0.20		0.24		0.171		0.814		1.84m - 9.3m / 64.5m                                                                   
6	102k	0.19		0.22		0.175		0.835		1.84m - 11.2m / 64.5m                                                                 
VAL f1	0.20024519820187986 - (0.20024519820187986)                                                                     
VAL loss	0.2223320584180878             

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.25		0.22		0.193		0.853		2.39m - 2.4m / 0.0m                                                                    
2	34k	0.21		0.21		0.165		0.821		2.42m - 4.8m / 83.8m                                                                   
3	51k	0.20		0.21		0.154		0.808		2.42m - 7.3m / 84.8m                                                                   
4	68k	0.19		0.19		0.217		0.869		2.42m - 9.7m / 84.6m                                                                   
5	85k	0.17		0.19		0.249		0.894		2.38m - 12.1m / 84.6m                                                                  
6	102k	0.17		0.21		0.155		0.797		2.42m - 14.6m / 83.6m                                                                 
7	119k	0.15		0.19		0.216		0.865		2.41m - 17.0m / 84.6m                                                                 
8	137k	0.14		0.20		0.197		0.858		2.41m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.53		0.46		0.222		0.878		2.45m - 2.4m / 0.0m                                                                    
2	34k	0.50		0.44		0.212		0.876		2.45m - 4.9m / 85.7m                                                                   
3	51k	0.46		0.43		0.225		0.879		2.45m - 7.4m / 85.8m                                                                   
4	68k	0.42		0.37		0.305		0.923		2.43m - 9.8m / 85.9m                                                                   
5	85k	0.39		0.37		0.260		0.903		2.44m - 12.3m / 85.3m                                                                  
6	102k	0.36		0.36		0.285		0.921		2.44m - 14.8m / 85.4m                                                                 
7	119k	0.35		0.35		0.255		0.904		2.45m - 17.2m / 85.5m                                                                 
8	136k	0.33		0.37		0.255		0.901		2.46m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.18		0.18		0.117		0.731		2.01m - 2.0m / 0.0m                                                                    
2	34k	0.16		0.16		0.110		0.722		2.01m - 4.1m / 70.5m                                                                   
3	51k	0.15		0.13		0.205		0.851		2.00m - 6.1m / 70.4m                                                                   
4	68k	0.14		0.18		0.093		0.649		2.00m - 8.1m / 70.0m                                                                   
5	85k	0.14		0.16		0.134		0.764		2.00m - 10.1m / 70.1m                                                                  
6	103k	0.13		0.16		0.190		0.835		1.99m - 12.1m / 70.0m                                                                 
7	120k	0.13		0.15		0.150		0.791		2.00m - 14.1m / 69.9m                                                                 
8	137k	0.13		0.14		0.184		0.827		2.00m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.51		0.48		0.374		0.957		2.96m - 3.0m / 0.0m                                                                    
2	34k	0.42		0.74		0.087		0.673		2.97m - 6.0m / 103.6m                                                                  
3	51k	0.40		0.64		0.124		0.737		2.97m - 8.9m / 104.1m                                                                  
4	68k	0.40		0.57		0.150		0.829		2.97m - 11.9m / 104.0m                                                                 
5	85k	0.38		0.53		0.178		0.848		2.97m - 14.9m / 104.0m                                                                 
6	102k	0.37		0.53		0.196		0.860		2.98m - 17.9m / 104.1m                                                                
VAL f1	0.3740831295843521 - (0.3740831295843521)                                                                       
VAL loss	0.4761229908078431             

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.26		0.19		0.245		0.905		1.36m - 1.4m / 0.0m                                                                    
2	34k	0.20		0.20		0.244		0.902		1.35m - 2.7m / 47.5m                                                                   
3	51k	0.20		0.20		0.192		0.850		1.35m - 4.1m / 47.3m                                                                   
4	68k	0.19		0.20		0.182		0.844		1.36m - 5.5m / 47.4m                                                                   
5	85k	0.18		0.21		0.124		0.753		1.35m - 6.8m / 47.6m                                                                   
6	103k	0.17		0.21		0.140		0.763		1.35m - 8.2m / 47.4m                                                                  
VAL f1	0.24545944973925554 - (0.24545944973925554)                                                                     
VAL loss	0.1948348704067889             

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.22		0.22		0.230		0.893		1.32m - 1.3m / 0.0m                                                                    
2	34k	0.19		0.23		0.125		0.738		1.33m - 2.7m / 46.4m                                                                   
3	51k	0.18		0.21		0.146		0.798		1.33m - 4.0m / 46.5m                                                                   
4	68k	0.17		0.20		0.209		0.865		1.33m - 5.4m / 46.5m                                                                   
5	85k	0.16		0.20		0.181		0.835		1.33m - 6.7m / 46.7m                                                                   
6	102k	0.16		0.18		0.211		0.861		1.33m - 8.0m / 46.6m                                                                  
VAL f1	0.2295675053445157 - (0.2295675053445157)                                                                       
VAL loss	0.18424444291649794            

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.24		0.17		0.244		0.900		0.46m - 0.5m / 0.0m                                                                    
2	34k	0.18		0.17		0.200		0.854		0.47m - 1.0m / 16.2m                                                                   
3	51k	0.17		0.17		0.250		0.890		0.46m - 1.4m / 16.4m                                                                   
4	68k	0.15		0.16		0.229		0.871		0.47m - 1.9m / 16.3m                                                                   
5	86k	0.13		0.17		0.173		0.820		0.47m - 2.4m / 16.6m                                                                   
6	103k	0.12		0.17		0.233		0.879		0.47m - 2.9m / 16.6m                                                                  
7	120k	0.11		0.19		0.283		0.904		0.47m - 3.4m / 16.6m                                                                  
8	137k	0.10		0.20		0.235		0.881		0.47m -

+------------------------------+---------------------------------------------------+
 63%|███████████████████████████                | 63/100 [23:15:38<8:59:06, 874.23s/it, best loss: 0.10995660924978375]

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.37		0.34		0.202		0.862		1.87m - 1.9m / 0.0m                                                                    
2	34k	0.32		0.34		0.211		0.870		1.89m - 3.8m / 65.4m                                                                   
3	51k	0.32		0.34		0.139		0.804		1.83m - 5.6m / 66.1m                                                                   
4	68k	0.32		0.33		0.190		0.861		1.82m - 7.5m / 64.1m                                                                   
5	85k	0.32		0.37		0.244		0.892		1.83m - 9.3m / 63.9m                                                                   
6	102k	0.32		0.34		0.216		0.866		1.87m - 11.2m / 64.1m                                                                 
7	119k	0.31		0.39		0.198		0.846		1.87m - 13.1m / 65.4m                                                                 
8	136k	0.33		0.40		0.223		0.874		1.88m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.29		0.27		0.263		0.916		1.98m - 2.0m / 0.0m                                                                    
2	34k	0.27		0.23		0.290		0.927		2.01m - 4.0m / 69.3m                                                                   
3	51k	0.24		0.24		0.210		0.867		1.96m - 6.0m / 70.3m                                                                   
4	68k	0.24		0.22		0.228		0.893		1.95m - 8.0m / 68.7m                                                                   
5	85k	0.24		0.22		0.368		0.956		1.94m - 9.9m / 68.6m                                                                   
6	102k	0.24		0.25		0.377		0.958		1.96m - 11.9m / 68.3m                                                                 
7	119k	0.25		0.25		0.265		0.916		1.94m - 13.9m / 68.9m                                                                 
8	136k	0.25		0.26		0.265		0.916		1.96m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.27		0.28		0.238		0.899		0.80m - 0.8m / 0.0m                                                                    
2	34k	0.23		0.30		0.162		0.812		0.80m - 1.6m / 27.9m                                                                   
3	51k	0.22		0.38		0.106		0.675		0.80m - 2.4m / 27.9m                                                                   
4	68k	0.21		0.38		0.084		0.703		0.80m - 3.2m / 28.1m                                                                   
5	85k	0.20		0.36		0.118		0.744		0.80m - 4.1m / 28.1m                                                                   
6	103k	0.20		0.35		0.180		0.826		0.80m - 4.9m / 28.0m                                                                  
VAL f1	0.23825098984334653 - (0.23825098984334653)                                                                     
VAL loss	0.2782446909131426             

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.48		0.42		0.142		0.794		3.30m - 3.3m / 0.0m                                                                    
2	34k	0.39		0.39		0.159		0.814		3.32m - 6.7m / 115.6m                                                                  
3	51k	0.38		0.37		0.230		0.884		3.30m - 10.0m / 116.3m                                                                 
4	68k	0.38		0.37		0.195		0.856		3.33m - 13.3m / 115.7m                                                                 
5	85k	0.38		0.37		0.243		0.898		3.31m - 16.7m / 116.5m                                                                 
6	102k	0.39		0.42		0.188		0.846		3.32m - 20.0m / 115.9m                                                                
7	119k	0.42		0.42		0.176		0.829		3.32m - 23.4m / 116.3m                                                                
8	136k	0.42		0.46		0.265		0.916		3.31m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.52		0.45		0.344		0.949		3.69m - 3.7m / 0.0m                                                                    
2	34k	0.46		0.48		0.199		0.856		3.77m - 7.6m / 129.1m                                                                  
3	51k	0.45		0.47		0.331		0.943		3.70m - 11.3m / 132.0m                                                                 
4	68k	0.45		0.44		0.285		0.925		3.72m - 15.0m / 129.7m                                                                 
5	85k	0.46		0.49		0.382		0.959		3.74m - 18.8m / 130.4m                                                                 
6	102k	0.47		0.53		0.265		0.916		3.79m - 22.6m / 130.9m                                                                
7	119k	0.47		0.50		0.238		0.911		3.69m - 26.3m / 132.6m                                                                
8	137k	0.47		0.48		0.238		0.910		3.69m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.25		0.17		0.281		0.928		0.84m - 0.8m / 0.0m                                                                    
2	34k	0.18		0.17		0.228		0.896		0.84m - 1.7m / 29.3m                                                                   
3	51k	0.18		0.18		0.181		0.847		0.84m - 2.6m / 29.4m                                                                   
4	68k	0.17		0.19		0.174		0.835		0.84m - 3.4m / 29.4m                                                                   
5	86k	0.16		0.21		0.099		0.718		0.84m - 4.3m / 29.4m                                                                   
6	103k	0.16		0.24		0.070		0.641		0.84m - 5.1m / 29.5m                                                                  
VAL f1	0.28066248880931066 - (0.28066248880931066)                                                                     
VAL loss	0.16834350116011026            

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.29		0.27		0.179		0.841		1.19m - 1.2m / 0.0m                                                                    
2	34k	0.28		0.28		0.283		0.910		1.19m - 2.4m / 41.6m                                                                   
3	51k	0.29		0.33		0.161		0.792		1.19m - 3.6m / 41.7m                                                                   
4	68k	0.29		0.35		0.119		0.682		1.19m - 4.8m / 41.6m                                                                   
5	85k	0.31		0.32		0.124		0.698		1.22m - 6.1m / 41.8m                                                                   
6	102k	0.32		0.31		0.127		0.712		1.21m - 7.3m / 42.7m                                                                  
7	119k	0.27		0.29		0.149		0.758		1.20m - 8.5m / 42.3m                                                                  
VAL f1	0.2834797891036907 - (0.283479789

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.18		0.13		0.233		0.876		1.02m - 1.0m / 0.0m                                                                    
2	34k	0.13		0.11		0.291		0.906		1.01m - 2.1m / 35.6m                                                                   
3	51k	0.12		0.11		0.288		0.910		1.01m - 3.1m / 35.4m                                                                   
4	69k	0.11		0.12		0.204		0.860		1.00m - 4.1m / 35.4m                                                                   
5	86k	0.10		0.11		0.358		0.933		1.00m - 5.1m / 35.2m                                                                   
6	103k	0.10		0.11		0.208		0.856		1.00m - 6.1m / 35.2m                                                                  
7	120k	0.10		0.12		0.280		0.904		1.00m - 7.2m / 35.2m                                                                  
8	137k	0.10		0.13		0.316		0.918		1.00m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.19		0.17		0.195		0.850		0.75m - 0.8m / 0.0m                                                                    
2	34k	0.17		0.17		0.212		0.861		0.76m - 1.6m / 26.4m                                                                   
3	51k	0.15		0.14		0.289		0.914		0.74m - 2.3m / 26.6m                                                                   
4	68k	0.14		0.14		0.267		0.899		0.75m - 3.1m / 26.0m                                                                   
5	85k	0.13		0.15		0.243		0.884		0.75m - 3.9m / 26.3m                                                                   
6	103k	0.12		0.15		0.186		0.850		0.75m - 4.6m / 26.3m                                                                  
7	120k	0.11		0.17		0.168		0.842		0.75m - 5.4m / 26.4m                                                                  
8	137k	0.11		0.18		0.117		0.750		0.75m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.34		0.20		0.201		0.877		0.43m - 0.4m / 0.0m                                                                    
2	34k	0.20		0.17		0.289		0.921		0.43m - 0.9m / 15.1m                                                                   
3	51k	0.18		0.16		0.277		0.910		0.43m - 1.3m / 15.2m                                                                   
4	68k	0.17		0.15		0.291		0.913		0.43m - 1.8m / 15.1m                                                                   
5	85k	0.15		0.14		0.320		0.924		0.43m - 2.2m / 15.1m                                                                   
6	103k	0.14		0.14		0.299		0.913		0.43m - 2.7m / 15.1m                                                                  
7	120k	0.13		0.13		0.321		0.922		0.43m - 3.1m / 15.1m                                                                  
8	137k	0.12		0.13		0.305		0.916		0.43m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.23		0.17		0.159		0.819		1.65m - 1.7m / 0.0m                                                                    
2	34k	0.18		0.18		0.139		0.776		1.66m - 3.3m / 57.8m                                                                   
3	51k	0.17		0.17		0.146		0.785		1.66m - 5.0m / 58.0m                                                                   
4	68k	0.17		0.16		0.150		0.794		1.65m - 6.7m / 58.0m                                                                   
5	86k	0.16		0.16		0.162		0.828		1.65m - 8.3m / 57.9m                                                                   
6	103k	0.16		0.17		0.158		0.817		1.65m - 10.0m / 57.9m                                                                 
7	120k	0.15		0.18		0.108		0.703		1.67m - 11.7m / 58.0m                                                                 
8	137k	0.15		0.19		0.115		0.712		1.66m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.55		0.35		0.117		0.734		1.70m - 1.7m / 0.0m                                                                    
2	34k	0.32		0.26		0.249		0.907		1.69m - 3.4m / 59.5m                                                                   
3	51k	0.28		0.26		0.252		0.909		1.69m - 5.1m / 59.3m                                                                   
4	68k	0.28		0.25		0.315		0.939		1.69m - 6.9m / 59.3m                                                                   
5	85k	0.27		0.25		0.281		0.924		1.71m - 8.6m / 59.1m                                                                   
6	102k	0.27		0.25		0.274		0.917		1.73m - 10.3m / 60.0m                                                                 
7	119k	0.26		0.24		0.283		0.922		1.68m - 12.0m / 60.6m                                                                 
8	136k	0.26		0.24		0.292		0.925		1.69m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.82		0.79		0.265		0.916		3.80m - 3.8m / 0.0m                                                                    
2	34k	0.84		0.74		0.382		0.959		3.82m - 7.6m / 132.9m                                                                  
3	51k	0.71		0.66		0.382		0.959		3.79m - 11.5m / 133.8m                                                                 
4	68k	0.67		0.58		0.382		0.959		3.83m - 15.3m / 132.8m                                                                 
5	85k	0.66		0.59		0.382		0.959		3.84m - 19.2m / 134.0m                                                                 
6	102k	0.64		0.56		0.382		0.959		3.88m - 23.1m / 134.3m                                                                
7	119k	0.65		0.58		0.382		0.959		3.82m - 26.9m / 135.6m                                                                
VAL f1	0.38205453392517436 - (0.38205453

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.80		0.77		0.158		0.826		2.39m - 2.4m / 0.0m                                                                    
2	34k	0.63		0.84		0.280		0.913		2.44m - 4.9m / 83.5m                                                                   
3	51k	0.63		0.88		0.281		0.917		2.41m - 7.3m / 85.5m                                                                   
4	68k	0.64		1.17		0.288		0.942		2.49m - 9.8m / 84.6m                                                                   
5	85k	0.64		1.12		0.322		0.933		2.40m - 12.2m / 87.1m                                                                  
6	102k	0.65		1.22		0.339		0.948		2.42m - 14.7m / 84.1m                                                                 
7	119k	0.63		1.66		0.346		0.952		2.38m - 17.1m / 84.9m                                                                 
8	136k	0.62		2.05		0.388		0.959		2.40m -

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.45		0.33		0.023		0.852		2.65m - 2.6m / 0.0m                                                                    
2	34k	0.29		0.29		0.382		0.959		2.63m - 5.3m / 92.7m                                                                   
3	51k	0.26		0.30		0.265		0.916		2.64m - 8.0m / 92.2m                                                                   
4	68k	0.26		0.29		0.265		0.916		2.75m - 10.7m / 92.5m                                                                  
5	85k	0.26		0.29		0.265		0.916		2.68m - 13.4m / 96.0m                                                                  
6	102k	0.25		0.29		0.265		0.916		2.65m - 16.1m / 93.8m                                                                 
7	119k	0.26		0.29		0.265		0.916		2.65m - 18.8m / 92.9m                                                                 
VAL f1	0.38205453392517436 - (0.38205453

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time                                                             
1	17k	0.29		0.28		0.246		0.912		2.54m - 2.5m / 0.0m                                                                    
2	34k	0.30		0.29		0.212		0.874		2.63m - 5.2m / 88.8m                                                                   
3	51k	0.30		0.28		0.265		0.916		2.60m - 7.8m / 91.8m                                                                   
4	68k	0.28		0.26		0.265		0.916		2.88m - 10.7m / 91.1m                                                                  
 78%|████████████████████████████████▊         | 78/100 [28:34:28<8:18:47, 1360.35s/it, best loss: 0.10995660924978375]