In [1]:
import numpy as np
import math
import time
import logging
from data.data_loader import Dataset
from misc.preferences import PREFERENCES
from misc.run_configuration import get_default_params, OutputLayerType, LearningSchedulerType, OptimizerType, good_organic_hp_params
from misc import utils

from optimizer import get_optimizer
from criterion import NllLoss, LossCombiner

from models.transformer.encoder import TransformerEncoder
from models.jointAspectTagger import JointAspectTagger
from trainer.train import Trainer
import pprint
from data.organic2019 import organic_dataset as dsl
from data.organic2019 import ORGANIC_TASK_ALL, ORGANIC_TASK_ENTITIES, ORGANIC_TASK_ATTRIBUTES, ORGANIC_TASK_ENTITIES_COMBINE

# Loading Functions

These functions will load the dataset and the model. The run configuration will determine the architecture and hyper parameters.

In [2]:
def load_model(dataset, rc, experiment_name):
    loss = LossCombiner(4, dataset.class_weights, NllLoss)
    transformer = TransformerEncoder(dataset.source_embedding,
                                     hyperparameters=rc)
    model = JointAspectTagger(transformer, rc, 4, 20, dataset.target_names)
    optimizer = get_optimizer(model, rc)
    trainer = Trainer(
                        model,
                        loss,
                        optimizer,
                        rc,
                        dataset,
                        experiment_name,
                        enable_tensorboard=False,
                        verbose=False)
    return trainer

def load_dataset(rc, logger, task):
    dataset = Dataset(
        task,
        logger,
        rc,
        source_index=0,
        target_vocab_index=1,
        data_path=PREFERENCES.data_root,
        train_file=PREFERENCES.data_train,
        valid_file=PREFERENCES.data_validation,
        test_file=PREFERENCES.data_test,
        file_format='.tsv',
        init_token=None,
        eos_token=None
    )
    dataset.load_data(dsl, verbose=False)
    return dataset

## Preamble - Definitions

In [3]:
PREFERENCES.defaults(
    data_root='./data/data/organic2019',
    data_train='train.csv',    
    data_validation='validation.csv',
    data_test='test.csv',
    early_stopping='highest_5_F1'
)
main_experiment_name = 'Organic19_Experiments'
use_cuda = True
STATUS_FAIL = 'fail'
STATUS_OK = 'ok'

 # Definition of experiments
 
 - SpellChecker On
 - Fasttext
 - Single Sentence
 - Combined Sentence
 - Stop Words

In [4]:
baseline = good_organic_hp_params
print(pprint.pformat(baseline, indent=2))

{ 'att_d_k': 100,
  'att_d_v': 100,
  'batch_size': 20,
  'clip_comments_to': 195,
  'dropout_rate': 0.392996573831,
  'early_stopping': 5,
  'embedding_dim': 300,
  'embedding_name': '6B',
  'embedding_type': 'glove',
  'language': 'en',
  'learning_rate_scheduler': { 'noam_learning_rate_factor': 3.3368149482,
                               'noam_learning_rate_warmup': 4631},
  'learning_rate_scheduler_type': <LearningSchedulerType.Noam: 1>,
  'log_every_xth_iteration': -1,
  'model_size': 300,
  'num_encoder_blocks': 2,
  'num_epochs': 35,
  'num_heads': 3,
  'optimizer': { 'adam_beta1': 0.89178641984,
                 'adam_beta2': 0.83491754824,
                 'adam_eps': 8.734158747166484e-09,
                 'adam_weight_decay': 1e-08,
                 'learning_rate': 0.001},
  'optimizer_type': <OptimizerType.Adam: 1>,
  'output_dropout_rate': 0.7608194889605,
  'output_layer_type': <OutputLayerType.LinearSum: 1>,
  'pointwise_layer_size': 195,
  'task': 'entities',
  'use_s

In [5]:
experiments = [
    {
        'name': 'Baseline',
        'description': 'Baseline. Uses good_organic_hp_params without any changes',
        'loss': 1000,
        'f1': -1,
        'rc': {}
    }
]

### Current commit

In [6]:
utils.get_current_git_commit()
print('Current commit: ' + utils.get_current_git_commit())

Current commit: b'50e2eba'


In [None]:
def objective(rc, experiment):
    run_time = time.time()
    
    # reset loggers
    utils.reset_loggers()
    experiment_name = utils.create_loggers(experiment_name=main_experiment_name)
    logger = logging.getLogger(__name__)
    dataset_logger = logging.getLogger('data_loader')
    
    logger.info('Experiment: ' + experiment['name'])
    logger.info('Description: ' + experiment['description'])
    
    logger.info('Parameters')
    logger.info(rc)
    print('\n\n#########################################################################')
    print('Name: ' + experiment['name'])
    print('Description: ' + experiment['description'])
    print('#########################################################################\n\n')
    print(rc)

    logger.debug('Load dataset')
    try:
        dataset = load_dataset(rc, dataset_logger, rc.task)
    except Exception as err:
        print('Could load dataset: ' + str(err))
        logger.exception("Could not load dataset")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }
    logger.debug('dataset loaded')
    logger.debug('Load model')

    try:
        trainer = load_model(dataset, rc, experiment_name)
    except Exception as err:
        print('Could not load model: ' + str(err))
        logger.exception("Could not load model")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }

    logger.debug('model loaded')

    logger.debug('Begin training')
    model = None
    try:
        result = trainer.train(use_cuda=rc.use_cuda, perform_evaluation=False)
        model = result['model']
    except Exception as err:
        print('Exception while training: ' + str(err))
        logger.exception("Could not complete iteration")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }

    if math.isnan(trainer.get_best_loss()):
        print('Loss is nan')
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }

    # perform evaluation and log results
    result = None
    try:
        result = trainer.perform_final_evaluation(use_test_set=True, verbose=False)
    except Exception as err:
        logger.exception("Could not complete iteration evaluation.")
        print('Could not complete iteration evaluation: ' + str(err))
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }
    print(f'VAL f1\t{trainer.get_best_f1()} - ({result[1][1]})')
    print(f'VAL loss\t{trainer.get_best_loss()}')
    return {
            'loss': result[1][0],
            'status': STATUS_OK,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1(),
            'sample_iterations': trainer.get_num_samples_seen(),
            'iterations': trainer.get_num_iterations(),
            'rc': rc,
            'results': {
                'train': {
                    'loss': result[0][0],
                    'f1': result[0][1]
                },
                'validation': {
                    'loss': result[1][0],
                    'f1': result[1][1]
                },
                'test': {
                    'loss': result[2][0],
                    'f1': result[2][1]
                }
            }
        }

# Perform experiments

In [None]:
for e in experiments:
    name = e['name']
    print(f'#########################################################################\n\nExperiment Name: {name}\n')
    print('#########################################################################\n\n')
    
    # generate rc
    rc = get_default_params(use_cuda=True, overwrite=e['rc'], from_default=baseline)
    result = objective(rc, e)
    
    if result['status'] == STATUS_OK:
        print(f"       .---.\n \
         /     \\\n\
          \\.@-@./\tExperiment: {e['name']}\n\
          /`\\_/`\\\tStatus: {result['status']}\n\
         //  _  \\\\\tLoss: {result['best_loss']}\n\
        | \\     )|_\tf1: {result['best_f1']}\n\
       /`\\_`>  <_/ \\\n\
       \\__/'---'\\__/\n")
    else:
        print(f"       .---.\n \
         /     \\\n\
          \\.@-@./\tExperiment: {e['name']} (FAIL)\n\
          /`\\_/`\\\n\
         //  _  \\\\\\n\
        | \\     )|_\n\
       /`\\_`>  <_/ \\\n\
       \\__/'---'\\__/\n")
    





#########################################################################

Experiment Name: Baseline

#########################################################################


Log path is  C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\Organic19_Experiments\20190331\14


#########################################################################
Name: Baseline
Description: Baseline. Uses good_organic_hp_params without any changes
#########################################################################


+----------------------------------------------------------------------------------+
|                                 Hyperparameters                                  |
+------------------------------+---------------------------------------------------+
|          Parameter           |                       Value                       |
+------------------------------+---------------------------------------------------+
|            kwargs        

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.49		0.38		0.053		0.872		0.51m - 0.5m / 0.0m
2	9k	0.42		0.34		0.181		0.816		0.48m - 1.0m / 17.9m
3	13k	0.39		0.36		0.230		0.798		0.44m - 1.5m / 17.0m
4	17k	0.38		0.37		0.214		0.800		0.44m - 2.0m / 15.7m
5	22k	0.36		0.34		0.266		0.814		0.44m - 2.5m / 15.7m
6	26k	0.35		0.38		0.206		0.763		0.43m - 3.0m / 15.7m
7	30k	0.33		0.38		0.247		0.788		0.44m - 3.4m / 15.5m
8	35k	0.32		0.35		0.246		0.799		0.43m - 3.9m / 15.7m
9	39k	0.31		0.43		0.224		0.782		0.43m - 4.4m / 15.6m
10	43k	0.30		0.38		0.243		0.793		0.43m - 4.8m / 15.6m
VAL f1	0.2657952069716776 - (0.2657952069716776)
VAL loss	0.33774394708521227
       .---.
          /     \
          \.@-@./	Experiment: Baseline
          /`\_/`\	Status: ok
         //  _  \\	Loss: 0.33774394708521227
        | \     )|_	f1: 0.2657952069716776
       /`\_`>  <_/ \
       \__/'---'\__/

#########################################################################

Experiment Name: SpellChecker 

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.48		0.40		0.180		0.842		0.45m - 0.5m / 0.0m
2	9k	0.43		0.37		0.176		0.786		0.44m - 0.9m / 15.8m
3	13k	0.39		0.37		0.225		0.798		0.44m - 1.4m / 15.6m
4	17k	0.38		0.38		0.244		0.805		0.44m - 1.9m / 15.7m
5	22k	0.36		0.34		0.284		0.820		0.45m - 2.4m / 15.7m
6	26k	0.35		0.36		0.213		0.757		0.46m - 2.9m / 15.9m
7	30k	0.33		0.38		0.263		0.798		0.42m - 3.4m / 16.2m
8	35k	0.32		0.33		0.270		0.817		0.43m - 3.8m / 15.1m
9	39k	0.30		0.36		0.218		0.805		0.42m - 4.3m / 15.4m
10	43k	0.29		0.36		0.275		0.830		0.42m - 4.7m / 15.3m
VAL f1	0.2838283828382838 - (0.2838283828382838)
VAL loss	0.3339613486738766
       .---.
          /     \
          \.@-@./	Experiment: SpellChecker On
          /`\_/`\	Status: ok
         //  _  \\	Loss: 0.3339613486738766
        | \     )|_	f1: 0.2838283828382838
       /`\_`>  <_/ \
       \__/'---'\__/

#########################################################################

Experiment Name: Fasttext

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.50		0.39		0.006		0.885		0.42m - 0.4m / 0.0m
2	9k	0.42		0.35		0.204		0.815		0.43m - 0.9m / 14.7m
3	13k	0.37		0.35		0.240		0.799		0.42m - 1.3m / 15.1m
4	17k	0.36		0.37		0.225		0.795		0.43m - 1.8m / 14.8m
5	22k	0.34		0.33		0.292		0.830		0.43m - 2.3m / 15.1m
6	26k	0.32		0.37		0.269		0.780		0.42m - 2.7m / 15.2m
7	30k	0.30		0.40		0.249		0.771		0.42m - 3.2m / 15.0m
8	35k	0.28		0.39		0.253		0.773		0.42m - 3.7m / 15.1m
9	39k	0.26		0.45		0.264		0.800		0.43m - 4.1m / 15.0m
10	43k	0.24		0.43		0.268		0.809		0.42m - 4.6m / 15.3m
VAL f1	0.29161882893226176 - (0.29161882893226176)
VAL loss	0.3286756620687597
       .---.
          /     \
          \.@-@./	Experiment: Fasttext
          /`\_/`\	Status: ok
         //  _  \\	Loss: 0.3286756620687597
        | \     )|_	f1: 0.29161882893226176
       /`\_`>  <_/ \
       \__/'---'\__/

#########################################################################

Experiment Name: Rolling Sent

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.51		0.38		0.065		0.875		0.42m - 0.4m / 0.0m
2	9k	0.42		0.36		0.146		0.813		0.42m - 0.9m / 14.9m
3	13k	0.39		0.37		0.235		0.790		0.42m - 1.4m / 14.9m
4	17k	0.38		0.40		0.197		0.770		0.42m - 1.8m / 14.9m
5	22k	0.37		0.35		0.238		0.809		0.43m - 2.3m / 15.0m
6	26k	0.35		0.43		0.182		0.716		0.42m - 2.7m / 15.1m
7	30k	0.34		0.38		0.241		0.790		0.42m - 3.2m / 15.0m
8	35k	0.33		0.41		0.217		0.780		0.43m - 3.7m / 15.1m
9	39k	0.31		0.43		0.213		0.784		0.42m - 4.1m / 15.2m
10	43k	0.30		0.44		0.220		0.809		0.43m - 4.6m / 15.2m
11	48k	0.28		0.46		0.247		0.793		0.43m - 5.1m / 15.3m
12	52k	0.27		0.47		0.236		0.778		0.42m - 5.5m / 15.3m
13	56k	0.26		0.48		0.165		0.761		0.42m - 6.0m / 15.2m
14	60k	0.25		0.49		0.251		0.799		0.42m - 6.5m / 15.3m
15	65k	0.25		0.47		0.198		0.781		0.43m - 6.9m / 15.4m
16	69k	0.23		0.58		0.206		0.789		0.42m - 7.4m / 15.5m
17	73k	0.24		0.61		0.143		0.701		0.43m - 7.8m / 15.4m
18	78k	0.23		0.55		0.226		0.798		0.

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.50		0.37		0.210		0.870		0.41m - 0.4m / 0.0m
2	9k	0.40		0.35		0.201		0.793		0.42m - 0.9m / 14.4m
3	13k	0.37		0.36		0.262		0.791		0.42m - 1.3m / 14.9m
4	17k	0.37		0.37		0.254		0.813		0.42m - 1.8m / 14.9m
5	22k	0.35		0.36		0.265		0.806		0.42m - 2.3m / 14.9m
6	26k	0.33		0.41		0.221		0.757		0.43m - 2.7m / 15.0m
7	30k	0.32		0.39		0.268		0.798		0.43m - 3.2m / 15.1m
8	35k	0.31		0.37		0.281		0.806		0.42m - 3.7m / 15.2m
9	39k	0.29		0.39		0.273		0.824		0.43m - 4.1m / 15.1m
10	43k	0.27		0.44		0.247		0.808		0.43m - 4.6m / 15.3m
11	48k	0.26		0.42		0.265		0.807		0.42m - 5.1m / 15.3m
12	52k	0.24		0.48		0.244		0.803		0.42m - 5.5m / 15.1m
13	56k	0.23		0.47		0.255		0.801		0.42m - 6.0m / 15.1m
VAL f1	0.281282316442606 - (0.281282316442606)
VAL loss	0.34873716550714823
       .---.
          /     \
          \.@-@./	Experiment: Stop Words
          /`\_/`\	Status: ok
         //  _  \\	Loss: 0.34873716550714823
        | \     )|_	f1: 0.281

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.49		0.39		0.048		0.871		0.40m - 0.4m / 0.0m
2	9k	0.42		0.35		0.180		0.819		0.41m - 0.9m / 14.1m
3	13k	0.39		0.37		0.212		0.795		0.41m - 1.3m / 14.4m
4	17k	0.39		0.37		0.224		0.801		0.41m - 1.7m / 14.4m
5	22k	0.38		0.33		0.250		0.819		0.41m - 2.2m / 14.5m
6	26k	0.38		0.39		0.186		0.745		0.40m - 2.6m / 14.4m
7	30k	0.38		0.35		0.212		0.802		0.40m - 3.1m / 14.3m
8	35k	0.38		0.36		0.224		0.805		0.40m - 3.5m / 14.3m
9	39k	0.37		0.36		0.214		0.805		0.40m - 4.0m / 14.4m
10	43k	0.38		0.37		0.219		0.803		0.40m - 4.4m / 14.4m
VAL f1	0.24971363115693013 - (0.24971363115693013)
VAL loss	0.3311501334695255
       .---.
          /     \
          \.@-@./	Experiment: Embedding finetuning Off
          /`\_/`\	Status: ok
         //  _  \\	Loss: 0.3311501334695255
        | \     )|_	f1: 0.24971363115693013
       /`\_`>  <_/ \
       \__/'---'\__/

#########################################################################

Experiment Na

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.50		0.38		0.110		0.871		0.41m - 0.4m / 0.0m
2	9k	0.42		0.34		0.183		0.819		0.43m - 0.9m / 14.5m
3	13k	0.39		0.35		0.230		0.811		0.43m - 1.3m / 15.0m
4	17k	0.38		0.38		0.231		0.805		0.42m - 1.8m / 15.0m
5	22k	0.36		0.35		0.262		0.803		0.41m - 2.3m / 14.8m
6	26k	0.35		0.37		0.240		0.768		0.42m - 2.7m / 14.7m
7	30k	0.34		0.35		0.269		0.821		0.42m - 3.2m / 15.0m
8	35k	0.32		0.34		0.235		0.804		0.41m - 3.6m / 14.9m
9	39k	0.30		0.36		0.248		0.799		0.42m - 4.1m / 14.9m
10	43k	0.30		0.35		0.273		0.835		0.42m - 4.6m / 14.9m
11	48k	0.28		0.38		0.265		0.833		0.41m - 5.0m / 15.1m
12	52k	0.28		0.39		0.238		0.806		0.42m - 5.5m / 15.0m
13	56k	0.26		0.42		0.254		0.801		0.41m - 5.9m / 15.1m
14	60k	0.26		0.45		0.242		0.799		0.42m - 6.4m / 15.0m
15	65k	0.24		0.40		0.254		0.827		0.41m - 6.8m / 15.1m
VAL f1	0.2730627306273063 - (0.2730627306273063)
VAL loss	0.34360285716898303
       .---.
          /     \
          \.@-@./	Experiment: Orga