In [1]:
import numpy as np
import math
import time
import logging
from data.data_loader import Dataset
from misc.preferences import PREFERENCES
from misc.run_configuration import get_default_params, OutputLayerType, LearningSchedulerType, OptimizerType, good_organic_hp_params
from misc import utils

from optimizer import get_optimizer
from criterion import NllLoss, LossCombiner

from models.transformer.encoder import TransformerEncoder
from models.jointAspectTagger import JointAspectTagger
from trainer.train import Trainer
import pprint
from data.organic2019 import organic_dataset as dsl
from data.organic2019 import ORGANIC_TASK_ALL, ORGANIC_TASK_ENTITIES, ORGANIC_TASK_ATTRIBUTES, ORGANIC_TASK_ENTITIES_COMBINE

# Loading Functions

These functions will load the dataset and the model. The run configuration will determine the architecture and hyper parameters.

In [2]:
def load_model(dataset, rc, experiment_name):
    loss = LossCombiner(4, dataset.class_weights, NllLoss)
    transformer = TransformerEncoder(dataset.source_embedding,
                                     hyperparameters=rc)
    model = JointAspectTagger(transformer, rc, 4, 20, dataset.target_names)
    optimizer = get_optimizer(model, rc)
    trainer = Trainer(
                        model,
                        loss,
                        optimizer,
                        rc,
                        dataset,
                        experiment_name,
                        enable_tensorboard=False,
                        verbose=False)
    return trainer

def load_dataset(rc, logger, task):
    dataset = Dataset(
        task,
        logger,
        rc,
        source_index=0,
        target_vocab_index=1,
        data_path=PREFERENCES.data_root,
        train_file=PREFERENCES.data_train,
        valid_file=PREFERENCES.data_validation,
        test_file=PREFERENCES.data_test,
        file_format='.tsv',
        init_token=None,
        eos_token=None
    )
    dataset.load_data(dsl, verbose=False)
    return dataset

## Preamble - Definitions

In [3]:
PREFERENCES.defaults(
    data_root='./data/data/organic2019',
    data_train='train.csv',    
    data_validation='validation.csv',
    data_test='test.csv',
    early_stopping='highest_5_F1'
)
main_experiment_name = 'Organic19_Experiments'
use_cuda = True
STATUS_FAIL = 'fail'
STATUS_OK = 'ok'

 # Definition of experiments
 
 - SpellChecker On
 - Fasttext
 - Single Sentence
 - Combined Sentence
 - Stop Words

In [4]:
baseline = good_organic_hp_params
print(pprint.pformat(baseline, indent=2))

{ 'att_d_k': 100,
  'att_d_v': 100,
  'batch_size': 20,
  'clip_comments_to': 195,
  'dropout_rate': 0.392996573831,
  'early_stopping': 5,
  'embedding_dim': 300,
  'embedding_name': '6B',
  'embedding_type': 'glove',
  'language': 'en',
  'learning_rate_scheduler': { 'noam_learning_rate_factor': 3.3368149482,
                               'noam_learning_rate_warmup': 4631},
  'learning_rate_scheduler_type': <LearningSchedulerType.Noam: 1>,
  'log_every_xth_iteration': -1,
  'model_size': 300,
  'num_encoder_blocks': 2,
  'num_epochs': 35,
  'num_heads': 3,
  'optimizer': { 'adam_beta1': 0.89178641984,
                 'adam_beta2': 0.83491754824,
                 'adam_eps': 8.734158747166484e-09,
                 'adam_weight_decay': 1e-08,
                 'learning_rate': 0.001},
  'optimizer_type': <OptimizerType.Adam: 1>,
  'output_dropout_rate': 0.7608194889605,
  'output_layer_type': <OutputLayerType.LinearSum: 1>,
  'pointwise_layer_size': 195,
  'task': 'entities',
  'use_s

In [5]:
experiments = [
    {
        'name': 'Baseline',
        'description': 'Baseline. Uses good_organic_hp_params without any changes',
        'loss': 1000,
        'f1': -1,
        'rc': {}
    },
    {
        'name': 'SpellChecker On',
        'description': 'Uses the baseline parameters but with the spellchecker enabled',
        'loss': 1000,
        'f1': -1,
        'rc': {
            'use_spell_checkers': True
        }
    },
    {
        'name': 'Fasttext',
        'description': 'Uses english Fasttext embeddings',
        'loss': 1000,
        'f1': -1,
        'rc': {
            'embedding_type': 'fasttext'
        }
    },
    {
        'name': 'Rolling Sentences',
        'description': 'Uses a combination of the last and the current sentence instead of just the sentence alone',
        'loss': 1000,
        'f1': -1,
        'rc': {
            'task': 'entities_combine'
        }
    },
    {
        'name': 'Stop Words',
        'description': 'Uses stop words removal',
        'loss': 1000,
        'f1': -1,
        'rc': {
            'use_stop_words': True
        }
    },
    {
        'name': 'Embedding finetuning Off',
        'description': 'This experiment prevents finetuning of the embedding layer',
        'loss': 1000,
        'f1': -1,
        'rc': {
            'finetune_embedding': False
        }
    },
    {
        'name': 'Organic Text Cleaning',
        'description': 'Remove certain words and replace them with their correct counterparts',
        'loss': 1000,
        'f1': -1,
        'rc': {
            'organic_text_cleaning': True
        }
    },
    {
        'name': 'Full Text Cleaning',
        'description': 'Features all text cleaning techniques including organic text cleaning, spell checkers, contraction removal, url token replacement, stop words',
        'loss': 1000,
        'f1': -1,
        'rc': {
            'organic_text_cleaning': True,
            'use_stop_words': True,
            'use_spell_checkers': True,
            'contraction_removal': True,
            'replace_url_tokens': True
        }
    }
]

### Current commit

In [6]:
utils.get_current_git_commit()
print('Current commit: ' + utils.get_current_git_commit())

Current commit: b'af92f28'


In [7]:
def objective(rc, experiment):
    run_time = time.time()
    
    # reset loggers
    utils.reset_loggers()
    experiment_name = utils.create_loggers(experiment_name=main_experiment_name)
    logger = logging.getLogger(__name__)
    dataset_logger = logging.getLogger('data_loader')
    
    logger.info('Experiment: ' + experiment['name'])
    logger.info('Description: ' + experiment['description'])
    
    logger.info('Parameters')
    logger.info(rc)
    print('\n\n#########################################################################')
    print('Name: ' + experiment['name'])
    print('Description: ' + experiment['description'])
    print('#########################################################################\n\n')
    print(rc)

    logger.debug('Load dataset')
    try:
        dataset = load_dataset(rc, dataset_logger, rc.task)
    except Exception as err:
        print('Could load dataset: ' + str(err))
        logger.exception("Could not load dataset")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }
    logger.debug('dataset loaded')
    logger.debug('Load model')

    try:
        trainer = load_model(dataset, rc, experiment_name)
    except Exception as err:
        print('Could not load model: ' + str(err))
        logger.exception("Could not load model")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }

    logger.debug('model loaded')

    logger.debug('Begin training')
    model = None
    try:
        result = trainer.train(use_cuda=rc.use_cuda, perform_evaluation=False)
        model = result['model']
    except Exception as err:
        print('Exception while training: ' + str(err))
        logger.exception("Could not complete iteration")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }

    if math.isnan(trainer.get_best_loss()):
        print('Loss is nan')
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }

    # perform evaluation and log results
    result = None
    try:
        result = trainer.perform_final_evaluation(use_test_set=True, verbose=False)
    except Exception as err:
        logger.exception("Could not complete iteration evaluation.")
        print('Could not complete iteration evaluation: ' + str(err))
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }
    print(f'VAL f1\t{trainer.get_best_f1()} - ({result[1][1]})')
    print(f'VAL loss\t{trainer.get_best_loss()}')
    return {
            'loss': result[1][0],
            'status': STATUS_OK,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1(),
            'sample_iterations': trainer.get_num_samples_seen(),
            'iterations': trainer.get_num_iterations(),
            'rc': rc,
            'results': {
                'train': {
                    'loss': result[0][0],
                    'f1': result[0][1]
                },
                'validation': {
                    'loss': result[1][0],
                    'f1': result[1][1]
                },
                'test': {
                    'loss': result[2][0],
                    'f1': result[2][1]
                }
            }
        }

# Perform experiments

In [8]:
for e in experiments:
    name = e['name']
    print(f'#########################################################################\n\nExperiment Name: {name}\n')
    print('#########################################################################\n\n')
    
    # generate rc
    rc = get_default_params(use_cuda=True, overwrite=e['rc'], from_default=baseline)
    result = objective(rc, e)
    
    if result['status'] == STATUS_OK:
        print(f"       .---.\n \
         /     \\\n\
          \\.@-@./\tExperiment: {e['name']}\n\
          /`\\_/`\\\tStatus: {result['status']}\n\
         //  _  \\\\\tLoss: {result['best_loss']}\n\
        | \\     )|_\tf1: {result['best_f1']}\n\
       /`\\_`>  <_/ \\\n\
       \\__/'---'\\__/\n")
    else:
        print(f"       .---.\n \
         /     \\\n\
          \\.@-@./\tExperiment: {e['name']} (FAIL)\n\
          /`\\_/`\\\n\
         //  _  \\\\\\n\
        | \\     )|_\n\
       /`\\_`>  <_/ \\\n\
       \\__/'---'\\__/\n")
    





#########################################################################

Experiment Name: Baseline

#########################################################################


Log path is  C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\Organic19_Experiments\20190331\5


#########################################################################
Name: Baseline
Description: Baseline. Uses good_organic_hp_params without any changes
#########################################################################


+----------------------------------------------------------------------------------+
|                                 Hyperparameters                                  |
+------------------------------+---------------------------------------------------+
|          Parameter           |                       Value                       |
+------------------------------+---------------------------------------------------+
|            kwargs         

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.49		0.38		0.038		0.882		0.43m - 0.4m / 0.0m
2	9k	0.42		0.35		0.184		0.806		0.42m - 0.9m / 15.2m
3	13k	0.39		0.37		0.215		0.802		0.44m - 1.4m / 14.9m
4	17k	0.38		0.38		0.222		0.791		0.44m - 1.9m / 15.5m
5	22k	0.36		0.32		0.271		0.823		0.42m - 2.3m / 15.4m
6	26k	0.34		0.39		0.227		0.753		0.43m - 2.8m / 15.1m
7	30k	0.33		0.37		0.270		0.789		0.44m - 3.3m / 15.4m
8	35k	0.32		0.36		0.256		0.801		0.44m - 3.8m / 15.5m
9	39k	0.30		0.35		0.259		0.823		0.43m - 4.2m / 15.6m
10	43k	0.29		0.38		0.252		0.812		0.43m - 4.7m / 15.4m
VAL f1	0.27149321266968324 - (0.27149321266968324)
VAL loss	0.32484197897069594
       .---.
          /     \
          \.@-@./	Experiment: Baseline
          /`\_/`\	Status: ok
         //  _  \\	Loss: 0.32484197897069594
        | \     )|_	f1: 0.27149321266968324
       /`\_`>  <_/ \
       \__/'---'\__/

#########################################################################

Experiment Name: SpellCheck

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.51		0.40		0.109		0.871		0.44m - 0.4m / 0.0m
2	9k	0.42		0.36		0.162		0.794		0.45m - 0.9m / 15.4m
3	13k	0.39		0.36		0.238		0.815		0.43m - 1.4m / 15.7m
4	17k	0.38		0.39		0.229		0.794		0.44m - 1.9m / 15.3m
5	22k	0.36		0.33		0.271		0.818		0.43m - 2.3m / 15.4m
6	26k	0.35		0.36		0.228		0.766		0.43m - 2.8m / 15.3m
7	30k	0.33		0.36		0.261		0.801		0.42m - 3.3m / 15.4m
8	35k	0.32		0.36		0.241		0.784		0.42m - 3.7m / 15.1m
9	39k	0.31		0.39		0.227		0.804		0.42m - 4.2m / 15.1m
10	43k	0.29		0.36		0.256		0.803		0.42m - 4.7m / 15.3m
VAL f1	0.270996640537514 - (0.270996640537514)
VAL loss	0.3259992901016684
       .---.
          /     \
          \.@-@./	Experiment: SpellChecker On
          /`\_/`\	Status: ok
         //  _  \\	Loss: 0.3259992901016684
        | \     )|_	f1: 0.270996640537514
       /`\_`>  <_/ \
       \__/'---'\__/

#########################################################################

Experiment Name: Fasttext

#

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.49		0.39		0.093		0.875		0.43m - 0.4m / 0.0m
2	9k	0.41		0.36		0.200		0.798		0.43m - 0.9m / 15.2m
3	13k	0.37		0.37		0.238		0.782		0.43m - 1.4m / 15.3m
4	17k	0.36		0.38		0.227		0.782		0.44m - 1.9m / 15.3m
5	22k	0.34		0.34		0.291		0.834		0.44m - 2.3m / 15.4m
6	26k	0.32		0.38		0.260		0.777		0.46m - 2.8m / 15.7m
7	30k	0.30		0.42		0.243		0.758		0.46m - 3.3m / 16.2m
8	35k	0.28		0.40		0.269		0.790		0.47m - 3.9m / 16.2m
9	39k	0.25		0.45		0.295		0.825		0.44m - 4.3m / 16.5m
10	43k	0.23		0.45		0.266		0.804		0.42m - 4.8m / 15.8m
11	48k	0.22		0.49		0.280		0.807		0.43m - 5.3m / 15.4m
12	52k	0.20		0.49		0.299		0.826		0.42m - 5.7m / 15.5m
13	56k	0.19		0.52		0.291		0.820		0.43m - 6.2m / 15.5m
14	60k	0.19		0.56		0.271		0.818		0.45m - 6.7m / 15.7m
15	65k	0.18		0.58		0.264		0.816		0.45m - 7.2m / 16.1m
16	69k	0.16		0.64		0.261		0.800		0.44m - 7.7m / 16.2m
17	73k	0.17		0.65		0.208		0.800		0.43m - 8.1m / 16.0m
VAL f1	0.29854096520763185 - (0.298

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.49		0.38		0.042		0.880		0.43m - 0.4m / 0.0m
2	9k	0.42		0.36		0.140		0.805		0.43m - 0.9m / 15.0m
3	13k	0.39		0.37		0.220		0.782		0.44m - 1.4m / 15.2m
4	17k	0.38		0.39		0.222		0.789		0.44m - 1.9m / 15.4m
5	22k	0.36		0.33		0.255		0.826		0.45m - 2.3m / 15.6m
6	26k	0.35		0.39		0.201		0.741		0.44m - 2.8m / 15.8m
7	30k	0.34		0.38		0.238		0.791		0.44m - 3.3m / 15.5m
8	35k	0.33		0.42		0.234		0.772		0.44m - 3.8m / 15.6m
9	39k	0.31		0.43		0.219		0.782		0.42m - 4.3m / 15.8m
10	43k	0.29		0.41		0.262		0.814		0.43m - 4.7m / 15.3m
11	48k	0.28		0.42		0.246		0.832		0.43m - 5.2m / 15.6m
12	52k	0.27		0.44		0.263		0.779		0.44m - 5.7m / 15.6m
13	56k	0.26		0.47		0.238		0.801		0.44m - 6.2m / 15.7m
14	60k	0.25		0.48		0.239		0.816		0.43m - 6.6m / 15.8m
15	65k	0.25		0.43		0.233		0.802		0.43m - 7.1m / 15.7m
16	69k	0.23		0.54		0.223		0.805		0.43m - 7.6m / 15.7m
17	73k	0.23		0.51		0.218		0.793		0.43m - 8.0m / 15.7m
VAL f1	0.26285714285714284 - (0.262

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	4k	0.50		0.37		0.194		0.859		0.42m - 0.4m / 0.0m
2	9k	0.41		0.34		0.211		0.810		0.43m - 0.9m / 14.8m
3	13k	0.38		0.35		0.262		0.809		0.44m - 1.4m / 15.3m
4	17k	0.37		0.36		0.251		0.802		0.44m - 1.9m / 15.4m
5	22k	0.34		0.35		0.257		0.802		0.44m - 2.3m / 15.6m
6	26k	0.33		0.41		0.232		0.768		0.43m - 2.8m / 15.7m
7	30k	0.31		0.39		0.259		0.797		0.44m - 3.3m / 15.4m
8	35k	0.30		0.37		0.267		0.803		0.47m - 3.8m / 15.5m
9	39k	0.28		0.41		0.242		0.803		0.46m - 4.3m / 16.6m
10	43k	0.27		0.44		0.224		0.773		0.42m - 4.8m / 16.2m
11	48k	0.25		0.40		0.304		0.834		0.42m - 5.2m / 15.4m
12	52k	0.24		0.47		0.254		0.805		0.43m - 5.7m / 15.4m
13	56k	0.22		0.57		0.225		0.786		0.42m - 6.2m / 15.6m
14	60k	0.22		0.58		0.242		0.794		0.42m - 6.6m / 15.5m
15	65k	0.21		0.57		0.210		0.778		0.42m - 7.1m / 15.6m
16	69k	0.20		0.62		0.185		0.772		0.42m - 7.6m / 15.6m
VAL f1	0.3042973286875726 - (0.3042973286875726)
VAL loss	0.34349424137788664
       .---