In [1]:
from hyperopt import fmin, tpe, hp, STATUS_OK, STATUS_FAIL, Trials
import numpy as np
import math
import time
import logging
from data.data_loader import Dataset
from data.germeval2017 import germeval2017_dataset
from misc.preferences import PREFERENCES
from misc.run_configuration import get_default_params, randomize_params, OutputLayerType, hyperOpt_goodParams
from misc import utils
from misc.hyperopt_space import *

from optimizer import get_optimizer
from criterion import NllLoss, LossCombiner

from models.transformer.encoder import TransformerEncoder
from models.jointAspectTagger import JointAspectTagger
from trainer.train import Trainer
import pprint

# Loading Functions

These functions will load the dataset and the model. The run configuration will determine the architecture and hyper parameters.

In [2]:
def load_model(dataset, rc, experiment_name):
    loss = LossCombiner(4, dataset.class_weights, NllLoss)
    transformer = TransformerEncoder(dataset.source_embedding,
                                     hyperparameters=rc)
    model = JointAspectTagger(transformer, rc, 4, 20, dataset.target_names)
    optimizer = get_optimizer(model, rc)
    trainer = Trainer(
                        model,
                        loss,
                        optimizer,
                        rc,
                        dataset,
                        experiment_name,
                        enable_tensorboard=False,
                        verbose=True)
    return trainer

def load_dataset(rc, logger):
    dataset = Dataset(
        'germeval',
        logger,
        rc,
        source_index=0,
        target_vocab_index=2,
        data_path=PREFERENCES.data_root,
        train_file=PREFERENCES.data_train,
        valid_file=PREFERENCES.data_validation,
        test_file=PREFERENCES.data_test,
        file_format='.tsv',
        init_token=None,
        eos_token=None
    )
    dataset.load_data(germeval2017_dataset, verbose=False)
    return dataset

## Preamble - Definitions

In [3]:
PREFERENCES.defaults(
    data_root='./data/germeval2017',
    data_train='train_v1.4.tsv',    
    data_validation='dev_v1.4.tsv',
    data_test='test_TIMESTAMP1.tsv',
    early_stopping='highest_5_F1'
)
experiment_name = 'HyperParameterTest'
use_cuda = True

# get general logger just for search
experiment_name = utils.create_loggers(experiment_name=experiment_name)
logger = logging.getLogger(__name__)
dataset_logger = logging.getLogger('data_loader')
logger.info('Run hyper parameter random grid search for experiment with name ' + experiment_name)

num_optim_iterations = 100
logger.info('num_optim_iterations: ' + str(num_optim_iterations))

Log path is  C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\HyperParameterTest\20190317\1


### Current commit

In [4]:
utils.get_current_git_commit()
logger.info('Current commit: ' + utils.get_current_git_commit())
print('Current commit: ' + utils.get_current_git_commit())

Current commit: b'a25c482'


In [5]:
def objective():
    run_time = time.time()

    # generate hp's from parameters
    rc = get_default_params(use_cuda, hyperOpt_goodParams)
    rc.num_epochs = 25
    rc.log_every_xth_iteration = -1
    print(rc)

    logger.debug('Load dataset')
    try:
        dataset = load_dataset(rc, dataset_logger)
    except Exception as err:
        print('Could load dataset: ' + str(err))
        logger.exception("Could not load dataset")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }
    logger.debug('dataset loaded')
    logger.debug('Load model')

    try:
        trainer = load_model(dataset, rc, experiment_name)
    except Exception as err:
        print('Could load model: ' + str(err))
        logger.exception("Could not load model")
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time
        }

    logger.debug('model loaded')
    print('Loading model')
    model = None
    
    trainer.load_model(custom_path='C:\\Users\\felix\\OneDrive\\Studium\\Studium\\6. Semester\\MA\\Project\\ABSA-Transformer\\logs\\test')
    print('Loading finished')
    trainer.set_cuda(True)

    # perform evaluation and log results
    result = None
    try:
        result = trainer.perform_final_evaluation(use_test_set=True, verbose=False)
    except Exception as err:
        logger.exception("Could not complete iteration evaluation.")
        print('Could not complete iteration evaluation: ' + str(err))
        return {
            'status': STATUS_FAIL,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1()
        }
    print(f'Best f1 {trainer.get_best_f1()}')
    return {
            'loss': result[1][0],
            'status': STATUS_OK,
            'eval_time': time.time() - run_time,
            'best_loss': trainer.get_best_loss(),
            'best_f1': trainer.get_best_f1(),
            'results': {
                'train': {
                    'loss': result[0][0],
                    'f1': result[0][1]
                },
                'validation': {
                    'loss': result[1][0],
                    'f1': result[1][1]
                },
                'test': {
                    'loss': result[2][0],
                    'f1': result[2][1]
                }
            },
            'trainer': trainer,
            'model': trainer.model,
            'optimizer': trainer.optimizer.optimizer
        }

# Perform optimization

In [6]:
result = objective()

+----------------------------------------------------------------------------------+
|                                 Hyperparameters                                  |
+------------------------------+---------------------------------------------------+
|          Parameter           |                       Value                       |
+------------------------------+---------------------------------------------------+
|            kwargs            | {'batch_size': 12, 'learning_rate_schedu[...]rue} |
|          model_size          |                        300                        |
|        early_stopping        |                         5                         |
|           use_cuda           |                        True                       |
|          batch_size          |                         12                        |
| learning_rate_scheduler_type |             LearningSchedulerType.Noam            |
|      output_layer_type       |             OutputLayerType.Line


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         Embedding-1             [-1, 113, 300]      20,583,300
           Dropout-2             [-1, 113, 300]               0
PositionalEncoding2-3             [-1, 113, 300]               0
            Linear-4             [-1, 113, 300]          90,000
            Linear-5             [-1, 113, 300]          90,000
            Linear-6             [-1, 113, 300]          90,000
           Dropout-7             [-1, 113, 113]               0
ScaledDotProductAttentionLayer-8             [-1, 113, 300]               0
            Linear-9             [-1, 113, 300]          90,000
          Dropout-10             [-1, 113, 300]               0
        LayerNorm-11             [-1, 113, 300]               0
MultiHeadedSelfAttentionLayer-12             [-1, 113, 300]               0
           Linear-13             [-1, 113, 405]         121,500
           Li

+------------------------------+---------------------------------------------------+
pre_training - INFO - Hyperparameters are compatible!
Loading finished
pre_training - DEBUG - train with cuda support
pre_training - DEBUG - --- Valid Scores ---
Best f1 0.3213599383691035


In [None]:
result['trainer'].optimizer.optimizer.state

In [7]:
result

{'best_f1': 0.3213599383691035,
 'best_loss': 1000.0,
 'eval_time': 205.22377395629883,
 'loss': 11.043194846103066,
 'model': JointAspectTagger(
   (encoder): TransformerEncoder(
     (src_embeddings): Embedding(68611, 300)
     (positional_encoding): PositionalEncoding2(
       (dropout): Dropout(p=0.302424)
     )
     (encoder_blocks): ModuleList(
       (0): EncoderBlock(
         (self_attention_layer): MultiHeadedSelfAttentionLayer(
           (query_projections): Linear(in_features=300, out_features=300, bias=False)
           (key_projections): Linear(in_features=300, out_features=300, bias=False)
           (value_projections): Linear(in_features=300, out_features=300, bias=False)
           (attention_layer): ScaledDotProductAttentionLayer(
             (dropout): Dropout(p=0.302424)
           )
           (layer_norm): LayerNorm()
           (w_0): Linear(in_features=300, out_features=300, bias=False)
           (dropout): Dropout(p=0.302424)
         )
         (feed_forw

In [None]:
import torch
path = 'C:\\Users\\felix\\OneDrive\\Studium\\Studium\\6. Semester\\MA\\Project\\ABSA-Transformer\\logs\\test\\checkpoint_5684.data'
checkpoint = torch.load(path)
result['trainer'].optimizer.optimizer.load_state_dict(checkpoint['optimizer'])
result['trainer'].model.load_state_dict(checkpoint['state_dict'])
result['trainer'].early_stopping.best_model_checkpoint = checkpoint
if torch.cuda.is_available():
    for state in result['trainer'].optimizer.optimizer.state.values():
        for k, v in state.items():
            if isinstance(v, torch.Tensor):
                state[k] = v.cuda()

In [None]:
checkpoint['state_dict']

In [None]:
result['model'].state_dict()

In [None]:
result['optimizer'].state_dict()

In [None]:
result['trainer'].train_iterator.train = True
result['trainer'].evaluator.evaluate(result['trainer'].train_iterator, show_progress=True, progress_label="Evaluating TRAIN")

In [15]:
from trainer.utils import *
t = result['trainer']
ds = t.dataset
field = ds.fields['comments']
t.train_iterator.init_epoch()
for b in t.train_iterator:
    x, _, padding, y = b.comments, b.general_sentiments, b.padding, b.aspect_sentiments
    print(field.reverse(x[0].unsqueeze(0)))
    
    source_mask = create_padding_masks(padding, 1)
    prediction = t.model.predict(x, source_mask)
    aspect_sentiment = ds.fields['aspect_sentiments'].reverse(prediction, detokenize=False)
    
    for s, name in zip(aspect_sentiment[0], ds.target_names):
        print(f'{name}: {s}')
    break

['sommerhitze und keine klimaanlage  endstation für züge rheinpfalzde sommerhitze und keine klimaanlage  endstation für züge berlin dpa  am bislang heißesten tag des jahres haben die klimaanlagen mehrerer züge schlapp gemacht im fernverkehr seien rund ein dutzend alte ics betroffen gewesen sagte ein sprecher']
Allgemein: n/a
Atmosphäre: negative
Auslastung_und_Platzangebot: n/a
Barrierefreiheit: n/a
Connectivity: n/a
DB_App_und_Website: n/a
Design: n/a
Gastronomisches_Angebot: n/a
Gepäck: n/a
Image: n/a
Informationen: n/a
Komfort_und_Ausstattung: n/a
QR-Code: n/a
Reisen_mit_Kindern: n/a
Service_und_Kundenbetreuung: n/a
Sicherheit: n/a
Sonstige_Unregelmässigkeiten: n/a
Ticketkauf: n/a
Toiletten: n/a
Zugfahrt: n/a
