In [1]:
import matplotlib
import copy
import logging

from data.data_loader import Dataset
from data.germeval2017 import germeval2017_dataset

from misc.preferences import PREFERENCES
from misc.run_configuration import get_default_params, randomize_params, LearningSchedulerType
from misc import utils

from optimizer import get_default_optimizer
from criterion import NllLoss, LossCombiner

from models.transformer.encoder import TransformerEncoder
from models.jointAspectTagger import JointAspectTagger
from models.transformer.train import Trainer
import pprint

  assert(config.model_size % config.n_heads == 0, f"number of heads {config.n_heads} is not a valid number of heads for model size {config.model_size}.")


In [2]:
PREFERENCES.defaults(
    data_root='./data/germeval2017',
    data_train='train_v1.4.tsv',    
    data_validation='dev_v1.4.tsv',
    data_test='test_TIMESTAMP1.tsv',
    early_stopping='highest_5_F1'
)
def load(hp, logger):
    dataset = Dataset(
        'germeval',
        logger,
        hp,
        source_index=0,
        target_vocab_index=2,
        data_path=PREFERENCES.data_root,
        train_file=PREFERENCES.data_train,
        valid_file=PREFERENCES.data_validation,
        test_file=PREFERENCES.data_test,
        file_format='.tsv',
        init_token=None,
        eos_token=None
    )
    dataset.load_data(germeval2017_dataset, verbose=True)
    return dataset

In [3]:
def load_model(dataset, hp, experiment_name):
    loss = LossCombiner(4, dataset.class_weights, NllLoss)
    transformer = TransformerEncoder(dataset.source_embedding,
                                     hyperparameters=hp)
    model = JointAspectTagger(transformer, hp, 4, 20, dataset.target_names)
    optimizer = get_default_optimizer(model, hp)
    trainer = Trainer(
                        model,
                        loss,
                        optimizer,
                        hp,
                        dataset,
                        experiment_name,
                        enable_tensorboard=False,
                        verbose=True)
    return trainer

In [4]:
experiment_name = 'AdaBound'
use_cuda = True

In [5]:
# get general logger just for search
experiment_name = utils.create_loggers(experiment_name=experiment_name)
logger = logging.getLogger(__name__)

Log path is  C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\AdaBound\20190226\0


In [6]:
utils.get_current_git_commit()
logger.info('Current commit: ' + utils.get_current_git_commit())

In [7]:
hp = get_default_params(use_cuda)
hp.num_epochs = 35
hp.batch_size = 11
#hp.learning_rate = 0.00275
#hp.learning_rate_warmup = 7628.240
#hp.learning_rate_factor = 0.893619
#hp.optim_adam_beta1 = 0.8315827
#hp.optim_adam_beta1 = 0.937167988175
hp.n_enc_blocks = 2
hp.n_heads = 10
hp.d_k = 30
hp.d_v = 30
hp.dropout_rate = 0.069
hp.pointwise_layer_size = 134
hp.clip_comments_to = 391
hp.learning_rate_type = LearningSchedulerType.AdaBound
logger.info(hp)
print(hp)

+----------------------------------------------------------+
|                     Hyperparameters                      |
+-------------------------+--------------------------------+
|        Parameter        |             Value              |
+-------------------------+--------------------------------+
|        batch_size       |               11               |
|        model_size       |              300               |
|    learning_rate_type   | LearningSchedulerType.AdaBound |
|      learning_rate      |               1                |
|   learning_rate_warmup  |              4800              |
|   learning_rate_factor  |               2                |
|     optim_adam_beta1    |              0.9               |
|     optim_adam_beta2    |              0.98              |
|      early_stopping     |               5                |
|         use_cuda        |              True              |
|       n_enc_blocks      |               2                |
|         n_heads       

In [8]:
dataset_logger = logging.getLogger('data_loader')
    
logger.debug('Load dataset')
dataset = load(hp, dataset_logger)
logger.debug('dataset loaded')
logger.debug('Load model')
trainer = load_model(dataset, hp, experiment_name)
logger.debug('model loaded')

logger.debug('Begin training')
model = None
result = trainer.train(use_cuda=hp.use_cuda, perform_evaluation=False)
model = result['model']

# perform evaluation and log results
result = trainer.perform_final_evaluation(use_test_set=False, verbose=False)
result

                                          

+-------------------------+
|  GERM EVAL 2017 DATASET |
+---------------+---------+
|     Split     |   Size  |
+---------------+---------+
|     train     |  17043  |
|   validation  |   2049  |
|      test     |   2095  |
+---------------+---------+
+--------------------------------------+
|           Vocabulary Stats           |
+------------------------------+-------+
|          Vocabulary          |  Size |
+------------------------------+-------+
|           comments           | 84428 |
|      general_sentiments      |   3   |
|      aspect_sentiments       |   4   |
|           padding            | 29470 |
|            Image             |   4   |
|          Toiletten           |   4   |
|            Design            |   4   |
|       Barrierefreiheit       |   4   |
|   Komfort_und_Ausstattung    |   4   |
|      Reisen_mit_Kindern      |   4   |
|          Ticketkauf          |   4   |
|           Zugfahrt           |   4   |
| Service_und_Kundenbetreuung  |   4   |
|         


pre_training - DEBUG - 20 initialized
pre_training - DEBUG - Initilize parameters with nn.init.xavier_uniform_
pre_training - DEBUG - Tagger initialized
pre_training - INFO - JointAspectTagger (
  (encoder): TransformerEncoder(
    (src_embeddings): Embedding(84428, 300)
    (positional_encoding): PositionalEncoding2(
      (dropout): Dropout(p=0.1)
    )
    (encoder_blocks): ModuleList(
      (0): EncoderBlock(
        (self_attention_layer): MultiHeadedSelfAttentionLayer(
          (query_projections): Linear(in_features=300, out_features=300, bias=False)
          (key_projections): Linear(in_features=300, out_features=300, bias=False)
          (value_projections): Linear(in_features=300, out_features=300, bias=False)
          (attention_layer): ScaledDotProductAttentionLayer(
            (dropout): Dropout(p=0.1)
          )
          (layer_norm): LayerNorm()
          (w_0): Linear(in_features=300, out_features=300, bias=False)
          (dropout): Dropout(p=0.069)
        )


           Linear-39               [-1, 391, 4]           1,204
CommentWiseSumLogSoftmax-40                    [-1, 4]               0
           Linear-41               [-1, 391, 4]           1,204
CommentWiseSumLogSoftmax-42                    [-1, 4]               0
           Linear-43               [-1, 391, 4]           1,204
CommentWiseSumLogSoftmax-44                    [-1, 4]               0
           Linear-45               [-1, 391, 4]           1,204
CommentWiseSumLogSoftmax-46                    [-1, 4]               0
           Linear-47               [-1, 391, 4]           1,204
CommentWiseSumLogSoftmax-48                    [-1, 4]               0
           Linear-49               [-1, 391, 4]           1,204
CommentWiseSumLogSoftmax-50                    [-1, 4]               0
           Linear-51               [-1, 391, 4]           1,204
CommentWiseSumLogSoftmax-52                    [-1, 4]               0
           Linear-53               [-1, 391, 4]        

pre_training - INFO - 1550 Iterations per epoch with batch size of 11
pre_training - INFO - Total iterations: 54250
pre_training - INFO - START training.


                                                                                                                       

# EP	# IT	tr loss		val loss	f1		acc		duration / total time


                                                                                                                       

1	1550	nan		nan		0.240		0.944		6.40m - 6.4m / 0.0m


                                                                                                                       

2	3100	nan		nan		0.240		0.944		6.35m - 12.8m / 224.0m


                                                                                                                       

3	4650	nan		nan		0.240		0.944		6.43m - 19.3m / 222.6m


                                                                                                                       

4	6200	nan		nan		0.240		0.944		6.31m - 25.6m / 225.0m


                                                                                                                       

5	7750	nan		nan		0.240		0.944		6.25m - 31.8m / 221.2m


                                                                                                                       

6	9300	nan		nan		0.240		0.944		6.19m - 38.0m / 219.3m


Epoch 6: 100%|█████████████████████████████████████████████████████████████████████| 1550/1550 [38:01<00:00,  4.23it/s]


pre_training - DEBUG - --- Valid Scores ---


((nan, 0.2403811203710183, None),
 (nan, 0.24035293348721692, None),
 (-1, -1, array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])))