In [1]:
import matplotlib
import copy
import logging

from data.data_loader import Dataset
from data.germeval2017 import germeval2017_dataset

from misc.preferences import PREFERENCES
from misc.run_configuration import get_default_params, randomize_params
from misc import utils

from optimizer import get_default_optimizer
from criterion import NllLoss, LossCombiner

from models.transformer.encoder import TransformerEncoder
from models.softmax_output import SoftmaxOutputLayerWithCommentWiseClass
from models.transformer_tagger import TransformerTagger
from models.jointAspectTagger import JointAspectTagger
from models.transformer.train import Trainer
import pprint

In [2]:
PREFERENCES.defaults(
    data_root='./data/germeval2017',
    data_train='train_v1.4.tsv',    
    data_validation='dev_v1.4.tsv',
    data_test='test_TIMESTAMP1.tsv',
    early_stopping='highest_5_F1'
)
def load(hp, logger):
    dataset = Dataset(
        'germeval',
        logger,
        hp,
        source_index=0,
        target_vocab_index=2,
        data_path=PREFERENCES.data_root,
        train_file=PREFERENCES.data_train,
        valid_file=PREFERENCES.data_validation,
        test_file=PREFERENCES.data_test,
        file_format='.tsv',
        init_token=None,
        eos_token=None
    )
    dataset.load_data(germeval2017_dataset, verbose=True)
    return dataset

In [3]:
def load_model(dataset, hp, experiment_name):
    loss = LossCombiner(4, dataset.class_weights, NllLoss)
    transformer = TransformerEncoder(dataset.source_embedding,
                                     hyperparameters=hp)
    model = JointAspectTagger(transformer, hp, 4, 20, dataset.target_names)
    optimizer = get_default_optimizer(model, hp)
    trainer = Trainer(
                        model,
                        loss,
                        optimizer,
                        hp,
                        dataset,
                        experiment_name,
                        enable_tensorboard=False,
                        verbose=True)
    return trainer

In [4]:
experiment_name = 'Conv2dLayerTest'
use_cuda = True

In [5]:
# get general logger just for search
experiment_name = utils.create_loggers(experiment_name=experiment_name)
logger = logging.getLogger(__name__)

Log path is  /data/home/felix/ABSA-Transformer/logs/Conv2dLayerTest/20190224/1


In [6]:
utils.get_current_git_commit()
logger.info('Current commit: ' + utils.get_current_git_commit())

In [7]:
hp = get_default_params(use_cuda)
hp.num_epochs = 15

logger.info(hp)
print(hp)

+------------------------------------+
|          Hyperparameters           |
+-------------------------+----------+
|        Parameter        |  Value   |
+-------------------------+----------+
|        batch_size       |    12    |
|        model_size       |   300    |
|    learning_rate_type   |   noam   |
|      learning_rate      |    0     |
|   learning_rate_warmup  |   4800   |
|   learning_rate_factor  |    2     |
|     optim_adam_beta1    |   0.9    |
|     optim_adam_beta2    |   0.98   |
|      early_stopping     |    5     |
|         use_cuda        |   True   |
|       n_enc_blocks      |    3     |
|         n_heads         |    6     |
|           d_k           |    50    |
|           d_v           |    50    |
|       dropout_rate      |   0.1    |
|   pointwise_layer_size  |   2048   |
|    output_layer_type    |   conv   |
| output_conv_num_filters |   300    |
| output_conv_kernel_size |    5     |
|    output_conv_stride   |    1     |
|   output_conv_padding  

In [8]:
dataset_logger = logging.getLogger('data_loader')

        
    
    
logger.debug('Load dataset')
dataset = load(hp, dataset_logger)
logger.debug('dataset loaded')
logger.debug('Load model')
trainer = load_model(dataset, hp, experiment_name)
logger.debug('model loaded')

logger.debug('Begin training')
model = None
try:
    result = trainer.train(use_cuda=hp.use_cuda, perform_evaluation=True)
    model = result['model']
except Exception as err:
    logger.exception("Could not complete iteration because of " + str(err))
    print(f'Could not complete iteration because of {str(err)}')

# perform evaluation and log results
result = None
try:
    result = trainer.perform_final_evaluation(use_test_set=False, verbose=True)
except Exception as err:
    logger.exception("Could not complete iteration evaluation for it " + str(err))
    print(f'Could not complete iterationevaluation because of {str(err)}')


                                           

+-------------------------+
|  GERM EVAL 2017 DATASET |
+---------------+---------+
|     Split     |   Size  |
+---------------+---------+
|     train     |  17043  |
|   validation  |   2049  |
|      test     |   2095  |
+---------------+---------+
+--------------------------------------+
|           Vocabulary Stats           |
+------------------------------+-------+
|          Vocabulary          |  Size |
+------------------------------+-------+
|           comments           | 67041 |
|      general_sentiments      |   3   |
|      aspect_sentiments       |   4   |
|           padding            | 23197 |
|          Sicherheit          |   4   |
| Sonstige_Unregelmässigkeiten |   4   |
|          Atmosphäre          |   4   |
|      Reisen_mit_Kindern      |   4   |
|            Image             |   4   |
|           QR-Code            |   3   |
| Service_und_Kundenbetreuung  |   4   |
|            Gepäck            |   4   |
|          Toiletten           |   4   |
|       Ba

pre_training - DEBUG - 20 initialized
pre_training - DEBUG - Initilize parameters with nn.init.xavier_uniform_
pre_training - DEBUG - Tagger initialized
pre_training - INFO - JointAspectTagger (
  (encoder): TransformerEncoder(
    (src_embeddings): Embedding(67041, 300)
    (positional_encoding): PositionalEncoding2(
      (dropout): Dropout(p=0.1)
    )
    (encoder_blocks): ModuleList(
      (0): EncoderBlock(
        (self_attention_layer): MultiHeadedSelfAttentionLayer(
          (query_projections): Linear(in_features=300, out_features=300, bias=False)
          (key_projections): Linear(in_features=300, out_features=300, bias=False)
          (value_projections): Linear(in_features=300, out_features=300, bias=False)
          (attention_layer): ScaledDotProductAttentionLayer(
            (dropout): Dropout(p=0.1)
          )
          (layer_norm): LayerNorm()
          (w_0): Linear(in_features=300, out_features=300, bias=False)
          (dropout): Dropout(p=0.1)
        )
   

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
         Embedding-1             [-1, 100, 300]      20,112,300
           Dropout-2             [-1, 100, 300]               0
PositionalEncoding2-3             [-1, 100, 300]               0
            Linear-4             [-1, 100, 300]          90,000
            Linear-5             [-1, 100, 300]          90,000
            Linear-6             [-1, 100, 300]          90,000
           Dropout-7             [-1, 100, 100]               0
ScaledDotProductAttentionLayer-8              [-1, 100, 50]               0
            Linear-9             [-1, 100, 300]          90,000
          Dropout-10             [-1, 100, 300]               0
        LayerNorm-11             [-1, 100, 300]               0
MultiHeadedSelfAttentionLayer-12             [-1, 100, 300]               0
           Linear-13            [-1, 100, 2048]         616,448
           Lin

pre_training - INFO - Classes: ['n/a', 'neutral', 'negative', 'positive']
pre_training - DEBUG - train with cuda support
pre_training - INFO - 1421 Iterations per epoch with batch size of 12
pre_training - INFO - Total iterations: 21315
pre_training - INFO - START training.


Epoch 1: 100%|██████████| 1421/1421 [04:17<00:00,  5.88it/s]

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	1421	10.23		[32m9.55		[32m0.244[0m		0.959		4.29m - 4.3m / 0.0m


Epoch 3:   0%|          | 0/1421 [08:34<30:31:38, 77.34s/it]

2	2842	10.24		[37m10.70		[37m0.244[0m		0.959		4.28m - 8.6m / 64.3m


Epoch 4:   0%|          | 0/1421 [12:51<30:31:38, 77.34s/it]  

3	4263	10.25		[32m9.78		[37m0.240[0m		0.932		4.28m - 12.9m / 64.2m


Epoch 4: 100%|██████████| 1421/1421 [17:07<00:00, 77.34s/it]  

4	5684	10.14		[32m9.42		[32m0.244[0m		0.959		4.28m - 17.1m / 64.2m


Epoch 6:   0%|          | 0/1421 [21:25<30:31:38, 77.34s/it]  

5	7105	9.90		[32m9.61		[37m0.244[0m		0.959		4.28m - 21.4m / 64.2m


Epoch 7:   0%|          | 0/1421 [25:41<30:31:38, 77.34s/it]  

6	8526	9.85		[37m10.06		[37m0.244[0m		0.959		4.28m - 25.7m / 64.2m


Epoch 8:   0%|          | 0/1421 [29:58<30:31:38, 77.34s/it]  

7	9947	9.84		[32m9.12		[37m0.244[0m		0.959		4.28m - 30.0m / 64.2m


Epoch 9:   0%|          | 0/1421 [34:15<30:31:38, 77.34s/it]  

8	11368	9.86		[32m9.26		[37m0.244[0m		0.959		4.28m - 34.3m / 64.2m


Epoch 9: 100%|██████████| 1421/1421 [38:31<00:00, 77.34s/it]  


9	12789	9.78		[32m8.97		[37m0.244[0m		0.959		4.28m - 38.5m / 64.2m
pre_training - INFO - Perform final model evaluation
pre_training - DEBUG - --- Train Scores ---


                                                                     

pre_training - INFO - TRAIN loss:	9.547267971703237
pre_training - INFO - TRAIN f1-s:	0.24403683939570603
pre_training - INFO - TRAIN accuracy:	0.958325998943848
pre_training - DEBUG - --- Valid Scores ---


                                                               

pre_training - INFO - Perform final model evaluation
pre_training - DEBUG - --- Train Scores ---


                                                                     

pre_training - INFO - TRAIN loss:	9.56523290626773
pre_training - INFO - TRAIN f1-s:	0.24403683939570603
pre_training - INFO - TRAIN accuracy:	0.958325998943848
pre_training - DEBUG - --- Valid Scores ---


                                                               

Could not complete iterationevaluation because of Dimension out of range (expected to be in range of [-1, 0], but got 1)


