In [1]:
import matplotlib
import copy
import logging

from data.data_loader import Dataset
from data.germeval2017 import germeval2017_dataset

from misc.preferences import PREFERENCES
from misc.run_configuration import get_default_params, randomize_params, LearningSchedulerType
from misc import utils

from optimizer import get_default_optimizer
from criterion import NllLoss, LossCombiner

from models.transformer.encoder import TransformerEncoder
from models.jointAspectTagger import JointAspectTagger
from models.transformer.train import Trainer
import pprint

In [2]:
PREFERENCES.defaults(
    data_root='./data/germeval2017',
    data_train='train_v1.4.tsv',    
    data_validation='dev_v1.4.tsv',
    data_test='test_TIMESTAMP1.tsv',
    early_stopping='highest_5_F1'
)
def load(hp, logger):
    dataset = Dataset(
        'germeval',
        logger,
        hp,
        source_index=0,
        target_vocab_index=2,
        data_path=PREFERENCES.data_root,
        train_file=PREFERENCES.data_train,
        valid_file=PREFERENCES.data_validation,
        test_file=PREFERENCES.data_test,
        file_format='.tsv',
        init_token=None,
        eos_token=None
    )
    dataset.load_data(germeval2017_dataset, verbose=True)
    return dataset

In [3]:
def load_model(dataset, hp, experiment_name):
    loss = LossCombiner(4, dataset.class_weights, NllLoss)
    transformer = TransformerEncoder(dataset.source_embedding,
                                     hyperparameters=hp)
    model = JointAspectTagger(transformer, hp, 4, 20, dataset.target_names)
    optimizer = get_default_optimizer(model, hp)
    trainer = Trainer(
                        model,
                        loss,
                        optimizer,
                        hp,
                        dataset,
                        experiment_name,
                        enable_tensorboard=False,
                        verbose=False)
    return trainer

In [4]:
experiment_name = 'DropoutLastLayer_WithWebReplacement'
use_cuda = True

In [5]:
# get general logger just for search
experiment_name = utils.create_loggers(experiment_name=experiment_name)
logger = logging.getLogger(__name__)

Log path is  C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\DropoutLastLayer_WithWebReplacement\20190305\2


In [6]:
utils.get_current_git_commit()
logger.info('Current commit: ' + utils.get_current_git_commit())

In [7]:
hp = get_default_params(use_cuda)
hp.num_epochs = 35
hp.batch_size = 11
hp.learning_rate = 0.00275
hp.learning_rate_warmup = 7628.240
hp.learning_rate_factor = 0.893619
hp.optim_adam_beta1 = 0.8315827
hp.optim_adam_beta1 = 0.937167988175
hp.n_enc_blocks = 2
hp.n_heads = 10
hp.d_k = 30
hp.d_v = 30
hp.dropout_rate = 0.069
hp.pointwise_layer_size = 134
hp.clip_comments_to = 391
hp.learning_rate_type = LearningSchedulerType.Noam
logger.info(hp)
print(hp)

+------------------------------------------------------+
|                   Hyperparameters                    |
+-------------------------+----------------------------+
|        Parameter        |           Value            |
+-------------------------+----------------------------+
|        batch_size       |             11             |
|        model_size       |            300             |
|    learning_rate_type   | LearningSchedulerType.Noam |
|      learning_rate      |          0.00275           |
|   learning_rate_warmup  |          7628.24           |
|   learning_rate_factor  |          0.893619          |
|     optim_adam_beta1    |       0.937167988175       |
|     optim_adam_beta2    |            0.98            |
|      early_stopping     |             5              |
|         use_cuda        |            True            |
|       n_enc_blocks      |             2              |
|         n_heads         |             10             |
|           d_k           |    

In [8]:
dataset_logger = logging.getLogger('data_loader')
    
logger.debug('Load dataset')
dataset = load(hp, dataset_logger)
logger.debug('dataset loaded')
logger.debug('Load model')
trainer = load_model(dataset, hp, experiment_name)
logger.debug('model loaded')

logger.debug('Begin training')
model = None
result = trainer.train(use_cuda=hp.use_cuda, perform_evaluation=False)
model = result['model']

                                           

+-------------------------+
|  GERM EVAL 2017 DATASET |
+---------------+---------+
|     Split     |   Size  |
+---------------+---------+
|     train     |  17043  |
|   validation  |   2049  |
|      test     |   2095  |
+---------------+---------+
+--------------------------------------+
|           Vocabulary Stats           |
+------------------------------+-------+
|          Vocabulary          |  Size |
+------------------------------+-------+
|              id              | 21187 |
|           comments           | 84428 |
|      general_sentiments      |   3   |
|      aspect_sentiments       |   4   |
|           padding            | 29470 |
|          Allgemein           |   4   |
|         Connectivity         |   4   |
|           Zugfahrt           |   4   |
|          Ticketkauf          |   4   |
| Service_und_Kundenbetreuung  |   4   |
| Auslastung_und_Platzangebot  |   4   |
|           QR-Code            |   3   |
|            Image             |   4   |
|   Gastro


pre_training - DEBUG - 20 initialized
pre_training - DEBUG - Initilize parameters with nn.init.xavier_uniform_
pre_training - DEBUG - Tagger initialized


                                                                                                                       

# EP	# IT	tr loss		val loss	f1		acc		duration / total time


                                                                                                                       

1	1550	582.26		254.88		0.246		0.921		6.04m - 6.0m / 0.0m


                                                                                                                       

2	3100	232.69		187.83		0.278		0.888		6.04m - 12.1m / 211.5m


                                                                                                                       

3	4650	229.59		267.77		0.270		0.870		5.87m - 18.0m / 211.5m


                                                                                                                       

4	6200	219.27		293.18		0.279		0.859		5.92m - 23.9m / 205.8m


                                                                                                                       

5	7750	208.15		343.63		0.281		0.908		5.94m - 30.1m / 207.6m


                                                                                                                       

6	9300	154.75		260.18		0.323		0.940		5.93m - 36.0m / 208.2m


                                                                                                                       

7	10850	104.69		255.49		0.323		0.929		5.97m - 42.0m / 207.9m


                                                                                                                       

8	12400	77.51		245.05		0.325		0.919		5.89m - 48.2m / 209.5m


                                                                                                                       

9	13950	59.74		262.07		0.343		0.940		5.91m - 54.2m / 207.4m


                                                                                                                       

10	15500	47.40		258.45		0.347		0.932		5.94m - 60.2m / 207.9m


                                                                                                                       

11	17050	40.92		305.70		0.353		0.947		5.78m - 66.0m / 208.8m


                                                                                                                       

12	18600	32.68		266.06		0.342		0.945		5.75m - 71.8m / 204.8m


                                                                                                                       

13	20150	28.54		355.14		0.357		0.957		5.70m - 77.5m / 204.0m


                                                                                                                       

14	21700	25.51		281.20		0.343		0.951		5.80m - 83.4m / 202.9m


                                                                                                                       

15	23250	21.87		323.13		0.356		0.956		5.80m - 89.2m / 205.2m


                                                                                                                       

16	24800	20.31		312.36		0.350		0.957		5.65m - 94.8m / 205.1m


                                                                                                                       

17	26350	16.84		321.35		0.346		0.953		5.65m - 100.5m / 202.2m


                                                                                                                       

18	27900	16.81		291.08		0.344		0.953		5.69m - 106.1m / 202.2m


Epoch 18: 100%|██████████████████████████████████████████████████████████████████| 1550/1550 [1:46:08<00:00,  4.66it/s]


In [13]:
trainer._restore_best_model()

In [11]:
# perform evaluation and log results
result = trainer.perform_final_evaluation(use_test_set=True, verbose=False)

In [12]:
result

((7.0496636361715534, 0.8583085295743327, None),
 (291.08214411398467, 0.34404920451694737, None),
 (322.6862192868562, 0.32946999958912493, None))