In [1]:
from misc.preferences import PREFERENCES
from misc.run_configuration import good_organic_hp_params, default_params
from data.conll import conll2003_dataset as dsl
from misc.run_configuration import conll_params
from misc.experimental_environment import Experiment
import time
import pprint
from misc import utils
import pandas as pd
import os

STATUS_FAIL = 'fail'
STATUS_OK = 'ok'

In [2]:
PREFERENCES.defaults(
        data_root='./data/data/conll2003',
        data_train='eng.train.txt',
        data_validation='eng.testa.txt',
        data_test='eng.testb.txt',
        source_index=0,
        target_vocab_index=1,
        file_format='txt',
        language='en'
    )
main_experiment_name = 'CoNLL2003-FinalExperiment'
use_cuda = True

In [3]:
baseline = conll_params
print(pprint.pformat(baseline, indent=2))

{ 'att_d_k': 150,
  'att_d_v': 150,
  'clip_comments_to': 200,
  'dropout_rate': 0.302424,
  'embedding_type': 'fasttext',
  'learning_rate_scheduler': { 'noam_learning_rate_factor': 0.5,
                               'noam_learning_rate_warmup': 3500},
  'model_size': 300,
  'num_encoder_blocks': 2,
  'num_heads': 2,
  'optimizer': { 'adam_beta1': 0.9,
                 'adam_beta2': 0.98,
                 'adam_eps': 1e-08,
                 'adam_weight_decay': 1e-06,
                 'learning_rate': 7.2e-05},
  'optimizer_type': <OptimizerType.Adam: 1>,
  'pointwise_layer_size': 287,
  'task': 'ner'}


In [4]:
experiments = [
    {
        'name': 'Baseline',
        'description': 'Classification of the CoNLL-2003 NER task',
        'loss': 1000,
        'f1': -1,
        'rc': {}
    }
]

In [5]:
utils.get_current_git_commit()
print('Current commit: ' + utils.get_current_git_commit())

Current commit: b'95a112a'


In [6]:
for e in experiments:
    name = e['name']
    print(f'#########################################################################\n\nExperiment Name: {name}\n')
    print('#########################################################################\n\n')
    test_params = {**baseline, **{'num_epochs': 35, 'language': 'en'}}
    test_params = {**test_params, **e['rc']}
    e = Experiment(main_experiment_name, e['description'], default_params, test_params, dsl, runs=5)
    df, e_path = e.run()

#########################################################################

Experiment Name: Baseline

#########################################################################


Experiment CoNLL2003-FinalExperiment initialized
Log path is  C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\CoNLL2003-FinalExperiment\20190430\0
Log path is  C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\CoNLL2003-FinalExperiment\20190430\1


#########################################################################
Name: CoNLL2003-FinalExperiment
Description: Classification of the CoNLL-2003 NER task
#########################################################################


+----------------------------------------------------------------------------------+
|                                 Hyperparameters                                  |
+------------------------------+---------------------------------------------------+
|        


pre_training - DEBUG - train with cuda support
pre_training - INFO - 1171 Iterations per epoch with batch size of 12
pre_training - INFO - Total iterations: 40985
pre_training - INFO - Total number of samples: 491820
pre_training - INFO - START training.


A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	14k	0.07		0.71		0.835		0.831		2.07m - 2.1m / 0.0m
2	28k	0.02		0.48		0.898		0.896		2.08m - 4.2m / 72.5m
3	42k	0.02		0.44		0.911		0.909		2.16m - 6.4m / 72.9m
4	56k	0.01		0.45		0.904		0.903		2.16m - 8.5m / 75.7m
5	70k	0.01		0.44		0.935		0.934		2.13m - 10.7m / 75.6m
6	84k	0.01		0.42		0.928		0.926		2.12m - 12.9m / 74.6m
7	98k	0.01		0.37		0.936		0.934		2.10m - 15.0m / 74.2m
8	112k	0.01		0.41		0.932		0.930		2.05m - 17.0m / 73.9m
9	126k	0.01		0.41		0.944		0.941		2.15m - 19.2m / 72.4m
10	141k	0.01		0.50		0.941		0.939		2.10m - 21.3m / 75.1m
11	155k	0.01		0.44		0.922		0.919		2.11m - 23.4m / 73.9m
12	169k	0.01		0.40		0.940		0.938		2.01m - 25.5m / 74.1m
13	183k	0.01		0.45		0.934		0.931		2.08m - 27.6m / 71.7m
14	197k	0.01		0.42		0.939		0.935		2.10m - 29.7m / 73.3m
Training duration was 1782.5369415283203
pre_training - DEBUG - --- Valid Scores ---
pre_training - INFO - TEST MACRO mean f1: 0.9758064516129032
VAL f1	0.9437711029139099 - (0.9


pre_training - DEBUG - train with cuda support
pre_training - INFO - 1171 Iterations per epoch with batch size of 12
pre_training - INFO - Total iterations: 40985
pre_training - INFO - Total number of samples: 491820
pre_training - INFO - START training.


A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	14k	0.07		1.11		0.738		0.731		2.13m - 2.1m / 0.0m
2	28k	0.02		0.71		0.889		0.884		2.17m - 4.4m / 74.5m
3	42k	0.02		0.50		0.891		0.890		2.03m - 6.5m / 76.2m
4	56k	0.01		0.42		0.910		0.908		2.04m - 8.5m / 71.4m
5	70k	0.01		0.39		0.935		0.932		2.12m - 10.7m / 71.9m
6	84k	0.01		0.48		0.928		0.924		2.15m - 12.8m / 74.2m
7	98k	0.01		0.39		0.938		0.935		2.09m - 14.9m / 75.3m
8	112k	0.01		0.42		0.928		0.925		2.01m - 17.0m / 73.4m
9	126k	0.01		0.46		0.927		0.923		2.03m - 19.0m / 71.4m
10	141k	0.01		0.45		0.938		0.935		2.02m - 21.0m / 71.7m
11	155k	0.01		0.42		0.929		0.926		2.14m - 23.2m / 71.6m
12	169k	0.01		0.47		0.932		0.930		2.11m - 25.3m / 74.5m
13	183k	0.01		0.46		0.932		0.929		2.08m - 27.4m / 73.9m
14	197k	0.01		0.41		0.940		0.938		2.05m - 29.5m / 73.2m
15	211k	0.01		0.43		0.931		0.928		2.03m - 31.5m / 72.6m
16	225k	0.01		0.38		0.940		0.938		2.03m - 33.6m / 72.0m
17	239k	0.01		0.49		0.925		0.922		2.02m - 35.6m / 72.1m
18	253k	0.



dataset loaded. Duration: 4.873996257781982
pre_training - INFO - Classes: ['O', 'PER', 'ORG', 'LOC', 'MISC']
pre_training - INFO - TransformerTagger (
  (encoder): TransformerEncoder(
    (positional_encoding): PositionalEncoding2(
      (dropout): Dropout(p=0.302424)
    )
    (encoder_blocks): ModuleList(
      (0): EncoderBlock(
        (self_attention_layer): MultiHeadedSelfAttentionLayer(
          (query_projections): Linear(in_features=300, out_features=300, bias=False)
          (key_projections): Linear(in_features=300, out_features=300, bias=False)
          (value_projections): Linear(in_features=300, out_features=300, bias=False)
          (attention_layer): ScaledDotProductAttentionLayer(
            (dropout): Dropout(p=0.302424)
          )
          (w_0): Linear(in_features=300, out_features=300, bias=False)
          (dropout): Dropout(p=0.302424)
        )
        (feed_forward_layer): PointWiseFCLayer(
          (w_1): Linear(in_features=300, out_features=287, bi

A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	14k	0.06		1.26		0.716		0.702		2.01m - 2.0m / 0.0m
2	28k	0.02		0.66		0.879		0.873		2.05m - 4.1m / 70.2m
3	42k	0.02		0.63		0.881		0.879		1.97m - 6.1m / 71.6m
4	56k	0.01		0.40		0.908		0.906		1.97m - 8.0m / 69.1m
5	70k	0.01		0.48		0.903		0.901		1.98m - 10.0m / 69.2m
6	84k	0.01		0.51		0.902		0.899		1.97m - 12.0m / 69.4m
7	98k	0.01		0.50		0.910		0.908		1.98m - 14.0m / 69.0m
8	112k	0.01		0.51		0.914		0.910		1.97m - 16.0m / 69.4m
9	126k	0.01		0.42		0.930		0.927		1.97m - 18.0m / 69.3m
10	141k	0.01		0.54		0.908		0.905		1.99m - 20.0m / 69.3m
11	155k	0.01		0.51		0.900		0.896		2.01m - 22.0m / 69.8m
12	169k	0.01		0.46		0.914		0.911		2.00m - 24.0m / 70.2m
13	183k	0.01		0.46		0.908		0.906		1.98m - 26.0m / 70.0m
14	197k	0.01		0.49		0.917		0.914		1.98m - 28.0m / 69.7m
Training duration was 1683.1723942756653
pre_training - DEBUG - --- Valid Scores ---
pre_training - INFO - TEST MACRO mean f1: 0.9758064516129032
VAL f1	0.9299016163035839 - (0.9


pre_training - DEBUG - train with cuda support
pre_training - INFO - 1171 Iterations per epoch with batch size of 12
pre_training - INFO - Total iterations: 40985
pre_training - INFO - Total number of samples: 491820
pre_training - INFO - START training.


A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	14k	0.06		1.34		0.719		0.702		2.00m - 2.0m / 0.0m
2	28k	0.02		0.80		0.877		0.869		2.15m - 4.2m / 70.0m
3	42k	0.02		0.64		0.863		0.860		2.17m - 6.4m / 75.2m
4	56k	0.01		0.41		0.923		0.921		1.96m - 8.3m / 75.8m
5	70k	0.01		0.41		0.923		0.921		1.99m - 10.3m / 69.2m
6	84k	0.01		0.61		0.888		0.885		1.98m - 12.3m / 69.9m
7	98k	0.01		0.52		0.902		0.899		1.99m - 14.3m / 69.8m
8	112k	0.01		0.54		0.903		0.898		1.96m - 16.3m / 70.0m
9	126k	0.01		0.43		0.926		0.923		1.99m - 18.3m / 69.3m
10	141k	0.01		0.51		0.923		0.919		1.98m - 20.3m / 70.0m
11	155k	0.01		0.45		0.924		0.920		1.99m - 22.3m / 69.8m
12	169k	0.01		0.50		0.915		0.913		1.99m - 24.3m / 70.0m
13	183k	0.01		0.47		0.919		0.916		2.00m - 26.3m / 70.0m
14	197k	0.01		0.49		0.936		0.934		1.99m - 28.3m / 70.4m
15	211k	0.01		0.47		0.914		0.911		1.99m - 30.3m / 70.1m
16	225k	0.01		0.42		0.931		0.929		1.94m - 32.3m / 70.1m
17	239k	0.01		0.56		0.917		0.913		1.97m - 34.3m / 69.1m
18	253k	0.


pre_training - DEBUG - train with cuda support
pre_training - INFO - 1171 Iterations per epoch with batch size of 12
pre_training - INFO - Total iterations: 40985
pre_training - INFO - Total number of samples: 491820
pre_training - INFO - START training.


A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	14k	0.06		0.86		0.793		0.787		2.00m - 2.0m / 0.0m
2	28k	0.02		0.61		0.878		0.874		2.02m - 4.0m / 70.1m
3	42k	0.02		0.44		0.908		0.906		2.22m - 6.3m / 70.9m
4	56k	0.01		0.40		0.919		0.918		2.12m - 8.4m / 77.3m
5	70k	0.01		0.41		0.928		0.926		2.23m - 10.7m / 74.0m
6	84k	0.01		0.48		0.928		0.925		2.13m - 12.8m / 77.4m
7	98k	0.01		0.41		0.930		0.927		2.16m - 15.0m / 74.6m
8	112k	0.01		0.36		0.934		0.933		2.15m - 17.2m / 75.5m
9	126k	0.01		0.41		0.936		0.934		2.15m - 19.3m / 75.1m
10	141k	0.01		0.44		0.939		0.938		2.11m - 21.4m / 75.2m
11	155k	0.01		0.39		0.930		0.926		2.12m - 23.6m / 74.2m
12	169k	0.01		0.43		0.938		0.936		2.11m - 25.7m / 74.4m
13	183k	0.01		0.42		0.925		0.923		2.13m - 27.8m / 74.2m
14	197k	0.01		0.42		0.943		0.940		2.13m - 30.0m / 74.7m
15	211k	0.01		0.39		0.931		0.929		2.05m - 32.1m / 74.8m
16	225k	0.01		0.43		0.939		0.936		2.03m - 34.1m / 73.1m
17	239k	0.01		0.54		0.906		0.902		2.05m - 36.2m / 72.7m
18	253k	0.

In [8]:
p = os.path.join(e_path, 'exp_df.pkl')
print('Save dataframe of experiment to ' + p)
df.to_pickle(p)

Save dataframe of experiment to C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\CoNLL2003-FinalExperiment\20190430\0\exp_df.pkl
