In [1]:
import matplotlib

import logging
import torch

from data.data_loader import Dataset
from data.germeval2017 import germeval2017_dataset

from misc.preferences import PREFERENCES
from misc.visualizer import *
from misc.run_configuration import get_default_params
from misc import utils

from optimizer import get_default_optimizer
from criterion import NllLoss

from models.transformer.encoder import TransformerEncoder
from models.softmax_output import SoftmaxOutputLayer, OutputLayer, SoftmaxOutputLayerWithCommentWiseClass
from models.transformer_tagger import TransformerTagger
from models.transformer.train import Trainer



In [2]:
experiment_name = 'generalSentimentStdModel25Ep'
use_cuda = True

In [3]:
PREFERENCES.defaults(
    data_root='./data/germeval2017',
    data_train='train_v1.4.tsv',    
    data_validation='dev_v1.4.tsv',
    data_test='test_TIMESTAMP1.tsv',
    early_stopping='highest_5_F1'
)

hyperparameters = get_default_params()
hyperparameters.model_size = 300
hyperparameters.batch_size = 12
hyperparameters.early_stopping = -1
hyperparameters.use_cuda = use_cuda
hyperparameters.language = 'de'
hyperparameters.num_epochs = 25
experiment_name = utils.create_loggers(experiment_name=experiment_name)

Log path is  C:\Users\felix\OneDrive\Studium\Studium\6. Semester\MA\Project\ABSA-Transformer\logs\generalSentimentStdModel25Ep


In [4]:
dataset = Dataset(
    'germeval',
    logging.getLogger('pre_training'),
    hyperparameters,
    source_index=0,
    target_vocab_index=1,
    data_path=PREFERENCES.data_root,
    train_file=PREFERENCES.data_train,
    valid_file=PREFERENCES.data_validation,
    test_file=PREFERENCES.data_test,
    file_format='.tsv',
    init_token=None,
    eos_token=None
)
dataset.load_data(germeval2017_dataset)

pre_training - INFO - Getting glove with dimension 300
pre_training - INFO - Word vectors successfully loaded.
pre_training - DEBUG - Start loading dataset


A Jupyter Widget




A Jupyter Widget




A Jupyter Widget


pre_training - INFO - +-------------------------------------------------------------------------------------+
|                                     Data Loader                                     |
+---------------------------------+---------------------------------------------------+
|            Parameter            |                       Value                       |
+---------------------------------+---------------------------------------------------+
|            embedding            |           (Embedding(95161, 300), None)           |
|            train_iter           | <torchtext.data.iterator.BucketIterator [...]C50> |
|            valid_iter           | <torchtext.data.iterator.BucketIterator [...]860> |
|            test_iter            | <torchtext.data.iterator.BucketIterator [...]080> |
|            word_field           |                        None                       |
|               name              |                      germeval                     |
|        

In [5]:
loss = NllLoss(dataset.target_size)
transformer = TransformerEncoder(dataset.source_embedding,
                                 hyperparameters=hyperparameters)
tagging_softmax = SoftmaxOutputLayerWithCommentWiseClass(hyperparameters.model_size, dataset.target_size)
model = TransformerTagger(transformer, tagging_softmax)

In [7]:
optimizer = get_default_optimizer(model, hyperparameters)
trainer = Trainer(
                    model,
                    loss,
                    optimizer,
                    hyperparameters,
                    dataset,
                    experiment_name,
                    enable_tensorboard=True)

pre_training - INFO - TransformerTagger (
  (encoder): TransformerEncoder(
    (src_embeddings): Embedding(95161, 300)
    (positional_encoding): PositionalEncoding2(
      (dropout): Dropout(p=0.1)
    )
    (encoder_blocks): ModuleList(
      (0): EncoderBlock(
        (self_attention_layer): MultiHeadedSelfAttentionLayer(
          (query_projections): Linear(in_features=300, out_features=300, bias=False)
          (key_projections): Linear(in_features=300, out_features=300, bias=False)
          (value_projections): Linear(in_features=300, out_features=300, bias=False)
          (attention_layer): ScaledDotProductAttentionLayer(
            (dropout): Dropout(p=0.1)
          )
          (layer_norm): LayerNorm()
          (w_0): Linear(in_features=300, out_features=300, bias=False)
          (dropout): Dropout(p=0.1)
        )
        (feed_forward_layer): PointWiseFCLayer(
          (layer_norm): LayerNorm()
          (w_1): Linear(in_features=300, out_features=2048, bias=True)
 


pre_training - INFO - Classes: [' UNK ', 'neutral', 'negative', 'positive']


In [None]:
result = trainer.train(hyperparameters.num_epochs, use_cuda=hyperparameters.use_cuda, perform_evaluation=False)

pre_training - DEBUG - train with cuda support
pre_training - INFO - 1421 Iterations per epoch with batch size of 12
pre_training - INFO - START training.





A Jupyter Widget



A Jupyter Widget

# EP	# IT	tr loss		val loss	f1		acc		duration / total time
1	200	123.442		25.632		0.620		0.620		0.00m - 0.4m / 0.0m
1	400	117.662		23.960		0.288		0.288		0.00m - 1.7m / 0.0m
1	600	137.199		16.322		0.311		0.311		0.00m - 3.0m / 0.0m
1	800	69.068		49.160		0.301		0.301		0.00m - 4.3m / 0.0m
1	1000	56.157		2.437		0.618		0.618		0.00m - 5.5m / 0.0m
1	1200	64.892		3.757		0.623		0.623		0.00m - 6.8m / 0.0m
1	1400	36.405		6.926		0.624		0.624		0.00m - 8.1m / 0.0m
1	1421	85.839		3.336		0.630		0.630		9.76m - 9.8m / 0.0m


A Jupyter Widget



A Jupyter Widget

2	1600	41.635		8.276		0.301		0.301		9.76m - 10.2m / 244.1m
2	1800	26.139		2.438		0.627		0.627		9.76m - 11.4m / 244.1m
2	2000	29.028		1.630		0.677		0.677		9.76m - 12.6m / 244.1m
2	2200	17.640		1.423		0.612		0.612		9.76m - 13.8m / 244.1m
2	2400	13.518		5.375		0.301		0.301		9.76m - 15.0m / 244.1m
2	2600	13.359		2.297		0.205		0.205		9.76m - 16.4m / 244.1m
2	2800	8.509		3.650		0.623		0.623		9.76m - 17.7m / 244.1m
2	2842	20.804		1.730		0.647		0.647		9.77m - 19.6m / 244.1m


A Jupyter Widget



A Jupyter Widget

3	3000	9.639		1.022		0.641		0.641		9.77m - 20.0m / 244.2m
3	3200	8.738		0.960		0.675		0.675		9.77m - 21.3m / 244.2m
3	3400	6.824		1.259		0.625		0.625		9.77m - 22.6m / 244.2m
3	3600	6.199		2.024		0.439		0.439		9.77m - 23.9m / 244.2m
3	3800	5.518		1.837		0.301		0.301		9.77m - 25.1m / 244.2m
3	4000	4.715		1.961		0.301		0.301		9.77m - 26.4m / 244.2m
3	4200	5.678		1.733		0.611		0.611		9.77m - 27.7m / 244.2m
3	4263	6.372		1.699		0.614		0.614		9.91m - 29.5m / 244.2m


A Jupyter Widget



A Jupyter Widget

4	4400	5.663		1.778		0.301		0.301		9.91m - 29.8m / 247.6m
4	4600	4.886		1.979		0.079		0.079		9.91m - 31.1m / 247.6m
4	4800	4.693		0.989		0.672		0.672		9.91m - 32.3m / 247.6m
4	5000	3.019		1.174		0.624		0.624		9.91m - 33.5m / 247.6m
4	5200	2.728		1.039		0.644		0.644		9.91m - 34.7m / 247.6m
4	5400	2.295		1.012		0.629		0.629		9.91m - 36.0m / 247.6m
4	5600	1.768		1.115		0.622		0.622		9.91m - 37.2m / 247.6m
4	5684	3.475		1.007		0.642		0.642		9.55m - 39.1m / 247.6m


A Jupyter Widget



A Jupyter Widget

5	5800	1.355		0.990		0.616		0.616		9.55m - 39.3m / 239.6m
5	6000	2.772		1.027		0.623		0.623		9.55m - 40.6m / 239.6m
5	6200	1.365		1.004		0.673		0.673		9.55m - 41.8m / 239.6m
5	6400	1.443		1.001		0.658		0.658		9.55m - 43.1m / 239.6m
5	6600	1.468		1.002		0.654		0.654		9.55m - 44.3m / 239.6m
5	6800	1.198		1.036		0.622		0.622		9.55m - 45.6m / 239.6m
5	7000	1.352		1.086		0.428		0.428		9.55m - 46.9m / 239.6m
5	7105	1.523		1.018		0.665		0.665		9.58m - 48.7m / 239.6m


A Jupyter Widget



A Jupyter Widget

6	7200	1.030		0.999		0.658		0.658		9.58m - 48.9m / 240.4m
6	7400	0.969		1.075		0.543		0.543		9.58m - 50.1m / 240.4m
6	7600	1.262		1.092		0.433		0.433		9.58m - 51.3m / 240.4m
6	7800	0.916		1.023		0.623		0.623		9.58m - 52.5m / 240.4m
6	8000	1.105		1.020		0.635		0.635		9.58m - 53.7m / 240.4m
6	8200	1.037		1.012		0.673		0.673		9.58m - 54.9m / 240.4m
6	8400	1.242		1.034		0.628		0.628		9.58m - 56.1m / 240.4m
6	8526	1.080		1.032		0.653		0.653		9.24m - 57.9m / 240.4m


A Jupyter Widget



A Jupyter Widget

7	8600	1.055		0.961		0.662		0.662		9.24m - 58.1m / 233.5m
7	8800	1.062		1.041		0.660		0.660		9.24m - 59.5m / 233.5m
7	9000	0.986		1.024		0.657		0.657		9.24m - 60.7m / 233.5m
7	9200	1.039		0.997		0.635		0.635		9.24m - 62.0m / 233.5m
7	9400	0.980		0.988		0.626		0.626		9.24m - 63.3m / 233.5m
7	9600	0.910		0.999		0.662		0.662		9.24m - 64.5m / 233.5m
7	9800	0.847		1.002		0.649		0.649		9.24m - 65.8m / 233.5m
7	9947	0.981		1.049		0.611		0.611		9.82m - 67.8m / 233.5m


A Jupyter Widget



A Jupyter Widget

8	10000	0.994		1.016		0.636		0.636		9.82m - 68.1m / 244.7m
8	10200	1.297		1.028		0.651		0.651		9.82m - 69.4m / 244.7m
8	10400	0.897		0.985		0.639		0.639		9.82m - 70.7m / 244.7m
8	10600	0.998		1.057		0.651		0.651		9.82m - 72.0m / 244.7m
8	10800	0.930		1.093		0.530		0.530		9.82m - 73.2m / 244.7m
8	11000	0.924		1.001		0.673		0.673		9.82m - 74.5m / 244.7m
8	11200	0.910		1.014		0.637		0.637		9.82m - 75.8m / 244.7m
8	11368	0.986		1.026		0.643		0.643		9.83m - 77.8m / 244.7m


A Jupyter Widget

In [None]:
plt.rcParams['figure.figsize'] = [10, 10]

In [None]:
model = result['model']

In [None]:
result_labels = trainer.classify_sentence('I was born in 1993 in Stuttgart')



print(result_labels)

In [None]:
tr = trainer.text_reverser[1]
lr = trainer.label_reverser

test_sentence = ['china', 'controlled', 'most', 'of', 'the', 'match']
test_sentence

In [None]:
test_sentence = tr.preprocess('Die Bahn ist nicht gut')

#test_sentence = tr.preprocess('china controlled most of the match on 1993')
test_sentence = [x.strip(' ') for x in test_sentence]
test_sentence = [test_sentence]
test_sentence

In [None]:

#test = tr.preprocess('china controlled most of the match')
#print(test)

#test_sentence = [['china', 'controlled', 'most', 'of', 'the', 'match']]
x = tr.process(test_sentence)

print("X TENSOR ",x)
print('X Size', x.size())
print("Reversed X", tr.reverse(x))
x = x.cuda()
y_hat = model.predict(x)
y_hat_label = lr.reverse(y_hat)
print(y_hat_label)



In [None]:
trainer.tb_writer = None
trainer.enable_tensorboard = False
evaluation_results = trainer.perform_final_evaluation()

In [None]:
c_matrix = evaluation_results[1][2]
c_matrix

In [None]:
fig = plot_confusion_matrix(c_matrix, class_labels)
plt.show()

In [None]:
fig = plot_confusion_matrix(c_matrix, class_labels, normalize=True)
plt.show()

In [None]:
# predict now to see model in final state
from IPython.display import display, HTML

df = predict_some_examples_to_df(model, conll2003['iters'][2], num_samples=800)
display(HTML(df.to_html()))

In [None]:
df = predict_some_examples_to_df(model, conll2003['iters'][1], num_samples=800)
display(HTML(df.to_html()))

In [None]:
df = predict_some_examples_to_df(model, conll2003['iters'][0], num_samples=800)
display(HTML(df.to_html()))

In [None]:
import numpy as np
a = np.array([
    np.array([[1, 1], [1, 1]]),
    np.array([[2, 2], [-2, -3]])
])
print(a)

In [None]:
a.sum(axis=0)

In [None]:
#df = predict_some_examples_to_df(model, test_sample_iter)
#print(df)

In [None]:
#print(result)

In [None]:
(tr_loss, tr_f1) = result['result_train']
