In [2]:
from data.conll import conll2003_dataset, extract_samples, iterate_with_sample_data
from misc.preferences import PREFERENCES
from misc.visualizer import *
from misc.hyperparameters import get_default_params
from optimizer import get_default_optimizer
from misc import utils
from models.transformer.encoder import TransformerEncoder
from models.softmax_output import SoftmaxOutputLayer, OutputLayer
from models.transformer_tagger import TransformerTagger
from models.transformer.train import Trainer
from criterion import NllLoss

In [3]:
experiment_name = 'JUST_TESTING'

In [4]:
PREFERENCES.defaults(
    data_root='./data/conll2003',
    data_train='eng.train.txt',
    data_validation='eng.testa.txt',
    data_test='eng.testb.txt',
    early_stopping='highest_5_F1'
)

hyper_parameters = get_default_params()
hyper_parameters.model_size = 300
hyper_parameters.batch_size = 80
hyper_parameters.early_stopping = -1
experiment_name = utils.create_loggers(experiment_name=experiment_name)

Log path is  /Users/felix/Documents/Repositories/TUM/ABSA-Transformer/logs/JUST_TESTING


In [5]:
conll2003 = conll2003_dataset('ner', hyper_parameters.batch_size,
                              root=PREFERENCES.data_root,
                              train_file=PREFERENCES.data_train,
                              validation_file=PREFERENCES.data_validation,
                              test_file=PREFERENCES.data_test,
                              use_cuda=False)



In [6]:
samples = extract_samples(conll2003['examples'])
print_samples(samples)

-docstart- - O

#######################

eu - I-ORG
rejects - O
german - I-MISC
call - O
to - O
boycott - O
british - I-MISC
lamb - O
. - O

#######################

peter - I-PER
blackburn - I-PER

#######################

-docstart- - O

#######################

cricket - O
- - O
leicestershire - I-ORG
take - O
over - O
at - O
top - O
after - O
innings - O
victory - O
. - O

#######################

london - I-LOC
1996-08-30 - O

#######################

-docstart- - O

#######################

soccer - O
- - O
japan - I-LOC
get - O
lucky - O
win - O
, - O
china - I-PER
in - O
surprise - O
defeat - O
. - O

#######################

nadim - I-PER
ladki - I-PER

#######################



In [7]:
# 10 words with a 100-length embedding
target_vocab = conll2003['vocabs'][0]
target_size = len(target_vocab)

In [8]:
loss = NllLoss(target_size)
# transformer = GoogleTransformer(True, target_size, target_size, num_units, 2, 2, 512, 0.1)
transformer = TransformerEncoder(conll2003['embeddings'][0],
                                 n_enc_blocks=2,
                                 n_head=3,
                                 d_model=hyper_parameters.model_size,
                                 d_k=100,
                                 d_v=100)
tagging_softmax = SoftmaxOutputLayer(hyper_parameters.model_size, target_size)
model = TransformerTagger(transformer, tagging_softmax)


In [11]:
# predict now to see model in initial state
test_sample_iter = iterate_with_sample_data(conll2003['iters'][1], 200)
df = predict_some_examples_to_df(model, test_sample_iter, num_samples=50)
print(df)

HBox(children=(IntProgress(value=1, bar_style='info', description='Predicting', max=1, style=ProgressStyle(des…


       Sentence Targets Predictions  # Matches
0    -docstart-       O           ?          0
1    -docstart-       O           ?          0
2    -docstart-       O           ?          0
3           may       O           ?          0
4          june       O           ?          0
5          july       O           ?          0
6        august       O           ?          0
7    -docstart-       O           ?          0
8    -docstart-       O           ?          0
9    -docstart-       O           ?          0
10   -docstart-       O           ?          0
11   -docstart-       O           ?          0
12   -docstart-       O           ?          0
13   -docstart-       O           ?          0
14   -docstart-       O           ?          0
15   -docstart-       O           ?          0
16            :       O           ?          0
17          6-2       O           ?          0
18            :       O           ?          0
19   -docstart-       O           ?          0
20   -docsta

In [None]:
optimizer = get_default_optimizer(model, hyper_parameters)
trainer = Trainer(model,
                    loss,
                    optimizer,
                    hyper_parameters,
                    conll2003['iters'],
                    experiment_name,
                    log_every_xth_iteration=50,
                    enable_tensorboard=True,
                    dummy_input=conll2003['dummy_input'])

In [1]:
result = trainer.train(10)

NameError: name 'trainer' is not defined

In [None]:
df = predict_some_examples_to_df(model, test_sample_iter)
print(df)

In [None]:
print(result)

In [None]:
(tr_loss, tr_f1) = result['result_train']
