In [1]:
import transformers
from transformers import TFBertForTokenClassification
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint
from conllu import parse
import numpy as np
from data_preparation_pos import ABSATokenizer, convert_examples_to_tf_dataset

In [2]:
en_pos_train = open("../data/ud/en/en_gum-ud-train.conllu", "r", encoding="utf-8").read()
en_pos_dev = open("../data/ud/en/en_gum-ud-dev.conllu", "r", encoding="utf-8").read()
en_pos_test = open("../data/ud/en/en_gum-ud-test.conllu", "r", encoding="utf-8").read()
train_sentences = parse(en_pos_train)
dev_sentences = parse(en_pos_dev)
test_sentences = parse(en_pos_test)
tagset = ["O", "ADJ", "ADP", "ADV", "AUX", "CCONJ", "DET", "INTJ", "NOUN", "NUM", 
          "PART", "PRON", "PROPN", "PUNCT", "SCONJ", "SYM", "VERB", "X"]
num_labels = len(tagset)
tokenizer = ABSATokenizer.from_pretrained('bert-base-multilingual-cased')
config = transformers.BertConfig.from_pretrained('bert-base-multilingual-cased', num_labels=num_labels)
model = TFBertForTokenClassification.from_pretrained('bert-base-multilingual-cased',
                                                     config=config)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing TFBertForTokenClassification: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of TFBertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier', 'dropout_37']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [3]:
batch_size = 4
epochs = 20
train_dataset = convert_examples_to_tf_dataset(examples=train_sentences, tokenizer=tokenizer, tagset=tagset, max_length=512)
train_dataset = train_dataset.shuffle(10000).batch(batch_size).repeat(epochs)
dev_dataset = convert_examples_to_tf_dataset(examples=dev_sentences, tokenizer=tokenizer, tagset=tagset, max_length=512)
dev_dataset = dev_dataset.shuffle(10000).batch(batch_size).repeat(1)
test_dataset = convert_examples_to_tf_dataset(examples=test_sentences, tokenizer=tokenizer, tagset=tagset, max_length=512)
test_dataset = test_dataset.shuffle(10000).batch(batch_size).repeat(1)

In [4]:
iter(train_dataset).next()

({'input_ids': <tf.Tensor: shape=(4, 512), dtype=int32, numpy=
  array([[15785, 23902,   117, ...,     0,     0,     0],
         [11065,   112, 11231, ...,     0,     0,     0],
         [10167, 10105, 35472, ...,     0,     0,     0],
         [  100,   146,   100, ...,     0,     0,     0]])>,
  'attention_mask': <tf.Tensor: shape=(4, 512), dtype=int32, numpy=
  array([[1, 1, 1, ..., 0, 0, 0],
         [1, 1, 1, ..., 0, 0, 0],
         [1, 1, 1, ..., 0, 0, 0],
         [1, 1, 1, ..., 0, 0, 0]])>,
  'token_type_ids': <tf.Tensor: shape=(4, 512), dtype=int32, numpy=
  array([[0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0],
         [0, 0, 0, ..., 0, 0, 0]])>},
 <tf.Tensor: shape=(4, 512), dtype=int64, numpy=
 array([[14,  1, 13, ...,  0,  0,  0],
        [11,  4,  4, ...,  0,  0,  0],
        [ 2,  6,  1, ...,  0,  0,  0],
        [13, 11,  4, ...,  0,  0,  0]], dtype=int64)>)

In [5]:
checkpoint = ModelCheckpoint('../checkpoints/multibert_pos_checkpoint.hdf5', 
                             verbose=1, monitor='val_sparse_categorical_accuracy',
                             save_best_only=True, mode='max', save_weights_only=True)

In [6]:
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy()
model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

In [7]:
model.fit(train_dataset, epochs=epochs, steps_per_epoch=np.ceil(len(train_sentences) / batch_size),
          validation_data=dev_dataset, validation_steps=np.ceil(len(dev_sentences) / batch_size),
          callbacks=[checkpoint])

Epoch 1/20
Epoch 00001: val_sparse_categorical_accuracy improved from -inf to 0.99704, saving model to ../checkpoints/multibert_pos_checkpoint.hdf5
Epoch 2/20
Epoch 00002: val_sparse_categorical_accuracy improved from 0.99704 to 0.99741, saving model to ../checkpoints/multibert_pos_checkpoint.hdf5
Epoch 3/20
Epoch 00003: val_sparse_categorical_accuracy improved from 0.99741 to 0.99768, saving model to ../checkpoints/multibert_pos_checkpoint.hdf5
Epoch 4/20
Epoch 00004: val_sparse_categorical_accuracy improved from 0.99768 to 0.99784, saving model to ../checkpoints/multibert_pos_checkpoint.hdf5
Epoch 5/20
Epoch 00005: val_sparse_categorical_accuracy improved from 0.99784 to 0.99787, saving model to ../checkpoints/multibert_pos_checkpoint.hdf5
Epoch 6/20
Epoch 00006: val_sparse_categorical_accuracy improved from 0.99787 to 0.99790, saving model to ../checkpoints/multibert_pos_checkpoint.hdf5
Epoch 7/20
Epoch 00007: val_sparse_categorical_accuracy improved from 0.99790 to 0.99791, saving 

KeyboardInterrupt: 

In [None]:
model.evaluate(test_dataset, steps=np.ceil(len(test_sentences) / batch_size))