In [3]:
import tensorflow_hub as hub
import tensorflow_text as text
import tensorflow as tf
import numpy as np
from keras.utils.np_utils import to_categorical
from keras.utils.vis_utils import plot_model
import keras
from bert.tokenization import FullTokenizer
import re

In [4]:
! pip install bert-tensorflow
! pip install tensorflow_text



In [11]:
# Some constants
index_to_tag = {0: '-PAD-', 1: 'ADJ', 2: 'ADP', 3: 'ADV', 4: 'CONJ', 5: 'DET', 6: 'NOUN', 7: 'NUM', 8: 'PRON', 9: 'PRT', 10: 'PUNCT', 11: 'VERB', 12: 'X'}
max_seq_length = 72
checkpoint_path = 'drive/MyDrive/my_best_model_8.hdf5'
n_tags = 13

In [7]:
input_word_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
                                       name="input_word_ids")
input_mask = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
                                   name="input_mask")
segment_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32,
                                    name="segment_ids")

In [8]:
bert_model = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1",
                            trainable=True)

In [9]:
bert_inputs = [input_word_ids, input_mask, segment_ids]
bert_output = bert_model(bert_inputs)[1]
outputs = keras.layers.Dense(n_tags, activation=keras.activations.softmax)(bert_output)
model = keras.models.Model(inputs=bert_inputs, outputs=outputs)
model.summary(200)

Model: "model"
________________________________________________________________________________________________________________________________________________________________________________________________________
 Layer (type)                                                     Output Shape                                Param #                 Connected to                                                      
 input_word_ids (InputLayer)                                      [(None, 72)]                                0                       []                                                                
                                                                                                                                                                                                        
 input_mask (InputLayer)                                          [(None, 72)]                                0                       []                                             

In [12]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
model.load_weights(checkpoint_path)

# Testing model with validation data

In [14]:
! git clone https://github.com/Vishisht-rao/POS-Tagger-Using-Transformers.git

Cloning into 'POS-Tagger-Using-Transformers'...
remote: Enumerating objects: 47, done.[K
remote: Counting objects: 100% (47/47), done.[K
remote: Compressing objects: 100% (43/43), done.[K
remote: Total 47 (delta 9), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (47/47), done.


In [15]:
val_input_ids = np.load('POS-Tagger-Using-Transformers/preprocessing_outputs_val/val_input_ids.npz')
val_input_masks = np.load('POS-Tagger-Using-Transformers/preprocessing_outputs_val/val_input_masks.npz')
val_type_ids = np.load('POS-Tagger-Using-Transformers/preprocessing_outputs_val/val_type_ids.npz')
val_tags = np.load('POS-Tagger-Using-Transformers/preprocessing_outputs_val/val_tags.npz')

In [16]:
val_input_ids = val_input_ids['arr_0']
val_input_masks = val_input_masks['arr_0']
val_type_ids = val_type_ids['arr_0']
val_tags = val_tags['arr_0']

In [24]:
val_tags_pred = model.predict([val_input_ids, val_input_masks, val_type_ids], batch_size=16)

In [25]:
val_tags_pred_argmax = val_tags_pred.argmax(-1)
val_tags_argmax = val_tags.argmax(-1)

In [26]:
val_tags_pred_argmax[0]

array([ 0,  8, 11,  5,  6,  2,  5,  6,  4,  5,  1,  6,  8, 11,  8,  2,  5,
        6,  2,  6, 11,  2,  5,  6, 10,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0])

In [27]:
val_tags_argmax[0]

array([ 0,  8, 11,  5,  6,  2,  5,  6,  4,  5,  1,  6,  8, 11,  8,  2,  5,
        6,  2,  6, 11,  2,  5,  6, 10,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0])

In [29]:
sum = 0
incorrect = 0
for i in range(val_tags_argmax.shape[0]):
  for j in range(val_tags_argmax.shape[1]):
    if val_tags_argmax[i][j] != val_tags_pred_argmax[i][j]:
      incorrect += 1
  sum += (val_tags_argmax.shape[1]-incorrect)/val_tags_argmax.shape[1]

  incorrect = 0
  

sum = sum/val_tags_argmax.shape[0]

print('Accuracy with unseen validation data:')
print(sum)

Accuracy with unseen validation data:
0.9949425287356447
