In [1]:
import numpy as np 
import pandas as pd 
import tensorflow as tf


**Importing the dataset and external test file for named entity recognition model**

In [2]:
data = pd.read_csv(r"E:\New Practical\Data Files\patterns sentences final tokenized v1.csv", encoding="utf8")
data = data.fillna(method="ffill")
test_data = pd.read_excel(r"E:\New Practical\Data Files\ner external test tokenized.xlsx", encoding="utf8")
test_data = test_data.fillna(method="ffill")

In [3]:
model=tf.keras.models.load_model('lab_bilstm_model_adam_30p_tokenized_100.h5')

> **Create list of list of tuples to differentiate each sentence from each other**

In [4]:
class SentenceGetter(object):
    
    def __init__(self, dataset):
        self.n_sent = 1
        self.dataset = dataset
        self.empty = False
        agg_func = lambda s: [(w, t) for w, t in zip(s["Word"].values.tolist(),
                                                       
                                                        s["Tag"].values.tolist())]
        self.grouped = self.dataset.groupby("Sentence #").apply(agg_func)
        self.sentences = [s for s in self.grouped]
    
    def get_next(self):
        try:
            s = self.grouped["sentence: {}".format(self.n_sent)]
            self.n_sent += 1
            return s
        except:
            return None

In [5]:
getter = SentenceGetter(data)
sentences = getter.sentences

In [6]:
words = list(set(data["Word"].values))
words.append("ENDPAD")
n_words = len(words)
tags = list(set(data["Tag"].values))
n_tags = len(tags)
print(tags)

['I-value', 'O', 'B-unit', 'B-space', 'B-flag', 'B-left', 'I-unit', 'B-line', 'I-test', 'B-center', 'B-ref', 'B-value', 'B-test', 'I-ref']


In [7]:
word2idx = {w: i for i, w in enumerate(words)}
tag2idx = {t: i for i, t in enumerate(tags)}

In [8]:
maxlen = max([len(s) for s in sentences])
print ('Maximum sequence length:', maxlen)

Maximum sequence length: 100


In [10]:
idx2tag = {i: w for w, i in tag2idx.items()}

df_words = test_data['Word']
df_tags = test_data['Tag']
df_sent = test_data['Sentence #']


In [11]:
sentences_list = []
current_sentence = []
for i, w in enumerate (df_words):
    if i == 0 or (i > 0 and df_sent[i] == df_sent[i-1]):
        current_sentence.append(str(w))
    else:
        sentences_list.append(current_sentence)
        current_sentence = []
        current_sentence.append(str(w))



In [12]:
tags_list = []
current_tag = []
for i, t in enumerate (df_tags):
    if i == 0 or (i > 0 and df_sent[i] == df_sent[i-1]):
        current_tag.append(t)
    else:
        tags_list.append(current_tag)
        current_tag = []
        current_tag.append(t)
# print(tags_list)

In [13]:
def text_process(text):
    seq = []
    words_list=[]
    for w in text.split():
        words_list.append(w)
        try:
            seq.append(word2idx[w])
        except:
            seq.append(n_words-1)

    sz = len(seq)
    
    for _ in range(sz, 100):
        seq.append(n_words-1)
    seq = np.array(seq, dtype=int)
    return seq, sz, words_list

def get_ner(text):
    np.random.seed(0)
    seq, sz, words_list = text_process(text)
    p = model.predict(np.array([seq]))
    p = np.argmax(p, axis=-1)
    idx = 0

    for w, pred in zip(words_list, p[0]):
        if w == '<left>':
            current_pred.append('B-left')
        elif w == '<line>':
            current_pred.append('B-line')
        elif w == '<space>':
            current_pred.append('B-space')
        else:
            current_pred.append(tags[pred])
        idx += 1
        if idx > sz-1:
            break
 

In [14]:
pred_list = []
for sent in sentences_list:
    current_pred = []
    full_sent = ' '.join(sent)
    prediction = get_ner(full_sent)
    pred_list.append(current_pred)

In [15]:
pred_labels = pred_list
test_labels = tags_list

**Converting words to numbers and numbers to words**

In [18]:
from seqeval.metrics import precision_score, recall_score, f1_score, classification_report

In [19]:
print("Precision: {:.2%}".format(precision_score(test_labels, pred_labels)))
print("Recall: {:.2%}".format(recall_score(test_labels, pred_labels)))
print("F1-score: {:.2%}".format(f1_score(test_labels, pred_labels)))

Precision: 97.81%
Recall: 97.76%
F1-score: 97.79%


In [20]:
from  sklearn_crfsuite.metrics import flat_classification_report  
report = flat_classification_report(y_pred=pred_labels, y_true=test_labels, digits=4)
print(report)

              precision    recall  f1-score   support

      B-flag     0.8824    0.7143    0.7895        21
      B-left     1.0000    1.0000    1.0000       931
      B-line     1.0000    1.0000    1.0000      1040
       B-ref     0.9926    0.8289    0.9034       970
     B-space     1.0000    1.0000    1.0000      2589
      B-test     0.9957    0.9968    0.9963       936
      B-unit     0.8802    0.9904    0.9320       727
     B-value     0.9830    0.9707    0.9768       955
       I-ref     0.9691    0.9680    0.9686      1752
      I-test     0.9961    0.9820    0.9890       776
      I-unit     0.8545    0.9792    0.9126        48
     I-value     0.8182    0.8471    0.8324        85
           O     0.0000    0.0000    0.0000         0

    accuracy                         0.9729     10830
   macro avg     0.8748    0.8675    0.8693     10830
weighted avg     0.9819    0.9729    0.9764     10830



  _warn_prf(average, modifier, msg_start, len(result))
