In [None]:
import nltk
nltk.download('brown')
nltk.download('universal_tagset')
nltk.download('tagsets')

[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.
[nltk_data] Downloading package universal_tagset to /root/nltk_data...
[nltk_data]   Unzipping taggers/universal_tagset.zip.
[nltk_data] Downloading package tagsets to /root/nltk_data...
[nltk_data]   Unzipping help/tagsets.zip.


True

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from nltk.corpus import brown #Brown Corpus 
from collections import defaultdict 
from sklearn.metrics import confusion_matrix,plot_confusion_matrix
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import MultiLabelBinarizer
from keras.preprocessing.sequence import pad_sequences
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, LSTM, InputLayer, Bidirectional, TimeDistributed, Embedding, Activation
from keras.optimizers import Adam
from nltk.metrics import ConfusionMatrix
from itertools import chain 

In [None]:
'''
Constant declarations
'''
UNIVERSAL_TAGSET =['NOUN', 'DET', 'ADJ', 'ADP', '.', 'VERB', 'CONJ', 'NUM', 'ADV', 'PRT', 'PRON', 'X']
TOTAL_TAGGED_WORDS = len(brown.tagged_words())
tagged_sentences = brown.tagged_sents(tagset = "universal")

In [None]:
'''
Pre-processing
'''

def separate_tags(tagged_sentences):
  ''' 
  Separating tags from the words in sentence
  '''
  sentences, sentence_tags = [], []
  for sentence_plus_tag in tagged_sentences:
    sentence, tags = zip(*sentence_plus_tag)
    sentences.append(np.array(sentence))
    sentence_tags.append(np.array(tags))
  return sentences, sentence_tags
  

In [None]:
class BiLSTM_POS():

  def __init__(self, train_sentences, test_sentences, train_tags, test_tags):
    self.train_sentences = train_sentences
    self.train_tags = train_tags
    self.test_sentences = test_sentences
    self.test_tags = test_tags
    self.word_index = {}
    self.tag_index = {}
    self.train_dataX =[]
    self.train_dataY = []
    self.test_dataX = []
    self.test_dataY = []
    self.MAX_LENGTH=0
    self.tag_metrics = defaultdict(lambda: defaultdict(lambda:0))
    self.test_pred_dataY = []
    self.model = Sequential()


  def word_to_dict(self):
    '''
    Creating the dictionary to convert words (textual data) into integers for keras
    '''

    unique_words, unique_tags = set([]), set([])

    self.word_index['PAD']=0
    self.word_index['OOV']=1
    self.tag_index['PAD']=0

    for sent in train_sentences:
      for w in sent:
        unique_words.add(w.lower())

    for tags in train_tags:
      for tag in tags:
        unique_tags.add(tag)

    for i, w in enumerate(list(unique_words)):
      self.word_index[w]=i+2

    for i, t in enumerate(list(unique_tags)):
      self.tag_index[t]=i+1

  def word_to_vec(self):
    '''
    conversion of data to integer data
    '''
    for sent in self.train_sentences:
      X =[]
      for w in sent:
        try:
          X.append(self.word_index[w.lower()])
        except KeyError:
          X.append(self.word_index['OOV'])
      self.train_dataX.append(X)

    for sent in self.test_sentences:
      X =[]
      for w in sent:
        try:
          X.append(self.word_index[w.lower()])
        except KeyError:
          X.append(self.word_index['OOV'])
      self.test_dataX.append(X)

    for sent in self.train_tags:
      X =[]
      for t in sent:
        X.append(self.tag_index[t])
      self.train_dataY.append(X)

    for sent in self.test_tags:
      X =[]
      for t in sent:
        X.append(self.tag_index[t])
      self.test_dataY.append(X)

  
  def max_length(self):
    '''
    maximum length of statement
    '''
    self.MAX_LENGTH = len(max(self.train_dataX, key=len))

  def padding_sequences(self):
    '''
    Pad the data and make sentence of equal length
    '''
    self.train_dataX = pad_sequences(self.train_dataX, maxlen=self.MAX_LENGTH, padding='post')
    self.test_dataX = pad_sequences(self.test_dataX, maxlen=self.MAX_LENGTH, padding='post')
    self.train_dataY = pad_sequences(self.train_dataY, maxlen=self.MAX_LENGTH, padding='post')
    self.test_dataY = pad_sequences(self.test_dataY, maxlen=self.MAX_LENGTH, padding='post')

  def generate(self):
    self.word_to_dict()
    self.word_to_vec()
    self.max_length()
    self.padding_sequences()
    self.Model()
    self.model.fit(self.train_dataX, self.to_categorical(self.train_dataY, len(self.tag_index)), batch_size=128, epochs=4, validation_split=0.1)
    self.test_pred_dataY = self.model.predict_classes(self.test_dataX)
    self.calc_tag_metrics()

  def conversion_to_tag(self):
    '''
    Conversion of Tags from integer to words (0->PAD)
    '''
    tag_indexes = {}
    for i in self.tag_index:
      j = self.tag_index[i]
      tag_indexes[j]=i
    tag_indexes[0] = 'PAD'
    predicted_tags =[]
    actual_tags = []

    for i in range(len(self.test_pred_dataY)):
      X=[]
      for j in range(len(self.test_pred_dataY[i])):
        X.append(tag_indexes[self.test_pred_dataY[i][j]])
      predicted_tags.append(X)

    for i in range(len(self.test_dataY)):
      X= []
      for j in range(len(self.test_dataY[i])):
        X.append(tag_indexes[self.test_dataY[i]])
      actual_tags.appedn(X)

    self.test_pred_dataY = predicted_tags
    self.test_dataY = actual_tags


  
  def Model(self):
    '''
    Bi_LSTM Model
    '''
    self.model = Sequential()
    self.model.add(InputLayer(input_shape=(self.MAX_LENGTH, )))  ### Input Layer in BiLSTM
    self.model.add(Embedding(len(self.word_index), 128))    #### Embedding LAyer to convert data to context vectors of len 128
    self.model.add(Bidirectional(LSTM(256, return_sequences=True)))  #### BiLSTM with return sequence true
    self.model.add(TimeDistributed(Dense(len(self.tag_index))))    #### Output Layer
    self.model.add(Activation('softmax'))  #### activation Function
 
    self.model.compile(loss='categorical_crossentropy',optimizer=Adam(0.001),metrics=['accuracy'])
 
    self.model.summary()


  def to_categorical(self, sequences, categories):
    '''
    Conversion into One Hot encoded vectors
    '''
    cat_sequences = []
    for s in sequences:
        cats = []
        for item in s:
            cats.append(np.zeros(categories))
            cats[-1][item] = 1.0
        cat_sequences.append(cats)
    return np.array(cat_sequences)
  

  def calc_tag_metrics(self):
        '''
        Calculate the per-POS accuracy for all the tags in the tag-set
        '''
        counter_dict = defaultdict(lambda: defaultdict(lambda:0))
        
        for i in range(len(self.test_dataY)):
            for j in range(len(self.test_dataY[i])):
                if(self.test_dataY[i][j] == self.test_pred_dataY[i][j]):
                    counter_dict[self.test_dataY[i][j]]['TP'] += 1
                else:
                    counter_dict[self.test_dataY[i][j]]['FN']    += 1
                    counter_dict[self.test_pred_dataY[i][j]]['FP'] += 1
        
        for tag in counter_dict.keys():
            counter_dict[tag]['TN'] = TOTAL_TAGGED_WORDS - counter_dict[tag]['TP']- counter_dict[tag]['FN'] - counter_dict[tag]['FP']
        
        for tag in counter_dict.keys():
            self.tag_metrics[tag]['Precision'] = counter_dict[tag]['TP']/(counter_dict[tag]['TP']+counter_dict[tag]['FP'])
            self.tag_metrics[tag]['Recall'] = counter_dict[tag]['TP']/(counter_dict[tag]['TP']+counter_dict[tag]['FN'])
            self.tag_metrics[tag]['F1_score'] = 2*(self.tag_metrics[tag]['Precision']*self.tag_metrics[tag]['Recall'])/(self.tag_metrics[tag]['Precision']+self.tag_metrics[tag]['Recall'])
            self.tag_metrics[tag]['Accuracy'] = (counter_dict[tag]['TP']+ counter_dict[tag]['TN']) / TOTAL_TAGGED_WORDS
        
        
  def generate_confusion_matrix(self):
      '''
      Generate confusion matrix for the particular fold
      '''
      CM = ConfusionMatrix(list(chain.from_iterable(self.test_dataY)) ,list(chain.from_iterable(self.test_pred_dataY)))
      print(CM)
        
  


  def print_sample(self):
      '''
      Prints a sample of n = 5 actual and predicted tagged sentences for reference
      '''
      for i in range(5):
        print("Actual :",self.test_dataY[i])
        print("Predicted :",self.test_pred_dataY[i])
        
  def get_tag_metrics(self):
        '''
        Prints the per POS precision,recall and F1 score of predicted tags
        '''
        
        print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format('TAG', 'PRECISION', 'RECALL','F1_SCORE','ACCURACY'))
        
        for key in self.tag_metrics.keys():
            precision = str(round(self.tag_metrics[key]['Precision'], 2))
            recall    = str(round(self.tag_metrics[key]['Recall'], 2))
            F1_score  = str(round(self.tag_metrics[key]['F1_score'], 2))
            accuracy  = str(round(self.tag_metrics[key]['Accuracy'], 2))
            print ("{:<10} {:<10} {:<10} {:<10} {:<10}".format(key, precision,recall,F1_score,accuracy)) 

  


In [None]:
'''
Calculate average of scores obtained using 5-fold cross validation
'''

def avg(dict_list,folds):
    avg_dict = defaultdict(lambda: defaultdict(lambda:0))
    for d in dict_list:
        for tag in d.keys():
            avg_dict[tag]['Precision'] += d[tag]['Precision']/folds
            avg_dict[tag]['Recall'] += d[tag]['Recall']/folds
            avg_dict[tag]['F1_score'] += d[tag]['F1_score']/folds
            avg_dict[tag]['Accuracy'] += d[tag]['Accuracy']/folds
            
    print ("{:<10} {:<10} {:<10} {:<10} ".format('TAG', 'PRECISION', 'RECALL','F1_SCORE'))
        
    for key in avg_dict.keys():
        precision = str(round(avg_dict[key]['Precision'], 2))
        recall    = str(round(avg_dict[key]['Recall'], 2))
        F1_score  = str(round(avg_dict[key]['F1_score'], 2))
        accuracy  = str(round(avg_dict[key]['Accuracy'], 2))
        print ("{:<10} {:<10} {:<10} {:<10} ".format(key, precision,recall,F1_score))

In [None]:
'''
Finding accuracies via 5 fold cross validation
'''

sentences, sentence_tags = separate_tags(tagged_sentences)

sent = np.array(sentences,dtype=object)
sent_tags = np.array(sentence_tags,dtype=object)
kfold = KFold(n_splits=5,shuffle=True)
tag_metric_list = []
predicted_tags_list = []
actual_tags_list = []
overall_accuracy = 0

for train, test in kfold.split(sent):
    train_sentences = list(sent[train])
    test_sentences = list(sent[test])
    train_tags = list(sent_tags[train])
    test_tags = list(sent_tags[test])                 
    lstm = BiLSTM_POS(train_sentences,test_sentences,train_tags, test_tags)
    lstm.generate()
    tag_metric_list.append(lstm.tag_metrics)
    predicted_tags_list.extend(list(chain.from_iterable(lstm.test_pred_dataY)))
    actual_tags_list.extend(list(chain.from_iterable(lstm.test_dataY)))
    #overall_accuracy += lstm.accuracy()/5
    


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 180, 128)          5758976   
_________________________________________________________________
bidirectional (Bidirectional (None, 180, 512)          788480    
_________________________________________________________________
time_distributed (TimeDistri (None, 180, 13)           6669      
_________________________________________________________________
activation (Activation)      (None, 180, 13)           0         
Total params: 6,554,125
Trainable params: 6,554,125
Non-trainable params: 0
_________________________________________________________________
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.pr

In [None]:
'''
Calculate average of scores obtained using 5-fold cross validation
'''

def avg(tag_indexes,dict_list,folds):
    avg_dict = defaultdict(lambda: defaultdict(lambda:0))
    for d in dict_list:
        for tag in d.keys():
            avg_dict[tag]['Precision'] += d[tag]['Precision']/folds
            avg_dict[tag]['Recall'] += d[tag]['Recall']/folds
            avg_dict[tag]['F1_score'] += d[tag]['F1_score']/folds
            avg_dict[tag]['Accuracy'] += d[tag]['Accuracy']/folds
            
    print ("{:<10} {:<10} {:<10} {:<10} ".format('TAG', 'PRECISION', 'RECALL','F1_SCORE'))
        
    for key in avg_dict.keys():
        precision = str(round(avg_dict[key]['Precision'], 2))
        recall    = str(round(avg_dict[key]['Recall'], 2))
        F1_score  = str(round(avg_dict[key]['F1_score'], 2))
        accuracy  = str(round(avg_dict[key]['Accuracy'], 2))
        print ("{:<10} {:<10} {:<10} {:<10} ".format(tag_indexes[key], precision,recall,F1_score))

In [None]:
def conversion_to_tag(tag_index, predicted_tags_list, actual_tags_list):
    '''
    Conversion of Tags from integer to words (0->PAD)
    '''
    tag_indexes = {}
    for i in tag_index:
      j = tag_index[i]
      tag_indexes[j]=i
    tag_indexes[0] = 'PAD'
    predicted_tags =[]
    actual_tags = []

    for i in range(len(predicted_tags_list)):
        predicted_tags.append(tag_indexes[predicted_tags_list[i]])

    for i in range(len(actual_tags_list)):
      actual_tags.append(tag_indexes[actual_tags_list[i]])

    return tag_indexes, predicted_tags, actual_tags

In [None]:
# Reporting precision,recall,F1 score 
unique_tags = set([]) 
for tags in sentence_tags:
      for tag in tags:
        unique_tags.add(tag)

tag_index = {}

for i, t in enumerate(list(unique_tags)):
  tag_index[t]=i+1
tag_index['PAD'] = 0

tag_indexes, predicted, actual = conversion_to_tag(tag_index, predicted_tags_list, actual_tags_list)

avg(tag_indexes, tag_metric_list,5)

# Printing final confusion matrix
'''
NOTE : This is the confusion matrix plotted over total of all predictions obtained from the FOLDS=5 folds
Divide by FOLDS in case average is required
'''

                

print(ConfusionMatrix(predicted,actual))

# Printing overall accuracy 

#print("OVERALL ACCURACY :",round(overall_accuracy,4)*100,"%")




TAG        PRECISION  RECALL     F1_SCORE   
DET        0.99       0.99       0.99       
NOUN       0.96       0.98       0.97       
ADJ        0.94       0.91       0.92       
VERB       0.97       0.97       0.97       
ADP        0.97       0.98       0.98       
.          1.0        1.0        1.0        
PAD        1.0        1.0        1.0        
PRT        0.94       0.91       0.93       
NUM        0.97       0.92       0.95       
ADV        0.94       0.92       0.93       
PRON       0.99       0.98       0.99       
CONJ       0.99       1.0        0.99       
X          0.86       0.23       0.36       
     |                                       C               N                       P               V         |
     |               A       A       A       O       D       O       N       P       R       P       E         |
     |               D       D       D       N       E       U       U       A       O       R       R         |
     |       .       J       P 

In [None]:
'''
To calculate the overall accuracy for the data
'''
class overall_accuracy:
    
    def __init__(self, test_actual_tags, test_predicted_tags):
        '''
        The test_actual_tags contains actual tags for the above setences.
        The test_predicted_tags contains predicted tags for the test data.
        '''
        self.counter_dict = defaultdict(lambda: defaultdict(lambda:0))
        self.test_actual_tags = test_actual_tags
        self.test_predicted_tags = test_predicted_tags
        self.tag_metrics = defaultdict(lambda: defaultdict(lambda:0))
            
    def calc_tag_metrics(self):
        '''
        Calculate the per-POS accuracy for all the tags in the tag-set
        '''
        counter_dict = defaultdict(lambda: defaultdict(lambda:0))
        
        for i in range(len(self.test_actual_tags)):
            
            if(self.test_actual_tags[i] == self.test_predicted_tags[i]):
                counter_dict[self.test_actual_tags[i]]['TP'] += 1
            else:
                counter_dict[self.test_actual_tags[i]]['FN']    += 1
                counter_dict[self.test_predicted_tags[i]]['FP'] += 1
        
        for tag in counter_dict.keys():
            counter_dict[tag]['TN'] = TOTAL_TAGGED_WORDS - counter_dict[tag]['TP']- counter_dict[tag]['FN'] - counter_dict[tag]['FP']
        
        for tag in counter_dict.keys():
            try:
                self.tag_metrics[tag]['Precision'] = counter_dict[tag]['TP']/(counter_dict[tag]['TP']+counter_dict[tag]['FP'])
                self.tag_metrics[tag]['Recall'] = counter_dict[tag]['TP']/(counter_dict[tag]['TP']+counter_dict[tag]['FN'])
                self.tag_metrics[tag]['F1_score'] = 2*(self.tag_metrics[tag]['Precision']*self.tag_metrics[tag]['Recall'])/(self.tag_metrics[tag]['Precision']+self.tag_metrics[tag]['Recall'])
                self.tag_metrics[tag]['Accuracy'] = (counter_dict[tag]['TP']+ counter_dict[tag]['TN']) / TOTAL_TAGGED_WORDS
            except ZeroDivisionError:
                continue               
        self.counter_dict =  counter_dict
        
        
    def accuracy(self):
        '''
        Calculate average accuracy score
        '''
        self.calc_tag_metrics()
        TP =0
        FN =0
        FP =0
        for tag in self.counter_dict.keys():
            if(tag != 'PAD'):
              TP += self.counter_dict[tag]['TP']
              FN += self.counter_dict[tag]['FN']
              FP += self.counter_dict[tag]['FP']
        
        return TP/(FN+TP)

In [None]:
'''
Printing the overall accuracy of the model
'''
LSTM_acc= overall_accuracy(actual, predicted)
overall_acc = LSTM_acc.accuracy()
print("OVERALL ACCURACY :",round(overall_acc,4)*100,"%")

OVERALL ACCURACY : 97.07000000000001 %
