In [425]:
import keras.utils
from keras.callbacks import TensorBoard, CSVLogger
from keras.preprocessing.text import text_to_word_sequence
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Embedding
from keras.layers import Dense,Flatten,LSTM,Conv1D,GlobalMaxPool1D,Dropout,Bidirectional
from keras import optimizers
from keras.layers import Input
from keras.models import Model
from keras.utils.vis_utils import plot_model
from keras.utils.vis_utils import model_to_dot
from keras.utils import pad_sequences
from keras.models import load_model

import pandas as pd
import os.path
import pickle 
import numpy as np
from IPython.display import SVG
import time
import re
import glob
import gensim
from gensim.models import Word2Vec

import nltk
from nltk.stem.porter import PorterStemmer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.tokenize import TreebankWordTokenizer
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')
nltk.download('averaged_perceptron_tagger')

import spacy
!python -m spacy download en
nlp = spacy.load("en_core_web_sm")

from sklearn import preprocessing
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
label_encoder = preprocessing.LabelEncoder()
import operator

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


[38;5;3m⚠ As of spaCy v3.0, shortcuts like 'en' are deprecated. Please use the
full pipeline package name 'en_core_web_sm' instead.[0m
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting en-core-web-sm==3.5.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m77.5 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
!unzip '/content/drive/MyDrive/Datasets Sem-6/IR Datasets/Project/liar_dataset.zip'

Archive:  /content/drive/MyDrive/Datasets Sem-6/IR Datasets/Project/liar_dataset.zip
  inflating: README                  
  inflating: test.tsv                
  inflating: train.tsv               
  inflating: valid.tsv               


In [433]:
train_df = pd.read_csv('/content/train.tsv',sep='\t', header = None)
test_df = pd.read_csv('/content/test.tsv',sep='\t', header = None)
val_df = pd.read_csv('/content/valid.tsv',sep='\t', header = None)

In [434]:
train_df = train_df.drop([0, 8, 9, 10, 11, 12], axis = 1)
test_df = test_df.drop([0, 8, 9, 10, 11, 12], axis = 1)
val_df = val_df.drop([0, 8, 9, 10, 11, 12], axis = 1)

train_df.columns = ['label', 'statement', 'subject', 'speaker', 'speaker job title', 'state info', 'party affiliation', 'location of statement']
test_df.columns = ['label', 'statement', 'subject', 'speaker', 'speaker job title', 'state info', 'party affiliation', 'location of statement']
val_df.columns = ['label', 'statement', 'subject', 'speaker', 'speaker job title', 'state info', 'party affiliation', 'location of statement']

In [435]:
train_df = train_df.dropna()
train_df = train_df.reset_index(drop=True)
test_df = test_df.dropna()
test_df = test_df.reset_index(drop=True)
val_df = val_df.dropna()
val_df = val_df.reset_index(drop=True)

In [436]:
train_df = pd.concat([train_df, val_df])
train_df = train_df.reset_index()
train_df = train_df.drop(['index'], axis = 1)

In [437]:
labels_dict = {'mostly-true':4,'barely-true':2,'half-true':3,'false':1, 'true':5,'pants-fire':0}
train_df['label'] = train_df['label'].apply(lambda x: labels_dict[x])
test_df['label'] = test_df['label'].apply(lambda x: labels_dict[x])

In [438]:
train_df['speaker_enc'] = label_encoder.fit_transform(train_df['speaker'])
train_df['state info_enc'] = label_encoder.fit_transform(train_df['state info'])
train_df['speaker job title_enc'] = label_encoder.fit_transform(train_df['speaker job title'])
train_df['party affiliation_enc'] = label_encoder.fit_transform(train_df['party affiliation'])
train_df['subject_enc'] = label_encoder.fit_transform(train_df['subject'])

test_df['speaker_enc'] = label_encoder.fit_transform(test_df['speaker'])
test_df['state info_enc'] = label_encoder.fit_transform(test_df['state info'])
test_df['speaker job title_enc'] = label_encoder.fit_transform(test_df['speaker job title'])
test_df['party affiliation_enc'] = label_encoder.fit_transform(test_df['party affiliation'])
test_df['subject_enc'] = label_encoder.fit_transform(test_df['subject'])

In [439]:
def preprocess(text):
  text = text.lower() # lower - casing the text
  text = re.sub('<[^>]*>', ' ', text)
  text = re.sub('[\W]+', ' ', text)
  tokenizer = TreebankWordTokenizer()
  words = tokenizer.tokenize(text)
  text = ' '.join(words)
  stop_words = set(stopwords.words('english'))
  word_tokens = word_tokenize(text)
  filtered_sentence = [w for w in word_tokens if not w in stop_words] # removal of stopwords
  text = ' '.join(filtered_sentence)
  text = text_to_word_sequence(text)
  val = [0] * 10
  val = [vocab_dict[t] for t in text if t in vocab_dict]
  return val

def get_vocab_dict(train_data):
  vocab_dict = {}
  if not os.path.exists('vocabulary.p'):
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(train_data['statement'])
    vocab_dict = tokenizer.word_index
    pickle.dump(vocab_dict, open( "vocabulary.p", "wb" ))
  else:
    vocab_dict = pickle.load(open("vocabulary.p", "rb" ))
  return vocab_dict

In [440]:
vocab_dict = get_vocab_dict(train_df)

In [441]:
train_df['statement_id'] = train_df['statement'].apply(preprocess)
test_df['statement_id'] = test_df['statement'].apply(preprocess)

In [442]:
pos_tags = {'ADJ': 'adjective', 'ADP': 'adposition', 'ADV': 'adverb', 
            'AUX': 'auxiliary verb', 'CONJ': 'coordinating conjunction', 
            'DET': 'determiner', 'INTJ': 'interjection', 'NOUN': 'noun', 
            'NUM': 'numeral', 'PART': 'particle', 'PRON': 'pronoun', 
            'PROPN': 'proper noun', 'PUNCT': 'punctuation', 'X': 'other', 
            'SCONJ': 'subord conjunction', 'SYM': 'symbol', 'VERB': 'verb'}

pos_dict = {'NOUN' : 0, 'VERB' : 1, 'ADP' : 2, 'PROPN' : 3, 'PUNCT' : 4, 
            'DET' : 5, 'ADJ' : 6, 'NUM' : 7, 'ADV' : 8, 'PRON' : 9, 'X' : 9, 
            'PART' : 9, 'SYM' : 9, 'INTJ' : 9 }

def get_pos(statement):
  doc = nlp(statement)
  taglist = []
  for token in doc:
    taglist.append(pos_dict.get(token.pos_,max(pos_dict.values())))
  return taglist

In [443]:
train_df['pos_id'] = train_df['statement'].apply(get_pos)
test_df['pos_id'] = test_df['statement'].apply(get_pos)

In [444]:
# Creating a dictionary for storing the glove embeddings in the dictionary
glove_path = '/content/drive/MyDrive/glove.6B.100d.txt'
embeddings = {}
file1 = open(glove_path)
for i in file1:
  line = i.split()
  vector = np.asarray(line[1:], dtype = 'float32')
  embeddings[line[0]] = vector
file1.close()

emb_dimension = 100 # Embedding dimensions obtained from glove dictionary


# Creating Embedding matrix for the given vocabluary
num_words = len(vocab_dict) + 1
emb_matrix = np.zeros((num_words, emb_dimension))
for word, i in vocab_dict.items():
    emb_vector = embeddings.get(word)
    if emb_vector is not None:
        emb_matrix[i] = emb_vector

# Creating POS embedding matrix of size having maximum pos embeddings in a statement
emb_index = None
pos_embeddings = np.identity(max(pos_dict.values()), dtype=int)

In [445]:
X_train = train_df['statement_id']
y_train = train_df['label']
y_train = keras.utils.to_categorical(y_train, num_classes=6)
X_test = test_df['statement_id']
y_test = test_df['label']
y_test = keras.utils.to_categorical(y_test, num_classes=6)
X_train_pos = train_df['pos_id']
X_test_pos = test_df['pos_id']

In [446]:
vocab_length = len(vocab_dict.keys())
hidden_size = emb_dimension 
lstm_size = 100
num_steps = 15
num_epochs = 30
batch_size = 40
kernel_sizes = [3,3]
filter_size = 128

In [447]:
X_train = pad_sequences(X_train, maxlen=num_steps, padding='post',truncating='post')
X_test = pad_sequences(X_test, maxlen=num_steps, padding='post',truncating='post')
X_train_pos = pad_sequences(X_train_pos, maxlen=num_steps, padding='post',truncating='post')
X_test_pos = pad_sequences(X_test_pos, maxlen=num_steps, padding='post',truncating='post')

**Training on statement with and without POS taggings data**

In [448]:
use_pos=False
def train(model, name, use_pos=False):
  sgd = optimizers.SGD(lr=0.25, clipvalue=0.4, nesterov=True)
  adam = optimizers.Adam(lr=0.00075, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
  model.compile(optimizer=sgd,loss='categorical_crossentropy',metrics=['categorical_accuracy'],)
  tb = TensorBoard()
         
  if use_pos:
    model.fit({'main_input': X_train, 'pos_input': X_train_pos}, {'main_output': y_train}, epochs = num_epochs, batch_size = batch_size)
  else:
    model.fit({'main_input': X_train}, {'main_output': y_train}, epochs = num_epochs, batch_size = batch_size)
  predictions = predict_test(model,name,use_pos)
  return predictions
      
def predict_test(model, name, use_pos=False):   
  preds = []
  if use_pos:
    preds = model.predict([X_test], batch_size=batch_size, verbose=1)

  else:
    preds = model.predict([X_test], batch_size=batch_size, verbose=1)

  predictions = np.array([np.argmax(pred) for pred in preds])
  return predictions


def predict_train(model, name, use_pos=False):
  preds = []
  if use_pos:
    preds = model.predict([X_train], batch_size=batch_size, verbose=1)

  else:
    preds = model.predict([X_train], batch_size=batch_size, verbose=1)  
  
  predictions = np.array([np.argmax(pred) for pred in preds])
  return predictions


**CNN Model**

In [471]:
filter_without_pos = []
filter_with_pos = []
statement_input = Input(shape=(num_steps,), dtype='int32', name='main_input')
x_stmt = Embedding(vocab_length+1,emb_dimension,weights=[emb_matrix],input_length=num_steps,trainable=False)(statement_input)
pos_input = Input(shape=(num_steps,), dtype='int32', name='pos_input')
x_pos = Embedding(max(pos_dict.values()), max(pos_dict.values()), weights=[pos_embeddings], input_length=num_steps, trainable=False)(pos_input)

In [472]:
for kernel in kernel_sizes:
  x_1 = Conv1D(filters=filter_size,kernel_size=kernel)(x_stmt)
  x_1 = GlobalMaxPool1D()(x_1)
  filter_without_pos.append(x_1)
    
  x_2 = Conv1D(filters=filter_size,kernel_size=kernel)(x_pos)
  x_2 = GlobalMaxPool1D()(x_2)
  filter_with_pos.append(x_2)

conv_in1 = keras.layers.concatenate(filter_without_pos)
conv_in1 = Dropout(0.2)(conv_in1)
conv_in1 = Dense(128, activation='relu')(conv_in1)
conv_in2 = keras.layers.concatenate(filter_with_pos)
conv_in2 = Dropout(0.2)(conv_in2)
conv_in2 = Dense(128, activation='relu')(conv_in2)

x = conv_in1
if use_pos:
  x = keras.layers.concatenate([conv_in1, conv_in2])
else:
  x = conv_in1

output = Dense(6, activation='softmax', name='main_output')(x)

if use_pos:
  model_cnn = Model(inputs=[statement_input, pos_input], outputs=[output])
else:
  model_cnn = Model(inputs=[statement_input], outputs=[output])

In [451]:
predictions = train(model_cnn,'cnn',use_pos=False)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [452]:
predictions = keras.utils.to_categorical(predictions, num_classes=6)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.19      0.12      0.14        43
           1       0.25      0.25      0.25       167
           2       0.22      0.17      0.19       139
           3       0.21      0.23      0.22       182
           4       0.24      0.36      0.29       173
           5       0.20      0.13      0.16       149

   micro avg       0.23      0.23      0.23       853
   macro avg       0.22      0.21      0.21       853
weighted avg       0.22      0.23      0.22       853
 samples avg       0.23      0.23      0.23       853



In [473]:
for kernel in kernel_sizes:
  x_1 = Conv1D(filters=filter_size,kernel_size=kernel)(x_stmt)
  x_1 = GlobalMaxPool1D()(x_1)
  filter_without_pos.append(x_1)
    
  x_2 = Conv1D(filters=filter_size,kernel_size=kernel)(x_pos)
  x_2 = GlobalMaxPool1D()(x_2)
  filter_with_pos.append(x_2)

conv_in1 = keras.layers.concatenate(filter_without_pos)
conv_in1 = Dropout(0.2)(conv_in1)
conv_in1 = Dense(128, activation='relu')(conv_in1)
conv_in2 = keras.layers.concatenate(filter_with_pos)
conv_in2 = Dropout(0.2)(conv_in2)
conv_in2 = Dense(128, activation='relu')(conv_in2)

x = conv_in1
if use_pos:
  x = keras.layers.concatenate([conv_in1, conv_in2])
else:
  x = conv_in1

output = Dense(6, activation='softmax', name='main_output')(x)

if use_pos:
  model_cnn = Model(inputs=[statement_input, pos_input], outputs=[output])
else:
  model_cnn = Model(inputs=[statement_input], outputs=[output])

In [474]:
predictions = train(model_cnn,'cnn',use_pos=True)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [475]:
predictions = keras.utils.to_categorical(predictions, num_classes=6)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        43
           1       0.24      0.20      0.22       167
           2       0.22      0.13      0.16       139
           3       0.23      0.16      0.19       182
           4       0.23      0.60      0.34       173
           5       0.25      0.06      0.10       149

   micro avg       0.23      0.23      0.23       853
   macro avg       0.20      0.19      0.17       853
weighted avg       0.22      0.23      0.20       853
 samples avg       0.23      0.23      0.23       853



**LSTM Model**

In [456]:
statement_input = Input(shape=(num_steps,), dtype='int32', name='main_input')
x = Embedding(vocab_length+1,emb_dimension,weights=[emb_matrix],input_length=num_steps,trainable=False)(statement_input) 
lstm_in = LSTM(lstm_size,dropout=0.2)(x)
pos_input = Input(shape=(num_steps,), dtype='int32', name='pos_input')
x2 = Embedding(max(pos_dict.values()), max(pos_dict.values()), weights=[pos_embeddings], input_length=num_steps, trainable=False)(pos_input)
lstm_in2 = LSTM(lstm_size, dropout=0.2)(x2)

In [457]:
# LSTM model
model_lstm = Sequential()
model_lstm.add(Embedding(vocab_length+1, hidden_size, input_length=num_steps))
model_lstm.add(Dense(6, activation='softmax'))



if use_pos :
  x = keras.layers.concatenate([lstm_in, lstm_in2])
else:
  x = lstm_in

main_output = Dense(6, activation='softmax', name='main_output')(x)

if use_pos:
  model_lstm = Model(inputs=[statement_input, pos_input], outputs=[main_output])
else:
  model_lstm = Model(inputs=[statement_input], outputs=[main_output])

In [458]:
predictions = train(model_lstm,'lstm',use_pos=False)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [459]:
predictions = keras.utils.to_categorical(predictions, num_classes=6)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.08      0.19      0.11        43
           1       0.29      0.23      0.26       167
           2       0.27      0.12      0.17       139
           3       0.23      0.29      0.26       182
           4       0.20      0.17      0.18       173
           5       0.23      0.28      0.25       149

   micro avg       0.22      0.22      0.22       853
   macro avg       0.22      0.21      0.20       853
weighted avg       0.23      0.22      0.22       853
 samples avg       0.22      0.22      0.22       853



In [460]:
# LSTM model
model_lstm = Sequential()
model_lstm.add(Embedding(vocab_length+1, hidden_size, input_length=num_steps))
model_lstm.add(Dense(6, activation='softmax'))



if use_pos :
  x = keras.layers.concatenate([lstm_in, lstm_in2])
else:
  x = lstm_in

main_output = Dense(6, activation='softmax', name='main_output')(x)

if use_pos:
  model_lstm = Model(inputs=[statement_input, pos_input], outputs=[main_output])
else:
  model_lstm = Model(inputs=[statement_input], outputs=[main_output])

In [461]:
predictions = train(model_lstm,'lstm',use_pos=True)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [462]:
predictions = keras.utils.to_categorical(predictions, num_classes=6)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.15      0.12      0.13        43
           1       0.23      0.27      0.25       167
           2       0.19      0.21      0.20       139
           3       0.24      0.18      0.20       182
           4       0.26      0.27      0.26       173
           5       0.20      0.21      0.21       149

   micro avg       0.22      0.22      0.22       853
   macro avg       0.21      0.21      0.21       853
weighted avg       0.22      0.22      0.22       853
 samples avg       0.22      0.22      0.22       853



In [463]:
statement_input = Input(shape=(num_steps,), dtype='int32', name='main_input')
x = Embedding(vocab_length+1,emb_dimension,weights=[emb_matrix],input_length=num_steps,trainable=False)(statement_input) 
lstm_in = LSTM(lstm_size,dropout=0.2)(x)
pos_input = Input(shape=(num_steps,), dtype='int32', name='pos_input')
x2 = Embedding(max(pos_dict.values()), max(pos_dict.values()), weights=[pos_embeddings], input_length=num_steps, trainable=False)(pos_input)
lstm_in2 = LSTM(lstm_size, dropout=0.2)(x2)

In [464]:
# BiLSTM model
model_bilstm = Sequential()
model_bilstm.add(Embedding(vocab_length+1, hidden_size, input_length=num_steps))
model_bilstm.add(Bidirectional(LSTM(hidden_size)))
model_bilstm.add(Dense(6, activation='softmax'))



if use_pos :
  x = keras.layers.concatenate([lstm_in, lstm_in2])
else:
  x = lstm_in

main_output = Dense(6, activation='softmax', name='main_output')(x)

if use_pos:
  model_bilstm = Model(inputs=[statement_input, pos_input], outputs=[main_output])
else:
  model_bilstm = Model(inputs=[statement_input], outputs=[main_output])

In [465]:
predictions = train(model_bilstm,'BiLSTM',use_pos=False)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [466]:
predictions = keras.utils.to_categorical(predictions, num_classes=6)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.22      0.05      0.08        43
           1       0.26      0.35      0.30       167
           2       0.26      0.10      0.15       139
           3       0.26      0.24      0.25       182
           4       0.24      0.39      0.30       173
           5       0.24      0.19      0.22       149

   micro avg       0.25      0.25      0.25       853
   macro avg       0.25      0.22      0.21       853
weighted avg       0.25      0.25      0.24       853
 samples avg       0.25      0.25      0.25       853



In [467]:
# BiLSTM model
model_bilstm = Sequential()
model_bilstm.add(Embedding(vocab_length+1, hidden_size, input_length=num_steps))
model_bilstm.add(Bidirectional(LSTM(hidden_size)))
model_bilstm.add(Dense(6, activation='softmax'))



if use_pos :
  x = keras.layers.concatenate([lstm_in, lstm_in2])
else:
  x = lstm_in

main_output = Dense(6, activation='softmax', name='main_output')(x)

if use_pos:
  model_bilstm = Model(inputs=[statement_input, pos_input], outputs=[main_output])
else:
  model_bilstm = Model(inputs=[statement_input], outputs=[main_output])

In [468]:
predictions = train(model_bilstm,'BiLSTM',use_pos=True)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [469]:
predictions = keras.utils.to_categorical(predictions, num_classes=6)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.14      0.14      0.14        43
           1       0.26      0.25      0.25       167
           2       0.23      0.18      0.20       139
           3       0.22      0.26      0.24       182
           4       0.18      0.26      0.22       173
           5       0.26      0.15      0.19       149

   micro avg       0.22      0.22      0.22       853
   macro avg       0.22      0.21      0.21       853
weighted avg       0.22      0.22      0.22       853
 samples avg       0.22      0.22      0.22       853

