In [35]:
# Importing libraries
import os
import pickle
import warnings
import keras

import matplotlib.pyplot as plt
import nltk
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (accuracy_score, f1_score, precision_score,
                             recall_score)
from sklearn.model_selection import (GridSearchCV, KFold, RandomizedSearchCV,
                                     learning_curve)
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.text import text_to_word_sequence
from keras.preprocessing import sequence
from keras.utils import to_categorical
from keras.layers import Input
from keras.layers import Dense,Flatten,LSTM,Conv1D,GlobalMaxPool1D,Dropout,Bidirectional
from keras.layers.embeddings import Embedding
from keras.models import Model
from keras import optimizers
from keras.callbacks import TensorBoard, CSVLogger
from keras.models import Sequential

%matplotlib inline
warnings.filterwarnings('ignore')

In [36]:
np.random.seed(42)

In [37]:
#Helper function for data preprocessing
def read_tsv(file_path):
    dataset = pd.read_csv(file_path, sep='\t', header=None)
    return dataset

def preprocess_dataset(dataset):
    columns_to_select = [1, 2]
    dataset = dataset.iloc[:, columns_to_select]
    dataset.columns = ['label', 'news']

    # Converting the multiclass labels to binary labels
    labels_map = {
        'true': 'True',
        'mostly-true': 'True',
        'half-true': 'True',
        'false': 'False',
        'barely-true': 'False',
        'pants-fire': 'False'
    }
    dataset['label'] = dataset['label'].map(labels_map)

    return dataset

def select_columns(dataset):
    columns_to_select = [1, 2]
    dataset = dataset.iloc[:, columns_to_select]
    dataset.columns = ['label', 'news']
    return dataset    

In [38]:
# Helper function to display the evaluation metrics of the different models
def show_eval_scores(model, test_set, model_name, avg = 'binary'):
    y_pred = model.predict(test_set['news'])
    y_true = test_set['label']
    f1 = f1_score(y_true, y_pred, average = avg)
    precision = precision_score(y_true, y_pred, average = avg)
    recall = recall_score(y_true, y_pred, average = avg)
    accuracy = accuracy_score(y_true, y_pred)
    
    print('Report for ---> {}'.format(model_name))
    print('Accuracy is: {}'.format(accuracy))
    print('F1 score is: {}'.format(f1))
    print('Precision score is: {}'.format(precision))
    print('Recall score is: {}'.format(recall))

In [None]:
# Importing the datasets
#train_np_data = read_tsv('/content/train.tsv')
#valid_np_data = read_tsv('/content/valid.tsv')
#test_np_data = read_tsv('/content/test.tsv')
train_np_data = pd.read_table('/content/train.tsv', names = ["id", "label", "news", "subject", "speaker", "job", "state", "party",
                                            "barely-true", "false", "half-true", "mostly-true", "pants-fire", "venue"])
valid_np_data = pd.read_table('/content/valid.tsv', names = ["id", "label", "news", "subject", "speaker", "job", "state", "party",
                                            "barely-true", "false", "half-true", "mostly-true", "pants-fire", "venue"])
test_np_data = pd.read_table('/content/test.tsv', names = ["id", "label", "news", "subject", "speaker", "job", "state", "party",
                                            "barely-true", "false", "half-true", "mostly-true", "pants-fire", "venue"])
train_np_data = select_columns(train_np_data)
valid_np_data = select_columns(valid_np_data)
test_np_data = select_columns(test_np_data)
train_data = pd.read_csv('/content/train.csv')
valid_data = pd.read_csv('/content/valid.csv')
test_data = pd.read_csv('/content/test.csv')
#train_data = preprocess_dataset(train_np_data)
#valid_data = preprocess_dataset(valid_np_data)
#test_data = preprocess_dataset(test_np_data)
print('Train dataset size: {}'.format(train_data.shape))
print('Valid dataset size: {}'.format(valid_data.shape))
print('Test dataset size: {}'.format(test_data.shape))
print('Train non-processed dataset size: {}'.format(train_np_data.shape))
print('Valid non-processed dataset size: {}'.format(valid_np_data.shape))
print('Test non-processed dataset size: {}'.format(test_np_data.shape))
train_np_data.sample(5)
#train_np_data['label'].unique()
#test_data['label'].unique()

Train dataset size: (10240, 2)
Valid dataset size: (1284, 2)
Test dataset size: (1267, 2)
Train non-processed dataset size: (10240, 2)
Valid non-processed dataset size: (1284, 2)
Test non-processed dataset size: (1267, 2)


Unnamed: 0,label,news
3842,true,Polling shows that nearly 74 percent of Nation...
6480,barely-true,I left the city with $43 million in the bank.
4521,false,Says she couldn't take stimulus money because ...
4026,mostly-true,The United States is the only industrialized c...
10111,barely-true,The Health Care and Education Reconciliation A...


In [None]:
training_np_set = pd.concat([train_np_data, valid_np_data], ignore_index=True)
print('Training non-processed set size: {}'.format(training_np_set.shape))

training_set = pd.concat([train_data, valid_data], ignore_index=True)
print('Training set size: {}'.format(training_set.shape))

Training non-processed set size: (11524, 2)
Training set size: (11524, 2)


In [None]:
countV = CountVectorizer()
train_count = countV.fit_transform(training_set['news'].values)
len(countV.get_feature_names())

12872

In [None]:
# Creating a list of stopwords
nltk.download('stopwords')
stopwords_list = list(stopwords.words('english'))
stopwords_list

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

In [None]:
lr_pipeline = Pipeline([
    ('lrCV', CountVectorizer(stop_words=stopwords_list)),
    ('lr_clf', LogisticRegression(random_state=42, n_jobs=-1))
])
param_grid = [
    {
        'lrCV__lowercase': [True, False],
        'lrCV__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
        'lr_clf__C': [0.0001, 0.00005, 0.00001]
    }
]

lr_gs = GridSearchCV(lr_pipeline, param_grid, scoring='f1', n_jobs=-1, cv=5, verbose=1)
lr_gs.fit(training_set['news'], training_set['label'])
print(lr_gs.best_params_)
print(lr_gs.best_score_)

Fitting 5 folds for each of 24 candidates, totalling 120 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:   31.7s
[Parallel(n_jobs=-1)]: Done 120 out of 120 | elapsed:  1.6min finished


{'lrCV__lowercase': True, 'lrCV__ngram_range': (1, 1), 'lr_clf__C': 0.0001}
0.7155595273927859


In [None]:
#Multiclass Logistic Regression
lr_pipeline = Pipeline([
    ('lrCV', CountVectorizer(stop_words=stopwords_list, lowercase=True, ngram_range=(1, 1))),
    ('lr_clf', LogisticRegression(solver = 'lbfgs'))
])
lr_pipeline.fit(training_np_set['news'], training_np_set['label'])
show_eval_scores(lr_pipeline, test_np_data, 'Logistic Regression Count Vectorizer', 'micro')

Report for ---> Logistic Regression Count Vectorizer
Accuracy is: 0.23599052880820837
F1 score is: 0.23599052880820837
Precision score is: 0.23599052880820837
Recall score is: 0.23599052880820837


In [None]:
#Binary Logistic Regression
lr_pipeline1 = Pipeline([
    ('lrCV', CountVectorizer(stop_words=stopwords_list, lowercase=True, ngram_range=(1, 1))),
    ('lr_clf', LogisticRegression(C=0.0001,random_state=42, n_jobs=-1))
])
lr_pipeline1.fit(training_set['news'], training_set['label'])
show_eval_scores(lr_pipeline1, test_data, 'Logistic Regression Count Vectorizer')

Report for ---> Logistic Regression Count Vectorizer
Accuracy is: 0.6108918705603789
F1 score is: 0.6662153012863913
Precision score is: 0.6448230668414154
Recall score is: 0.6890756302521008


In [39]:
 svm_pipeline = Pipeline([
     ('svm_CV', CountVectorizer(stop_words=stopwords_list)),
     ('svm_clf', SVC(random_state=42))
 ])
 param_grid = [
     {
         'svm_CV__lowercase': [True, False],
         'svm_CV__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
         'svm_clf__kernel': ['poly'],
         'svm_clf__degree': [1, 2, 3],
         'svm_clf__C' : [0.1, 1, 10]
     },
     {
         'svm_CV__lowercase': [True, False],
         'svm_CV__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
         'svm_clf__kernel': ['rbf'],
        'svm_clf__gamma': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0],
        'svm_clf__C' : [0.1, 1, 10]
    }
]
svm_gs = GridSearchCV(svm_pipeline, param_grid, scoring='f1', n_jobs=-1, cv=5, verbose=1)
svm_gs.fit(training_set['news'], training_set['label'])
print(svm_gs.best_params_)
print(svm_gs.best_score_)

In [None]:
#Multiclass SVM
svm_pipeline = Pipeline([
    ('svm_CV', CountVectorizer(stop_words=stopwords_list, lowercase=False, ngram_range=(1, 1))),
    ('svm_clf', SVC(decision_function_shape= 'ovr', probability=True))
])
svm_pipeline.fit(training_np_set['news'], training_np_set['label'])
show_eval_scores(svm_pipeline, test_np_data, 'SVM Classifier Count Vectorizer', 'micro')

Report for ---> SVM Classifier Count Vectorizer
Accuracy is: 0.2573007103393844
F1 score is: 0.2573007103393844
Precision score is: 0.2573007103393844
Recall score is: 0.2573007103393844


In [None]:
#Binary SVM
svm_pipeline1 = Pipeline([
    ('svm_CV', CountVectorizer(stop_words=stopwords_list, lowercase=False, ngram_range=(1, 1))),
    ('svm_clf', SVC(random_state=42, gamma=1.0, kernel='rbf', probability=True))
])
svm_pipeline1.fit(training_set['news'], training_set['label'])
show_eval_scores(svm_pipeline1, test_data, 'SVM Classifier Count Vectorizer')

Report for ---> SVM Classifier Count Vectorizer
Accuracy is: 0.6243093922651933
F1 score is: 0.6952624839948783
Precision score is: 0.6403301886792453
Recall score is: 0.7605042016806722


In [None]:
rf_pipeline = Pipeline([
    ('rf_CV', CountVectorizer(stop_words=stopwords_list)),
    ('rf_clf', RandomForestClassifier(n_jobs=-1, random_state=42))
])
param_grid = {
    'rf_CV__lowercase': [True, False],
    'rf_CV__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4), (1, 5)],
    'rf_clf__n_estimators': [200, 300, 400, 500],
    'rf_clf__max_depth': [i for i in range(8, 13)],
    'rf_clf__max_features': ['auto', 'sqrt', 'log2']
}
rf_gs = GridSearchCV(rf_pipeline, param_grid, scoring='f1', cv=5, verbose=1, n_jobs=-1)
rf_gs.fit(training_set['news'], training_set['label'])
print(rf_gs.best_params_)
print(rf_gs.best_score_)

In [None]:
#Multiclass Random Forest
rf_pipeline = Pipeline([
    ('rf_CV', CountVectorizer(stop_words=stopwords_list, lowercase=False, ngram_range=(1, 1))),
    ('rf_clf', RandomForestClassifier(max_depth=12, n_estimators=300, n_jobs=-1, random_state=42))
])
rf_pipeline.fit(training_np_set['news'], training_np_set['label'])
show_eval_scores(rf_pipeline, test_np_data, 'Random Forest Classifier Count Vectorizer', 'micro')

Report for ---> Random Forest Classifier Count Vectorizer
Accuracy is: 0.2123125493291239
F1 score is: 0.2123125493291239
Precision score is: 0.2123125493291239
Recall score is: 0.2123125493291239


In [None]:
#Binary Random Forest
rf_pipeline1 = Pipeline([
    ('rf_CV', CountVectorizer(stop_words=stopwords_list, lowercase=False, ngram_range=(1, 1))),
    ('rf_clf', RandomForestClassifier(max_depth=12, n_estimators=300, n_jobs=-1, random_state=42))
])
rf_pipeline1.fit(training_set['news'], training_set['label'])
show_eval_scores(rf_pipeline1, test_data, 'Random Forest Classifier Count Vectorizer')

Report for ---> Random Forest Classifier Count Vectorizer
Accuracy is: 0.5651144435674822
F1 score is: 0.7215765538150581
Precision score is: 0.5644268774703557
Recall score is: 1.0


In [None]:
#Voting Classifier for Multiclass Classification
voting_classifier = VotingClassifier(estimators=[
    ('lr', lr_pipeline), ('svm', svm_pipeline), ('rf', rf_pipeline)], voting='soft', n_jobs=-1)
voting_classifier.fit(training_np_set['news'], training_np_set['label'])
show_eval_scores(voting_classifier, test_np_data, 'Voting Classifier(soft) Count Vectorizer', 'micro')

Report for ---> Voting Classifier(soft) Count Vectorizer
Accuracy is: 0.2462509865824783
F1 score is: 0.2462509865824783
Precision score is: 0.2462509865824783
Recall score is: 0.2462509865824783


In [None]:
#Voting Classifier for  Binary Classification
voting_classifier1 = VotingClassifier(estimators=[
    ('lr', lr_pipeline1), ('svm', svm_pipeline1), ('rf', rf_pipeline1)], voting='soft', n_jobs=-1)
voting_classifier1.fit(training_set['news'], training_set['label'])
show_eval_scores(voting_classifier1, test_data, 'Voting Classifier(soft) Count Vectorizer')

Report for ---> Voting Classifier(soft) Count Vectorizer
Accuracy is: 0.5666929755327546
F1 score is: 0.7211782630777045
Precision score is: 0.5657370517928287
Recall score is: 0.9943977591036415


In [None]:
y_label_dict = {"pants-fire" : 0, "false" : 1, "barely-true" : 2, "half-true" : 3, "mostly-true" : 4, "true" : 5}
print (y_label_dict)

train_np_data['output'] = train_np_data['label'].apply(lambda x: y_label_dict[x])
valid_np_data['output'] = valid_np_data['label'].apply(lambda x: y_label_dict[x])
test_np_data['output'] = test_np_data['label'].apply(lambda x: y_label_dict[x])

{'pants-fire': 0, 'false': 1, 'barely-true': 2, 'half-true': 3, 'mostly-true': 4, 'true': 5}


In [None]:

def load_statement_vocab_dict(train_np_data):
  vocabulary_dict = {}
  tokenizer = Tokenizer()
  tokenizer.fit_on_texts(train_np_data['news'])
  vocabulary_dict = tokenizer.word_index
  print(len(vocabulary_dict))
  return vocabulary_dict


def preprocess_statement(statement):
  statement = [w for w in statement.split(' ') if w not in stopwords.words('english')]
  statement = ' '.join(statement)
  text = text_to_word_sequence(statement)
  val = [0] * 10
  val = [vocabulary_dict[t] for t in text if t in vocabulary_dict] 
  return val

vocabulary_dict = load_statement_vocab_dict(train_np_data)
train_np_data['word_id'] = train_np_data['news'].apply(preprocess_statement)
valid_np_data['word_id'] = valid_np_data['news'].apply(preprocess_statement)
test_np_data['word_id'] = test_np_data['news'].apply(preprocess_statement)


train_np_data.head()


12408


Unnamed: 0,label,news,output,word_id
0,false,Says the Annies List political group supports ...,1,"[7, 6968, 1141, 520, 621, 385, 444, 5119, 585,..."
1,half-true,When did the decline of coal start? It started...,3,"[63, 2091, 964, 866, 23, 602, 1142, 315, 180, ..."
2,mostly-true,"Hillary Clinton agrees with John McCain ""by vo...",4,"[127, 101, 3546, 191, 254, 20, 329, 343, 310, ..."
3,false,Health care reform legislation is likely to ma...,1,"[32, 43, 266, 298, 666, 667, 404, 467, 417, 4148]"
4,half-true,The economic turnaround started at the end of ...,3,"[1, 325, 4149, 602, 408, 505]"


In [None]:
train_np_data.columns

Index(['label', 'news', 'output', 'word_id'], dtype='object')

In [None]:
num_steps = 15
X_train = train_np_data['word_id']
X_val = valid_np_data['word_id']
X_test = test_np_data['word_id']

Y_train = train_np_data['output']
Y_train = to_categorical(Y_train, num_classes=6)

Y_val = valid_np_data['output']
Y_val = to_categorical(Y_val, num_classes=6)

X_train = sequence.pad_sequences(X_train, maxlen=num_steps, padding='post',truncating='post')
X_val = sequence.pad_sequences(X_val, maxlen=num_steps, padding='post',truncating='post')
X_test = sequence.pad_sequences(X_test, maxlen=num_steps, padding='post',truncating='post')

In [None]:
num_epochs = 30
batch_size = 64

def train(model, name):
  sgd = optimizers.SGD(lr=0.025, momentum = 0.0001, clipvalue=0.5, nesterov=True)
  model.compile(optimizer=sgd,loss='categorical_crossentropy',metrics=['categorical_accuracy'])
  tb = TensorBoard()
  csv_logger = keras.callbacks.CSVLogger('training.log')
  filepath= name+"_weights_best.hdf5"
  checkpoint = keras.callbacks.ModelCheckpoint(filepath, monitor='val_categorical_accuracy', 
                                             verbose=1, save_best_only=True, mode='max')
  model.fit(
        {'main_input': X_train},
        {'main_output': Y_train}, epochs = num_epochs, batch_size = batch_size,
        validation_data = (
            {'main_input': X_val},
            {'main_output': Y_val}
        ), callbacks=[tb,csv_logger,checkpoint])
  preds = model.predict([X_test], batch_size=batch_size, verbose=1)
  Y_test_compare = list(test_np_data['output'])
  predictions = np.array([np.argmax(pred) for pred in preds])
  correct = np.sum(predictions == Y_test_compare)
  print("Correctly Predicted : ", correct,"/",len(Y_test_compare))
  print("Accuracy : ", correct*100.0/len(Y_test_compare))

In [None]:
from google.colab import drive
drive.mount('/content/drive/')
!ls "/content/drive/My Drive/glove_6B_100D"


Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/
32801_42887_compressed_glove.6B.100d.txt.zip  glove.6B.100d.txt


In [None]:
#!wget http://nlp.stanford.edu/data/glove.6B.zip
#!unzip glove*.zip
#!ls
embeddings = {}
with open("/content/drive/My Drive/glove_6B_100D/glove.6B.100d.txt") as file_object:
  for line in file_object:
    word_embed = line.split()
    word = word_embed[0]
    embed = np.array(word_embed[1:], dtype="float32")
    embeddings[word.lower()]= embed

EMBED_DIM = 100
print(len(embeddings), " : Word Embeddings Found")
print(len(embeddings[word]), " : Embedding Dimension")


num_words = len(vocabulary_dict) + 1
embedding_matrix = np.zeros((num_words, EMBED_DIM))
for word, i in vocabulary_dict.items():
    embedding_vector = embeddings.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

400000  : Word Embeddings Found
100  : Embedding Dimension


In [40]:
#CNN
kernel_sizes = [2,3,4]
filter_size = 128
vocab_length = len(vocabulary_dict.keys())
num_steps = 15
num_epochs = 10
batch_size = 64

kernel_stmt = []
statement_input = Input(shape=(num_steps,), dtype='int32', name='main_input')
x_stmt = Embedding(vocab_length+1,EMBED_DIM,weights=[embedding_matrix],input_length=num_steps,trainable=False)(statement_input) 

for kernel in kernel_sizes:
    x = Conv1D(filters=filter_size,kernel_size=kernel)(x_stmt)
    x = GlobalMaxPool1D()(x)
    kernel_stmt.append(x)

conv_in = keras.layers.concatenate(kernel_stmt)
conv_in = Dropout(0.8)(conv_in)
conv_in = Dense(128, activation='relu')(conv_in)

main_output = Dense(6, activation='softmax', name='main_output')(conv_in)
model_cnn = Model(inputs=[statement_input], outputs=[main_output])
print(model_cnn.summary)
train(model_cnn, 'cnn')

<bound method Network.summary of <keras.engine.training.Model object at 0x7f28a6cdcfd0>>
Train on 10240 samples, validate on 1284 samples
Epoch 1/10

Epoch 00001: val_categorical_accuracy improved from -inf to 0.21184, saving model to cnn_weights_best.hdf5
Epoch 2/10

Epoch 00002: val_categorical_accuracy did not improve from 0.21184
Epoch 3/10

Epoch 00003: val_categorical_accuracy improved from 0.21184 to 0.21651, saving model to cnn_weights_best.hdf5
Epoch 4/10

Epoch 00004: val_categorical_accuracy did not improve from 0.21651
Epoch 5/10

Epoch 00005: val_categorical_accuracy improved from 0.21651 to 0.22040, saving model to cnn_weights_best.hdf5
Epoch 6/10

Epoch 00006: val_categorical_accuracy did not improve from 0.22040
Epoch 7/10

Epoch 00007: val_categorical_accuracy improved from 0.22040 to 0.23287, saving model to cnn_weights_best.hdf5
Epoch 8/10

Epoch 00008: val_categorical_accuracy improved from 0.23287 to 0.24299, saving model to cnn_weights_best.hdf5
Epoch 9/10

Epoch 

In [None]:
# LSTM
vocab_length = len(vocabulary_dict.keys())
hidden_size = EMBED_DIM #Has to be same as EMBED_DIM
lstm_size = 100
num_steps = 15
num_epochs = 30
batch_size = 40

model_lstm = Sequential()
model_lstm.add(Embedding(vocab_length+1, hidden_size, input_length=num_steps))
model_lstm.add(Bidirectional(LSTM(hidden_size)))
model_lstm.add(Dense(6, activation='softmax'))

statement_input = Input(shape=(num_steps,), dtype='int32', name='main_input')
x = Embedding(vocab_length+1,EMBED_DIM,weights=[embedding_matrix],input_length=num_steps,trainable=False)(statement_input) 

lstm_in = LSTM(lstm_size,dropout=0.2)(x)
main_output = Dense(6, activation='softmax', name='main_output')(lstm_in)
model_lstm = Model(inputs=[statement_input], outputs=[main_output])
print(model_lstm.summary)
train(model_lstm, 'lstm')

<bound method Network.summary of <keras.engine.training.Model object at 0x7f28a6570438>>
Train on 10240 samples, validate on 1284 samples
Epoch 1/30

Epoch 00001: val_categorical_accuracy improved from -inf to 0.21184, saving model to lstm_weights_best.hdf5
Epoch 2/30

Epoch 00002: val_categorical_accuracy improved from 0.21184 to 0.22118, saving model to lstm_weights_best.hdf5
Epoch 3/30

Epoch 00003: val_categorical_accuracy improved from 0.22118 to 0.23988, saving model to lstm_weights_best.hdf5
Epoch 4/30