In [1]:
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing.text import Tokenizer
from keras import backend as K

class utility:

    def read_CSV(self, filename):
        df = pd.read_csv(filename)
        return df

    def get_text_label(self, df):
        texts = []  # list of text samples
        labels = []  # list of label ids
        for index, row in df.iterrows():
            if isinstance(row['sentence'], float):
                texts.append(str(row['sentence']))
            else:
                texts.append(row['sentence'])

            labels.append(row['label'])

        return texts, labels

    def tokenize_texts(self, texts):
        tokenizer = Tokenizer(num_words=10000)
        tokenizer.fit_on_texts(texts)

        return tokenizer

    def padding_texts(self, texts, maxlen):

        texts = pad_sequences(texts, padding='post', maxlen=maxlen)

        return texts

    def get_metric(self, y_true, y_pred):
        accuracyScore = accuracy_score(y_true, y_pred)

        # binary: Only report results for the class specified by pos_label. This is applicable only if targets (y_{true,pred}) are binary.
        precisionScoreBinary = precision_score(y_true, y_pred, average='binary')
        recallScoreBinary = recall_score(y_true, y_pred, average='binary')
        f1ScoreBinary = f1_score(y_true, y_pred, average='binary')

        return accuracyScore, precisionScoreBinary, recallScoreBinary, f1ScoreBinary

    def print_metric(self, accuracyScore, precisionScoreBinary, recallScoreBinary, f1ScoreBinary):
        print("Accuracy: {}".format(str(accuracyScore)))
        print("Precision: {}".format(str(precisionScoreBinary)))
        print("Recall: {}".format(str(recallScoreBinary)))
        print("F1-Score: {}".format(str(f1ScoreBinary)))
        print("{},{},{},{}".format(str(accuracyScore), str(precisionScoreBinary), str(recallScoreBinary), str(f1ScoreBinary)))
        

    def get_testing_metric(self, y_test, y_pred):
        # metric for Testing Data
        # print("Testing Data")
        accuracyScore, precisionScoreBinary, recallScoreBinary, f1ScoreBinary = self.get_metric(y_test, y_pred)
        # print()

        return accuracyScore, precisionScoreBinary, recallScoreBinary, f1ScoreBinary

    def write_df_csv(self, df, out_path):
        df.to_csv(out_path, index=False)

    def create_embedding_matrix(self, filepath, word_index, embedding_dim):
        vocab_size = len(word_index) + 1  # Adding again 1 because of reserved 0 index
        embedding_matrix = np.zeros((vocab_size, embedding_dim))

        with open(filepath, encoding="utf8") as f:
            for line in f:
                word, *vector = line.split()
                if word in word_index:
                    idx = word_index[word]
                    embedding_matrix[idx] = np.array(
                        vector, dtype=np.float32)[:embedding_dim]

        return embedding_matrix

    def get_max_length_of_sentences(self, texts):
        maxlength = 0
        for text in texts:
            if (len(text.split()) > maxlength):
                maxlength = len(text.split())

        return maxlength

    def get_training_trial_data(self, textsTraining, labelsTraining, textsTrial, labelsTrial, glovePath):
        textsTraining, textsTesting = np.asarray(textsTraining), np.asarray(textsTrial)
        y_train, y_val = np.asarray(labelsTraining), np.asarray(labelsTrial)

        # Tokenize words
        tokenizer = self.tokenize_texts(textsTraining)
        X_train = tokenizer.texts_to_sequences(textsTraining)
        X_val = tokenizer.texts_to_sequences(textsTesting)

        # Adding 1 because of reserved 0 index
        vocab_size = len(tokenizer.word_index) + 1

        # get maxlen
        maxlen = self.get_max_length_of_sentences(textsTraining)

        # Pad sequences with zeros
        X_train = self.padding_texts(X_train, maxlen)
        X_val = self.padding_texts(X_val, maxlen)

        embedding_matrix = []
        embedding_matrix.append(self.create_embedding_matrix(glovePath[0], tokenizer.word_index, 50))
        embedding_matrix.append(self.create_embedding_matrix(glovePath[1], tokenizer.word_index, 100))
        embedding_matrix.append(self.create_embedding_matrix(glovePath[2], tokenizer.word_index, 200))
        embedding_matrix.append(self.create_embedding_matrix(glovePath[3], tokenizer.word_index, 300))

        return X_train, X_val, y_train, y_val, vocab_size, maxlen, embedding_matrix

    def Average(self, list):
        return sum(list) / len(list)
    
    def recall_m(self, y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision_m(self, y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

    def f1_m(self, y_true, y_pred):
        precision = self.precision_m(y_true, y_pred)
        recall = self.recall_m(y_true, y_pred)
        return 2*((precision*recall)/(precision+recall+K.epsilon()))

Using TensorFlow backend.


In [2]:
dataTraining = 'TrainingTrialData.csv'
dataTesting = 'EvaluationData.csv'

root_path = '/lab/dbms/fatyanosa'
datasetPath = '{}/Dataset/Suggestion Mining/'.format(root_path)
resultsPath = '{}/Server1/Suggestion Mining/Results/'.format(root_path)
archPath = '{}/Server1/Suggestion Mining/Architecture/'.format(root_path)

In [None]:
from __future__ import print_function

from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM, Bidirectional
from tensorflow.keras.layers import Conv1D, MaxPooling1D
from tensorflow.keras.datasets import imdb

import tensorflow as tf
import pandas as pd
import numpy as np

import time

util = utility()
n_run = 30

# LSTM

In [4]:
max_features = 20000
# cut texts after this number of words (among top max_features most common words)
maxlen = 80
batch_size = 32

print('Loading data...')
# Read data
dfTraining = util.read_CSV(datasetPath + dataTraining)
dfTesting = util.read_CSV(datasetPath + dataTesting)

# get texts and labels
textsTraining, y_train = util.get_text_label(dfTraining)
textsTesting, y_test = util.get_text_label(dfTesting)

# Tokenize words
tokenizer = util.tokenize_texts(textsTraining)
x_train = tokenizer.texts_to_sequences(textsTraining)
x_test = tokenizer.texts_to_sequences(textsTesting)

print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

# Adding 1 because of reserved 0 index
vocab_size = len(tokenizer.word_index) + 1

# get maxlen
maxlen = util.get_max_length_of_sentences(textsTraining)

# Pad sequences with zeros
x_train = util.padding_texts(x_train, maxlen)
x_test = util.padding_texts(x_test, maxlen)

print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

testing_name = "LSTM"

# Create Testing Results
f = open(resultsPath + testing_name + ".csv", "w+")
f.write("i,accuracy,precision,recall,f1Score,time\n")
f.close()

for i in range(0, n_run):
    then = time.time()
    
    print('Build model...')
    model = Sequential()
    model.add(Embedding(max_features, 128))
    model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(1, activation='sigmoid'))

    # try using different optimizers and different optimizer configs
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=[util.f1_m])

    print('Train...')
    # save history to a file
    callbacks = [tf.keras.callbacks.CSVLogger(str(archPath + testing_name + ".csv"))]

    #early stopping
    callbacks += [tf.keras.callbacks.EarlyStopping(monitor='val_f1_m', mode='max', verbose=1, patience=10)]

    #save the best model
    callbacks += [tf.keras.callbacks.ModelCheckpoint(archPath + testing_name + ".h5", monitor='val_f1_m', mode='max', verbose=1, save_best_only=True)]

    class_weight = {0: 0.25,
                    1: 0.75}
    y_train = np.uint8(y_train)
    y_test = np.uint8(y_test)
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=15,
              validation_data=(x_test, y_test), 
              callbacks=callbacks, 
              class_weight=class_weight)
#     score, acc = model.evaluate(x_test, y_test,
#                                 batch_size=batch_size)
#     print('Test score:', score)
#     print('Test accuracy:', acc)
    
    dependencies = {
    'f1_m': util.f1_m
    }

    # load the saved model
    saved_model = tf.keras.models.load_model(archPath + testing_name + ".h5", custom_objects=dependencies)
    y_pred = saved_model.predict_classes(x_test)    

    # CNN metrics
    accuracyScore, precisionScoreBinary, recallScoreBinary, f1ScoreBinary = util.get_testing_metric(y_test, y_pred)

    now = time.time()
    diff = now - then
    print(diff)
    print(f1ScoreBinary)

    # save testing data
    f = open(resultsPath + testing_name + ".csv", 'a')
    f.write(str(i + 1)
            + ',' + str(accuracyScore)
            + ',' + str(precisionScoreBinary)
            + ',' + str(recallScoreBinary)
            + ',' + str(f1ScoreBinary)
            + ',' + str(diff) + '\n')
    f.close()

Loading data...
9900 train sequences
1657 test sequences
x_train shape: (9900, 183)
x_test shape: (1657, 183)
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
620.9188227653503
0.4

Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
649.3281900882721
0.41586998087954113
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_

Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
623.9814763069153
0.41586998087954113
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_

Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
622.2800266742706
0.41586998087954113
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1

Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
621.8490405082703
0.41586998087954113
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_

Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
600.3656523227692
0.41586998087954113
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
60

Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
624.1448194980621
0.41586998087954113
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_

Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
620.5717704296112
0.41586998087954113
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Serv

Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
619.1944615840912
0.41586998087954113
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not i

Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
617.6947708129883
0.41586998087954113
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_

Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
649.5211544036865
0.41586998087954113
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1

Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m did not improve from 0.30977
Epoch 6/15
Epoch 00006: val_f1_m did not improve from 0.30977
Epoch 7/15
Epoch 00007: val_f1_m did not improve from 0.30977
Epoch 8/15
Epoch 00008: val_f1_m did not improve from 0.30977
Epoch 9/15
Epoch 00009: val_f1_m did not improve from 0.30977
Epoch 10/15
Epoch 00010: val_f1_m did not improve from 0.30977
Epoch 11/15
Epoch 00011: val_f1_m did not improve from 0.30977
Epoch 00011: early stopping
639.33349776268
0.41586998087954113
Build model...
Train...
Train on 9900 samples, validate on 1657 samples
Epoch 1/15
Epoch 00001: val_f1_m improved from -inf to 0.30977, saving model to /lab/dbms/fatyanosa/Server1/Suggestion Mining/Architecture/LSTM.h5
Epoch 2/15
Epoch 00002: val_f1_m did not improve from 0.30977
Epoch 3/15
Epoch 00003: val_f1_m did not improve from 0.30977
Epoch 4/15
Epoch 00004: val_f1_m did not improve from 0.30977
Epoch 5/15
Epoch 00005: val_f1_m 

# Bidirectional LSTM

In [None]:
from __future__ import print_function
import numpy as np

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional
from keras.datasets import imdb


max_features = 20000
# cut texts after this number of words
# (among top max_features most common words)
maxlen = 100
batch_size = 32

print('Loading data...')
# Read data
dfTraining = util.read_CSV(datasetPath + dataTraining)
dfTesting = util.read_CSV(datasetPath + dataTesting)

# get texts and labels
textsTraining, y_train = util.get_text_label(dfTraining)
textsTesting, y_test = util.get_text_label(dfTesting)

# Tokenize words
tokenizer = util.tokenize_texts(textsTraining)
x_train = tokenizer.texts_to_sequences(textsTraining)
x_test = tokenizer.texts_to_sequences(textsTesting)

print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

# Adding 1 because of reserved 0 index
vocab_size = len(tokenizer.word_index) + 1

# get maxlen
maxlen = util.get_max_length_of_sentences(textsTraining)

# Pad sequences with zeros
x_train = util.padding_texts(x_train, maxlen)
x_test = util.padding_texts(x_test, maxlen)

print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)

y_train = np.array(y_train)
y_test = np.array(y_test)

testing_name = "BidirectionalLSTM"

# Create Testing Results
f = open(resultsPath + testing_name + ".csv", "w+")
f.write("i,accuracy,precision,recall,f1Score,time\n")
f.close()

for i in range(0, n_run):
    then = time.time()

    model = Sequential()
    model.add(Embedding(max_features, 128, input_length=maxlen))
    model.add(Bidirectional(LSTM(64)))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    # try using different optimizers and different optimizer configs
    model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

    print('Train...')
    # save history to a file
    callbacks = [tf.keras.callbacks.CSVLogger(str(archPath + testing_name + ".csv"))]

    #early stopping
    callbacks += [tf.keras.callbacks.EarlyStopping(monitor='val_f1_m', mode='max', verbose=1, patience=10)]

    #save the best model
    callbacks += [tf.keras.callbacks.ModelCheckpoint(archPath + testing_name + ".h5", monitor='val_f1_m', mode='max', verbose=1, save_best_only=True)]

    class_weight = {0: 0.25,
                    1: 0.75}
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=4,
              validation_data=[x_test, y_test], 
              callbacks=callbacks, 
              class_weight=class_weight)
    
    dependencies = {
    'f1_m': util.f1_m
    }

    # load the saved model
    saved_model = tf.keras.models.load_model(archPath + testing_name + ".h5", custom_objects=dependencies)
    y_pred = saved_model.predict_classes(x_test)    

    # CNN metrics
    accuracyScore, precisionScoreBinary, recallScoreBinary, f1ScoreBinary = util.get_testing_metric(y_test, y_pred)

    now = time.time()
    diff = now - then
    print(diff)
    print(f1ScoreBinary)

    # save testing data
    f = open(resultsPath + testing_name + ".csv", 'a')
    f.write(str(i + 1)
            + ',' + str(accuracyScore)
            + ',' + str(precisionScoreBinary)
            + ',' + str(recallScoreBinary)
            + ',' + str(f1ScoreBinary)
            + ',' + str(diff) + '\n')
    f.close()