In [1]:
import torch
import os
import argparse
from torch.utils.data import Dataset, DataLoader
import torchtext
from collections import Counter
import numpy as np
import pandas as pd
import pickle

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
nltk.download('stopwords')
nltk.download('punkt')
sw = stopwords.words('english') 


class Tokenizer:

    def __init__(self, file, threshold=5):
        self.file = file
        self.data = pd.read_csv(file)
        self.threshold = threshold

    def preprocess(self):
        tokenizer = torchtext.data.utils.get_tokenizer('spacy', language='en')
        tokens = []
        sentence_list=[]
        for text in self.data['text'].tolist():
            tokens.append(tokenizer(text))
            sentence_list.append(text.split('.'))

        self.data['sentences_list'] = sentence_list
        counter = Counter()
        for line in tokens:
            for word in line:
                counter[word] += 1
        # print(len(counter.items()), len(counter.most_common()))

        # remove all words that have frequency less than threshold
        # counter_threshold = {k:v for k,v in counter.items() if v >= self.threshold}

        # create mappings
        # mapper = {word:idx+1 for idx,word in enumerate(counter_threshold.keys())}
        # inverse_mapper = {idx+1:word for idx,word in enumerate(counter_threshold.keys())}

        # sos_idx = len(counter_threshold.keys())
        # eos_idx = len(counter_threshold.keys()) + 1
        # other_idx = len(counter_threshold.keys()) + 2

        # mapped_tokens = []

        # for line in tokens:
        #     mapped_line = [sos_idx]
        #     for word in line:
        #       # map words to their mappings and to other otherwise
        #         mapped_line.append(mapper.get(word, other_idx))
        #     mapped_line.append(eos_idx)
        #     mapped_tokens.append(mapped_line)

        # inverse_mapper[other_idx] = "__OTHER__"
        # inverse_mapper[sos_idx] = "__SOS__"
        # inverse_mapper[eos_idx] = "__EOS__"
        # inverse_mapper[0] = "__PADDING__"

        mapper = {word[0]: idx+1 for idx,
                  word in enumerate(counter.most_common())}
        inverse_mapper = {idx+1: word[0] for idx,
                          word in enumerate(counter.most_common())}

        # sos_idx = len(counter_threshold.keys())
        # eos_idx = len(counter_threshold.keys()) + 1
        other_idx = len(counter.keys())

        mapped_tokens = []

        for line in tokens:
            mapped_line = []
            for word in line:
              # map words to their mappings and to other otherwise
                mapped_line.append(mapper.get(word, other_idx))
            mapped_tokens.append(mapped_line)

        # inverse_mapper[other_idx] = "__OTHER__"
        # inverse_mapper[sos_idx] = "__SOS__"
        # inverse_mapper[eos_idx] = "__EOS__"
        # inverse_mapper[0] = "__PADDING__"

        return mapped_tokens, inverse_mapper




def similarity_paragraph(data):
    # data = self.data
    sim_list = []
    for para in data['sentences_list'].tolist():
      sim = 2000
      start = para[0]
      para = para[1:]
      for sent in para:            
        # tokenization
        X_list = word_tokenize(start) 
        Y_list = word_tokenize(sent)
          
        # sw contains the list of stopwords
        l1 =[];l2 =[]
          
        # remove stop words from the string
        X_set = {w for w in X_list if not w in sw} 
        Y_set = {w for w in Y_list if not w in sw}
          
        # form a set containing keywords of both strings 
        rvector = X_set.union(Y_set) 
        for w in rvector:
            if w in X_set: l1.append(1) # create a vector
            else: l1.append(0)
            if w in Y_set: l2.append(1)
            else: l2.append(0)
        c = 0
          
        # cosine formula 
        for i in range(len(rvector)):
            c+= l1[i]*l2[i]
        try:
          cosine = c / float((sum(l1)*sum(l2))**0.5)
          if sim > cosine:
            sim=cosine
          # sim += cosine
        except:
          sim += 0
          
        start = sent
      
      # sim = sim/(len(para)+1)
      sim_list.append(sim)
    
    data['similarity'] = sim_list

    return data



          
          # print("similarity: ", cosine)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [4]:
# training data
data1 = pd.read_csv('/content/gdrive/MyDrive/NLP-Project/Clinton_train.csv')
data2 = pd.read_csv('/content/gdrive/MyDrive/NLP-Project/Yahoo_train.csv')
data3 = pd.read_csv('/content/gdrive/MyDrive/NLP-Project/Yelp_train.csv')
data4 = pd.read_csv('/content/gdrive/MyDrive/NLP-Project/Enron_train.csv')

data5 = pd.read_csv('/content/gdrive/MyDrive/NLP-Project/Yahoo_test.csv')
data6 = pd.read_csv('/content/gdrive/MyDrive/NLP-Project/Yelp_test.csv')
data7 = pd.read_csv('/content/gdrive/MyDrive/NLP-Project/Enron_test.csv')


In [5]:
data = pd.concat([data1, data2, data3, data4, data5, data6, data7])

In [6]:
data.to_csv('/content/gdrive/MyDrive/NLP-Project/new_train.csv')

In [7]:
# train = Tokenizer("/content/gdrive/MyDrive/GCDC_rerelease/train.csv")
# test = Tokenizer("/content/gdrive/MyDrive/GCDC_rerelease/Yahoo_test.csv")

In [8]:
train = Tokenizer("/content/gdrive/MyDrive/NLP-Project/new_train.csv")
test = Tokenizer("/content/gdrive/MyDrive/NLP-Project/Clinton_test.csv")

In [9]:
len(train.data)
# len(test.data)

4600

In [10]:
# lst=[]
# for i in range(1000):
#   if train.data['labelA'][i]==3:
#     lst.append([0,0,1])
#   elif train.data['labelA'][i]==2:
#     lst.append([0,1,0])
#   elif train.data['labelA'][i]==1:
#     lst.append([1,0,0])

# train.data['h_e']=lst

from numpy import array
from numpy import argmax
from tensorflow.keras.utils import to_categorical

lst = array(train.data['labelA'])
encoded = to_categorical(lst)
print(encoded)
# inverted = argmax(encoded[0])
# print(inverted)

# train.data['h_e'] = encoded

[[0. 0. 1. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 ...
 [0. 0. 0. 1.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]]


In [11]:
# lst=[]
# for i in range(200):
#   if test.data['labelA'][i]==3:
#     lst.append([0,0,1])
#   elif test.data['labelA'][i]==2:
#     lst.append([0,1,0])
#   elif test.data['labelA'][i]==1:
#     lst.append([1,0,0])

# test.data['h_e']=lst

lst = array(test.data['labelA'])
t_encoded = to_categorical(lst)
# test.data['h_e'] = encoded

In [12]:
train_mapping, inv_train_mapping = train.preprocess()
test_mapping, inv_test_mapping = test.preprocess()

# train.data = similarity_paragraph(train.data)
# test.data = similarity_paragraph(test.data)

In [13]:
# new_data = open('/content/drive/MyDrive/GCDC_rerelease/mapped_tokens_Yelp_train.csv.pkl','rb')
# new_t_data = open('/content/drive/MyDrive/GCDC_rerelease/mapped_tokens_Yelp_test.csv.pkl','rb')
# train_mapping = pickle.load(new_data)
# test_mapping = pickle.load(new_t_data)

len(train_mapping)
# train_mapping
train.data['encoding'] = train_mapping
test.data['encoding'] = test_mapping
train.data

Unnamed: 0.1,Unnamed: 0,text_id,subject,text,ratingA1,ratingA2,ratingA3,labelA,ratingM1,ratingM2,ratingM3,ratingM4,ratingM5,labelM,question_title,question,sentences_list,encoding
0,0,C05796441_2,,Cheryl:\n\nAre we in a good place to begin pap...,3,2,1,2,2,2,3,1,2,2,,,[Cheryl:\n\nAre we in a good place to begin pa...,"[527, 104, 15, 1437, 22, 10, 7, 75, 97, 4, 869..."
1,1,C05786430_1,Department of State,"Our friend, General Joe Ballard owns The Raven...",2,1,3,2,3,2,3,1,3,3,,,"[Our friend, General Joe Ballard owns The Rave...","[398, 369, 3, 1247, 2174, 9564, 3567, 29, 9565..."
2,2,C05780653_3,,Outstanding news! Miki Rakic called about 10 m...,2,3,3,3,2,2,3,2,3,3,,,[Outstanding news! Miki Rakic called about 10 ...,"[14037, 626, 37, 14038, 19227, 253, 54, 371, 3..."
3,3,C05782181_1,Libyan CG Pol Dirs mtg @ Istanbul @ 14:00 Thur...,Responding to separate emails from Uzra + Jeff...,1,2,1,1,2,2,1,3,1,1,,,[Responding to separate emails from Uzra + Jef...,"[19240, 4, 1518, 3176, 43, 14041, 1133, 581, 1..."
4,4,C05785147_0,Mexico,Guy from Mexico is in NY and is cooperating. D...,2,1,1,1,1,1,2,3,1,1,,,"[Guy from Mexico is in NY and is cooperating, ...","[6124, 43, 1011, 12, 10, 1566, 5, 12, 6699, 1,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4595,195,1353855,Comments of Wolak,"Wolak makes some good points. In ERCOT, Enron...",2,3,2,3,2,2,2,2,2,2,,,"[Wolak makes some good points, In ERCOT, Enr...","[36597, 433, 72, 75, 555, 1, 9, 166, 2846, 3, ..."
4596,196,1131834,NBC,The reason NBC will not take cash is the prefe...,2,2,1,1,2,2,2,2,2,2,,,[The reason NBC will not take cash is the pref...,"[29, 404, 4401, 28, 26, 134, 642, 12, 2, 4060,..."
4597,197,725369,Gallup Peak,All GC's\n\nAfter utilizing the Gallup Peak Av...,3,3,3,3,3,2,2,3,2,3,,,[All GC's\n\nAfter utilizing the Gallup Peak A...,"[359, 13652, 25, 15, 444, 18327, 2, 9773, 9547..."
4598,198,766379,New TW Contract System,"Lindy,\n\nJust wanted to let you know that we ...",3,2,1,2,2,3,2,2,2,2,,,"[Lindy,\n\nJust wanted to let you know that we...","[8271, 3, 15, 389, 202, 4, 179, 13, 68, 11, 22..."


In [14]:
train.data

Unnamed: 0.1,Unnamed: 0,text_id,subject,text,ratingA1,ratingA2,ratingA3,labelA,ratingM1,ratingM2,ratingM3,ratingM4,ratingM5,labelM,question_title,question,sentences_list,encoding
0,0,C05796441_2,,Cheryl:\n\nAre we in a good place to begin pap...,3,2,1,2,2,2,3,1,2,2,,,[Cheryl:\n\nAre we in a good place to begin pa...,"[527, 104, 15, 1437, 22, 10, 7, 75, 97, 4, 869..."
1,1,C05786430_1,Department of State,"Our friend, General Joe Ballard owns The Raven...",2,1,3,2,3,2,3,1,3,3,,,"[Our friend, General Joe Ballard owns The Rave...","[398, 369, 3, 1247, 2174, 9564, 3567, 29, 9565..."
2,2,C05780653_3,,Outstanding news! Miki Rakic called about 10 m...,2,3,3,3,2,2,3,2,3,3,,,[Outstanding news! Miki Rakic called about 10 ...,"[14037, 626, 37, 14038, 19227, 253, 54, 371, 3..."
3,3,C05782181_1,Libyan CG Pol Dirs mtg @ Istanbul @ 14:00 Thur...,Responding to separate emails from Uzra + Jeff...,1,2,1,1,2,2,1,3,1,1,,,[Responding to separate emails from Uzra + Jef...,"[19240, 4, 1518, 3176, 43, 14041, 1133, 581, 1..."
4,4,C05785147_0,Mexico,Guy from Mexico is in NY and is cooperating. D...,2,1,1,1,1,1,2,3,1,1,,,"[Guy from Mexico is in NY and is cooperating, ...","[6124, 43, 1011, 12, 10, 1566, 5, 12, 6699, 1,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4595,195,1353855,Comments of Wolak,"Wolak makes some good points. In ERCOT, Enron...",2,3,2,3,2,2,2,2,2,2,,,"[Wolak makes some good points, In ERCOT, Enr...","[36597, 433, 72, 75, 555, 1, 9, 166, 2846, 3, ..."
4596,196,1131834,NBC,The reason NBC will not take cash is the prefe...,2,2,1,1,2,2,2,2,2,2,,,[The reason NBC will not take cash is the pref...,"[29, 404, 4401, 28, 26, 134, 642, 12, 2, 4060,..."
4597,197,725369,Gallup Peak,All GC's\n\nAfter utilizing the Gallup Peak Av...,3,3,3,3,3,2,2,3,2,3,,,[All GC's\n\nAfter utilizing the Gallup Peak A...,"[359, 13652, 25, 15, 444, 18327, 2, 9773, 9547..."
4598,198,766379,New TW Contract System,"Lindy,\n\nJust wanted to let you know that we ...",3,2,1,2,2,3,2,2,2,2,,,"[Lindy,\n\nJust wanted to let you know that we...","[8271, 3, 15, 389, 202, 4, 179, 13, 68, 11, 22..."


In [15]:
test.data

Unnamed: 0,text_id,subject,text,ratingA1,ratingA2,ratingA3,labelA,ratingM1,ratingM2,ratingM3,ratingM4,ratingM5,labelM,sentences_list,encoding
0,C05760125_1,Hilda Solis Tom and Craig--,Madame Secretary:\n\nThank you for reaching ou...,3,3,3,3,2,3,2,2,2,2,[Madame Secretary:\n\nThank you for reaching o...,"[1045, 44, 36, 7, 320, 16, 14, 2849, 77, 4, 44..."
1,C05768263_2,,"Cheryl, Jake,\n\nI received a call from Masood...",3,3,3,3,2,2,2,3,2,2,"[Cheryl, Jake,\n\nI received a call from Masoo...","[199, 3, 183, 3, 7, 11, 559, 9, 123, 33, 2878,..."
2,C05771873_1,Framing Statement - State Draft,We anticipate the release of what are claimed ...,3,3,3,3,3,2,3,2,1,2,[We anticipate the release of what are claimed...,"[43, 2884, 1, 1060, 6, 78, 26, 1365, 4, 19, 40..."
3,C05768528_2,,Spoke to Ed Levine today to follow up on Frida...,3,3,3,3,3,2,2,1,3,2,[Spoke to Ed Levine today to follow up on Frid...,"[2905, 4, 495, 2906, 82, 4, 445, 63, 13, 490, ..."
4,C05775052_1,The matter I raised on my end of the converati...,Purely to update: Tom had me in for lunch at t...,2,1,3,2,2,2,3,1,2,2,[Purely to update: Tom had me in for lunch at ...,"[2924, 4, 498, 36, 396, 49, 48, 8, 14, 726, 29..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,C05782457_0,"arming the rebels, women, and small arms","Kavita Ramdas, until recently the head of the ...",3,3,2,3,1,1,2,2,2,1,"[Kavita Ramdas, until recently the head of the...","[6298, 6299, 3, 463, 790, 1, 613, 6, 1, 741, 1..."
196,C05739879_1,,I called PM el-Keib this morning to get his ta...,2,3,2,3,1,1,2,2,1,1,[I called PM el-Keib this morning to get his t...,"[11, 207, 318, 6323, 20, 6324, 17, 223, 4, 55,..."
197,C05765100_1,,Department of State Ranks High as Employer for...,2,3,2,3,2,3,3,2,3,3,[Department of State Ranks High as Employer fo...,"[127, 6, 71, 6345, 6346, 25, 6347, 14, 557, 63..."
198,C05773055_1,,Dear Hillary Wanted to take a minute to thank ...,2,3,2,3,2,2,2,2,3,2,[Dear Hillary Wanted to take a minute to thank...,"[187, 285, 6367, 4, 115, 9, 2824, 4, 324, 16, ..."


# GRU model 

In [16]:
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import GRU
from keras.layers import SimpleRNN
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence

In [17]:
np.random.seed(7)
X_train = sequence.pad_sequences(train.data['encoding'],maxlen = 500)
y_train = encoded#train.data['h_e']
X_test = sequence.pad_sequences(test.data['encoding'],maxlen=500)
y_test = t_encoded#test.data['h_e']





In [18]:
embedding_vector_length = 32
model_RNN = Sequential()
model_RNN.add(Embedding(40000,embedding_vector_length,input_length = 500))
model_RNN.add(SimpleRNN(32,dropout=0.2, return_sequences = True ))
model_RNN.add(SimpleRNN(32))
model_RNN.add(Dense(4,activation = 'softmax'))
model_RNN.compile(loss ='categorical_crossentropy', optimizer = 'adam', metrics=['accuracy'])
print(model_RNN.summary())
model_RNN.fit(X_train,y_train, epochs = 20, batch_size=23)

scores = model_RNN.evaluate(X_test, y_test, verbose =0)
print("Accuracy: ",(scores[1]*100))

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 32)           1280000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 500, 32)           2080      
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 32)                2080      
                                                                 
 dense (Dense)               (None, 4)                 132       
                                                                 
Total params: 1,284,292
Trainable params: 1,284,292
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Ep

In [19]:
model_RNN.save("/content/gdrive/MyDrive/NLP-Project")

INFO:tensorflow:Assets written to: /content/gdrive/MyDrive/NLP-Project/assets


In [20]:
# import pickle
# filename = 'model_1.sav'
# pickle.dump(model,open(filename,'wb'))



In [21]:
# type(X_train)

# X_train = np.append(train.data['similarity'][:,np.newaxis], X_train, axis=1)
# X_test = np.append(test.data['similarity'][:,np.newaxis],X_test, axis=1)

In [22]:
coh_bin = []
for i in range(4600):
  if train.data['labelA'].tolist()[i] >=2:
    coh_bin.append(1)
  else:
    coh_bin.append(0)
train.data['bin_coh']= coh_bin


coh_bin=[]
for i in range(200):
  if test.data['labelA'].tolist()[i] >=2:
    coh_bin.append(1)
  else:
    coh_bin.append(0)

test.data['bin_coh']=coh_bin

In [23]:
lst = array(train.data['bin_coh'])
encoded = to_categorical(lst)

lst = array(test.data['bin_coh'])
t_encoded = to_categorical(lst)



In [24]:
np.random.seed(7)
X_train = sequence.pad_sequences(train.data['encoding'],maxlen = 500)
y_train = encoded#train.data['h_e']
X_test = sequence.pad_sequences(test.data['encoding'],maxlen=500)
y_test = t_encoded#test.data['h_e']

# X_train = np.append(train.data['similarity'][:,np.newaxis], X_train, axis=1)
# X_test = np.append(test.data['similarity'][:,np.newaxis],X_test, axis=1)



In [25]:
embedding_vector_length = 32
model_RNN_B = Sequential()
model_RNN_B.add(Embedding(40000,embedding_vector_length,input_length = 500))
model_RNN_B.add(SimpleRNN(32,dropout=0.2, return_sequences = True ))
model_RNN_B.add(SimpleRNN(32))
model_RNN_B.add(Dense(2,activation = 'softmax'))
model_RNN_B.compile(loss ='binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
print(model_RNN_B.summary())
model_RNN_B.fit(X_train,y_train, epochs = 15 , batch_size=23)

scores = model_RNN_B.evaluate(X_test, y_test, verbose =0)
print("Accuracy: ",(scores[1]*100))


# embedding_vector_length = 32
# model_RNN = Sequential()
# model_RNN.add(Embedding(40000,embedding_vector_length,input_length = 500))
# model_RNN.add(SimpleRNN(32,dropout=0.2, return_sequences = True ))
# model_RNN.add(SimpleRNN(32))
# model_RNN.add(Dense(4,activation = 'softmax'))
# model_RNN.compile(loss ='categorical_crossentropy', optimizer = 'adam', metrics=['accuracy'])
# print(model_RNN.summary())
# model_RNN.fit(X_train,y_train, epochs = 20, batch_size=23)

# scores = model_RNN.evaluate(X_test, y_test, verbose =0)
# print("Accuracy: ",(scores[1]*100))

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 500, 32)           1280000   
                                                                 
 simple_rnn_2 (SimpleRNN)    (None, 500, 32)           2080      
                                                                 
 simple_rnn_3 (SimpleRNN)    (None, 32)                2080      
                                                                 
 dense_1 (Dense)             (None, 2)                 66        
                                                                 
Total params: 1,284,226
Trainable params: 1,284,226
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [26]:
model_RNN_B.save("/content/gdrive/MyDrive/NLP-Project")

INFO:tensorflow:Assets written to: /content/gdrive/MyDrive/NLP-Project/assets


In [27]:
train.data = similarity_paragraph(train.data)
test.data = similarity_paragraph(test.data)

len(train_mapping)
# train_mapping
train.data['encoding'] = train_mapping
test.data['encoding'] = test_mapping

In [28]:
np.random.seed(7)
X_train = sequence.pad_sequences(train.data['encoding'],maxlen = 500)
y_train = encoded#train.data['h_e']
X_test = sequence.pad_sequences(test.data['encoding'],maxlen=500)
y_test = t_encoded#test.data['h_e']

X_train = np.append(train.data['similarity'][:,np.newaxis], X_train, axis=1)
X_test = np.append(test.data['similarity'][:,np.newaxis],X_test, axis=1)

  import sys
  


In [32]:
embedding_vector_length = 32
model_RNN_C = Sequential()
model_RNN_C.add(Embedding(40000,embedding_vector_length,input_length = 501))
model_RNN_C.add(SimpleRNN(32,dropout=0.2, return_sequences = True ))
model_RNN_C.add(SimpleRNN(32))
model_RNN_C.add(Dense(2,activation = 'softmax'))
model_RNN_C.compile(loss ='binary_crossentropy', optimizer = 'adam', metrics=['accuracy'])
print(model_RNN_C.summary())
model_RNN_C.fit(X_train,y_train, epochs = 15 , batch_size=23)

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 501, 32)           1280000   
                                                                 
 simple_rnn_6 (SimpleRNN)    (None, 501, 32)           2080      
                                                                 
 simple_rnn_7 (SimpleRNN)    (None, 32)                2080      
                                                                 
 dense_4 (Dense)             (None, 2)                 66        
                                                                 
Total params: 1,284,226
Trainable params: 1,284,226
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f2f86f47d10>

In [33]:
scores = model_RNN_C.evaluate(X_test, y_test, verbose =0)
print("Accuracy: ",(scores[1]*100))

Accuracy:  53.50000262260437


In [35]:
model_RNN_C.save("/content/gdrive/MyDrive/NLP-Project")

INFO:tensorflow:Assets written to: /content/gdrive/MyDrive/NLP-Project/assets
