In [1]:
import pandas as pd
import numpy as np
import nltk
import re
from keras.datasets import imdb 
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.models import load_model

Using TensorFlow backend.


In [2]:
# save np.load
np_load_old = np.load
# modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k)
# call load_data with allow_pickle implicitly set to true
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=5000)
# restore np.load for future normal usage
np.load = np_load_old

print("Loaded dataset with {} training samples, {} test samples".format(len(X_train), len(X_test)))

Loaded dataset with 25000 training samples, 25000 test samples


In [3]:
X_train.shape

(25000,)

In [4]:
type(X_train)

numpy.ndarray

In [5]:
len(X_train[0])

218

In [6]:
type(X_train[0])

list

In [7]:
# Inspect a sample review and its label
print("--- Review ---")
print(X_train[0])
print("--- Label ---")
print(y_train[0])

--- Review ---
[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 2, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 2, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 2, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 2, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 2, 19, 178, 32]
--- Label ---
1


In [8]:
# Map word IDs back to words
word2id = imdb.get_word_index()
id2word = {i: word for word, i in word2id.items()}
print("--- Review (with words) ---")
print([id2word.get(i, " ") for i in X_train[0]])
print("--- Label ---")
print(y_train[0])

--- Review (with words) ---
['the', 'as', 'you', 'with', 'out', 'themselves', 'powerful', 'lets', 'loves', 'their', 'becomes', 'reaching', 'had', 'journalist', 'of', 'lot', 'from', 'anyone', 'to', 'have', 'after', 'out', 'atmosphere', 'never', 'more', 'room', 'and', 'it', 'so', 'heart', 'shows', 'to', 'years', 'of', 'every', 'never', 'going', 'and', 'help', 'moments', 'or', 'of', 'every', 'chest', 'visual', 'movie', 'except', 'her', 'was', 'several', 'of', 'enough', 'more', 'with', 'is', 'now', 'current', 'film', 'as', 'you', 'of', 'mine', 'potentially', 'unfortunately', 'of', 'you', 'than', 'him', 'that', 'with', 'out', 'themselves', 'her', 'get', 'for', 'was', 'camp', 'of', 'you', 'movie', 'sometimes', 'movie', 'that', 'with', 'scary', 'but', 'and', 'to', 'story', 'wonderful', 'that', 'in', 'seeing', 'in', 'character', 'to', 'of', '70s', 'and', 'with', 'heart', 'had', 'shadows', 'they', 'of', 'here', 'that', 'with', 'her', 'serious', 'to', 'have', 'does', 'when', 'from', 'why', 'what

In [9]:
max_words = 500
X_train = sequence.pad_sequences(X_train, maxlen=max_words)
X_test = sequence.pad_sequences(X_test, maxlen=max_words)

In [10]:
X_train.shape

(25000, 500)

In [11]:
type(X_train)

numpy.ndarray

In [12]:
type(X_train[0])

numpy.ndarray

In [13]:
X_train[0].shape

(500,)

In [14]:
X_train[0]

array([   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
          0,    0,    0,    0,    0,    0,    0,   

In [16]:
vocabulary_size=5000
embedding_size = 32
model = Sequential()
model.add(Embedding(vocabulary_size, embedding_size, input_length=max_words))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))

print(model.summary())




Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 500, 32)           160000    
_________________________________________________________________
lstm_1 (LSTM)                (None, 128)               82432     
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 129       
Total params: 242,561
Trainable params: 242,561
Non-trainable params: 0
_________________________________________________________________
None


In [17]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [None]:
batch_size = 64
num_epochs = 5

X_valid, y_valid = X_train[:batch_size], y_train[:batch_size]  
X_train2, y_train2 = X_train[batch_size:], y_train[batch_size:] 

model.fit(X_train2, y_train2,validation_data=(X_valid, y_valid),batch_size=batch_size, epochs=num_epochs)

Train on 24936 samples, validate on 64 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5

In [19]:
scores = model.evaluate(X_test, y_test, verbose=0) 
print("Test accuracy:", scores[1]) 

Test accuracy: 0.87392


In [27]:
X_test[1].dtype

dtype('int32')

In [117]:
strr = input('Enter your Review :- ')

Enter your Review :- Nice Movie Great excellent work by whole team . Good casting , very good scenes , action comic , romance all work brilliant


In [118]:
strr

'Nice Movie Great excellent work by whole team . Good casting , very good scenes , action comic , romance all work brilliant'

In [119]:
filee = open("input_review.txt","w")
filee.write(strr)
filee.close()

In [120]:
import pandas as pd

In [121]:
rev=[]

In [122]:
def readReview():
     # open the file as read only
    file = open("input_review.txt", mode='rt')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text

In [123]:
text = readReview()

In [124]:
text

'Nice Movie Great excellent work by whole team . Good casting , very good scenes , action comic , romance all work brilliant'

In [125]:
pattern = re.compile(r'<.*?>')  # tags look like <...>
print(pattern.sub(' ', text))  # replace them with blank

Nice Movie Great excellent work by whole team . Good casting , very good scenes , action comic , romance all work brilliant


In [126]:
text = text.lower()
print(text)

nice movie great excellent work by whole team . good casting , very good scenes , action comic , romance all work brilliant


In [127]:
text = re.sub(r"[^a-zA-Z0-9]", " ", text)
print(text)

nice movie great excellent work by whole team   good casting   very good scenes   action comic   romance all work brilliant


In [128]:
words = text.split()
print(words)

['nice', 'movie', 'great', 'excellent', 'work', 'by', 'whole', 'team', 'good', 'casting', 'very', 'good', 'scenes', 'action', 'comic', 'romance', 'all', 'work', 'brilliant']


In [129]:
test_review = [word2id.get(i, " ") for i in words]

In [130]:
test_review

[324,
 17,
 84,
 318,
 154,
 31,
 223,
 765,
 49,
 970,
 52,
 49,
 136,
 203,
 697,
 880,
 29,
 154,
 527]

In [131]:
leng = len(test_review)

In [132]:
for i in range(1,501-leng):
    test_review.append(0)

In [133]:
len(test_review)

500

In [134]:
type(test_review)

list

In [135]:
from numpy import array

In [136]:
arr = array(test_review)

In [137]:
arr.shape

(500,)

In [138]:
arr = arr.reshape((1,500))

In [139]:
arr.shape

(1, 500)

In [71]:
# Save your model, so that you can quickly load it in future (and perhaps resume training)
model_file = "rnn_model.h5"  # HDF5 file
#model.save(os.path.join("V:/Study Material/Jupyter Notebook/Sentiment Analysis", model_file))

# Later you can load it using keras.models.load_model()

model = load_model(model_file)







Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [140]:
pred = model.predict(arr)

In [141]:
pred.shape

(1, 1)

In [142]:
pred[0]

array([0.92174107], dtype=float32)