In [8]:
#Test Run
print('Hello')

Hello


LOADING DATASET

In [9]:
import pandas as pd
import numpy as np
import tensorflow as tf


In [10]:
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

TRAIN-TEST SPLIT

In [11]:
#Train_test split
(X_train,y_train),(X_test,y_test) = imdb.load_data(num_words=10000) #Vocabulary Size=10,000

In [12]:
#SAMPLE CHECK
first_review=X_train[0] # One Hot Encoded Form of 1st review
len(first_review) # 218 words in first review

218

In [13]:
label_review=y_train[0]
label_review # Positive

np.int64(1)

PADDING THE REVIEWS WITH MAX LENGTH OF EACH SENTENCE = 500 words

In [14]:
max_len=500
X_train=pad_sequences(X_train,maxlen=max_len)
X_test=pad_sequences(X_test,maxlen=max_len)
X_train[0].shape # Each sentence is of 500 length now

(500,)

TRAINING SIMPLE RNN WITH EMBEDDING LAYER

In [15]:
model=Sequential()
model.add(Embedding(10000,128,input_length=max_len)) # VOCABULARY SIZE, No.OF FEATURES OF EACH WORD, input size=500 [Each sentence is converted into a vector with dimensions 128 by AvgWORD2Vec in Embedding Layer]
model.add(SimpleRNN(128, activation='relu'))
model.add(Dense(1,activation='sigmoid'))



In [16]:
#COMPILING THE MODEL
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [17]:
#SETTING UP EARLY STOPPING
from tensorflow.keras.callbacks import EarlyStopping
es=EarlyStopping(monitor='val_loss',patience=5, restore_best_weights=True)

In [18]:
#T
# RAINING THE MODEL
history=model.fit(X_train,y_train,validation_data=(X_test,y_test),epochs=10,callbacks=[es])

Epoch 1/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 263ms/step - accuracy: 0.6408 - loss: 12959.5332 - val_accuracy: 0.7020 - val_loss: 0.5741
Epoch 2/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m207s[0m 265ms/step - accuracy: 0.7982 - loss: 0.4526 - val_accuracy: 0.8131 - val_loss: 0.4142
Epoch 3/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 260ms/step - accuracy: 0.8795 - loss: 0.2816 - val_accuracy: 0.8310 - val_loss: 0.3855
Epoch 4/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m221s[0m 283ms/step - accuracy: 0.9204 - loss: 0.2015 - val_accuracy: 0.8264 - val_loss: 0.4244
Epoch 5/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m218s[0m 278ms/step - accuracy: 0.9403 - loss: 0.1539 - val_accuracy: 0.8432 - val_loss: 0.4790
Epoch 6/10
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m238s[0m 305ms/step - accuracy: 0.9513 - loss: 0.1299 - val_accuracy: 0.8350 - val_loss: 0.4899


In [19]:
#SAVING THE MODEL
model.save('model_rnn.h5')



In [20]:
word_index=imdb.get_word_index()
reverse_word_index={value: key for key,value in word_index.items()}

In [21]:
#DEFINING HELPER FUNCTIONS
#1. DECODING REVIEWS
def decode_review(encoded_review):
  return ' '.join([reverse_word_index.get(i-3, '?') for i in encoded_review])

#2. PRE_PROCESS TEXT
def pre_process(text):
  words=text.lower().split() # Splitting sentence into words and lowering the case
  encoded_review=[word_index.get(word,2) + 3 for word in words]
  padded_review=pad_sequences([encoded_review],maxlen=max_len)
  return padded_review


In [22]:
#FUNCTION TO PREDICT SENTIMENT
def predict_sentiment(review):
  pre_processed_input=pre_process(review)
  prediction=model.predict(pre_processed_input)
  sentiment='Positive' if prediction[0][0]>0.5 else 'Negative'
  return sentiment, prediction[0][0]


In [24]:
example_review='movie was good'
predict_sentiment(example_review)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 119ms/step


('Positive', np.float32(0.539804))