In [1]:
%tensorflow_version 2.x
import tensorflow
tensorflow.__version__


'2.3.0'

In [2]:
# Mount google colab to drive.
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Import imdb dataset from keras
# Get train set and test set with 10000 most frequent words.

from tensorflow.keras.datasets import imdb
(X_train,y_train), (X_test, y_test) = imdb.load_data(num_words = 10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [4]:
#Pad each sentence to be of same length of 300

from keras.preprocessing import sequence
X_train = sequence.pad_sequences(X_train, maxlen = 300)
X_test = sequence.pad_sequences(X_test, maxlen = 300)

In [5]:
# Print shape of features 

print('X_train', X_train.shape)
print('y_train', y_train.shape)

X_train (25000, 300)
y_train (25000,)


In [6]:
# Print shape of labels

print('X_test', X_test.shape)
print('y_test', y_test.shape)

X_test (25000, 300)
y_test (25000,)


In [7]:
# Print one value of feature

print(X_train[0])

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    1   14
   22   16   43  530  973 1622 1385   65  458 4468   66 3941    4  173
   36  256    5   25  100   43  838  112   50  670    2    9   35  480
  284    5  150    4  172  112  167    2  336  385   39    4  172 4536
 1111   17  546   38   13  447    4  192   50   16    6  147 2025   19
   14   22    4 1920 4613  469    4   22   71   87   12   16   43  530
   38   76   15   13 1247    4   22   17  515   17   12   16  626   18
    2    5   62  386   12    8  316    8  106    5    4 2223 5244   16
  480   66 3785   33    4  130   12   16   38  619    5   25  124   51
   36 

In [8]:
print(X_train[1])

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    1
  194 1153  194 8255   78  228    5    6 1463 4369 5012  134   26    4
  715    8  118 1634   14  394   20   13  119  954  189  102    5  207
  110 3103   21   14   69  188    8   30   23    7    4  249  126   93
    4  114    9 2300 1523    5  647    4  116    9   35 8163    4  229
    9  340 1322    4  118    9    4  130 4901   19    4 1002    5   89
   29  952   46   37    4  455    9   45   43   38 1543 1905  398    4
 1649 

In [9]:
# Print one value of feature
# output '0' indicate 'NEGATIVE' sentiment
print(y_train[50])

0


In [10]:
# Print one value of label
# output '1' indicate 'POSITIVE' sentiment
print(y_test[100])

1


In [11]:
# Decode the feature value to get original sentence

word_index = imdb.get_word_index()
reverse_word_index = dict([(value,key) for key, value in word_index.items()])
decoded = " ".join([reverse_word_index.get(i-3, "#") for i in X_train[91]])
print(decoded)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
the strange set of brother's sisters who are just downright unlikeable so far removed from reality that any tension or mystery that the simplistic # story could have achieved is sorely missing then there's the awful twist ending that you can guess within the first 10 minutes it's boring to watch it's poorly paced it's just a chore to even think about it please someone save me as this is really bad stuff i could go on all day about how bad blood legacy is i really could br br director # was either working with a none existent budget or judging by this he shouldn't have even been directing traffic the entire film looks ugly it's poorly photographed there is no atmosphere or scares the blood gore is tame there's an axe in a head a decapitated head a scene when someone is # to death by # the best murder when someone's face is eaten by # however there are question marks over this scene so 

In [12]:
print(y_train[91])

0


In [13]:
# Import necessary libraries

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Flatten, TimeDistributed, Bidirectional

In [14]:
# Define model

model = Sequential()
model.add(Embedding(input_dim = 10000, output_dim = 100, input_length = 300))
model.add(Bidirectional(LSTM(units=100, return_sequences=True)))
model.add(TimeDistributed(Dense(100, activation = 'relu')))
model.add(Flatten())
model.add(Dense(1, activation = 'sigmoid'))



In [15]:
# Compile the model

model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

In [16]:
# Print the model summary

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 300, 100)          1000000   
_________________________________________________________________
bidirectional (Bidirectional (None, 300, 200)          160800    
_________________________________________________________________
time_distributed (TimeDistri (None, 300, 100)          20100     
_________________________________________________________________
flatten (Flatten)            (None, 30000)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 30001     
Total params: 1,210,901
Trainable params: 1,210,901
Non-trainable params: 0
_________________________________________________________________


In [17]:
# Fit the model

history = model.fit(X_train, y_train, batch_size = 32, epochs = 10, verbose = 1, validation_data= (X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
# Evaluate the model

result = model.evaluate(X_test, y_test)



In [19]:
# Predict one sample with model.predict

text_neg = X_train[55]
text_pos = X_train[10]
texts = (text_neg, text_pos)
padded_texts = sequence.pad_sequences(texts, maxlen=300, value = 0.0) # 0.0 because it corresponds with <PAD>

# Generate predictions
predictions = model.predict(padded_texts)
print(predictions)

[[2.6428271e-08]
 [1.0000000e+00]]


In [20]:
word_index = imdb.get_word_index()
reverse_word_index = dict([(value,key) for key, value in word_index.items()])
decoded = " ".join([reverse_word_index.get(i-3, "#") for i in X_train[55]])
print(decoded)

to be exact physical # their past # br br maybe kubrick didn't care about his storyline maybe only wanted to evoke a mood of horror whatever the case the film tries to # its narrative # to have it both ways rational and supernatural as a result the story is a mess this movie hasn't improved with age and it certainly doesn't improve with repeated viewings br br i don't deny that a few moments of fear # and general creepiness are scattered throughout this long long film but those # # # blood seen repeatedly in little # visions are absurd and laughable and jack # infamous tag lines wendy i'm home and # johnny merely # the movie's dramatic tension and # its narrative energy i know i sat in the theater and heard the audience laugh in comic relief # glad we don't have to take this stuff seriously finally kubrick is completely at sea or else utterly cynical during those scenes in which wendy wanders around the empty hotel while her husband tries to # their son a # full of # guests all sitting

In [21]:
word_index = imdb.get_word_index()
reverse_word_index = dict([(value,key) for key, value in word_index.items()])
decoded = " ".join([reverse_word_index.get(i-3, "#") for i in X_train[10]])
print(decoded)

a short while in the cell together they stumble upon a hiding place in the wall that contains an old # after # part of it they soon realise its magical powers and realise they may be able to use it to break through the prison walls br br black magic is a very interesting topic and i'm actually quite surprised that there aren't more films based on it as there's so much scope for things to do with it it's fair to say that # makes the best of it's # as despite it's # the film never actually feels restrained and manages to flow well throughout director eric # provides a great atmosphere for the film the fact that most of it takes place inside the central prison cell # that the film feels very claustrophobic and this immensely benefits the central idea of the prisoners wanting to use magic to break out of the cell it's very easy to get behind them it's often said that the unknown is the thing that really # people and this film proves that as the director # that we can never really be sure o

In [22]:
# The model built has predicted the sentiment correctly. We can check by using decoded text. 
# Negative review if predicted value is in between 0 - 0.5
# Positive review if predicted value is in between 0.5 - 1
# The model got  max accuracy of 99% and loss is 0.01%.