In [9]:
import tensorflow as td
from tensorflow import keras
import numpy as np

In [58]:
data = keras.datasets.imdb
(train_data , train_labels), (test_data , test_labels ) = data.load_data()

In [16]:
len(train_data[0])

218

In [59]:
word_index = data.get_word_index()

In [79]:
len(word_index)

88588

In [60]:
word_index = {k:(v+3) for k,v in word_index.items()}
word_index['<PAD>'] = 0
word_index['<START>'] = 1
word_index['<UNK>'] = 2
word_index['<UNUSED>'] = 3

In [61]:
reverse_word_index = dict([(value , key) for (key, value) in word_index.items()])


In [62]:
def decode_review(text):
    return " ".join([reverse_word_index.get(i, "?") for i in text]) #'?' is the default value if no value found 

In [35]:
print(decode_review(test_data[0]))

<START> murder both in have ? easily of of camilla shattered ? ? boring the ? again marries understand dead ? over a odd odd odd of of br how where first lead spiral make you cross in have movie not convict are role dark and where in true director and old just ? not last i lot illustrations an he film spiral based both in ? easily


In [23]:
decode_review(test_data[1])

"<START> in with cardboard ? part the greg other is alice movie terrific ? watch directing ? many a good score ? films the ? scene however set movie ? find looking to rambling impulse ? henchman greater watch industry for ? with slave and ? awesome movie's of of ? movies present all imagine cagney's was wait public ? humorous impressionistic or good isn't it novel husband's eyes in members ? early ? australian the not also characters better ? yard husband's symbolism early a hanks end ? has br any if which funny in with old a ? horrible last one henchman greater or perfect have the so sticks movies for up watched is an mess ? pants it or released lead destroyed know ? some three this ? deputy world your werewolf husband's soft lead lights about ? wonderful going ? abhishek stereotype looking satisfying ? share the ? would good can to used in as a ? casting lewis the knocked wait abigail ? exotic his sexuality yawns more ? imagine ? create four and ? sleep playing whole of of br crimina

In [63]:
from keras.preprocessing.sequence import pad_sequences

train_data = pad_sequences(train_data,
                         value = word_index['<PAD>'],
                         padding = 'post',
                         maxlen = 250)

test_data =pad_sequences(test_data ,
                        value = word_index['<PAD>'],
                        padding = 'post',
                        maxlen = 250)

#add padding to standardize the length of the review, 250 is the max
#standardized length across all review

In [83]:
#Architecture of the nework
model = keras.Sequential()
model.add(keras.layers.Embedding(88588,16)) #
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16,activation = 'relu'))
model.add(keras.layers.Dense(1,activation = 'sigmoid')) #output is bad or good review, either 0 or 1

In [84]:
model.compile(optimizer= 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [85]:
x_val = train_data[:10000]
x_train =train_data[10000:]

y_val =train_labels[:10000]
y_train = train_labels[10000:]

fitModel = model.fit(x_train, y_train , 
                     epochs = 40,
                     batch_size = 512, 
                     validation_data = (x_val, y_val),
                    verbose = 1)

results = model.evaluate(test_data, test_labels)

Train on 15000 samples, validate on 10000 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [86]:
model.save('model.h5')

In [87]:
model_load = keras.models.load_model('model.h5')

In [90]:
def review_encode(s):
    encoded = [1] # 1 because it begins with <start> which is 1
    #word_index convert word to numeric
    
    for word in s:
        if word.lower() in word_index:
            encoded.append(word_index[word.lower()])
        else:
            encoded.append(2) #add in <UNK> = 2 
            
    return encoded

In [101]:
with open('review_test.txt', encoding = 'utf-8') as f:
    for line in f.readlines():
        nline = line.replace("-", " ").replace("?", "").replace("!", "").replace(".", "").replace(",", "").replace("(", "").replace(")", "").replace(":", "").replace("\"", "").strip().split(" ")
        #convert to numeric
        encoded = review_encode(nline)
        #use previous encoding func add padding.. 
        encode =  pad_sequences([encoded],
                         value = word_index['<PAD>'],
                         padding = 'post',
                         maxlen = 250)
        predict = model.predict(encode)
        print(line)
        print ('')
        print('nline',nline)
        print(encode)
        
        print (predict[0])

This movie is Amazing!!! The imagery and colors are remarkable. It brought back so many memories from when I was Young. Let's face it, nothing will ever top the Original. People are complaining about the animals not having emotion line the original?? Are you serious? They try making this movie as real as it gets, you guys want to see a Lion smiling?? Seriously!!! Music was nostalgic, everything was perfect. Best Disney "live-action" movie of all.

nline ['This', 'movie', 'is', 'Amazing', 'The', 'imagery', 'and', 'colors', 'are', 'remarkable', 'It', 'brought', 'back', 'so', 'many', 'memories', 'from', 'when', 'I', 'was', 'Young', "Let's", 'face', 'it', 'nothing', 'will', 'ever', 'top', 'the', 'Original', 'People', 'are', 'complaining', 'about', 'the', 'animals', 'not', 'having', 'emotion', 'line', 'the', 'original', 'Are', 'you', 'serious', 'They', 'try', 'making', 'this', 'movie', 'as', 'real', 'as', 'it', 'gets', 'you', 'guys', 'want', 'to', 'see', 'a', 'Lion', 'smiling', 'Seriously',