In [12]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

data = keras.datasets.imdb

(train_data, train_labels), (test_data, test_labels) = data.load_data(num_words=10000)

In [13]:
word_index = data.get_word_index()
word_index = {k:(v+3) for k, v in word_index.items()}

In [15]:
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2
word_index["<UNUSED>"] = 3

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word_index["<PAD>"], padding = "post", maxlen=250)
test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word_index["<PAD>"], padding = "post", maxlen=250)

def decode_review(text):
    return " ".join([reverse_word_index.get(i, "?") for i in text])

model = keras.Sequential()
model.add(keras.layers.Embedding(10000, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation = "relu"))
model.add(keras.layers.Dense(1, activation = "sigmoid"))
 
model.summary()

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

x_val = train_data[:10000]
x_train = train_data[10000:]

y_val = train_labels[:10000]
y_train = train_labels[10000:]

fitModel = model.fit(x_train, y_train, epochs=40, batch_size=512, validation_data=(x_val, y_val), verbose=1)

results = model.evaluate(test_data, test_labels)

print(results)


 

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 16)          160000    
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense (Dense)                (None, 16)                272       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 160,289
Trainable params: 160,289
Non-trainable params: 0
_________________________________________________________________
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 15000 samples, validate on 10000 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch

In [16]:
test_review = test_data[44]
predict = model.predict([test_review])
print("Review: ")
print(decode_review(test_review))
print("Prediction: " + str(predict[44]))
print("Actual: " + str(test_labels[44]))

Review: 
<UNK> the second circle a young man returns to the russian countryside to bury his deceased father in the shack where the old man once lived everything is covered with dust he <UNK> the few old clothes scattered around and places the body of his father in a coffin then he <UNK> goodbye and <UNK> it the rest of the film is a prolonged reflection on the collapse of the soviet system the loneliness and <UNK> many were left dealing with br br in modern love somewhat ironically love is nowhere to be seen on the contrary it is pain loneliness and <UNK> with <UNK> that suddenly <UNK> john's life unlike the second circle however here the main protagonist has the opportunity to re embrace his modern life his wife and son <UNK> him yet he walks away slowly but surely the present begins to <UNK> under the weight of a somewhat confusing past br br i doubt intended for modern love to be so strikingly similar to what <UNK> did in the second circle yet the pacing and in particular the puzzli

In [17]:
model.save("model.h5")

In [19]:
model = keras.models.load_model("model.h5")

In [20]:

def review_encode(s):
    encoded = [1]
    
    for word in s:
        if word.lower() in word_index:
            encoded.append(word_index[word])
        else:
            encoded.append(2)

with open("sw8.txt", encoding="utf-8") as f:
    for line in f.readlines():
        nline = line.replace(",","").replace(".","").replace("(","").replace(")","").replace(":","").replace("\"","").strip().split(" ")
        encode = review_encode(nline)
        encode = keras.preprocessing.sequence.pad_sequences([encode], value=word_index["<PAD>"], padding = "post", maxlen=250)
        predict = model.predict(encode)
        print(line)
        print(encode)
        print(predict[0])