In [1]:
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras

# Helper libraries
import numpy as np

In [2]:
data = keras.datasets.imdb

(train_data, train_labels), (test_data, test_labels) = data.load_data(num_words=10000)  # Vocabulary

print(train_data[0])

[1, 14, 22, 16, 43, 530, 973, 1622, 1385, 65, 458, 4468, 66, 3941, 4, 173, 36, 256, 5, 25, 100, 43, 838, 112, 50, 670, 2, 9, 35, 480, 284, 5, 150, 4, 172, 112, 167, 2, 336, 385, 39, 4, 172, 4536, 1111, 17, 546, 38, 13, 447, 4, 192, 50, 16, 6, 147, 2025, 19, 14, 22, 4, 1920, 4613, 469, 4, 22, 71, 87, 12, 16, 43, 530, 38, 76, 15, 13, 1247, 4, 22, 17, 515, 17, 12, 16, 626, 18, 2, 5, 62, 386, 12, 8, 316, 8, 106, 5, 4, 2223, 5244, 16, 480, 66, 3785, 33, 4, 130, 12, 16, 38, 619, 5, 25, 124, 51, 36, 135, 48, 25, 1415, 33, 6, 22, 12, 215, 28, 77, 52, 5, 14, 407, 16, 82, 2, 8, 4, 107, 117, 5952, 15, 256, 4, 2, 7, 3766, 5, 723, 36, 71, 43, 530, 476, 26, 400, 317, 46, 7, 4, 2, 1029, 13, 104, 88, 4, 381, 15, 297, 98, 32, 2071, 56, 26, 141, 6, 194, 7486, 18, 4, 226, 22, 21, 134, 476, 26, 480, 5, 144, 30, 5535, 18, 51, 36, 28, 224, 92, 25, 104, 4, 226, 65, 16, 38, 1334, 88, 12, 16, 283, 5, 16, 4472, 113, 103, 32, 15, 16, 5345, 19, 178, 32]


In [3]:
word_index = data.get_word_index()

word_index = {k: (v + 3) for k, v in word_index.items()}  # Sort the wards as a dict
# "(v+3)" Because of the three special characters
word_index["<PAD>"] = 0  # Space
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # UNK == Unknown
word_index["<UNUSED>"] = 3

In [4]:
reverse_word_index = dict([(val, key) for (key, val) in word_index.items()])  # swipe the dictionary

train_data = keras.preprocessing.sequence.pad_sequences(train_data,
                                                        value=word_index["<PAD>"], padding="post", maxlen=250)
test_data = keras.preprocessing.sequence.pad_sequences(test_data,
                                                       value=word_index["<PAD>"], padding="post", maxlen=250)


def decode_to_view(text):
    """
    decode to text
    :param text:
    """
    return " ".join([reverse_word_index.get(i, "?") for i in text])

In [5]:
# To load the model
"""
model = keras.Sequential()
model.add(keras.layers.Embedding(10000, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation="relu"))  # number of Neurons
model.add(keras.layers.Dense(1, activation="sigmoid"))

model.summary()

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

x_val = train_data[:10000]
x_train = train_data[10000:]

y_val = train_labels[:10000]
y_train = train_labels[10000:]

fit_model = model.fit(x_train, y_train, epochs=30, batch_size=512, validation_data=(x_val, y_val), verbose=1)

results = model.evaluate(test_data, test_labels)
print(results)

model.save("Model.h5")
"""

'\nmodel = keras.Sequential()\nmodel.add(keras.layers.Embedding(10000, 16))\nmodel.add(keras.layers.GlobalAveragePooling1D())\nmodel.add(keras.layers.Dense(16, activation="relu"))  # number of Neurons\nmodel.add(keras.layers.Dense(1, activation="sigmoid"))\n\nmodel.summary()\n\nmodel.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])\n\nx_val = train_data[:10000]\nx_train = train_data[10000:]\n\ny_val = train_labels[:10000]\ny_train = train_labels[10000:]\n\nfit_model = model.fit(x_train, y_train, epochs=30, batch_size=512, validation_data=(x_val, y_val), verbose=1)\n\nresults = model.evaluate(test_data, test_labels)\nprint(results)\n\nmodel.save("Model.h5")\n'

In [6]:
def review_encode(str):
    encoded = [1]  # START

    for word in str:
        if word.lower() in word_index:
            encoded.append(word_index[word.lower()])
        else:
            encoded.append(2)  # UNK
    return encoded


model = keras.models.load_model("Model.h5")

In [7]:
with open("External_review", encoding="utf-8") as f:
    for line in f.readlines():
        n_line = line.replace(",", "").replace(".", "").replace("(", "").replace(")", "") \
            .replace(":", "").replace("\"", "").replace("'", "").strip().split(" ")  # Unsigned characters
        encode = review_encode(n_line)
        encode = keras.preprocessing.sequence.pad_sequences([encode],
                                                            value=word_index["<PAD>"], padding="post", maxlen=250)
        
        predict = model.predict(encode)
        print(line)
        print("-----------------------------------------------------------------------")
        print(encode)
        print("-----------------------------------------------------------------------")
        print(f"This review is {float(predict[0])*100:.2f}% Positive")

There is no way that I could describe my emotions for this movie. I'm totally speechless. Even I laughed (even cried) this much in Marvel movie or even in any movie. I'm fully on my emotion, there are so many tears of joy and loss. Amazing story But I iron man missing now, the acting is outstanding, epic action, great CGI, the best storytelling ever told in a superhero movie, amazing performance. But there is one thing I really hate hate hate the part that Tony Stark a.k.a. iron man died, It's really Make me sad, I must say that I am disappointed. I love it more than 3000. Happiness, sadness, pure joy, excitement... I'm gonna miss this moment in my whole life because let's face it's been awhile movies can bring such a big enthusiasm like this. It is such an experience you'll gonna remember it forever. People are laughing, crying, hate, full of a state emotion. It's 3 hours long but it went by like a finger, and now thinking I'm actually in Quantum Realm because it felt like 5 seconds. 