In [1]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb

In [2]:
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10_000)
word_index = imdb.get_word_index()
index_to_word = dict(
    [(value, key) for key, value in word_index.items()]
)

In [3]:
def vectorize_reviews(reviews, dims=10000):
    results = np.zeros((len(reviews), dims))
    for i, review in enumerate(reviews):
        for word_index in review:
            results[i, word_index] = 1

    return results

In [4]:
x_train = vectorize_reviews(train_data)
x_test = vectorize_reviews(test_data)

y_train = train_labels.astype("float32")
y_test = test_labels.astype("float32")

# x_val = x_train[:10_000]
# y_val = y_train[:10_000]

# partial_x_train = x_train[10_000:]
# partial_y_train = y_train[10_000:]

In [5]:
model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer="rmsprop",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [6]:
history = model.fit(
    x_train,
    y_train,
    epochs=4,
    batch_size=512
)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [7]:
results = model.evaluate(x_test, y_test)



In [8]:
def decode_review(encoded):
    return " ".join(index_to_word.get(i - 3, "?") for i in encoded)

def encode_review(review):
    return np.array([1] + [word_index.get(word, -1) + 3 for word in review.split()])

In [9]:
my_reviews = vectorize_reviews([
    encode_review("this movie sucks"),
    encode_review("i have never seen such a crap"),
    encode_review("i have never seen such a crap in my entire life"),
    encode_review("i have never watched such a masterpiece"),
    encode_review("i hate this movie"),
    encode_review("it's terrible"),
    encode_review("it's a really great movie"),
    encode_review("it was very boring movie totally not worth of time"),
    encode_review("very slow pace movie but everyone should watch it once"),
    encode_review("noice"),
])

predictions = model.predict(my_reviews).reshape((-1,))
for i, pred in enumerate(predictions, start=1):
    print(f"{i:2d}) Score: {pred:.6f}, Conclusion: {'+ve' if pred >= 0.5 else '-ve'}")

 1) Score: 0.360646, Conclusion: -ve
 2) Score: 0.402339, Conclusion: -ve
 3) Score: 0.384908, Conclusion: -ve
 4) Score: 0.581699, Conclusion: +ve
 5) Score: 0.489245, Conclusion: -ve
 6) Score: 0.364143, Conclusion: -ve
 7) Score: 0.659893, Conclusion: +ve
 8) Score: 0.325038, Conclusion: -ve
 9) Score: 0.612199, Conclusion: +ve
10) Score: 0.469605, Conclusion: -ve
