In [1]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import imdb

In [2]:
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10_000)
word_index = imdb.get_word_index()
index_to_word = dict(
    [(value, key) for key, value in word_index.items()]
)

In [3]:
def vectorize_reviews(reviews, dims=10_000):
    results = np.zeros((len(reviews), dims))
    for i, review in enumerate(reviews):
        for word_index in review:
            results[i, word_index] = 1

    return results

In [4]:
x_train = vectorize_reviews(train_data)
x_test = vectorize_reviews(test_data)

y_train = train_labels.astype("float32")
y_test = test_labels.astype("float32")

x_val = x_train[:10_000]
y_val = y_train[:10_000]

partial_x_train = x_train[10_000:]
partial_y_train = y_train[10_000:]

In [5]:
model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer="rmsprop",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

history = model.fit(
    partial_x_train,
    partial_y_train,
    epochs=10,
    batch_size=512,
    validation_data=(x_val, y_val),
    verbose=2
)

Epoch 1/10
30/30 - 2s - loss: 0.5296 - accuracy: 0.7821 - val_loss: 0.4115 - val_accuracy: 0.8600 - 2s/epoch - 82ms/step
Epoch 2/10
30/30 - 1s - loss: 0.3250 - accuracy: 0.9009 - val_loss: 0.3217 - val_accuracy: 0.8803 - 1s/epoch - 39ms/step
Epoch 3/10
30/30 - 1s - loss: 0.2358 - accuracy: 0.9262 - val_loss: 0.2837 - val_accuracy: 0.8916 - 1s/epoch - 43ms/step
Epoch 4/10
30/30 - 1s - loss: 0.1849 - accuracy: 0.9411 - val_loss: 0.2776 - val_accuracy: 0.8898 - 1s/epoch - 42ms/step
Epoch 5/10
30/30 - 1s - loss: 0.1519 - accuracy: 0.9507 - val_loss: 0.2828 - val_accuracy: 0.8872 - 1s/epoch - 41ms/step
Epoch 6/10
30/30 - 1s - loss: 0.1228 - accuracy: 0.9642 - val_loss: 0.3267 - val_accuracy: 0.8760 - 1s/epoch - 37ms/step
Epoch 7/10
30/30 - 1s - loss: 0.1052 - accuracy: 0.9679 - val_loss: 0.3016 - val_accuracy: 0.8846 - 978ms/epoch - 33ms/step
Epoch 8/10
30/30 - 1s - loss: 0.0846 - accuracy: 0.9764 - val_loss: 0.3208 - val_accuracy: 0.8826 - 1s/epoch - 37ms/step
Epoch 9/10
30/30 - 1s - loss:

In [6]:
model = keras.Sequential([
    layers.Dense(16, activation="relu"),
    layers.Dense(16, activation="relu"),
    layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [7]:
history = model.fit(
    x_train,
    y_train,
    epochs=4,
    batch_size=512,
    verbose=2
)

Epoch 1/4
49/49 - 2s - loss: 0.4885 - accuracy: 0.8046 - 2s/epoch - 47ms/step
Epoch 2/4
49/49 - 2s - loss: 0.2455 - accuracy: 0.9108 - 2s/epoch - 34ms/step
Epoch 3/4
49/49 - 1s - loss: 0.1801 - accuracy: 0.9366 - 1s/epoch - 27ms/step
Epoch 4/4
49/49 - 1s - loss: 0.1442 - accuracy: 0.9518 - 1s/epoch - 26ms/step


In [8]:
results = model.evaluate(x_test, y_test)



In [9]:
def decode_review(encoded):
    return " ".join(index_to_word.get(i - 3, "?") for i in encoded)

def encode_review(review):
    return np.array([1] + [word_index.get(word, -1) + 3 for word in review.split()])

In [11]:
my_reviews = vectorize_reviews([
    encode_review("this movie sucks"),
    encode_review("movie sucks this"),
    encode_review("movie sucks this movie sucks"),
    encode_review("i have never seen such a crap"),
    encode_review("i have never seen such a crap in my entire life"),
    encode_review("i have never watched such a masterpiece"),
    encode_review("i hate this movie"),
    encode_review("it's terrible"),
    encode_review("it's a really great movie"),
    encode_review("it was very boring movie totally not worth of time"),
    encode_review("very slow pace movie but everyone should watch it once"),
    encode_review("noice"),
])

predictions = model.predict(my_reviews).reshape((-1,))
for i, pred in enumerate(predictions, start=1):
    print(f"{i:2d}) Score: {pred:.6f}, Conclusion: {'+ve' if pred >= 0.5 else '-ve'}")

 1) Score: 0.365016, Conclusion: -ve
 2) Score: 0.365016, Conclusion: -ve
 3) Score: 0.365016, Conclusion: -ve
 4) Score: 0.477219, Conclusion: -ve
 5) Score: 0.465910, Conclusion: -ve
 6) Score: 0.653365, Conclusion: +ve
 7) Score: 0.512592, Conclusion: +ve
 8) Score: 0.384642, Conclusion: -ve
 9) Score: 0.696161, Conclusion: +ve
10) Score: 0.358113, Conclusion: -ve
11) Score: 0.700438, Conclusion: +ve
12) Score: 0.514511, Conclusion: +ve


See the first 3 examples (in `my_reviews`) and their predictions. Their predictions are esactly the same because our simple fully-connected neural network doesn't take into consideration the order of words and repeated words. There is no notion of context here. This should be obvious as we are just giving the vectors with all zeros except ones at word indices (from the `word_index` dictionary) as input.