In [1]:
%tensorflow_version 2.x
from keras.datasets import imdb
from keras.preprocessing import sequence
import tensorflow as tf
import os
import numpy as np

VOCAB_SIZE = 88584
MAX_LEN = 250
BATCH_SIZE = 64
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = VOCAB_SIZE)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [2]:
# reviews are of different length and cannot pass it like that to model
# hence add padding
train_data = sequence.pad_sequences(train_data, MAX_LEN)
test_data = sequence.pad_sequences(test_data, MAX_LEN)

In [4]:
model = tf.keras.Sequential([
  tf.keras.layers.Embedding(VOCAB_SIZE, 32),
  tf.keras.layers.LSTM(32),
  # sigmoid because it gives a value between 0 and 1
  tf.keras.layers.Dense(1, activation="sigmoid")
])

In [None]:
model.compile(loss = 'binary_crossentropy', optimizer='rmsprop', metrics=['acc'])
history = model.fit(train_data, train_labels, epochs=10, validation_split=0.2)

In [None]:
results = model.evaluate(test_data, test_labels)

# create predictions
word_index = imdb.get_word_index()
def encode_text(text):
  tokens = tf.keras.preprocessing.text.text_to_word_sequences(text)
  tokens = [word_index[word] if word in word_index else 0 for word in tokens]
  return sequence.pad_sequences([tokens], MAX_LEN)[0]

text = "Great Movie!"
encoded = encode_text(text)

In [None]:
def generate_prediction(text):
  encoded_text = encode_text(text)
  pred = np.zeros(1, 250)
  pred[0] = encoded_text
  result = model.predict(pred)
  print(result[0])

preview = "Movie was awesome so much fun"
generate_prediction(preview)

nreview = "bad movie and not fun"
generate_prediction(nreview)