In [0]:
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import keras

In [0]:
imdb = keras.datasets.imdb
(train_data, train_labels), (test_data, test_labels) = imdb.load_data()

In [0]:
# A dictionary mapping words to an integer index
word_index = imdb.get_word_index()

# The first indices are reserved
word_index = {k:(v+3) for k,v in word_index.items()} 
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def encode_review(text):
    return [word_index[word] if word in word_index else word_index['<UNK>'] for word in text]

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

In [0]:
train_data_decoded = [decode_review(r) for r in train_data]
test_data_decoded = [decode_review(r) for r in test_data]

In [0]:
train_data_decoded = np.array(train_data_decoded, dtype=object)[:, np.newaxis]
test_data_decoded = np.array(test_data_decoded, dtype=object)[:, np.newaxis]

In [0]:
from keras import backend as K

# Initialize session
sess = tf.Session()
K.set_session(sess)

In [0]:
dan_module = hub.Module("https://tfhub.dev/google/universal-sentence-encoder/2", trainable=False)

sess.run([tf.global_variables_initializer(), tf.tables_initializer()])

In [0]:
def DANEmbedding(x):
    return dan_module(tf.squeeze(tf.cast(x, tf.string)), signature="default", as_dict=True)["default"]

In [0]:
input_text = keras.layers.Input(shape=(1,), dtype=tf.string)
embedding = keras.layers.Lambda(DANEmbedding, output_shape=(512,))(input_text)
dense = keras.layers.Dense(256, activation='relu')(embedding)
pred = keras.layers.Dense(1, activation='sigmoid')(dense)

model = keras.models.Model(inputs=[input_text], outputs=pred)

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

model.fit(train_data_decoded, 
          train_labels,
          validation_data=(test_data_decoded, test_labels),
          epochs=5,
          batch_size=32)

Task: try to achieve higher accuracy.

Possible things to try:

1.   Use other module: elmo or universal-sentence-encoder-large (switch to GPU runtime)
2.   Set trainable=True for TF Hub module
3.   Modify network architecture (more layers, dropout, etc.)
4.   Adjust learning rate, choose other optimizer
5.   Use ensemble of modules

