In [0]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [0]:
imdb = keras.datasets.imdb

(x_train , y_train) , (x_test , y_test) = imdb.load_data(num_words = 10000)

In [0]:
# A dictionary mapping words to an integer index
word_index = imdb.get_word_index()

# The first indices are reserved
word_index = {k:(v+3) for k,v in word_index.items()} 
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

In [0]:
decode_review(x_train[0])

In [0]:
x_train = keras.preprocessing.sequence.pad_sequences(x_train , value = word_index["<PAD>"] , padding = "post" , maxlen = 256)
x_test = keras.preprocessing.sequence.pad_sequences(x_test , value = word_index["<PAD>"] , padding = "post" , maxlen = 256)

In [7]:
vocSize = 10000

# model architecture
model = keras.Sequential()
model.add(keras.layers.Embedding(vocSize , 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16 , activation = tf.nn.relu))
model.add(keras.layers.Dense(1 , activation = tf.nn.sigmoid))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 16)          160000    
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense (Dense)                (None, 16)                272       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 17        
Total params: 160,289
Trainable params: 160,289
Non-trainable params: 0
_________________________________________________________________


In [0]:
model.compile(loss = "binary_crossentropy" , optimizer = tf.train.AdamOptimizer() , metrics = ["accuracy"])

In [16]:
x_val = x_train[:10000]
part_x = x_train[10000:]

y_val = y_train[:10000]
part_y = y_train[10000:]

history = model.fit(part_x , part_y , epochs = 40 , batch_size = 512 , validation_data = (x_val , y_val) , verbose = 1)

Train on 15000 samples, validate on 10000 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [18]:
hit = model.evaluate(x_test , y_test)

print(hit[0]) # loss
print(hit[1]) # accuracy

0.3298557037162781
0.87252
