In [None]:
!pip install tensorflow==2.0.0-alpha # for colab

In [None]:

from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing import sequence

max_features = 10000
maxlen = 400

print('Loading data...')
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=max_features)
print(len(train_data), 'train sequences')
print(len(test_data), 'test sequences')

print('Pad sequences (samples x time)')
train_data = sequence.pad_sequences(train_data, maxlen=maxlen)
test_data = sequence.pad_sequences(test_data, maxlen=maxlen)
print('train_data shape:', train_data.shape)
print('test_data shape:', test_data.shape)

train_data = sequence.pad_sequences(train_data, maxlen=maxlen)
test_data = sequence.pad_sequences(test_data, maxlen=maxlen)

print(train_data[0])
print(test_data[0])


In [None]:

# A dictionary mapping words to an integer index
word_index = imdb.get_word_index()

# The first indices are reserved
word_index = {k:(v+3) for k,v in word_index.items()} 
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

decode_review(train_data[0])

In [None]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

print("Training entries: {}, labels: {}".format(len(train_data), len(train_labels)))

train_data = pad_sequences(train_data, value=word_index["<PAD>"], padding='post', maxlen=256)
test_data = pad_sequences(test_data, 
                                                       value=word_index["<PAD>"], 
                                                       padding='post',
                                                       maxlen=256)


print(len(train_data[0]), len(train_data[1]))


In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, \
    Conv1D, GlobalMaxPooling1D, GlobalAveragePooling1D, Embedding
from tensorflow.nn import relu, sigmoid
from tensorflow.keras.backend import binary_crossentropy


# set parameters:
embedding_dims = 50
filters = 250
kernel_size = 3
hidden_dims = 250




# input shape is the vocabulary count used for the movie reviews (10,000 words)
vocab_size = 10000

model = Sequential()
model.add(Embedding(vocab_size, 16))
#model.add(Embedding(max_features, embedding_dims, input_length=maxlen))
#model.add(Dropout(0.2))
#model.add(Conv1D(filters, kernel_size, padding='valid', activation=relu, strides=1))
#model.add(GlobalMaxPooling1D())
model.add(GlobalAveragePooling1D())
#model.add(Dense(250, activation=relu))
#model.add(Dropout(0.2))
model.add(Dense(16, activation=relu))
model.add(Dense(1, activation=sigmoid))

model.summary()

model.compile(optimizer='adam',
              loss=binary_crossentropy,
              metrics=['acc']
              )


In [None]:
partial_validation_data = train_data[:10000]
partial_train_data = train_data[10000:]

partial_validation_labels = train_labels[:10000]
partial_train_labels = train_labels[10000:]

In [None]:
batch_size = 512 # 32
epochs=40 # 2

history = model.fit(partial_train_data,
                    partial_train_labels,
                    epochs=epochs,
                    batch_size=batch_size,
                    validation_data=(partial_validation_data, partial_validation_labels),
                    verbose=1)
# history = model.fit(test_data, test_labels, batch_size=batch_size, epochs=epochs, verbose=1)
model.evaluate(test_data, test_labels)

In [None]:
history_dict = history.history
history_dict.keys()

In [None]:
import matplotlib.pyplot as plt

acc = history_dict['acc']
val_acc = history_dict['val_acc']
loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

# "bo" is for "blue dot"
plt.plot(epochs, loss, 'bo', label='Training loss')
# b is for "solid blue line"
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

In [None]:
plt.clf()   # clear figure

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()