In [1]:
# Deep Learning with Python Ch7: IMDb example
# load and preprocess IMDb data
import numpy as np
from keras.datasets import imdb
from keras.preprocessing import sequence

# # of most common words to consider
max_features = 2000 
# only consider 1st 500 words of each review 
# within the defined most common words
max_len      = 500   

# load data
# save np.load
np_load_old = np.load

# modify the default parameters of np.load: set allow_pickle to true
np.load = lambda *a, **k: np_load_old(*a, allow_pickle=True, **k)

print('load data')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)

# restore np.load for future normal usage
np.load = np_load_old

# preprocess data
print('pad sequences')
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test  = sequence.pad_sequences(x_test, maxlen=max_len)
print('x_train shape', x_train.shape)
print('x_est shape', x_test.shape)

Using TensorFlow backend.


load data
pad sequences
x_train shape (25000, 500)
x_est shape (25000, 500)


In [3]:
# train a 1D convnet network
from keras.models import Sequential
from keras.layers import Conv1D, Dense, Embedding, GlobalMaxPooling1D, MaxPooling1D

# build network
model = Sequential()
model.add(Embedding(max_features, 128, input_length=max_len, name='embed'))
model.add(Conv1D(32, 7, activation='relu'))
model.add(MaxPooling1D(5))
model.add(Conv1D(32, 7, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(1, activation='sigmoid'))

model.summary()

# configure network
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embed (Embedding)            (None, 500, 128)          256000    
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 494, 32)           28704     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 98, 32)            0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 92, 32)            7200      
_________________________________________________________________
global_max_pooling1d_2 (Glob (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 291,937
Trainable params: 291,937
Non-trainable params: 0
_________________________________________________________________


In [4]:
# train network with TensorBoard callback
import keras

# record activation histograms and embedding data per epoch
# due to ResourceExhaustedError, only use 1st 100 training samples as embedding data
callbacks = [keras.callbacks.TensorBoard(log_dir='my_log_dir', histogram_freq=1, 
                                         embeddings_freq=1, embeddings_data=x_train[:100])]

# train network
history = model.fit(x_train, y_train, epochs=20, batch_size=128, 
                    validation_split=0.2, callbacks=callbacks, verbose=0)

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Instructions for updating:
Use standard file APIs to delete files with this prefix.


In [6]:
# plot model as graphs of layers in PNG image 
from keras.utils import plot_model

# no shape info
plot_model(model, to_file='model.png')
# display shape info
plot_model(model, show_shapes=True, to_file='model_shape_info.png') 