In [13]:
import keras
from keras import layers
from keras.datasets import imdb
from keras.preprocessing import sequence

#data preprocessing
max_features = 2000                                                                        #maximum index of words i.e, the integer code for the words per sample (cuts sample size to most common 2000). the resulting sample can have any length may be even 2000 maximum
max_len = 500                                                                              #then the length of each sample(most common 500 words are selected). the indexes in the resulting sample can be any value from 0 to 2000 but only 500 elements(indexes) will be there in each sample
#imdb data set has 25000 samples each for test and train sets
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)              #loads data with each sample having 20000words
x_train = sequence.pad_sequences(x_train, maxlen=max_len)                                  #again truncates/pads the data with most common 500words per sample
x_test = sequence.pad_sequences(x_test, maxlen=max_len)

#designing network architecture
model = keras.models.Sequential()
#mbedding converts +ve integers to dense vectors of fixed size
model.add(layers.Embedding(max_features,                                     #maximum integer value of the indexes of words in each sample(2000)
                           128,                                              #o/p dimension
                           input_length=max_len,                             #number of indexes or integers in each sample
                           name='embed'))                                    #so here i/p is each sample with 500 integers and each sample o/p is of size 500x128 
model.add(layers.Conv1D(32, 7, activation='relu'))                           #o/p last dimension is 32, and each kernel is a 1dvector of size 7
model.add(layers.MaxPooling1D(5))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(1))
model.summary()

#configuring the model
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['acc'])


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embed (Embedding)            (None, 500, 128)          256000    
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 494, 32)           28704     
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 98, 32)            0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 92, 32)            7200      
_________________________________________________________________
global_max_pooling1d_2 (Glob (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 291,937
Trainable params: 291,937
Non-trainable params: 0
________________________________________________

In [14]:
x_train.shape

(25000, 500)

In [16]:
#create a directory for TensorBoard log files
import os
base_dir = '/home/anish/Documents/Jupyter Notebook/SA-DLwithPy/TensorBoard/my_log_dir'
#os.mkdir(base_dir)

In [20]:
#adding tensorboard callback
callbacks = [
    keras.callbacks.TensorBoard(
        log_dir='my_log_dir')
]
 
#training the model with TensorBoard callback   
history = model.fit(x_train, y_train,
                    epochs=20,
                    batch_size=128,
                    validation_split=0.2,
                    callbacks=callbacks)


Train on 20000 samples, validate on 5000 samples
Epoch 1/20

Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
