CNN for NLP
------

Create CNN for text classification.

In [1]:
reset -fs

In [2]:
from keras.datasets import imdb

Using TensorFlow backend.


In [11]:
print('Loading data...')
max_features = 5000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

Loading data...
25000 train sequences
25000 test sequences


In [24]:
x_train.shape

(25000, 400)

In [31]:
x_train[0].shape

(400,)

In [32]:
x_train

array([[   0,    0,    0, ...,   19,  178,   32],
       [   0,    0,    0, ...,   16,  145,   95],
       [   0,    0,    0, ...,    7,  129,  113],
       ..., 
       [   0,    0,    0, ...,    4, 3586,    2],
       [   0,    0,    0, ...,   12,    9,   23],
       [   0,    0,    0, ...,  204,  131,    9]], dtype=int32)

In [15]:
from keras.preprocessing import sequence

In [16]:
print('Pad sequences (samples x time)')
maxlen_ = 400
x_train = sequence.pad_sequences(x_train, maxlen=maxlen_)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen_)
# TODO: Pad sequences

Pad sequences (samples x time)


In [10]:
y_train

array([1, 0, 0, ..., 0, 1, 0])

In [19]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import Embedding
from keras.layers import Conv1D, GlobalMaxPooling1D

In [33]:
print('Build model...')
# TODO: Create a model
model = Sequential()
# Start off with an efficient embedding layer which maps our vocab indices into embedding_dims dimensions
embedding_dims = 50
model.add(Embedding(input_dim=5000,output_dim=embedding_dims)) # TODO: Finish

# Add a Convolution1D, which will learn filters word group filters of size filter_length:
filters_ = 250
kernel_size_ = 3
model.add(Conv1D(filters=filters_, kernel_size=kernel_size_))  # TODO: Finish

# Add max pooling:
model.add(GlobalMaxPooling1D())  # TODO: Finish

# Add a vanilla hidden layer:
hidden_dims = 250
model.add(Dense(250,activation='relu'))  # TODO: Finish

# Project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1,activation='sigmoid')) # TODO: Finish
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, None, 50)          250000    
_________________________________________________________________
conv1d_3 (Conv1D)            (None, None, 250)         37750     
_________________________________________________________________
global_max_pooling1d_3 (Glob (None, 250)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 250)               62750     
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 251       
Total params: 350,751.0
Trainable params: 350,751.0
Non-trainable params: 0.0
_________________________________________________________________


TODO: How many parameters does this model have compared to LSTM for same dataset?

In [35]:
print("Training model...")
model.compile(loss='binary_crossentropy',
              optimizer='SGD',
              metrics=['accuracy'])
batch_size = 32
epochs = 1
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=True,
          validation_split=0.1)

Training model...
Train on 22500 samples, validate on 2500 samples
Epoch 1/1


<keras.callbacks.History at 0x11387ae80>

In [9]:
score, accuracy = model.evaluate(x_test, y_test,
                                batch_size=batch_size, 
                                verbose=True)



In [10]:
print('Test score: {:.3}'.format(score))
print('Test accuracy: {:.3}'.format(accuracy))

Test score: 0.693
Test accuracy: 0.5


__TODO__: How does accuracy compare to LSTM for same dataset?

In [None]:
# The accuracy for baselin LSTM was ~50%. Here, we are also at ~50%.
##### LSTM accuracy is 50% CNN accuracy is 50% for the 1st epoch

__TODO__: How does training speed compare to LSTM for same datset?

In [None]:
# The training speed should be faster between LSTM and CNNs . This is because CNNs have fewer parameters, while 
# for LSTM  I had about 1.5 million parameters.

In [None]:
##### LSTM took 382s to trains, CNN took 272s for the 1st epoch
##### LSTM has more hyperparameters, and hence takes longer to train 

# Switch to adam
- change kernel size to 8
- change embedding dimension to 100

In [40]:
print('Build model...')
# TODO: Create a model
model = Sequential()
# Start off with an efficient embedding layer which maps our vocab indices into embedding_dims dimensions
embedding_dims = 100
model.add(Embedding(input_dim=5000,output_dim=embedding_dims)) # TODO: Finish

# Add a Convolution1D, which will learn filters word group filters of size filter_length:
filters_ = 250
kernel_size_ = 8
model.add(Conv1D(filters=filters_, kernel_size=kernel_size_))  # TODO: Finish

# Add max pooling:
model.add(GlobalMaxPooling1D())  # TODO: Finish

# Add a vanilla hidden layer:
hidden_dims = 250
model.add(Dense(250,activation='relu'))  # TODO: Finish

# Project onto a single unit output layer, and squash it with a sigmoid:
model.add(Dense(1,activation='sigmoid')) # TODO: Finish
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_6 (Embedding)      (None, None, 100)         500000    
_________________________________________________________________
conv1d_6 (Conv1D)            (None, None, 250)         200250    
_________________________________________________________________
global_max_pooling1d_6 (Glob (None, 250)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 250)               62750     
_________________________________________________________________
dense_12 (Dense)             (None, 1)                 251       
Total params: 763,251.0
Trainable params: 763,251.0
Non-trainable params: 0.0
_________________________________________________________________


In [41]:
print("Training model...")
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
batch_size = 32
epochs = 1
history = model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=True,
          validation_split=0.1)

Training model...
Train on 22500 samples, validate on 2500 samples
Epoch 1/1


In [None]:
score, accuracy = model.evaluate(x_test, y_test,
                                batch_size=batch_size, 
                                verbose=True)

In [None]:
print('Test score: {:.3}'.format(score))
print('Test accuracy: {:.3}'.format(accuracy))

In [None]:
### Convolution with text data got an accuracy of ~87%. But it took longer to train than LSTMs

<br>
<br> 
<br>

----