# 4. CNN for Text Categorization (Johnson and Zhang 2014)
- This is Keras implementation for seq-CNN for text categorization (Johnson, R., & Zhang, T. (2014). Effective use of word order for text categorization with convolutional neural networks. arXiv preprint arXiv:1412.1058.)
    - "Instead of using low-dimensional word vectors as input as is often done, we directly apply CNN to high-dimensional text data, which leads to directly learning embedding of small text regions for use in classification"
    
<br>
<img src="https://ai2-s2-public.s3.amazonaws.com/figures/2017-08-08/364da079f91a6cb385997be990af06e9ddf6e888/5-Figure4-1.png" style="width: 600px"/>

In [1]:
import numpy as np 
from keras.preprocessing import sequence
from keras.models import *
from keras.layers import *
from keras.callbacks import *
from keras.datasets import imdb

Using TensorFlow backend.


In [2]:
num_words = 300
max_len = 50

In [13]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=num_words)

X_test = X_test[:5000]
y_test = y_test[:5000]
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(25000, 50) (5000, 50) (25000,) (5000,)


## seq-CNN for text
- "As in the convolution layer for image, we represent each region (which each computation unit responds to) by a concatenation of the pixels, which makes p|V|-dimensional region vectors where p is the region size fixed in advance"
    - "The rest is the same as image; the text region vectors are converted to feature vectors, i.e., the convolution layer learns to embed text regions into low dimensional vector space"

In [14]:
# define a function to encode sentences into one-hot
def one_hot(sentences):
    result = np.zeros((len(sentences), num_words * max_len))
    for i in range(len(sentences)):
        k = 0
        for j in range(len(sentences[i])):
            idx = sentences[i][j]
            result[i][idx+k] = 1
            k += num_words
    return result

In [15]:
X_train_one_hot = one_hot(X_train)[:, :, np.newaxis]
X_test_one_hot = one_hot(X_test)[:, :, np.newaxis]

print(X_train_one_hot.shape, X_test_one_hot.shape)

(25000, 15000, 1) (5000, 15000, 1)


In [19]:
# function to create seq-CNN model
# number of filters are diversified for each convolutional operation
def seq_cnn(filters = (64, 32, 16), kernels = 300):
    inputs = Input(shape = X_train_one_hot.shape[1:], name= "input")
    conv_result = []
    i = 0
    for f in filters:
        x = Conv1D(f, kernels, strides = 10, padding = "valid", activation = 'relu')(inputs)
        if i % 2 == 0:
            # perform maxpooling
            x = MaxPooling1D(10)(x)
        else:
            # perform averagepooling
            x = AveragePooling1D(10)(x)
        i += 1
        conv_result.append(x)
    conv_result = concatenate(conv_result, axis = 2)
    flattened = Flatten()(conv_result)
    outputs = Dense(1, activation = 'sigmoid')(flattened)
    m = Model(inputs = inputs, outputs = outputs)
    m.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['acc'])
    return m

In [20]:
model = seq_cnn()
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              (None, 15000, 1)     0                                            
__________________________________________________________________________________________________
conv1d_17 (Conv1D)              (None, 1471, 64)     19264       input[0][0]                      
__________________________________________________________________________________________________
conv1d_18 (Conv1D)              (None, 1471, 32)     9632        input[0][0]                      
__________________________________________________________________________________________________
conv1d_19 (Conv1D)              (None, 1471, 16)     4816        input[0][0]                      
__________________________________________________________________________________________________
max_poolin

In [None]:
callbacks = [ModelCheckpoint(filepath = 'best_model.hdf5', monitor='val_acc', verbose=1, save_best_only = True, mode='max')]
history = model.fit(X_train_one_hot, y_train, epochs = 10, callbacks = callbacks, validation_split = 0.1, batch_size = 64)

In [16]:
model = seq_cnn()
model.load_weights('best_model.hdf5')
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
results = model.evaluate(X_test_one_hot, y_test)
print('Test accuracy: ', results[1])

