# 1-D convolution for text analytics
- It is commonly considered that RNNs are for text analysis and CNNs are for image analysis. However, It could be the other way around as well.
- With help of features such as local connectivity, sliding filters, weight sharing, etc., CNNs can be attractive for text analysis as.
- Keras has ```Conv1D``` layer, which is similar to ```Conv2D```
    - ```Conv1D``` is a widely-used substitute for recurrent models in learning context-based data

In [1]:
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.datasets import reuters
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt

## ```Conv1D``` layer
- Temporal convolution - "creates a convolution kernel that is convolved with the layer input over a single spatial (or temporal) dimension to produce a tensor of outputs"
    - input: 3D tensor of shape ```(batch_size, steps, input_dim)```
    - output: 3D tensor of shape ```(batch_size, new_steps, filters)```

In [2]:
# setting num_filters = 5 and kernel_size = 3
# 0-10 timesteps & kernel_size = 3 => 8 new steps
conv1d = Conv1D(5, 3, padding = 'valid')(Input(shape = (10, 30)))
print(conv1d.shape)

(None, 8, 5)


In [3]:
# when padding = 'same'
conv1d = Conv1D(5, 3, padding = 'same')(Input(shape = (10, 30)))
print(conv1d.shape)

(None, 10, 5)


In [4]:
# comparison to Conv2D layer
conv2d = Conv2D(5, (3,3), padding = 'valid')(Input(shape = (5, 4, 3)))
print(conv2d.shape)
conv2d = Conv2D(5, (3,3), padding = 'same')(Input(shape = (5, 4, 3)))
print(conv2d.shape)

(None, 3, 2, 5)
(None, 5, 4, 5)


## Using ```Conv1D``` layer in Network

In [5]:
# parameters to import dataset
def get_reuters(num_words = 3000, maxlen = 50):    
    (X_train, y_train), (X_test, y_test) = reuters.load_data(num_words = num_words, maxlen = maxlen)

    X_train = sequence.pad_sequences(X_train, maxlen = maxlen, padding = 'post')
    X_test = sequence.pad_sequences(X_test, maxlen = maxlen, padding = 'post')
    y_train = to_categorical(y_train, num_classes = 46)
    y_test = to_categorical(y_test, num_classes = 46)

    print(X_train.shape)
    print(X_test.shape)
    print(y_train.shape)
    print(y_test.shape)
    
    return X_train, X_test, y_train, y_test

In [6]:
num_words = 3000
max_len = 50
embed_size = 100

In [7]:
X_train, X_test, y_train, y_test = get_reuters(num_words, max_len)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
(1595, 50)
(399, 50)
(1595, 46)
(399, 46)


In [8]:
def one_dim_convolution_model(num_words, embed_size, input_length):
    model = Sequential()
    model.add(Embedding(num_words, embed_size, input_length = max_len))
    model.add(Conv1D(50, 10, activation = 'relu'))
    model.add(MaxPooling1D(10))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(46, activation = 'softmax'))
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['acc'])
    return model

In [9]:
model = one_dim_convolution_model(num_words, embed_size, max_len)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 50, 100)           300000    
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 41, 50)            50050     
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 4, 50)             0         
_________________________________________________________________
global_max_pooling1d (Global (None, 50)                0         
_________________________________________________________________
dense (Dense)                (None, 46)                2346      
Total params: 352,396
Trainable params: 352,396
Non-trainable params: 0
_________________________________________________________________


In [12]:
history = model.fit(X_train, y_train, epochs = 10, batch_size = 256)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [13]:
result = model.evaluate(X_test, y_test)



In [14]:
print('Test Accuracy: ', result[1])

Test Accuracy:  0.8822054862976074


## Diversifying the Size of Kernels
- In the previous example, size of kernels were kept constant. However, it is also possible to perform convolution operations with different size of kernels parallel, and merge them afterwards
    - This could be done using ```Functional API``` when creating model

In [15]:
num_words = 3000
max_len = 50
embed_size = 100
kernel_sizes = 5, 10, 15

X_train, X_test, y_train, y_test = get_reuters(num_words, max_len)

(1595, 50)
(399, 50)
(1595, 46)
(399, 46)


In [16]:
def one_dim_convolution_model_with_diff_kernels(num_words, embed_size, input_length, kernel_sizes):
    inputs = Input(shape = (X_train.shape[1],))
    embedded = Embedding(output_dim = embed_size, input_dim = num_words, input_length = max_len)(inputs)
    conv_results = []
    for kernel_size in kernel_sizes:
        x = Conv1D(50, kernel_size, activation = 'relu')(embedded)
        x = MaxPooling1D(pool_size = max_len - kernel_size + 1)(x)
        conv_results.append(x)
    conv_result = concatenate(conv_results)
    x = GlobalMaxPooling1D()(conv_result)
    outputs = Dense(46, activation = 'softmax')(x)
    model = Model(inputs = inputs, outputs = outputs)
    model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['acc'])
    return model

In [17]:
model = one_dim_convolution_model_with_diff_kernels(num_words, embed_size, max_len, kernel_sizes)
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            [(None, 50)]         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 50, 100)      300000      input_5[0][0]                    
__________________________________________________________________________________________________
conv1d_3 (Conv1D)               (None, 46, 50)       25050       embedding_1[0][0]                
__________________________________________________________________________________________________
conv1d_4 (Conv1D)               (None, 41, 50)       50050       embedding_1[0][0]                
_______________________________________________________________________________________

In [18]:
history = model.fit(X_train, y_train, epochs = 10, batch_size = 256)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
result = model.evaluate(X_test, y_test)



In [20]:
print('Test Accuracy: ', result[1])

Test Accuracy:  0.8822054862976074
