In [1]:
%matplotlib inline

import pandas as pd
import seaborn as sns



In [2]:
df = pd.read_csv('data/data_uniqcontent.csv')

In [3]:
df.drop(df[pd.isnull(df['sentences_1000_str'])].index, inplace=True)

In [4]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
import numpy as np

Using TensorFlow backend.


In [5]:
MAX_SEQUENCE_LENGTH = 1000
EMBEDDING_DIM = 200


tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['sentences_1000_str'])
sequences = tokenizer.texts_to_sequences(df['sentences_1000_str'])


In [6]:
word_index = tokenizer.word_index

In [7]:
all_data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
labels = to_categorical(np.asarray(df['class_no']))
print('Shape of data tensor:', all_data.shape)
print('Shape of label tensor:', labels.shape)

Shape of data tensor: (81649, 1000)
Shape of label tensor: (81649, 14)


In [8]:
from sklearn.model_selection import train_test_split
x_train,x_val,y_train,y_val = train_test_split(all_data,labels,test_size=0.3,stratify=labels)

In [9]:
x_train.shape,y_train.shape

((57154, 1000), (57154, 14))

In [10]:
from keras.layers import Embedding
from keras.layers import Dense, Input, Flatten
from keras.layers import Conv1D, MaxPooling1D, Embedding, Dropout, LSTM, GRU, Bidirectional
from keras.models import Model

from keras import backend as K
from keras.engine.topology import Layer, InputSpec
from keras import initializers

In [11]:
from keras import initializers, regularizers, constraints
class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        """
        Keras Layer that implements an Attention mechanism for temporal data.
        Supports Masking.
        Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756]
        # Input shape
            3D tensor with shape: `(samples, steps, features)`.
        # Output shape
            2D tensor with shape: `(samples, features)`.
        :param kwargs:
        Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
        The dimensions are inferred based on the output shape of the RNN.
        Example:
            model.add(LSTM(64, return_sequences=True))
            model.add(Attention())
        """
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight((input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight((input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        return None

    def call(self, x, mask=None):
        # eij = K.dot(x, self.W) TF backend doesn't support it

        # features_dim = self.W.shape[0]
        # step_dim = x._keras_shape[1]

        features_dim = self.features_dim
        step_dim = self.step_dim
        eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

        if self.bias:
            eij += self.b

        eij = K.tanh(eij)
        a = K.exp(eij)

        if mask is not None:
            a *= K.cast(mask, K.floatx())

        # in some cases especially in the early stages of training the sum may be almost zero
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

        a = K.expand_dims(a)
        weighted_input = x * a
        return K.sum(weighted_input, axis=1)

    def compute_output_shape(self, input_shape):
        #return input_shape[0], input_shape[-1]
        return input_shape[0],  self.features_dim



In [12]:
inputs = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='float64')
embed = Embedding(len(word_index) + 1,200, input_length = MAX_SEQUENCE_LENGTH)(inputs)
gru = Bidirectional(GRU(100, dropout=0.2, return_sequences=True))(embed)
attention = Attention(MAX_SEQUENCE_LENGTH)(gru)
output = Dense(14, activation='softmax')(attention)
model = Model(inputs, output)
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

In [14]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 1000)              0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 1000, 200)         139376400 
_________________________________________________________________
bidirectional_1 (Bidirection (None, 1000, 200)         180600    
_________________________________________________________________
attention_1 (Attention)      (None, 200)               1200      
_________________________________________________________________
dense_1 (Dense)              (None, 14)                2814      
Total params: 139,561,014
Trainable params: 139,561,014
Non-trainable params: 0
_________________________________________________________________


In [None]:
model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=5, batch_size=128)

  num_elements)
  num_elements)


Train on 57154 samples, validate on 24495 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

In [31]:
model.save('rnntextv2_model.h5')