In [None]:
import utils

In [None]:
df_train, df_test = utils.read_data(test_data='./data/test_b.csv')

In [None]:
x_train, x_val, y_train, y_val = utils.split_train_val(df_train,
                                                       test_size=0.2,
                                                       random_state=1)

In [None]:
utils.assign_device()

In [None]:
from tensorflow.keras import backend as K
#from tensorflow.python.keras import backend as K
from tensorflow.keras import initializers, regularizers, constraints
from tensorflow.keras.layers import Layer
#from keras.engine.topology import Layer

class Attention(Layer):
    def __init__(self, step_dim,
                 W_regularizer=None, b_regularizer=None,
                 W_constraint=None, b_constraint=None,
                 bias=True, **kwargs):
        """
        Keras Layer that implements an Attention mechanism for temporal data.
        Supports Masking.
        Follows the work of Raffel et al. [https://arxiv.org/abs/1512.08756]
        # Input shape
            3D tensor with shape: `(samples, steps, features)`.
        # Output shape
            2D tensor with shape: `(samples, features)`.
        :param kwargs:
        Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.
        The dimensions are inferred based on the output shape of the RNN.
        Example:
            # 1
            model.add(LSTM(64, return_sequences=True))
            model.add(Attention())
            # next add a Dense layer (for classification/regression) or whatever...
            # 2
            hidden = LSTM(64, return_sequences=True)(words)
            sentence = Attention()(hidden)
            # next add a Dense layer (for classification/regression) or whatever...
        """
        self.supports_masking = True
        self.init = initializers.get('glorot_uniform')

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.step_dim = step_dim
        self.features_dim = 0

        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3

        self.W = self.add_weight(shape=(input_shape[-1],),
                                 initializer=self.init,
                                 name='{}_W'.format(self.name),
                                 regularizer=self.W_regularizer,
                                 constraint=self.W_constraint)
        self.features_dim = input_shape[-1]

        if self.bias:
            self.b = self.add_weight(shape=(input_shape[1],),
                                     initializer='zero',
                                     name='{}_b'.format(self.name),
                                     regularizer=self.b_regularizer,
                                     constraint=self.b_constraint)
        else:
            self.b = None

        self.built = True

    def compute_mask(self, input, input_mask=None):
        # do not pass the mask to the next layers
        return None

    def call(self, x, mask=None):
        features_dim = self.features_dim
        step_dim = self.step_dim

        e = K.reshape(K.dot(K.reshape(x, (-1, features_dim)), K.reshape(self.W, (features_dim, 1))), (-1, step_dim))  # e = K.dot(x, self.W)
        if self.bias:
            e += self.b
        e = K.tanh(e)

        a = K.exp(e)
        # apply mask after the exp. will be re-normalized next
        if mask is not None:
            # cast the mask to floatX to avoid float64 upcasting in theano
            a *= K.cast(mask, K.floatx())
        # in some cases especially in the early stages of training the sum may be almost zero
        # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.
        a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        a = K.expand_dims(a)

        c = K.sum(a * x, axis=1)
        return c

    def compute_output_shape(self, input_shape):
        return input_shape[0], self.features_dim

    def get_config(self):
        return {'a':3}
        pass

In [None]:
# 定义一共有多少个字 或者词 或者特征
max_features = 8000
# 定义一个篇文本，最多有多少个句子
maxlen_text = 20
# 定义一个句子， 最多有多少个词
maxlen_sentence = 25
# 定义一共有多少个类别
n_classes = 14
# 词嵌入的维度
embedding_dims = 30

filters = 32
kernel_size = 7

batch_size = 200
epochs = 100

train_generator = utils.DataGeneratorHAN(x_train, y_train,
                                    n_classes,
                                    batch_size=batch_size,
                                    maxlen_text=maxlen_text,
                                    maxlen_sentence=maxlen_sentence,
                                    )

val_generator = utils.DataGeneratorHAN(x_val, y_val,
                                  n_classes,
                                  batch_size=batch_size,
                                  maxlen_text=maxlen_text,
                                  maxlen_sentence=maxlen_sentence,
                                  )
test_generator = utils.DataGeneratorHAN(df_test.text.values.tolist(),
                                        batch_size=100,
                                        maxlen_text=maxlen_text,
                                        maxlen_sentence=maxlen_sentence,
                                        )

In [None]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Embedding, Dense, Dropout, Conv1D
from tensorflow.keras.layers import Bidirectional, LSTM
from tensorflow.keras.layers import TimeDistributed

class HAN():
    def __init__(self):
        pass

    def get_model(self):
        input_words = Input(shape=(maxlen_sentence,))
        x_words = Embedding(max_features, embedding_dims,
                            input_length=maxlen_sentence)(input_words)
        x_words = Conv1D(maxlen_sentence, 7, activation='relu')(x_words)
        x_words = Bidirectional(LSTM(256, return_sequences=True,
                                     activation='tanh',
                                     recurrent_activation='sigmoid',
                                     dropout=0.1,
                                     recurrent_dropout=0))(x_words)
        x_words = Attention(19)(x_words)
        model_words = Model(input_words, x_words)

        # Sentence part
        input_sentences = Input(shape=(maxlen_text, maxlen_sentence))
        x_sentence = TimeDistributed(model_words)(input_sentences)
        x_sentence = Bidirectional(LSTM(256, return_sequences=True,
                                        activation='tanh',
                                        recurrent_activation='sigmoid',
                                        dropout=0.1,
                                        recurrent_dropout=0))(x_sentence)
        x_sentence = Attention(maxlen_text)(x_sentence)
        output = Dense(n_classes, activation='softmax')(x_sentence)
        model = Model(inputs=input_sentences, outputs=output)

        return model

    def get_config(self):
        return {'a':3}

In [None]:
from tensorflow import keras
callbacks_list = [
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_f1_score',
        factor=0.1,
        patience=3,
        mode='max',
        cooldown=3
        ),
    keras.callbacks.EarlyStopping(
        monitor='val_f1_score',
        patience=10,
        mode='max'
    ),
    keras.callbacks.ModelCheckpoint(
        filepath='han_weights.h5',
        monitor='val_f1_score',
        save_best_only=True,
    )]

In [None]:
han = HAN().get_model()

han.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=[utils.F1_score()])

In [None]:
history = han.fit(train_generator,
                  epochs=epochs,
                  batch_size=batch_size,
                  validation_data=val_generator,
                  validation_freq=1,
                  callbacks=callbacks_list,
                   )

In [None]:
utils.do_predict(han, test_generator, './data/submit_han_b.csv')