In [12]:
import keras
import pandas as pd
import numpy as np

In [13]:
from keras.layers import Conv1D,Dense
from keras.models import Sequential


In [14]:
x_train = np.load('x_train.npy')
x_test = np.load('x_test.npy')
test_val = np.load('test_val.npy')
y_test = np.load('y_test.npy')
y_train = np.load('y_train.npy')
y_val = np.load('y_val.npy')

In [15]:
y_train.shape

(37065, 20)

In [16]:

len(x_train[3])

1011

In [17]:
import tensorflow as tf
from keras.layers import Dense, Input, Embedding, Lambda, Dropout, Activation, SpatialDropout1D, Reshape, GlobalAveragePooling1D, merge, Flatten, Bidirectional, CuDNNGRU, add, Conv1D, GlobalMaxPooling1D
from keras.layers.merge import concatenate
from keras.models import Model
from keras.layers.normalization import BatchNormalization
from keras import optimizers
from keras import initializers
from keras.engine import InputSpec, Layer
from keras import backend as K

class AttentionWeightedAverage(Layer):

    def __init__(self, return_attention=False, **kwargs):
        self.init = initializers.get('uniform')
        self.supports_masking = True
        self.return_attention = return_attention
        super(AttentionWeightedAverage, self).__init__(** kwargs)

    def build(self, input_shape):
        self.input_spec = [InputSpec(ndim=3)]
        assert len(input_shape) == 3

        self.W = self.add_weight(shape=(input_shape[2], 1),
                                 name='{}_W'.format(self.name),
                                 initializer=self.init)
        self.trainable_weights = [self.W]
        super(AttentionWeightedAverage, self).build(input_shape)

    def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses 'max trick' for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            ai = ai * mask
        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
        weighted_input = x * K.expand_dims(att_weights)
        result = K.sum(weighted_input, axis=1)
        if self.return_attention:
            return [result, att_weights]
        return result

    def get_output_shape_for(self, input_shape):
        return self.compute_output_shape(input_shape)

    def compute_output_shape(self, input_shape):
        output_len = input_shape[2]
        if self.return_attention:
            return [(input_shape[0], output_len), (input_shape[0], input_shape[1])]
        return (input_shape[0], output_len)

    def compute_mask(self, input, input_mask=None):
        if isinstance(input_mask, list):
            return [None] * len(input_mask)
        else:
            return None

In [18]:
def Hamming_loss(y_true, y_pred):
    tmp = K.abs(y_true-y_pred)
    return K.mean(K.cast(K.greater(tmp,0.5),dtype=float))

In [19]:
class squash_function(Layer):
    def squash(s, axis=-1, epsilon=1e-7, name=None):
        with tf.name_scope(name, default_name="squash"):
            squared_norm = tf.reduce_sum(tf.square(s), axis=axis,
                                         keep_dims=True)
            safe_norm = tf.sqrt(squared_norm + epsilon)
            squash_factor = squared_norm / (1. + squared_norm)
            unit_vector = s / safe_norm
            return squash_factor * unit_vector

In [20]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Embedding, Flatten, GlobalMaxPooling1D, Dropout, Conv1D,GlobalAveragePooling1D,Bidirectional,GRU,concatenate,Input
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from keras.losses import binary_crossentropy
from keras.optimizers import Adam
from keras_self_attention import SeqSelfAttention

filter_length = 128
num_classes = 20
max_words = 111396
maxlen = 1011

input1 = Input(shape=(maxlen,))
 
x = Embedding(max_words, 20, input_length=maxlen)(input1)


gru1 = GRU(32, return_sequences=True, dropout=0.1,
                                                      recurrent_dropout=0.1)(x)
gru2 = GRU(32, return_sequences=True, dropout=0.1,
                                                      recurrent_dropout=0.1)(x)
x = concatenate([gru1,gru2])

x = squash_function()(x)
 
conv_64 = Conv1D(64, kernel_size=3, padding="valid", kernel_initializer="glorot_uniform")(x)
 
conv1 = Conv1D(32, kernel_size=3, padding="valid", kernel_initializer="glorot_uniform")(x)
conv2 = Conv1D(32, kernel_size=3, padding="valid", kernel_initializer="glorot_uniform")(x)
conv3 = Conv1D(32, kernel_size=3, padding="valid", kernel_initializer="glorot_uniform")(x)
cat = concatenate([conv2,conv1])
x = concatenate([cat,conv3])

maxpool = GlobalMaxPooling1D()(x)
attn = AttentionWeightedAverage()(x)
avg = GlobalAveragePooling1D()(x)

l = concatenate([maxpool,attn])
x = concatenate([l,avg])
 
preds = Dense(num_classes, activation="sigmoid")(x)
 
model = keras.Model(input1, preds)
 
model.summary()
 

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[Hamming_loss])


# callbacks = [
#     ReduceLROnPlateau(), 
#     EarlyStopping(patience=4)
# ]

# history = model.fit(x_train, y_train,
#                     epochs=5,
#                     batch_size=128,
#                     validation_split=0.1,
#                     callbacks=callbacks)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 1011)         0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 1011, 20)     2227920     input_2[0][0]                    
__________________________________________________________________________________________________
gru_3 (GRU)                     (None, 1011, 32)     5088        embedding_2[0][0]                
__________________________________________________________________________________________________
gru_4 (GRU)                     (None, 1011, 32)     5088        embedding_2[0][0]                
__________________________________________________________________________________________________
concatenat

In [21]:
from sklearn.metrics import accuracy_score, roc_auc_score, log_loss

def _train_model(model, batch_size, train_x, train_y, test_val, y_val):
    num_labels = train_y.shape[1]
    patience = 7
    best_loss = -1
    best_weights = None
    best_epoch = 0
    
    current_epoch = 0
    
    while True:
        model.fit(x_train, y_train, batch_size=batch_size, epochs=1)
        y_pred = model.predict(test_val, batch_size=batch_size)

        total_loss = 0
        for j in range(num_labels):
            loss = log_loss(y_val[:, j], y_pred[:, j])
            total_loss += loss

        total_loss /= num_labels

        print("Epoch {0} loss {1} best_loss {2}".format(current_epoch, total_loss, best_loss))

        current_epoch += 1
        if total_loss < best_loss or best_loss == -1:
            best_loss = total_loss
            best_weights = model.get_weights()
            best_epoch = current_epoch
        else:
            if current_epoch - best_epoch == patience:
                break

    model.set_weights(best_weights)
    return model

In [None]:
batch_size = 256
history = _train_model(model,batch_size,x_train,y_train,test_val,y_val)

Epoch 1/1
Epoch 0 loss 0.4361130736413029 best_loss -1
Epoch 1/1
Epoch 1 loss 0.41322811947662635 best_loss 0.4361130736413029
Epoch 1/1
Epoch 2 loss 0.4040020778238452 best_loss 0.41322811947662635
Epoch 1/1
Epoch 3 loss 0.39399984504532376 best_loss 0.4040020778238452
Epoch 1/1
Epoch 4 loss 0.38983792474951806 best_loss 0.39399984504532376
Epoch 1/1

In [None]:
y_pred = history.predict(x_test, batch_size=batch_size)

In [None]:
y_val

In [None]:
y_test.shape

In [None]:
print(y_pred[35],y_val[35])

In [None]:
(7.8654468e-01+           
5.0963247e-01+
9.6992075e-01+
5.8361495e-01+
3.5057139e-01+
5.7766867e-01)/6

In [None]:
y_pred_round = np.where(y_pred>0.5,1,0)

###### y_train.shape

In [None]:
from sklearn.metrics import jaccard_score,roc_auc_score,confusion_matrix,hamming_loss
print(jaccard_score(y_test,y_pred_round,average='micro'))
print(jaccard_score(y_test,y_pred_round,average='macro'))
print(roc_auc_score(y_test,y_pred_round))
print(hamming_loss(y_test,y_pred_round))

In [None]:
from keras.models import load_model
model.save('conv_n_gru.h5')