In [3]:
import keras
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing, model_selection, metrics
import warnings
warnings.filterwarnings('ignore')
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.optimizers import Adam

import os
from keras.layers import Dense,Input,LSTM,Bidirectional,Activation,Conv1D,GRU
from keras.callbacks import Callback
from keras.layers import Dropout,Embedding,GlobalMaxPooling1D, MaxPooling1D, Add, Flatten
from keras.preprocessing import text, sequence
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, SpatialDropout1D
from keras import initializers, regularizers, constraints, optimizers, layers, callbacks
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.models import Model
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

import seaborn as sns
color = sns.color_palette()
sns.set_style("dark")

%matplotlib inline

Using TensorFlow backend.


In [0]:
from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers, regularizers, constraints

EMBEDDING_FILE='/content/datalab/glove.840B.300d.txt'

MAX_SEQUENCE_LENGTH = 150
MAX_NB_WORDS = 100000
EMBEDDING_DIM = 300
VALIDATION_SPLIT = 0.1

num_lstm = 300
num_dense = 256
rate_drop_lstm = 0.2
rate_drop_dense = 0.2

act = 'relu'

In [27]:
print('Indexing word vectors')

count = 0
embeddings_index = {}
f = open(EMBEDDING_FILE)
for line in f:
    values = line.split()
    word = ' '.join(values[:-300])
    coefs = np.asarray(values[-300:], dtype='float32')
    embeddings_index[word] = coefs.reshape(-1)
    coef = embeddings_index[word]
f.close()

print('Found %d word vectors of glove.' % len(embeddings_index))
emb_mean,emb_std = coef.mean(), coef.std()
print(emb_mean,emb_std)

print('Total %s word vectors.' % len(embeddings_index))

Indexing word vectors
Found 2195895 word vectors of glove.
-0.01444638 0.47249147
Total 2195895 word vectors.


Processing text dataset
Found 292462 unique tokens
Shape of data tensor: (159571, 150)
Shape of label tensor: (159571, 6)
Shape of test_data tensor: (153164, 150)


Shape of data tensor: (159571, 150)
Shape of label tensor: (159571, 6)
Shape of test_data tensor: (153164, 150)


In [31]:
print('Preparing embedding matrix')
nb_words = min(MAX_NB_WORDS, len(word_index))
embedding_matrix = np.zeros((nb_words, EMBEDDING_DIM))
for word, i in word_index.items():
    if i >= MAX_NB_WORDS:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

print('Null word embeddings: %d' % np.sum(np.sum(embedding_matrix, axis=1) == 0))

Preparing embedding matrix
Null word embeddings: 21603


In [0]:
max_features=100000
maxlen=150
embed_size=300

In [0]:
class RocAucEvaluation(Callback):
    def __init__(self, validation_data=(), interval=1):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            score = roc_auc_score(self.y_val, y_pred)
            print("\n ROC-AUC - epoch: {:d} - score: {:.6f}".format(epoch+1, score))

In [0]:
from keras.layers import K, Activation
from keras.engine import Layer
from keras.layers import Dense, Input, Embedding, Dropout, Bidirectional, GRU, Flatten, SpatialDropout1D
gru_len = 128
Routings = 5
Num_capsule = 10
Dim_capsule = 16
dropout_p = 0.25
rate_drop_dense = 0.28

def squash(x, axis=-1):
    # s_squared_norm is really small
    # s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    # scale = K.sqrt(s_squared_norm)/ (0.5 + s_squared_norm)
    # return scale * x
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True)
    scale = K.sqrt(s_squared_norm + K.epsilon())
    return x / scale


# A Capsule Implement with Pure Keras
class Capsule(Layer):
    def __init__(self, num_capsule, dim_capsule, routings=3, kernel_size=(9, 1), share_weights=True,
                 activation='default', **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.kernel_size = kernel_size
        self.share_weights = share_weights
        if activation == 'default':
            self.activation = squash
        else:
            self.activation = Activation(activation)

    def build(self, input_shape):
        super(Capsule, self).build(input_shape)
        input_dim_capsule = input_shape[-1]
        if self.share_weights:
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(1, input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     # shape=self.kernel_size,
                                     initializer='glorot_uniform',
                                     trainable=True)
#             print("W: "+ str(self.W.shape) +"\n")
        else:
            input_num_capsule = input_shape[-2]
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(input_num_capsule,
                                            input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     initializer='glorot_uniform',
                                     trainable=True)

    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
#             print("u_vec: "+ str(u_vecs) +"\n")
#             print("u_hat_vecs: "+ str(u_hat_vecs) +"\n")
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
#         print("u_hat_vecs: after reshape 1: "+ str(u_hat_vecs) +"\n")
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
#         print("u_hat_vecs: after reshape 2: "+ str(u_hat_vecs) +"\n")
#         final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(u_hat_vecs[:, :, :, 0])  # shape = [None, num_capsule, input_num_capsule]
        print(b)
        for i in range(self.routings):
            b = K.permute_dimensions(b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
#             print("b: "+ str(b) +"\n")
#             print("c: "+ str(c) +"\n")
#             print("outputs: "+ str(outputs) +"\n")
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs

    def compute_output_shape(self, input_shape):
        return (None, self.num_capsule, self.dim_capsule)



In [0]:
def get_model():
    input1_pre = Input(shape=(maxlen,))
    embed_layer1_pre = Embedding(max_features,
                            embed_size,
                            input_length=maxlen,
                            weights=[embedding_matrix],
                            trainable=False)(input1_pre)
    embed_layer1_pre = SpatialDropout1D(0.4)(embed_layer1_pre)
    
    x_pre = Bidirectional(CuDNNGRU(128, return_sequences=True))(embed_layer1_pre)
    capsule_pre = Capsule(num_capsule=10, dim_capsule=16, routings=5,share_weights=True)(x_pre)
#     capsule_pre = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule_pre)
#     capsule_pre = GlobalMaxPooling1D()(capsule_pre)
    capsule_pre = Flatten()(capsule_pre)
    capsule_pre = Dropout(0.25)(capsule_pre)
    
    input1_post = Input(shape=(maxlen,))
    embed_layer1_post = Embedding(max_features,
                            embed_size,
                            input_length=maxlen,
                            weights=[embedding_matrix],
                            trainable=False)(input1_post)
    embed_layer1_post = SpatialDropout1D(0.4)(embed_layer1_post)
    
    x_post = Bidirectional(CuDNNGRU(128, return_sequences=True))(embed_layer1_post)
    capsule_post = Capsule(num_capsule=10, dim_capsule=16, routings=5,share_weights=True)(x_post)
#     capsule_post = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule_post)
#     capsule_post = GlobalMaxPooling1D()(capsule_post)
    capsule_post = Flatten()(capsule_post)
    capsule_post = Dropout(0.25)(capsule_post)
    
    concat = concatenate([capsule_pre,capsule_post])
    output = Dense(6, activation='sigmoid')(concat)
    
    model = Model(inputs=[input1_pre,input1_post], outputs=output)
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=1e-3,decay=0),
        metrics=['accuracy'])
#     model.summary()
    return model

In [0]:
# file_path="capsule_first_fold.h5"
# model = get_model()
# checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
# early = EarlyStopping(monitor="val_loss", mode="min", patience=3)
# RocAuc = RocAucEvaluation(validation_data=([val_x, val_xp], val_y), interval=1)
# callbacks_list = [checkpoint, early,RocAuc] 
# hist = model.fit([train_x, train_xp], train_y, epochs=15, batch_size=128, shuffle=True, validation_data=([val_x, val_xp], val_y), callbacks = callbacks_list, verbose=1)
# model.load_weights(file_path)
# best_score = min(hist.history['val_loss'])

In [0]:
from sklearn.metrics import log_loss
import numpy as np

test_predicts_list = []

def train_folds(data,data_post, y,fold_count=10):
    print("Starting to train models...")
    fold_size = len(data) // fold_count
    models = []
    for fold_id in range(0, fold_count):
        fold_start = fold_size * fold_id
        fold_end = fold_start + fold_size

        if fold_id == fold_size - 1:
            fold_end = len(data)

        print("Fold {0}".format(fold_id))
        
        train_x = np.concatenate([data[:fold_start], data[fold_end:]])
        train_xp = np.concatenate([data_post[:fold_start], data_post[fold_end:]])
        train_y = np.concatenate([y[:fold_start], y[fold_end:]])

        val_x = data[fold_start:fold_end]
        val_xp = data_post[fold_start:fold_end]
        val_y = y[fold_start:fold_end]
        
        file_path="capsule_fold{0}.h5".format(fold_id)
        model = get_model()
        checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
        early = EarlyStopping(monitor="val_loss", mode="min", patience=3)
        RocAuc = RocAucEvaluation(validation_data=([val_x, val_xp], val_y), interval=1)
        callbacks_list = [checkpoint, early,RocAuc] 

        hist = model.fit([train_x, train_xp], train_y, epochs=15, batch_size=128, shuffle=True, 
                         validation_data=([val_x, val_xp], val_y), callbacks = callbacks_list, verbose=1)
        model.load_weights(file_path)
        best_score = min(hist.history['val_loss'])
        
        print("Fold {0} loss {1}".format(fold_id, best_score))
        print("Predicting validation...")
        val_predicts_path = "capsule_val_predicts{0}.npy".format(fold_id)
        val_predicts = model.predict([val_x, val_xp], batch_size=1024, verbose=1)
        np.save(val_predicts_path, val_predicts)
        
        print("Predicting results...")
        test_predicts_path = "capsule_test_predicts{0}.npy".format(fold_id)
        test_predicts = model.predict([test_data, test_data_post], batch_size=1024, verbose=1)
        test_predicts_list.append(test_predicts)
        np.save(test_predicts_path, test_predicts)

In [38]:
train_folds(data, data_post, y)

Starting to train models...
Fold 0
Tensor("capsule_1/zeros_like:0", shape=(?, 10, ?), dtype=float32)
Tensor("capsule_2/zeros_like:0", shape=(?, 10, ?), dtype=float32)
Train on 143614 samples, validate on 15957 samples
Epoch 1/15

Epoch 00001: val_loss improved from inf to 0.04414, saving model to capsule_fold0.h5

 ROC-AUC - epoch: 1 - score: 0.982143
Epoch 2/15

Epoch 00002: val_loss improved from 0.04414 to 0.04181, saving model to capsule_fold0.h5

 ROC-AUC - epoch: 2 - score: 0.987948
Epoch 3/15

Epoch 00003: val_loss improved from 0.04181 to 0.04026, saving model to capsule_fold0.h5

 ROC-AUC - epoch: 3 - score: 0.988843
Epoch 4/15

Epoch 00004: val_loss improved from 0.04026 to 0.03922, saving model to capsule_fold0.h5

 ROC-AUC - epoch: 4 - score: 0.989323
Epoch 5/15

Epoch 00005: val_loss improved from 0.03922 to 0.03888, saving model to capsule_fold0.h5

 ROC-AUC - epoch: 5 - score: 0.989421
Epoch 6/15

Epoch 00006: val_loss did not improve from 0.03888

 ROC-AUC - epoch: 6 - 

KeyboardInterrupt: ignored

In [0]:
CLASSES = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']

test_predicts_am = np.zeros(test_predicts_list[0].shape)

for fold_predict in test_predicts_list:
    test_predicts_am += fold_predict

test_predicts_am = (test_predicts_am / len(test_predicts_list))

test_ids = test_df["id"].values
test_ids = test_ids.reshape((len(test_ids), 1))

test_predicts_am = pd.DataFrame(data=test_predicts_am, columns=CLASSES)
test_predicts_am["id"] = test_ids
test_predicts_am = test_predicts_am[["id"] + CLASSES]
test_predicts_am.to_csv("10fold_capsule_am.csv", index=False)

In [0]:
from google.colab import files
files.download('10fold_capsule_am.csv')