In [1]:
import numpy as np
import pandas as pd
import pickle 
import gc

from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score

import tensorflow as tf
from keras.layers import Dense,Input,LSTM,Bidirectional,Activation,Conv1D,GRU, CuDNNGRU
from keras.callbacks import Callback
from keras.layers import Dropout,Embedding,GlobalMaxPooling1D, MaxPooling1D, Add, Flatten
from keras.preprocessing import text, sequence
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, SpatialDropout1D
from keras import initializers, regularizers, constraints, optimizers, layers, callbacks
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.models import Model
from keras.optimizers import Adam
from keras import backend as K

import warnings
warnings.filterwarnings('ignore')


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def np_rank(array):
    ranks = np.empty_like(array)
    for i in np.arange(array.shape[1]):
        temp = array[:, i].argsort()
        ranks[temp, i] = np.arange(len(array))
    return ranks

def save_oof(train_oof, test_oof, name, sample_submission):
    # oof test
    submission = pd.concat([sample_submission.iloc[:, 0], pd.DataFrame(test_oof)], axis=1)
    submission.columns = sample_submission.columns
    # submission.to_csv("../output/cnn_conv1D_emb_num_5epochs.csv.gz", compression="gzip", index=False)
    submission.to_csv("../output/test/{}.csv".format(name), index=False)

    # oof train
    submission_train = pd.concat([sample_submission.iloc[:, 0], pd.DataFrame(train_oof)], axis=1)
    submission_train.columns = sample_submission.columns
    submission_train.to_csv("../output/train/{}.csv".format(name), index=False)
    
def oof(X_train, X_test, y, num_folds, seed):
    
    scores = []
    train_predict = np.zeros((X_train.shape[0],6))
    test_predict = np.zeros((X_test.shape[0],6))
    
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=seed)
    
    for train_idx, val_idx in kf.split(X_train):

        x_train = X_train[train_idx]
        x_val = X_train[val_idx]
        y_train = y[train_idx]
        y_val = y[val_idx]
        
        # fit model 
        model = get_model()
        RocAuc = RocAucEvaluation(validation_data=(x_val, y_val), interval=1)
        model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, 
                  validation_data=(x_val, y_val), callbacks=[RocAuc], verbose=2)
        
        # predict
        train_predict[val_idx] = model.predict(x_val, batch_size=batch_size)
        test_predict += np_rank(model.predict(X_test, batch_size=batch_size))
        
        # save scores 
        cv_score = roc_auc_score(y_val, train_predict[val_idx])
        scores.append(cv_score)
        
        # release memory
        del model
        gc.collect()
        K.clear_session()
        
    test_predict /= (num_folds*test_predict.shape[0])
    return scores, train_predict, test_predict

In [15]:
EMBEDDING_FILE = '../input/glove.840B.300d.txt'
train= pd.read_csv('../input/train.csv')
test = pd.read_csv('../input/test.csv')
sample_submission = pd.read_csv("../input/sample_submission.csv")
train["comment_text"].fillna("fillna")
test["comment_text"].fillna("fillna")
X_train = train["comment_text"].str.lower()
y_train = train[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].values
X_test = test["comment_text"].str.lower()

In [4]:
max_features=100000
maxlen=150
embed_size=300

In [5]:
# tok=text.Tokenizer(num_words=max_features,lower=True)
# tok.fit_on_texts(list(X_train)+list(X_test))
# X_train=tok.texts_to_sequences(X_train)
# X_test=tok.texts_to_sequences(X_test)
# x_train=sequence.pad_sequences(X_train,maxlen=maxlen)
# x_test=sequence.pad_sequences(X_test,maxlen=maxlen)

# embeddings_index = {}
# with open(EMBEDDING_FILE,encoding='utf8') as f:
#     for line in f:
#         values = line.rstrip().rsplit(' ')
#         word = values[0]
#         coefs = np.asarray(values[1:], dtype='float32')
#         embeddings_index[word] = coefs

# word_index = tok.word_index
# #prepare embedding matrix
# num_words = min(max_features, len(word_index) + 1)
# embedding_matrix = np.zeros((num_words, embed_size))
# for word, i in word_index.items():
#     if i >= max_features:
#         continue
#     embedding_vector = embeddings_index.get(word)
#     if embedding_vector is not None:
#         # words not found in embedding index will be all-zeros.
#         embedding_matrix[i] = embedding_vector

In [6]:
# with open("../dumps/nn_capsule.pkl", "wb") as f:
#     pickle.dump(obj=(x_train, x_test, y_train, embedding_matrix), file=f)
    
with open("../dumps/nn_capsule.pkl", "rb") as f:
    x_train, x_test, y_train, embedding_matrix = pickle.load(file=f)

In [7]:
class RocAucEvaluation(Callback):
    def __init__(self, validation_data=(), interval=1):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            score = roc_auc_score(self.y_val, y_pred)
            print("\n ROC-AUC - epoch: {:d} - score: {:.6f}".format(epoch+1, score))

In [8]:
from keras.layers import K, Activation
from keras.engine import Layer
from keras.layers import Dense, Input, Embedding, Dropout, Bidirectional, GRU, Flatten, SpatialDropout1D
gru_len = 128
Routings = 5
Num_capsule = 10
Dim_capsule = 16
dropout_p = 0.25
rate_drop_dense = 0.28

def squash(x, axis=-1):
    # s_squared_norm is really small
    # s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    # scale = K.sqrt(s_squared_norm)/ (0.5 + s_squared_norm)
    # return scale * x
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True)
    scale = K.sqrt(s_squared_norm + K.epsilon())
    return x / scale


# A Capsule Implement with Pure Keras
class Capsule(Layer):
    def __init__(self, num_capsule, dim_capsule, routings=3, kernel_size=(9, 1), share_weights=True,
                 activation='default', **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.kernel_size = kernel_size
        self.share_weights = share_weights
        if activation == 'default':
            self.activation = squash
        else:
            self.activation = Activation(activation)

    def build(self, input_shape):
        super(Capsule, self).build(input_shape)
        input_dim_capsule = input_shape[-1]
        if self.share_weights:
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(1, input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     # shape=self.kernel_size,
                                     initializer='glorot_uniform',
                                     trainable=True)
        else:
            input_num_capsule = input_shape[-2]
            self.W = self.add_weight(name='capsule_kernel',
                                     shape=(input_num_capsule,
                                            input_dim_capsule,
                                            self.num_capsule * self.dim_capsule),
                                     initializer='glorot_uniform',
                                     trainable=True)

    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs, (batch_size, input_num_capsule,
                                            self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(u_hat_vecs[:, :, :, 0])  # shape = [None, num_capsule, input_num_capsule]
        for i in range(self.routings):
            b = K.permute_dimensions(b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs

    def compute_output_shape(self, input_shape):
        return (None, self.num_capsule, self.dim_capsule)

In [10]:
def get_model():   
    input1 = Input(shape=(maxlen,))
    embed_layer = Embedding(max_features,
                            embed_size,
                            input_length=maxlen,
                            weights=[embedding_matrix],
                            trainable=False)(input1)
    embed_layer = SpatialDropout1D(rate_drop_dense)(embed_layer)
    embed_layer = Dropout(dropout_p)(embed_layer)
    x = Bidirectional(CuDNNGRU(gru_len, return_sequences=True))(embed_layer)
    capsule = Capsule(num_capsule=Num_capsule, dim_capsule=Dim_capsule, routings=Routings,
                      share_weights=True)(x)
    # output_capsule = Lambda(lambda x: K.sqrt(K.sum(K.square(x), 2)))(capsule)
    capsule = Flatten()(capsule)
    capsule = Dropout(0.15)(capsule)
    output = Dense(6, activation='sigmoid')(capsule)
    model = Model(inputs=input1, outputs=output)
    model.compile(
        loss='binary_crossentropy',
        optimizer='adam',
        metrics=['accuracy'])
    model.summary()
    return model


In [11]:
np.random.seed(42)
seed = 42
num_folds = 10
batch_size = 128
epochs = 3

scores, train_oof, test_oof = oof(x_train, x_test, y_train, num_folds, seed)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150)               0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 150, 300)          30000000  
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 150, 300)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 150, 300)          0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 150, 256)          330240    
_________________________________________________________________
capsule_1 (Capsule)          (None, 10, 16)            40960     
_________________________________________________________________
flatten_1 (Flatten)          (None, 160)               0         
__________

Train on 143614 samples, validate on 15957 samples
Epoch 1/3
 - 109s - loss: 0.0655 - acc: 0.9787 - val_loss: 0.0444 - val_acc: 0.9833

 ROC-AUC - epoch: 1 - score: 0.974257
Epoch 2/3
 - 108s - loss: 0.0475 - acc: 0.9822 - val_loss: 0.0424 - val_acc: 0.9838

 ROC-AUC - epoch: 2 - score: 0.981647
Epoch 3/3
 - 104s - loss: 0.0441 - acc: 0.9832 - val_loss: 0.0417 - val_acc: 0.9839

 ROC-AUC - epoch: 3 - score: 0.985689
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150)               0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 150, 300)          30000000  
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 150, 300)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 150, 300)       


 ROC-AUC - epoch: 3 - score: 0.986330
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 150)               0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 150, 300)          30000000  
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 150, 300)          0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 150, 300)          0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 150, 256)          330240    
_________________________________________________________________
capsule_1 (Capsule)          (None, 10, 16)            40960     
_________________________________________________________________
flatten_1 (Flatten)          (None, 1

In [13]:
scores, roc_auc_score(y_train, train_oof)

([0.9867666598493461,
  0.9864524717597147,
  0.982806029281791,
  0.9880073728994524,
  0.985689375485412,
  0.9885150276785449,
  0.9874741332684961,
  0.9876190567169946,
  0.9863294996976336,
  0.981750265598548],
 0.9851536977565648)

In [17]:
save_oof(train_oof, test_oof, "cnn_capsule", sample_submission)

In [None]:
# #### callbacks
# # filepath="../input/best-model/best.hdf5"
# filepath="weights_base.best.hdf5"
# checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
# early = EarlyStopping(monitor="val_acc", mode="max", patience=5)
# ra_val = RocAucEvaluation(validation_data=(X_val, y_val), interval = 1)
# callbacks_list = [ra_val,checkpoint, early]

# model.fit(X_tra, y_tra, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val),callbacks = callbacks_list,verbose=1)
# #Loading model weights
# model.load_weights(filepath)
# print('Predicting....')
# y_pred = model.predict(x_test,batch_size=1024,verbose=1)