In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
from keras.layers import Dense,Input,LSTM,Bidirectional,Activation,Conv1D,GRU
from keras.callbacks import Callback
from keras.layers import Dropout,Embedding,GlobalMaxPooling1D, MaxPooling1D, Add, Flatten
from keras.preprocessing import text, sequence
from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, concatenate, SpatialDropout1D
from keras import initializers, regularizers, constraints, optimizers, layers, callbacks
from keras.callbacks import EarlyStopping,ModelCheckpoint
from keras.models import Model
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

Using TensorFlow backend.
  from ._conv import register_converters as _register_converters


In [19]:
EMBEDDING_FILE = '../data/glove.840B.300d.txt'
train = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/test.csv')
train["comment_text"].fillna("fillna")
test["comment_text"].fillna("fillna")
X_train = train["comment_text"].str.lower()
y_train = train[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]].values

X_test = test["comment_text"].str.lower()

In [109]:
y1 = train['toxic'].values
y2 = train['severe_toxic'].values
y3 = train['obscene'].values
y4 = train['threat'].values
y5 = train['insult'].values
y6 = train['identity_hate'].values

In [20]:
max_features=100000
maxlen=150
embed_size=300

In [21]:
tok=text.Tokenizer(num_words=max_features,lower=True)
tok.fit_on_texts(list(X_train)+list(X_test))
X_train=tok.texts_to_sequences(X_train)
X_test=tok.texts_to_sequences(X_test)
x_train=sequence.pad_sequences(X_train,maxlen=maxlen)
x_test=sequence.pad_sequences(X_test,maxlen=maxlen)

In [22]:
embeddings_index = {}
with open(EMBEDDING_FILE,encoding='utf8') as f:
    for line in f:
        values = line.rstrip().rsplit(' ')
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs

In [23]:
word_index = tok.word_index
#prepare embedding matrix
num_words = min(max_features, len(word_index) + 1)
embedding_matrix = np.zeros((num_words, embed_size))
for word, i in word_index.items():
    if i >= max_features:
        continue
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

In [24]:
class RocAucEvaluation(Callback):
    def __init__(self, validation_data=(), interval=1):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            score = roc_auc_score(self.y_val, y_pred)
            print("\n ROC-AUC - epoch: {:d} - score: {:.6f}".format(epoch+1, score))

In [25]:
from Attention import *

def build_model_nn1(num_lstm_gru_units=128, dr_lstm=0.1, dr_rec=0.1, num_conv=64, kernel_size=3, 
                    num_sp_dr=0.2, optim=Adam(lr=1e-3)):
    sequence_input = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],trainable = False)(sequence_input)
    x = SpatialDropout1D(num_sp_dr)(x)
    x = Bidirectional(GRU(num_lstm_gru_units, return_sequences=True,dropout=dr_lstm,recurrent_dropout=dr_rec))(x)
    x = Conv1D(num_conv, kernel_size = kernel_size, padding = "valid", kernel_initializer = "glorot_uniform")(x)
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    att = Attention()(x)
    x = concatenate([avg_pool, max_pool, att]) 
    # x = Dense(128, activation='relu')(x)
    # x = Dropout(0.1)(x)
    preds = Dense(6, activation="sigmoid")(x)
    model = Model(sequence_input, preds)
    model.compile(loss='binary_crossentropy',optimizer=optim,metrics=['accuracy'])

    return model

In [26]:
from sklearn.base import BaseEstimator

In [27]:

class NN1(BaseEstimator):
    def __init__(self,num_lstm_gru_units=128, dr_lstm=0.1, dr_rec=0.1, num_conv=64, kernel_size=3, 
                 num_sp_dr=0.2, batch_size=128, optim=Adam(lr=1e-3)):
        self.num_lstm_gru_units = num_lstm_gru_units
        self.dr_lstm = dr_lstm
        self.dr_rec = dr_rec
        self.num_conv = num_conv
        self.kernel_size = kernel_size
        self.num_sp_dr = num_sp_dr
        self.batch_size = batch_size
        self.optim = optim
    def fit(self,X,y):
        X_tra, X_val, y_tra, y_val = train_test_split(x_train, y_train, train_size=0.9, random_state=233)
        self.filepath="../cache/1_18_NN1_weights_base.best.hdf5"
        checkpoint = ModelCheckpoint(self.filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        early = EarlyStopping(monitor="val_acc", mode="max", patience=5)
        ra_val = RocAucEvaluation(validation_data=(X_val, y_val), interval = 1)
        callbacks_list = [ra_val,checkpoint, early]
        model = build_model_nn1(num_lstm_gru_units=self.num_lstm_gru_units, dr_lstm=self.dr_lstm, 
                                dr_rec=self.dr_rec, 
                                num_conv=self.num_conv, num_sp_dr=self.num_sp_dr, optim=self.optim)
        model.fit(X_tra, y_tra, batch_size=self.batch_size, epochs=1, validation_data=(X_val, y_val),
          callbacks = callbacks_list,verbose=1)
        self.model=model
    def predict_proba(self,X):
        #Loading model weights
        self.model.load_weights(self.filepath)
        print('Predicting....')
        y_pred = self.model.predict(X,batch_size=1024,verbose=1)
        print(y_pred.shape)
        return y_pred

In [28]:
from AttentionWithContext import *

def build_model_nn2(num_lstm_gru_units=128, dr_lstm=0.1, dr_rec=0.1, num_conv=64, kernel_size=3, 
                    num_sp_dr=0.2, optim=Adam(lr=1e-3)):
    sequence_input = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],trainable = False)(sequence_input)
    x = SpatialDropout1D(num_sp_dr)(x)
    x = Bidirectional(GRU(num_lstm_gru_units, return_sequences=True,dropout=dr_lstm,recurrent_dropout=dr_rec))(x)
    x = Conv1D(num_conv, kernel_size = kernel_size, padding = "valid", kernel_initializer = "glorot_uniform")(x)
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    att = AttentionWithContext()(x)
    x = concatenate([avg_pool, max_pool, att]) 
    # x = Dense(128, activation='relu')(x)
    # x = Dropout(0.1)(x)
    preds = Dense(6, activation="sigmoid")(x)
    model = Model(sequence_input, preds)
    model.compile(loss='binary_crossentropy',optimizer=optim,metrics=['accuracy'])

    return model

In [104]:
class NN2(BaseEstimator):
    def __init__(self,num_lstm_gru_units=128, dr_lstm=0.1, dr_rec=0.1, num_conv=64, kernel_size=3, 
                 num_sp_dr=0.2, batch_size=128, optim=Adam(lr=1e-3)):
        self.num_lstm_gru_units = num_lstm_gru_units
        self.dr_lstm = dr_lstm
        self.dr_rec = dr_rec
        self.num_conv = num_conv
        self.kernel_size = kernel_size
        self.num_sp_dr = num_sp_dr
        self.batch_size = batch_size
        self.optim = optim
    def fit(self,X,y):
        X_tra, X_val, y_tra, y_val = train_test_split(x_train, y_train, train_size=0.9, random_state=233)
        self.filepath="../cache/1_18_NN2_weights_base.best.hdf5"
        checkpoint = ModelCheckpoint(self.filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        early = EarlyStopping(monitor="val_acc", mode="max", patience=5)
        ra_val = RocAucEvaluation(validation_data=(X_val, y_val), interval = 1)
        callbacks_list = [ra_val,checkpoint, early]
        model = build_model_nn2(num_lstm_gru_units=self.num_lstm_gru_units, dr_lstm=self.dr_lstm, 
                                dr_rec=self.dr_rec, 
                                num_conv=self.num_conv, num_sp_dr=self.num_sp_dr, optim=self.optim)
        model.fit(X_tra, y_tra, batch_size=self.batch_size, epochs=1, validation_data=(X_val, y_val),
          callbacks = callbacks_list,verbose=1)
        self.model=model
    def predict_proba(self,X):
        #Loading model weights
        self.model.load_weights(self.filepath)
        print('Predicting....')
        y_pred = self.model.predict(X,batch_size=1024,verbose=1)
        print(y_pred.shape)
        return y_pred

In [30]:
from AttentionWithContext import *
from Attention import *

def build_model_nn3(num_lstm_gru_units=128, dr_lstm=0.1, dr_rec=0.1, num_conv=64, kernel_size=3, 
                    num_sp_dr=0.2, optim=Adam(lr=1e-3)):
    sequence_input = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],trainable = False)(sequence_input)
    x = SpatialDropout1D(num_sp_dr)(x)
    x = Bidirectional(GRU(num_lstm_gru_units, return_sequences=True,dropout=dr_lstm,recurrent_dropout=dr_rec))(x)
    x = Conv1D(num_conv, kernel_size = kernel_size, padding = "valid", kernel_initializer = "glorot_uniform")(x)
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    att1 = AttentionWithContext()(x)
    att2 = Attention()(x)
    x = concatenate([avg_pool, max_pool, att1, att2]) 
    preds = Dense(6, activation="sigmoid")(x)
    model = Model(sequence_input, preds)
    model.compile(loss='binary_crossentropy',optimizer=optim,metrics=['accuracy'])

    return model

In [31]:
class NN3(BaseEstimator):
    def __init__(self,num_lstm_gru_units=128, dr_lstm=0.1, dr_rec=0.1, num_conv=64, kernel_size=3, 
                 num_sp_dr=0.2, batch_size=128, optim=Adam(lr=1e-3)):
        self.num_lstm_gru_units = num_lstm_gru_units
        self.dr_lstm = dr_lstm
        self.dr_rec = dr_rec
        self.num_conv = num_conv
        self.num_sp_dr = num_sp_dr
        self.batch_size=batch_size
        self.optim=optim
    def fit(self,X,y):
        X_tra, X_val, y_tra, y_val = train_test_split(x_train, y_train, train_size=0.9, random_state=233)
        self.filepath="../cache/1_18_NN3_weights_base.best.hdf5"
        checkpoint = ModelCheckpoint(self.filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        early = EarlyStopping(monitor="val_acc", mode="max", patience=5)
        ra_val = RocAucEvaluation(validation_data=(X_val, y_val), interval = 1)
        callbacks_list = [ra_val,checkpoint, early]
        model = build_model_nn3(num_lstm_gru_units=self.num_lstm_gru_units, dr_lstm=self.dr_lstm, 
                                dr_rec=self.dr_rec, 
                                num_conv=self.num_conv, num_sp_dr=self.num_sp_dr, optim=self.optim)
        model.fit(X_tra, y_tra, batch_size=self.batch_size, epochs=5, validation_data=(X_val, y_val),
          callbacks = callbacks_list,verbose=1)
        self.model=model
    def predict_proba(self,X):
        #Loading model weights
        self.model.load_weights(self.filepath)
        print('Predicting....')
        y_pred = self.model.predict(X,batch_size=1024,verbose=1)
        print(y_pred.shape)
        return y_pred

In [32]:
from AttentionWithContext import *
from Attention import *

def build_model_nn4(num_lstm_gru_units=128, dr_lstm=0.1, dr_rec=0.1, num_conv=64, kernel_size=3, 
                    num_sp_dr=0.2, optim=Adam(lr=1e-3)):
    sequence_input = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix],trainable = False)(sequence_input)
    x = SpatialDropout1D(num_sp_dr)(x)
    x = Bidirectional(GRU(num_lstm_gru_units, return_sequences=True,dropout=dr_lstm,recurrent_dropout=dr_rec))(x)
    x = Conv1D(num_conv, kernel_size = kernel_size, padding = "valid", kernel_initializer = "glorot_uniform")(x)
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    x = concatenate([avg_pool, max_pool]) 
    preds = Dense(6, activation="sigmoid")(x)
    model = Model(sequence_input, preds)
    model.compile(loss='binary_crossentropy',optimizer=optim,metrics=['accuracy'])

    return model

In [33]:
class NN4(BaseEstimator):
    def __init__(self,num_lstm_gru_units=128, dr_lstm=0.1, dr_rec=0.1, num_conv=64, kernel_size=3, 
                 num_sp_dr=0.2, batch_size=128, optim=Adam(lr=1e-3)):
        self.num_lstm_gru_units = num_lstm_gru_units
        self.dr_lstm = dr_lstm
        self.dr_rec = dr_rec
        self.num_conv = num_conv
        self.num_sp_dr = num_sp_dr
        self.batch_size=batch_size
        self.optim=optim
    def fit(self,X,y):
        X_tra, X_val, y_tra, y_val = train_test_split(x_train, y_train, train_size=0.9, random_state=233)
        self.filepath="../cache/1_18_NN4_weights_base.best.hdf5"
        checkpoint = ModelCheckpoint(self.filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        early = EarlyStopping(monitor="val_acc", mode="max", patience=5)
        ra_val = RocAucEvaluation(validation_data=(X_val, y_val), interval = 1)
        callbacks_list = [ra_val,checkpoint, early]
        model = build_model_nn4(num_lstm_gru_units=self.num_lstm_gru_units, dr_lstm=self.dr_lstm, 
                                dr_rec=self.dr_rec, 
                                num_conv=self.num_conv, num_sp_dr=self.num_sp_dr, optim=self.optim)
        model.fit(X_tra, y_tra, batch_size=self.batch_size, epochs=5, validation_data=(X_val, y_val),
          callbacks = callbacks_list,verbose=1)
        self.model=model
    def predict_proba(self,X):
        #Loading model weights
        self.model.load_weights(self.filepath)
        print('Predicting....')
        y_pred = self.model.predict(X,batch_size=1024,verbose=1)
        print(y_pred.shape)
        return y_pred

In [34]:
def build_model_nn5(num_lstm_gru_units=128, dr_lstm=0.1, dr_rec=0.1, num_sp_dr=0.2, 
                    optim=Adam(lr=0.001)):
    inp = Input(shape=(maxlen, ))
    x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)
    x = SpatialDropout1D(num_sp_dr)(x)

    x = Bidirectional(GRU(num_lstm_gru_units, return_sequences=True,dropout=dr_lstm,recurrent_dropout=dr_rec))(x)
    avg_pool = GlobalAveragePooling1D()(x)
    max_pool = GlobalMaxPooling1D()(x)
    conc = concatenate([avg_pool, max_pool])
    outp = Dense(6, activation="sigmoid")(conc)
    
    model = Model(inputs=inp, outputs=outp)

    model.compile(loss='binary_crossentropy',
                  optimizer=optim,
                  metrics=['accuracy'])
    return model

In [35]:
class NN5(BaseEstimator):
    def __init__(self,num_lstm_gru_units=128, dr_lstm=0.1, dr_rec=0.1, num_sp_dr=0.2,
                 batch_size=128, optim=Adam(lr=1e-3)):
        self.num_lstm_gru_units = num_lstm_gru_units
        self.dr_lstm = dr_lstm
        self.dr_rec = dr_rec
        
        self.num_sp_dr = num_sp_dr
        self.batch_size=batch_size
        self.optim=optim
    def fit(self,X,y):
        X_tra, X_val, y_tra, y_val = train_test_split(x_train, y_train, train_size=0.9, random_state=233)
        self.filepath="../cache/1_18_NN5_weights_base.best.hdf5"
        checkpoint = ModelCheckpoint(self.filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
        early = EarlyStopping(monitor="val_acc", mode="max", patience=5)
        ra_val = RocAucEvaluation(validation_data=(X_val, y_val), interval = 1)
        callbacks_list = [ra_val,checkpoint, early]
        model = build_model_nn5(num_lstm_gru_units=self.num_lstm_gru_units, dr_lstm=self.dr_lstm, 
                                dr_rec=self.dr_rec, 
                                num_sp_dr=self.num_sp_dr, optim=self.optim)
        model.fit(X_tra, y_tra, batch_size=self.batch_size, epochs=5, validation_data=(X_val, y_val),
          callbacks = callbacks_list,verbose=1)
        self.model=model
    def predict_proba(self,X):
        #Loading model weights
        self.model.load_weights(self.filepath)
        print('Predicting....')
        y_pred = self.model.predict(X,batch_size=1024,verbose=1)
        print(y_pred.shape)
        return y_pred

In [36]:
a = np.random.random((10,6))
c = np.random.random((10,6))

In [37]:
a.shape

(10, 6)

In [38]:
b = np.zeros((10, 6, 3))

In [39]:
b[:,:,0] = a
b[:,:,1] = c

In [40]:
b.mean(axis=2)

array([[0.17727482, 0.45456644, 0.49212607, 0.50721638, 0.50686121,
        0.161148  ],
       [0.20563254, 0.64988956, 0.32744716, 0.17360135, 0.35707627,
        0.48103542],
       [0.44689288, 0.19128629, 0.53710537, 0.41709411, 0.37870518,
        0.49350407],
       [0.31327662, 0.16441258, 0.38323538, 0.21965772, 0.29665035,
        0.52717279],
       [0.40057884, 0.22551249, 0.48584782, 0.37296569, 0.19768885,
        0.57517161],
       [0.43608877, 0.34935421, 0.22674936, 0.61298084, 0.02550516,
        0.25646376],
       [0.34728608, 0.28166745, 0.25873408, 0.46843744, 0.28097777,
        0.28730284],
       [0.34534859, 0.30243579, 0.34651773, 0.37009074, 0.27523783,
        0.2359247 ],
       [0.18054381, 0.51721435, 0.22347868, 0.22697491, 0.38112764,
        0.18070692],
       [0.47845215, 0.44294616, 0.19088375, 0.28891556, 0.27543041,
        0.41902096]])

In [113]:
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.model_selection import cross_val_score

final_score = 0.0
n_classes = 6
class Ensemble1(object):    
    def __init__(self, mode, n_splits, stacker_2, stacker_1, base_models):
        self.mode = mode
        self.n_splits = n_splits
        self.stacker_2 = stacker_2
        self.stacker_1 = stacker_1
        self.base_models = base_models

    def fit_predict(self, X, y, T):
        X = np.array(X)
        y = np.array(y)
        T = np.array(T)


#         folds = list(StratifiedKFold(n_splits=self.n_splits, shuffle=True, 
#                                                              random_state=2016).split(X, y))
        
        folds = list(KFold(n_splits=self.n_splits, shuffle=True, 
                                                             random_state=2016).split(X, y))
        
        kf = KFold(n_splits=self.n_splits, shuffle=True, random_state=233)
        OOF_columns = []

#         S_train = np.zeros((X.shape[0], len(self.base_models)) * n_classes)
#         S_test = np.zeros((T.shape[0], len(self.base_models)) * n_classes)
        
        S_train = np.zeros((X.shape[0], n_classes, len(self.base_models)))
        S_test = np.zeros((T.shape[0], n_classes, len(self.base_models)))
        
#         S_train = []
#         S_test = []
#         (159571, 6)
#         (159571, 2)
#         print(y.shape)
        print(S_train.shape)
        for i, clf in enumerate(self.base_models):

            S_test_i = np.zeros((T.shape[0], n_classes, self.n_splits))

            for j, (train_idx, test_idx) in enumerate(folds):                
                X_train = X[train_idx]
                y_train = y[train_idx]
                X_holdout = X[test_idx]
                print(test_idx.shape)
                print(train_idx.shape)
                print(test_idx.shape[0])
                print(train_idx.shape[0])
                print ("Fit %s_%d fold %d" % (str(clf).split("(")[0], i+1, j+1))
                clf.fit(X_train, y_train)

#                 S_train[0:test_idx[0], n_classes, i] = clf.predict_proba(X_holdout)[:,1]  
                S_train[j*test_idx.shape[0]:(j+1)*test_idx.shape[0], :, i] = clf.predict_proba(X_holdout)
                S_test_i[:, :, j] = clf.predict_proba(T)                
            S_test[:, :, i] = S_test_i.mean(axis=2)
            
            print("  Base model_%d score: %.5f\n" % (i+1, roc_auc_score(y, S_train[:,:,i])))
#             print("  Base model_%d accuracy score: %.5f\n" % (i+1, accuracy_score(y, S_train[:,:,i])))
        
            OOF_columns.append('Base model_'+str(i+1))
        OOF_S_train = pd.DataFrame(S_train, columns = OOF_columns)
#         OOF_S_train = pd.DataFrame(np.array(S_train), columns = OOF_columns)
        print('\n')
        print('Correlation between out-of-fold predictions from Base models:')
        print('\n')
        print(OOF_S_train.corr())
        print('\n')
            
        
        if self.mode==1:
            
            folds_2 = list(StratifiedKFold(n_splits=self.n_splits, shuffle=True,
                                                                   random_state=2016).split(S_train, y))
            
            OOF_columns = []

            S_train_2 = np.zeros((S_train.shape[0], len(self.stacker_1)))
            S_test_2 = np.zeros((S_test.shape[0], len(self.stacker_1)))
            
            for i, clf in enumerate(self.stacker_1):
            
                S_test_i_2 = np.zeros((S_test.shape[0], self.n_splits))

                for j, (train_idx, test_idx) in enumerate(folds_2):
                    X_train_2 = S_train[train_idx]
                    y_train_2 = y[train_idx]
                    X_holdout_2 = S_train[test_idx]

                    print ("Fit %s_%d fold %d" % (str(clf).split("(")[0], i+1, j+1))
                    clf.fit(X_train_2, y_train_2)
                                 
                    S_train_2[test_idx, i] = clf.predict_proba(X_holdout_2)[:,1] 
                    S_test_i_2[:, j] = clf.predict_proba(S_test)[:,1]
                S_test_2[:, i] = S_test_i_2.mean(axis=1)
                
                print("  1st level model_%d score: %.5f\n"%(i+1,
                                                            roc_auc_score(y, S_train_2.mean(axis=1))))
                
#                 print("  1st level model_%d accuracy score: %.5f\n"%(i+1,
#                                                             accuracy_score(y, S_train_2.mean(axis=1))))
                
                OOF_columns.append('1st level model_'+str(i+1))
            OOF_S_train = pd.DataFrame(S_train_2, columns = OOF_columns)
            print('\n')
            print('Correlation between out-of-fold predictions from 1st level models:')
            print('\n')
            print(OOF_S_train.corr())
            print('\n')


        if self.mode==2:
            
            WOC_columns = []
        
            S_train_2 = np.zeros((S_train.shape[0], len(self.stacker_1)))
            S_test_2 = np.zeros((S_test.shape[0], len(self.stacker_1)))
               
            for i, clf in enumerate(self.stacker_1):
            
                S_train_i_2= np.zeros((S_train.shape[0], S_train.shape[1]))
                S_test_i_2 = np.zeros((S_test.shape[0], S_train.shape[1]))
                                       
                for j in range(S_train.shape[1]):
                                
                    S_tr = S_train[:,np.arange(S_train.shape[1])!=j]
                    S_te = S_test[:,np.arange(S_test.shape[1])!=j]
                                               
                    print ("Fit %s_%d subset %d" % (str(clf).split("(")[0], i+1, j+1))
                    clf.fit(S_tr, y)

                    S_train_i_2[:, j] = clf.predict_proba(S_tr)[:,1]                
                    S_test_i_2[:, j] = clf.predict_proba(S_te)[:,1]
                S_train_2[:, i] = S_train_i_2.mean(axis=1)    
                S_test_2[:, i] = S_test_i_2.mean(axis=1)
            
                print("  1st level model_%d score: %.5f\n"%(i+1,roc_auc_score(y, S_train_2.mean(axis=1))))
#                 print("  1st level model_%d accuracy score: %.5f\n"%(i+1,accuracy_score(y, S_train_2.mean(axis=1))))
                
                
                WOC_columns.append('1st level model_'+str(i+1))
            WOC_S_train = pd.DataFrame(S_train_2, columns = WOC_columns)
            print('\n')
            print('Correlation between without-one-column predictions from 1st level models:')
            print('\n')
            print(WOC_S_train.corr())
            print('\n')
            
            
        try:
            num_models = len(self.stacker_2)
            if self.stacker_2==(et_model):
                num_models=1
        except TypeError:
            num_models = len([self.stacker_2])
            
        if num_models==1:
                
            print ("Fit %s for final\n" % (str(self.stacker_2).split("(")[0]))
            self.stacker_2.fit(S_train_2, y)
            
            stack_res = self.stacker_2.predict_proba(S_test_2)[:,1]
        
            stack_score = self.stacker_2.predict_proba(S_train_2)[:,1]
            print("2nd level model final score: %.5f" % (roc_auc_score(y, stack_score)))
#             print("2nd level model final accuracy score: %.5f" % (accuracy_score(y, stack_score)))
            final_score = roc_auc_score(y, stack_score.mean(axis=1))    
        else:
            
            F_columns = []
            
            stack_score = np.zeros((S_train_2.shape[0], len(self.stacker_2)))
            res = np.zeros((S_test_2.shape[0], len(self.stacker_2)))
            
            for i, clf in enumerate(self.stacker_2):
                
                print ("Fit %s_%d" % (str(clf).split("(")[0], i+1))
                clf.fit(S_train_2, y)
                
                stack_score[:, i] = clf.predict_proba(S_train_2)[:,1]
                print("  2nd level model_%d score: %.5f\n"%(i+1,roc_auc_score(y, stack_score[:, i])))
                
                res[:, i] = clf.predict_proba(S_test_2)[:,1]
                
                F_columns.append('2nd level model_'+str(i+1))
            F_S_train = pd.DataFrame(stack_score, columns = F_columns)
            print('\n')
            print('Correlation between final predictions from 2nd level models:')
            print('\n')
            print(F_S_train.corr())
            print('\n')
        
            stack_res = res.mean(axis=1)            
            print("2nd level models final score: %.5f" % (roc_auc_score(y, stack_score.mean(axis=1))))
#             print("2nd level models final accuracy score: %.5f" % (accuracy_score(y, stack_score.mean(axis=1))))
            final_score = accuracy_score(y, stack_score.mean(axis=1))
        return stack_res

In [122]:
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.model_selection import cross_val_score

final_score = 0.0
class Ensemble(object):    
    def __init__(self, mode, n_splits, stacker_2, stacker_1, base_models):
        self.mode = mode
        self.n_splits = n_splits
        self.stacker_2 = stacker_2
        self.stacker_1 = stacker_1
        self.base_models = base_models

    def fit_predict(self, X, y, T):
        X = np.array(X)
        y = np.array(y)
        T = np.array(T)


        folds = list(StratifiedKFold(n_splits=self.n_splits, shuffle=True, 
                                                             random_state=2016).split(X, y))
        
        OOF_columns = []

        S_train = np.zeros((X.shape[0], len(self.base_models)))
        S_test = np.zeros((T.shape[0], len(self.base_models)))
        
        for i, clf in enumerate(self.base_models):

            S_test_i = np.zeros((T.shape[0], self.n_splits))

            for j, (train_idx, test_idx) in enumerate(folds):                
                X_train = X[train_idx]
                y_train = y[train_idx]
                X_holdout = X[test_idx]

                print ("Fit %s_%d fold %d" % (str(clf).split("(")[0], i+1, j+1))
                clf.fit(X_train, y_train)

                S_train[test_idx, i] = clf.predict_proba(X_holdout)[:,1]  
                S_test_i[:, j] = clf.predict_proba(T)[:,1]                
            S_test[:, i] = S_test_i.mean(axis=1)
            
            print("  Base model_%d score: %.5f\n" % (i+1, roc_auc_score(y, S_train[:,i])))
#             print("  Base model_%d accuracy score: %.5f\n" % (i+1, accuracy_score(y, S_train[:,i])))
        
            OOF_columns.append('Base model_'+str(i+1))
        OOF_S_train = pd.DataFrame(S_train, columns = OOF_columns)
        print('\n')
        print('Correlation between out-of-fold predictions from Base models:')
        print('\n')
        print(OOF_S_train.corr())
        print('\n')
            
        
        if self.mode==1:
            
            folds_2 = list(StratifiedKFold(n_splits=self.n_splits, shuffle=True,
                                                                   random_state=2016).split(S_train, y))
            
            OOF_columns = []

            S_train_2 = np.zeros((S_train.shape[0], len(self.stacker_1)))
            S_test_2 = np.zeros((S_test.shape[0], len(self.stacker_1)))
            
            for i, clf in enumerate(self.stacker_1):
            
                S_test_i_2 = np.zeros((S_test.shape[0], self.n_splits))

                for j, (train_idx, test_idx) in enumerate(folds_2):
                    X_train_2 = S_train[train_idx]
                    y_train_2 = y[train_idx]
                    X_holdout_2 = S_train[test_idx]

                    print ("Fit %s_%d fold %d" % (str(clf).split("(")[0], i+1, j+1))
                    clf.fit(X_train_2, y_train_2)
                                 
                    S_train_2[test_idx, i] = clf.predict_proba(X_holdout_2)[:,1] 
                    S_test_i_2[:, j] = clf.predict_proba(S_test)[:,1]
                S_test_2[:, i] = S_test_i_2.mean(axis=1)
                
                print("  1st level model_%d score: %.5f\n"%(i+1,
                                                            roc_auc_score(y, S_train_2.mean(axis=1))))
                
#                 print("  1st level model_%d accuracy score: %.5f\n"%(i+1,
#                                                             accuracy_score(y, S_train_2.mean(axis=1))))
                
                OOF_columns.append('1st level model_'+str(i+1))
            OOF_S_train = pd.DataFrame(S_train_2, columns = OOF_columns)
            print('\n')
            print('Correlation between out-of-fold predictions from 1st level models:')
            print('\n')
            print(OOF_S_train.corr())
            print('\n')


        if self.mode==2:
            
            WOC_columns = []
        
            S_train_2 = np.zeros((S_train.shape[0], len(self.stacker_1)))
            S_test_2 = np.zeros((S_test.shape[0], len(self.stacker_1)))
               
            for i, clf in enumerate(self.stacker_1):
            
                S_train_i_2= np.zeros((S_train.shape[0], S_train.shape[1]))
                S_test_i_2 = np.zeros((S_test.shape[0], S_train.shape[1]))
                                       
                for j in range(S_train.shape[1]):
                                
                    S_tr = S_train[:,np.arange(S_train.shape[1])!=j]
                    S_te = S_test[:,np.arange(S_test.shape[1])!=j]
                                               
                    print ("Fit %s_%d subset %d" % (str(clf).split("(")[0], i+1, j+1))
                    clf.fit(S_tr, y)

                    S_train_i_2[:, j] = clf.predict_proba(S_tr)[:,1]                
                    S_test_i_2[:, j] = clf.predict_proba(S_te)[:,1]
                S_train_2[:, i] = S_train_i_2.mean(axis=1)    
                S_test_2[:, i] = S_test_i_2.mean(axis=1)
            
                print("  1st level model_%d score: %.5f\n"%(i+1,roc_auc_score(y, S_train_2.mean(axis=1))))
#                 print("  1st level model_%d accuracy score: %.5f\n"%(i+1,accuracy_score(y, S_train_2.mean(axis=1))))
                
                
                WOC_columns.append('1st level model_'+str(i+1))
            WOC_S_train = pd.DataFrame(S_train_2, columns = WOC_columns)
            print('\n')
            print('Correlation between without-one-column predictions from 1st level models:')
            print('\n')
            print(WOC_S_train.corr())
            print('\n')
            
            
        try:
            num_models = len(self.stacker_2)
            if self.stacker_2==(et_model):
                num_models=1
        except TypeError:
            num_models = len([self.stacker_2])
            
        if num_models==1:
                
            print ("Fit %s for final\n" % (str(self.stacker_2).split("(")[0]))
            self.stacker_2.fit(S_train_2, y)
            
            stack_res = self.stacker_2.predict_proba(S_test_2)[:,1]
        
            stack_score = self.stacker_2.predict_proba(S_train_2)[:,1]
            print("2nd level model final score: %.5f" % (roc_auc_score(y, stack_score)))
#             print("2nd level model final accuracy score: %.5f" % (accuracy_score(y, stack_score)))
            final_score = roc_auc_score(y, stack_score.mean(axis=1))    
        else:
            
            F_columns = []
            
            stack_score = np.zeros((S_train_2.shape[0], len(self.stacker_2)))
            res = np.zeros((S_test_2.shape[0], len(self.stacker_2)))
            
            for i, clf in enumerate(self.stacker_2):
                
                print ("Fit %s_%d" % (str(clf).split("(")[0], i+1))
                clf.fit(S_train_2, y)
                
                stack_score[:, i] = clf.predict_proba(S_train_2)[:,1]
                print("  2nd level model_%d score: %.5f\n"%(i+1,roc_auc_score(y, stack_score[:, i])))
                
                res[:, i] = clf.predict_proba(S_test_2)[:,1]
                
                F_columns.append('2nd level model_'+str(i+1))
            F_S_train = pd.DataFrame(stack_score, columns = F_columns)
            print('\n')
            print('Correlation between final predictions from 2nd level models:')
            print('\n')
            print(F_S_train.corr())
            print('\n')
        
            stack_res = res.mean(axis=1)            
            print("2nd level models final score: %.5f" % (roc_auc_score(y, stack_score.mean(axis=1))))
#             print("2nd level models final accuracy score: %.5f" % (accuracy_score(y, stack_score.mean(axis=1))))
            final_score = roc_auc_score(y, stack_score.mean(axis=1))
        return stack_res

In [126]:
nn1 = NN1()
nn2 = NN2()
nn3 = NN3()
nn4 = NN4()
nn5 = NN5()

In [127]:
# Stacker models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
log_model = LogisticRegression()

et_model = ExtraTreesClassifier(n_estimators=200, max_depth=6, min_samples_split=10, random_state=10)

mlp_model = MLPClassifier(max_iter=20, random_state=42)

In [128]:
# Mode 2 run
stack = Ensemble(mode=2,
        n_splits=2,
        stacker_2 = (log_model, et_model),         
        stacker_1 = (log_model, et_model, mlp_model),
        base_models = (
            nn3,nn4,nn5
            
        ))       
        
y_pred1 = stack.fit_predict(x_train, y1, x_test)
# y_pred2 = stack.fit_predict(x_train, y2, x_test)
# y_pred3 = stack.fit_predict(x_train, y3, x_test)
# y_pred4 = stack.fit_predict(x_train, y4, x_test)
# y_pred5 = stack.fit_predict(x_train, y5, x_test)
# y_pred6 = stack.fit_predict(x_train, y6, x_test)

Fit NN3_1 fold 1




Train on 143613 samples, validate on 15958 samples
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.986334
Epoch 00000: val_acc improved from -inf to 0.98161, saving model to ../cache/1_18_NN3_weights_base.best.hdf5
Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.987396
Epoch 00001: val_acc improved from 0.98161 to 0.98316, saving model to ../cache/1_18_NN3_weights_base.best.hdf5
Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.988717
Epoch 00002: val_acc improved from 0.98316 to 0.98377, saving model to ../cache/1_18_NN3_weights_base.best.hdf5
Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.988814
Epoch 00003: val_acc did not improve
Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.989330
Epoch 00004: val_acc improved from 0.98377 to 0.98404, saving model to ../cache/1_18_NN3_weights_base.best.hdf5
Predicting....
(79786, 6)
Predicting....
(153164, 6)
Fit NN3_1 fold 2




Train on 143613 samples, validate on 15958 samples
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.986018
Epoch 00000: val_acc improved from -inf to 0.98255, saving model to ../cache/1_18_NN3_weights_base.best.hdf5
Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.988258
Epoch 00001: val_acc improved from 0.98255 to 0.98348, saving model to ../cache/1_18_NN3_weights_base.best.hdf5
Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.988349
Epoch 00002: val_acc did not improve
Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.988196
Epoch 00003: val_acc did not improve
Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.988574
Epoch 00004: val_acc improved from 0.98348 to 0.98368, saving model to ../cache/1_18_NN3_weights_base.best.hdf5
Predicting....
(79785, 6)
Predicting....
(153164, 6)
  Base model_1 score: 0.97975

Fit NN4_2 fold 1




Train on 143613 samples, validate on 15958 samples
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.986189
Epoch 00000: val_acc improved from -inf to 0.98294, saving model to ../cache/1_18_NN4_weights_base.best.hdf5
Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.988254
Epoch 00001: val_acc did not improve
Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.989187
Epoch 00002: val_acc improved from 0.98294 to 0.98396, saving model to ../cache/1_18_NN4_weights_base.best.hdf5
Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.988858
Epoch 00003: val_acc improved from 0.98396 to 0.98407, saving model to ../cache/1_18_NN4_weights_base.best.hdf5
Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.989063
Epoch 00004: val_acc did not improve
Predicting....
(79786, 6)
Predicting....
(153164, 6)
Fit NN4_2 fold 2




Train on 143613 samples, validate on 15958 samples
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.987693
Epoch 00000: val_acc improved from -inf to 0.98379, saving model to ../cache/1_18_NN4_weights_base.best.hdf5
Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.988605
Epoch 00001: val_acc did not improve
Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.989411
Epoch 00002: val_acc improved from 0.98379 to 0.98397, saving model to ../cache/1_18_NN4_weights_base.best.hdf5
Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989092
Epoch 00003: val_acc improved from 0.98397 to 0.98412, saving model to ../cache/1_18_NN4_weights_base.best.hdf5
Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.988856
Epoch 00004: val_acc did not improve
Predicting....
(79785, 6)
Predicting....
(153164, 6)
  Base model_2 score: 0.97372

Fit NN5_3 fold 1




Train on 143613 samples, validate on 15958 samples
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.985985
Epoch 00000: val_acc improved from -inf to 0.98354, saving model to ../cache/1_18_NN5_weights_base.best.hdf5
Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.988699
Epoch 00001: val_acc improved from 0.98354 to 0.98391, saving model to ../cache/1_18_NN5_weights_base.best.hdf5
Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.988090
Epoch 00002: val_acc did not improve
Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.986875
Epoch 00003: val_acc did not improve
Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.985654
Epoch 00004: val_acc did not improve
Predicting....
(79786, 6)
Predicting....
(153164, 6)
Fit NN5_3 fold 2




Train on 143613 samples, validate on 15958 samples
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.986356
Epoch 00000: val_acc improved from -inf to 0.98311, saving model to ../cache/1_18_NN5_weights_base.best.hdf5
Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.988112
Epoch 00001: val_acc improved from 0.98311 to 0.98359, saving model to ../cache/1_18_NN5_weights_base.best.hdf5
Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.987886
Epoch 00002: val_acc did not improve
Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.986190
Epoch 00003: val_acc did not improve
Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.985204
Epoch 00004: val_acc did not improve
Predicting....
(79785, 6)
Predicting....
(153164, 6)
  Base model_3 score: 0.98830



Correlation between out-of-fold predictions from Base models:


              Base model_1  Base model_2  Base model_3
Base model_1      1.000000      0.920163      0.919484
Base model_2      0.920163      1.000000      0.897421
Base model_3      0.919484      0.897421      1.000000


Fit L



Fit MLPClassifier_3 subset 2




Fit MLPClassifier_3 subset 3




  1st level model_3 score: 0.98631



Correlation between without-one-column predictions from 1st level models:


                   1st level model_1  1st level model_2  1st level model_3
1st level model_1           1.000000           0.979033           0.857521
1st level model_2           0.979033           1.000000           0.926970
1st level model_3           0.857521           0.926970           1.000000


Fit LogisticRegression_1
  2nd level model_1 score: 0.98666

Fit ExtraTreesClassifier_2
  2nd level model_2 score: 0.98654



Correlation between final predictions from 2nd level models:


                   2nd level model_1  2nd level model_2
2nd level model_1           1.000000           0.988861
2nd level model_2           0.988861           1.000000


2nd level models final score: 0.98658


In [129]:
final_score = '0.98658'

In [130]:
# Submission from mode 2
sample_submission = pd.read_csv('../data/sample_submission.csv')
list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
sample_submission[list_classes] = y_pred
sample_submission.to_csv('../submission/sub_1_18.csv', index=False)

NameError: name 'y_pred' is not defined

In [None]:
print(x_train.shape)

In [None]:
print(x_test.shape)

In [None]:
S_train = np.zeros((x_train.shape[0], x_train.shape[1], 2))
# S_test = np.zeros((T.shape[0], T.shape[1], len(self.base_models)))

In [None]:
S_train.shape

In [None]:
x1 = np.zeros((79786, 6))

In [None]:
x1.shape

In [None]:
S_train[x1.shape[0],x1.shape[1],0] = x1