In [None]:
import os
from os.path import join as path_join
import re
import gc
import pickle  
import random
import sys
import glob

import numpy as np
from numpy.random import seed
import pandas as pd

import torch
import transformers
from tensorflow.keras.preprocessing.sequence import pad_sequences

import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras import backend as K
from tensorflow.keras import initializers, regularizers, constraints, optimizers, layers, models, callbacks
from tensorflow.keras.losses import *
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.initializers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import *

from tensorflow.keras.utils import get_custom_objects

import math
from scipy.stats import spearmanr, rankdata
from urllib.parse import urlparse
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import KFold, GroupKFold, train_test_split
from ml_stratifiers import MultilabelStratifiedShuffleSplit, MultilabelStratifiedKFold

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(font_scale=1.56)

import warnings
warnings.filterwarnings('ignore')

from tqdm import tqdm_notebook
tqdm_notebook().pandas()

from tqdm import tqdm
tqdm.pandas(desc="progress-bar")

In [None]:
def seed_everything(seed=0):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

SEED = 69
seed_everything(SEED)

In [None]:
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]
        
def fetch_vectors(string_list, batch_size=64):
    # inspired by https://jalammar.github.io/a-visual-guide-to-using-bert-for-the-first-time/
    DEVICE = torch.device("cuda")
    tokenizer = transformers.DistilBertTokenizer.from_pretrained("../input/distilbertbaseuncased/")
    model = transformers.DistilBertModel.from_pretrained("../input/distilbertbaseuncased/")
    model.to(DEVICE)

    fin_features = []
    for data in chunks(string_list, batch_size):
        tokenized = []
        for x in data:
            x = " ".join(x.strip().split()[:300])
            tok = tokenizer.encode(x, add_special_tokens=True)
            tokenized.append(tok[:512])

        max_len = 512
        padded = np.array([i + [0] * (max_len - len(i)) for i in tokenized])
        attention_mask = np.where(padded != 0, 1, 0)
        input_ids = torch.tensor(padded).to(DEVICE)
        attention_mask = torch.tensor(attention_mask).to(DEVICE)

        with torch.no_grad():
            last_hidden_states = model(input_ids, attention_mask=attention_mask)

        features = last_hidden_states[0][:, 0, :].cpu().numpy()
        fin_features.append(features)

    fin_features = np.vstack(fin_features)
    return fin_features

In [None]:
mispell_dict = {"aren't" : "are not",          "can't" : "cannot",
                "couldn't" : "could not",      "couldnt" : "could not",
                "didn't" : "did not",          "doesn't" : "does not",
                "doesnt" : "does not",         "don't" : "do not",
                "hadn't" : "had not",          "hasn't" : "has not",
                "haven't" : "have not",        "havent" : "have not",
                "he'd" : "he would",           "he'll" : "he will",
                "he's" : "he is",              "i'd" : "I would",
                "i'd" : "I had",               "i'll" : "I will",
                "i'm" : "I am",                "isn't" : "is not",
                "it's" : "it is",              "it'll":"it will",
                "i've" : "I have",             "let's" : "let us",
                "mightn't" : "might not",      "mustn't" : "must not",
                "shan't" : "shall not",        "she'd" : "she would",
                "she'll" : "she will",         "she's" : "she is",
                "shouldn't" : "should not",    "shouldnt" : "should not",
                "that's" : "that is",          "thats" : "that is",
                "there's" : "there is",        "theres" : "there is",
                "they'd" : "they would",       "they'll" : "they will",
                "they're" : "they are",        "theyre":  "they are",
                "they've" : "they have",       "we'd" : "we would",
                "we're" : "we are",            "weren't" : "were not",
                "we've" : "we have",           "what'll" : "what will",
                "what're" : "what are",        "what's" : "what is",
                "what've" : "what have",       "where's" : "where is",
                "who'd" : "who would",         "who'll" : "who will",
                "who're" : "who are",          "who's" : "who is",
                "who've" : "who have",         "won't" : "will not",
                "wouldn't" : "would not",      "you'd" : "you would",
                "you'll" : "you will",         "you're" : "you are",
                "you've" : "you have",         "'re": " are",
                "wasn't": "was not",           "we'll":" will",
                "didn't": "did not",           "tryin'":"trying"}

def _get_mispell(mispell_dict):
    mispell_re = re.compile('(%s)' % '|'.join(mispell_dict.keys()))
    return mispell_dict, mispell_re


def replace_typical_misspell(text):
    mispellings, mispellings_re = _get_mispell(mispell_dict)

    def replace(match):
        return mispellings[match.group(0)]

    return mispellings_re.sub(replace, text)


puncts = [',', '.', '"', ':', ')', '(', '-', '!', '?', '|', ';', "'", '$', '&',
          '/', '[', ']', '>', '%', '=', '#', '*', '+', '\\', '•',  '~', '@', '£',
          '·', '_', '{', '}', '©', '^', '®', '`',  '<', '→', '°', '€', '™', '›',
          '♥', '←', '×', '§', '″', '′', 'Â', '█', '½', 'à', '…', '\xa0', '\t',
          '“', '★', '”', '–', '●', 'â', '►', '−', '¢', '²', '¬', '░', '¶', '↑',
          '±', '¿', '▾', '═', '¦', '║', '―', '¥', '▓', '—', '‹', '─', '\u3000', '\u202f',
          '▒', '：', '¼', '⊕', '▼', '▪', '†', '■', '’', '▀', '¨', '▄', '♫',
          '☆', 'é', '¯', '♦', '¤', '▲', 'è', '¸', '¾', 'Ã', '⋅', '‘', '∞', '«',
          '∙', '）', '↓', '、', '│', '（', '»', '，', '♪', '╩', '╚', '³', '・',
          '╦', '╣', '╔', '╗', '▬', '❤', 'ï', 'Ø', '¹', '≤', '‡', '√', ]

def clean_text(x):
    x = str(x).replace("\n","")
    for punct in puncts:
        x = x.replace(punct, f' {punct} ')
    return x

def preprocess(x):
    x= clean_text(x.lower())
    x= replace_typical_misspell(x)
    
    return x

In [None]:
data_path = '../input/google-quest-challenge/'
df_train = pd.read_csv(path_join(data_path, 'train.csv')).fillna("none")
df_test = pd.read_csv(path_join(data_path, 'test.csv')).fillna("none")
sample = pd.read_csv(path_join(data_path, 'sample_submission.csv'))
target_cols = list(sample.drop("qa_id", axis=1).columns)

df_train['question_body'] = df_train['question_body'].progress_map(lambda q: preprocess(q))
df_train['answer'] = df_train['answer'].progress_map(lambda q: preprocess(q))
df_train['question_title'] = df_train['question_title'].progress_map(lambda q: preprocess(q))
#df_train['category'] = df_train['category'].progress_map(lambda q: preprocess(q))

df_test['question_body'] = df_test['question_body'].progress_map(lambda q: preprocess(q))
df_test['answer'] = df_test['answer'].progress_map(lambda q: preprocess(q))
df_test['question_title'] = df_test['question_title'].progress_map(lambda q: preprocess(q))
#df_test['category'] = df_test['category'].progress_map(lambda q: preprocess(q))

#train_category_dense = fetch_vectors(df_train.category.values)
#print('train_category_dense')
train_question_title_dense = fetch_vectors(df_train.question_title.values)
print('train_question_title_dense')
train_question_body_dense = fetch_vectors(df_train.question_body.values)
print('train_question_body_dense')
train_answer_dense = fetch_vectors(df_train.answer.values)
print('train_answer_dense')

#test_category_dense = fetch_vectors(df_test.category.values)
#print('test_category_dense')
test_question_title_dense = fetch_vectors(df_test.question_title.values)
print('test_question_title_dense')
test_question_body_dense = fetch_vectors(df_test.question_body.values)
print('test_question_body_dense')
test_answer_dense = fetch_vectors(df_test.answer.values)
print('test_answer_dense')

del df_train, df_test, sample
gc.collect()

In [None]:
targets = [
        'question_asker_intent_understanding',
        'question_body_critical',
        'question_conversational',
        'question_expect_short_answer',
        'question_fact_seeking',
        'question_has_commonly_accepted_answer',
        'question_interestingness_others',
        'question_interestingness_self',
        'question_multi_intent',
        'question_not_really_a_question',
        'question_opinion_seeking',
        'question_type_choice',
        'question_type_compare',
        'question_type_consequence',
        'question_type_definition',
        'question_type_entity',
        'question_type_instructions',
        'question_type_procedure',
        'question_type_reason_explanation',
        'question_type_spelling',
        'question_well_written',
        'answer_helpful',
        'answer_level_of_information',
        'answer_plausible',
        'answer_relevance',
        'answer_satisfaction',
        'answer_type_instructions',
        'answer_type_procedure',
        'answer_type_reason_explanation',
        'answer_well_written'    
    ]

input_columns = ['question_title', 'question_body', 'answer']

In [None]:
find = re.compile(r"^[^.]*")

df_train['netloc'] = df_train['url'].apply(lambda x: re.findall(find, urlparse(x).netloc)[0])
df_test['netloc'] = df_test['url'].apply(lambda x: re.findall(find, urlparse(x).netloc)[0])

features = ['netloc', 'category'] #
merged = pd.concat([df_train[features], df_test[features]])
ohe = OneHotEncoder()
ohe.fit(merged)

features_train = ohe.transform(df_train[features]).toarray()
features_test = ohe.transform(df_test[features]).toarray()

In [None]:
module_url = "../input/universal-sentence-encoder-l5/universal_sentence_encoder_large5/"
embed = hub.load(module_url)

In [None]:
embeddings_train = {}
embeddings_test = {}
for text in input_columns:
    print(text)
    train_text = df_train[text].str.replace('?', '.').str.replace('!', '.').tolist()
    test_text = df_test[text].str.replace('?', '.').str.replace('!', '.').tolist()
    
    curr_train_emb = []
    curr_test_emb = []
    batch_size = 4
    ind = 0
    while ind*batch_size < len(train_text):
        curr_train_emb.append(embed(train_text[ind*batch_size: (ind + 1)*batch_size]).numpy())
        ind += 1
        
    ind = 0
    while ind*batch_size < len(test_text):
        curr_test_emb.append(embed(test_text[ind*batch_size: (ind + 1)*batch_size]).numpy())
        ind += 1    
        
    embeddings_train[text + '_embedding'] = np.vstack(curr_train_emb)
    embeddings_test[text + '_embedding'] = np.vstack(curr_test_emb)
    
del embed
K.clear_session()
gc.collect()

In [None]:
l2_dist = lambda x, y: np.power(x - y, 2).sum(axis=1)
cos_dist = lambda x, y: (x * y).sum(axis=1)
abs_dist = lambda x, y: np.abs(x - y).sum(axis=1)
sum_dist = lambda x, y: (x + y).sum(axis=1)

dist_features_train = np.array([
    #l2_dist(embeddings_train['question_title_embedding'], embeddings_train['category_embedding']),
    l2_dist(embeddings_train['question_title_embedding'], embeddings_train['answer_embedding']),
    l2_dist(embeddings_train['question_body_embedding'], embeddings_train['answer_embedding']),
    l2_dist(embeddings_train['question_body_embedding'], embeddings_train['question_title_embedding']),
    
    #cos_dist(embeddings_train['question_title_embedding'], embeddings_train['category_embedding']),
    cos_dist(embeddings_train['question_title_embedding'], embeddings_train['answer_embedding']),
    cos_dist(embeddings_train['question_body_embedding'], embeddings_train['answer_embedding']),
    cos_dist(embeddings_train['question_body_embedding'], embeddings_train['question_title_embedding']),
    
    #abs_dist(embeddings_train['question_title_embedding'], embeddings_train['category_embedding']),
    abs_dist(embeddings_train['question_title_embedding'], embeddings_train['answer_embedding']),
    abs_dist(embeddings_train['question_body_embedding'], embeddings_train['answer_embedding']),
    abs_dist(embeddings_train['question_body_embedding'], embeddings_train['question_title_embedding']),
    
    #sum_dist(embeddings_train['question_title_embedding'], embeddings_train['category_embedding']),
    sum_dist(embeddings_train['question_title_embedding'], embeddings_train['answer_embedding']),
    sum_dist(embeddings_train['question_body_embedding'], embeddings_train['answer_embedding']),
    sum_dist(embeddings_train['question_body_embedding'], embeddings_train['question_title_embedding']),
]).T

dist_features_test = np.array([
    #l2_dist(embeddings_test['question_title_embedding'], embeddings_test['category_embedding']),
    l2_dist(embeddings_test['question_title_embedding'], embeddings_test['answer_embedding']),
    l2_dist(embeddings_test['question_body_embedding'], embeddings_test['answer_embedding']),
    l2_dist(embeddings_test['question_body_embedding'], embeddings_test['question_title_embedding']),
    
    #cos_dist(embeddings_test['question_title_embedding'], embeddings_test['category_embedding']),
    cos_dist(embeddings_test['question_title_embedding'], embeddings_test['answer_embedding']),
    cos_dist(embeddings_test['question_body_embedding'], embeddings_test['answer_embedding']),
    cos_dist(embeddings_test['question_body_embedding'], embeddings_test['question_title_embedding']),
    
    #abs_dist(embeddings_test['question_title_embedding'], embeddings_test['category_embedding']),
    abs_dist(embeddings_test['question_title_embedding'], embeddings_test['answer_embedding']),
    abs_dist(embeddings_test['question_body_embedding'], embeddings_test['answer_embedding']),
    abs_dist(embeddings_test['question_body_embedding'], embeddings_test['question_title_embedding']),
    
    #sum_dist(embeddings_test['question_title_embedding'], embeddings_test['category_embedding']),
    sum_dist(embeddings_test['question_title_embedding'], embeddings_test['answer_embedding']),
    sum_dist(embeddings_test['question_body_embedding'], embeddings_test['answer_embedding']),
    sum_dist(embeddings_test['question_body_embedding'], embeddings_test['question_title_embedding']),
]).T

dist_features_train_dense = np.array([
    #l2_dist(train_question_title_dense, train_category_dense),
    l2_dist(train_question_body_dense, train_answer_dense),
    l2_dist(train_question_body_dense, train_question_title_dense),
    l2_dist(train_answer_dense, train_question_title_dense),
    
    #cos_dist(train_question_title_dense, train_category_dense),
    cos_dist(train_question_body_dense, train_answer_dense),
    cos_dist(train_question_body_dense, train_question_title_dense),
    cos_dist(train_answer_dense, train_question_title_dense),
    
    #abs_dist(train_question_title_dense, train_category_dense),
    abs_dist(train_question_body_dense, train_answer_dense),
    abs_dist(train_question_body_dense, train_question_title_dense),
    abs_dist(train_answer_dense, train_question_title_dense),
    
    #sum_dist(train_question_title_dense, train_category_dense),
    sum_dist(train_question_body_dense, train_answer_dense),  
    sum_dist(train_question_body_dense, train_question_title_dense),
    sum_dist(train_answer_dense, train_question_title_dense),
]).T

dist_features_test_dense = np.array([
    #l2_dist(test_question_title_dense, test_category_dense),
    l2_dist(test_question_body_dense, test_answer_dense),
    l2_dist(test_question_body_dense, test_question_title_dense),
    l2_dist(test_answer_dense, test_question_title_dense),
    
    #cos_dist(test_question_title_dense, test_category_dense),
    cos_dist(test_question_body_dense, test_answer_dense),
    cos_dist(test_question_body_dense, test_question_title_dense),
    cos_dist(test_answer_dense, test_question_title_dense),
    
    #abs_dist(test_question_title_dense, test_category_dense),
    abs_dist(test_question_body_dense, test_answer_dense),
    abs_dist(test_question_body_dense, test_question_title_dense),
    abs_dist(test_answer_dense, test_question_title_dense),
    
    #sum_dist(test_question_title_dense, test_category_dense),
    sum_dist(test_question_body_dense, test_answer_dense),
    sum_dist(test_question_body_dense, test_question_title_dense),
    sum_dist(test_answer_dense, test_question_title_dense),
]).T

In [None]:
X_train = np.hstack([item for k, item in embeddings_train.items()])
X_test = np.hstack([item for k, item in embeddings_test.items()])

In [None]:
X_train = np.hstack((X_train, train_question_body_dense, train_answer_dense, train_question_title_dense))
X_test = np.hstack((X_test, test_question_body_dense, test_answer_dense, test_question_title_dense))

In [None]:
X_train = np.hstack((X_train, features_train, dist_features_train, dist_features_train_dense))
X_test = np.hstack((X_test, features_test, dist_features_test, dist_features_test_dense))
y_train = train[targets].values

In [None]:
class AttentionWeightedAverage(Layer):
    """
    Computes a weighted average of the different channels across timesteps.
    Uses 1 parameter pr. channel to compute the attention value for a single timestep.
    """

    def __init__(self, return_attention=False, **kwargs):
        self.init = initializers.RandomUniform(seed=10000)
        self.supports_masking = True
        self.return_attention = return_attention
        super(AttentionWeightedAverage, self).__init__(** kwargs)

    def build(self, input_shape):
        self.input_spec = [InputSpec(ndim=3)]
        assert len(input_shape) == 3

        self.W = self.add_weight(shape=(input_shape[2], 1),
                                 name='{}_W'.format(self.name),
                                 initializer=self.init)
        #self.trainable_weights = [self.W]
        self.trainable_W = [self.W]
        super(AttentionWeightedAverage, self).build(input_shape)

    def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses 'max trick' for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            ai = ai * mask
        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
        weighted_input = x * K.expand_dims(att_weights)
        result = K.sum(weighted_input, axis=1)
        if self.return_attention:
            return [result, att_weights]
        return result

    def get_output_shape_for(self, input_shape):
        return self.compute_output_shape(input_shape)

    def compute_output_shape(self, input_shape):
        output_len = input_shape[2]
        if self.return_attention:
            return [(input_shape[0], output_len), (input_shape[0], input_shape[1])]
        return (input_shape[0], output_len)

    def compute_mask(self, input, input_mask=None):
        if isinstance(input_mask, list):
            return [None] * len(input_mask)
        else:
            return None
        
    def get_config(self):
        config = {'return_attention': self.return_attention}
        base_config = super(AttentionWeightedAverage, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [None]:
class SpearmanRhoCallback(Callback):
    def __init__(self, training_data, validation_data, patience, model_name):
        self.x = training_data[0]
        self.y = training_data[1]
        self.x_val = validation_data[0]
        self.y_val = validation_data[1]
        
        self.patience = patience
        self.value = -1
        self.bad_epochs = 0
        self.model_name = model_name

    def on_train_begin(self, logs={}):
        return

    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        y_pred_val = self.model.predict(self.x_val)
        rho_val = np.mean([spearmanr(self.y_val[:, ind], y_pred_val[:, ind] + np.random.normal(0, 1e-7, y_pred_val.shape[0])).correlation for ind in range(y_pred_val.shape[1])])
        #rho_val = np.mean([ spearmanr(self.y_val[:, ind], y_pred_val[:, ind]).correlation for ind in range(y_pred_val.shape[1]) ])
        if rho_val >= self.value:
            self.value = rho_val
            self.model.save_weights(self.model_name)
        else:
            self.bad_epochs += 1
        
        if self.bad_epochs >= self.patience:
            print("Epoch %05d: early stopping Threshold" % epoch)
            self.model.stop_training = True
        
        print('\rval_spearman-rho: %s' % (str(round(rho_val, 4))), end=100*' '+'\n')
        return rho_val

    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return

def bce(t,p):
    return binary_crossentropy(t,p)

def custom_loss(true,pred):
    bce = binary_crossentropy(true,pred)
    return bce + logcosh(true,pred)

In [None]:
X_train.shape

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

In [None]:
def create_RNN_CNN_model():
    
    inp = Input(shape=(X_test.shape[1],X_test.shape[2]))
    #CNN
    conv_0 = Conv1D(filters=256, kernel_size=1, kernel_initializer=lecun_normal(seed=SEED),activation='elu',padding='same')(inp)
    maxpool_0 = GlobalMaxPool1D()(conv_0)
    conv_1 = Conv1D(filters=256, kernel_size=3, kernel_initializer=lecun_normal(seed=SEED),activation='elu',padding='same')(inp)
    maxpool_1 = GlobalMaxPool1D()(conv_1)
    conv_2 = Conv1D(filters=256, kernel_size=6, kernel_initializer=lecun_normal(seed=SEED),activation='elu',padding='same')(inp)
    maxpool_2 = GlobalMaxPool1D()(conv_2)
    conv_3 = Conv1D(filters=256, kernel_size=9, kernel_initializer=lecun_normal(seed=SEED),activation='elu',padding='same')(inp)
    maxpool_3 = GlobalMaxPool1D()(conv_3)
    concatenate1 = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3],axis=1)
    flatten = Flatten()(concatenate1)
    x1 = Dense(512, activation='elu', kernel_initializer=lecun_normal(seed=SEED))(concatenate1)
    x1 = Dropout(0.2)(x1)
    
    #RNN
    x_gru = Bidirectional(GRU(256, return_sequences = True,
                              dropout=0.2,
                              recurrent_dropout=0.2,
                              kernel_initializer=glorot_uniform(seed=SEED),
                              recurrent_initializer=Orthogonal(gain=1.0, seed=SEED)))(inp)
    maxpoolGRU = GlobalMaxPooling1D()(x_gru)
    avgpoolGRU = GlobalAveragePooling1D()(x_gru)
    atnwavgGRU = AttentionWeightedAverage()(x_gru)
    concatenate2 = concatenate([atnwavgGRU, avgpoolGRU, maxpoolGRU])
    x2 = Dense(512, activation='elu', kernel_initializer=lecun_normal(seed=SEED))(concatenate2)
    x2 = Dropout(0.2)(x2)
    
    concatenate3 = concatenate([x1, x2],axis=1)
    x3 = Dense(512, activation='elu', kernel_initializer=lecun_normal(seed=SEED))(concatenate3)
    x3 = Dropout(0.2)(x3)
    outp = Dense(30, activation="sigmoid")(x3)
    
    model = Model(inputs=inp, outputs=outp)
    model.compile(loss=custom_loss,
                  optimizer=Adam(lr=1e-4, ),
                  metrics=[bce,logcosh])
    model.summary()
    return model

In [None]:
#gkf = GroupKFold(n_splits=5).split(X=train.question_body, groups=train.question_body)
kf = MultilabelStratifiedKFold(n_splits = 5, random_state = SEED)
all_predictions = []

for ind, (tr, val) in enumerate(kf.split(X_train,y_train)):
#for ind, (tr, val) in enumerate(gkf):
    if ind < 6:
        X_tr = X_train[tr]
        y_tr = y_train[tr]
        X_vl = X_train[val]
        y_vl = y_train[val]
        
        model = create_RNN_CNN_model()
        
        filepath = "USE+DisstilBERT+LSTM_fold " + str(ind+1) + " bestmodel.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=0,
                                     save_best_only=True, save_weights_only=False, mode='auto')
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                                      patience=5, min_lr=1e-7, verbose=1)
        early_stop = EarlyStopping(monitor='val_loss',min_delta=0,
                                   patience=15,mode='auto')
        
        model.fit(
            X_tr, y_tr, epochs=100, batch_size=4, validation_data=(X_vl, y_vl), verbose=True, 
            callbacks=[SpearmanRhoCallback(training_data=(X_tr, y_tr), validation_data=(X_vl, y_vl),
                                           patience=10, model_name=u'best_USE_model_batch.h5'),
                       reduce_lr,early_stop,checkpoint]
        )
        model.load_weights('best_USE_model_batch.h5')
        all_predictions.append(model.predict(X_test))
        
        os.remove('best_USE_model_batch.h5')
    

del model
gc.collect()

In [None]:
USE_test_preds = np.array([np.array([rankdata(c) for c in p.T]).T for p in all_predictions]).mean(axis=0)

In [None]:
max_val = USE_test_preds.max() + 1
USE_test_preds = USE_test_preds/max_val + 1e-12

In [None]:
Final_test_preds = USE_test_preds
Final_test_preds

In [None]:
submission[targets] = Final_test_preds
submission

In [None]:
submission.to_csv("submission.csv", index = False)