# Simple GRU network with pretrained vectors for initialization

In [148]:
import sys, os, re, csv, codecs, gc, numpy as np, pandas as pd
import tensorflow as tf
#from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense, Input, Permute, GRU, Conv1D, LSTM, Embedding, Dropout, Activation, CuDNNLSTM, CuDNNGRU, concatenate, Flatten
from keras.layers import Bidirectional, GlobalMaxPool1D, GlobalAveragePooling1D, BatchNormalization, SpatialDropout1D, Dot
from keras.optimizers import Adam, RMSprop, Nadam
from keras.models import Model
from keras import initializers, regularizers, constraints, optimizers, layers
from keras_tqdm import TQDMNotebookCallback
import keras.backend as K
from keras.callbacks import LearningRateScheduler
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
from functools import reduce
from keras.layers import Layer, PReLU, SpatialDropout1D, TimeDistributed, Subtract
from keras import initializers
from sklearn.model_selection import cross_val_predict

from nltk.tokenize import word_tokenize, wordpunct_tokenize, TweetTokenizer, MWETokenizer, ToktokTokenizer
from nltk.corpus import stopwords

import unicodedata
from collections import Counter
import itertools

np.random.seed(786)

from Tokenizer import Tokenizer
from ZeroMaskedLayer import ZeroMaskedLayer
from AttentionLayer import AttentionLayer

In [3]:
path = '../input/'
utility_path = '../utility/'
comp = 'jigsaw-toxic-comment-classification-challenge/'
EMBEDDING_FILE=f'{utility_path}crawl-300d-2M.vec'
TRAIN_DATA_FILE=f'{path}train.csv'
TEST_DATA_FILE=f'{path}test.csv'

In [4]:
from sklearn.metrics import roc_auc_score
from keras.callbacks import Callback
class RocAucEvaluation(Callback):
    def __init__(self, validation_data=(), interval=1):
        super(Callback, self).__init__()

        self.interval = interval
        self.X_val, self.y_val = validation_data

    def on_epoch_end(self, epoch, logs={}):
        if epoch % self.interval == 0:
            y_pred = self.model.predict(self.X_val, verbose=0)
            score = roc_auc_score(self.y_val, y_pred)
            print("\n ROC-AUC - epoch: %d - score: %.6f \n" % (epoch+1, score))

In [27]:
def unicodeToAscii(series):
    return series.apply(lambda s: unicodedata.normalize('NFKC', str(s)))


def multiple_replace(text, adict):
    rx = re.compile('|'.join(map(re.escape, adict)))

    def one_xlat(match):
        return adict[match.group(0)]

    return rx.sub(one_xlat, text)

STOP_WORDS = set(stopwords.words( 'english' ))
# Lowercase, trim, and remove non-letter characters
def normalizeString(series):
    series = unicodeToAscii(series)
    series = series.str.lower()
    series = series.str.replace(r"(\n){1,}", " ")
    series = series.str.replace(r"\'", "")
    series = series.str.replace(r"\-", "")
    series = series.str.replace(r"[^0-9a-z]+", " ")
    series = series.str.replace("([a-z0-9]{2,}\.){2,}[a-z]{2,}", " ") 
    series = series.str.replace(" \d ", "")
    return series


In [28]:
train = pd.read_csv(TRAIN_DATA_FILE)
test = pd.read_csv(TEST_DATA_FILE)

print(train.shape, test.shape)

list_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
y = train[list_classes].values

#Get validation folds
train['target_str'] = reduce(lambda x,y: x+y, [train[col].astype(str) for col in list_classes])
train['target_str'] = train['target_str'].replace('110101', '000000').replace('110110','000000')
cvlist1 = list(StratifiedKFold(n_splits=10, random_state=786).split(train, train['target_str'].astype('category')))
cvlist2 = list(StratifiedShuffleSplit(n_splits=5, test_size=0.05, random_state=786).split(train, train['target_str'].astype('category')))

(159571, 8) (153164, 2)




In [29]:
for df in train, test:
    df["comment_text"] = normalizeString(df["comment_text"])

In [30]:
train.comment_text.sample(1).values[0]

'ha ha im on episodeso fuck u now let me spoil it basicly jack dad kill theguys drive to his brothers house interrogate him brother tells him that he ist he one behind all of the last season and then his dad kills jacks bro the end'

In [31]:
MAX_FEATURES = 200000
MAX_LEN = 200

tok = Tokenizer(max_features=MAX_FEATURES, max_len=MAX_LEN, tokenizer=wordpunct_tokenize)
X = tok.fit_transform(pd.concat([train["comment_text"].astype(str), test["comment_text"].astype(str)]))
X_train = X[:len(train), :]
X_test = X[len(train):, :]

print(X_train.shape, X_test.shape)

(159571, 200) (153164, 200)


In [32]:
#del train, testlen()
len(tok.doc_freq)

384733

In [33]:
EMBED_SIZE = 300
oov_list= []
def get_coefs(word, *arr): return word, np.asarray(arr, dtype='float32')

def initialize_embeddings(filename, tokenizer):
    embeddings_index = dict(get_coefs(*o.rstrip().rsplit(' ')) for o in open(filename))

    word_index = tokenizer.vocab_idx
    nb_words = min(MAX_FEATURES, len(word_index))
    embedding_matrix = np.zeros((nb_words, EMBED_SIZE))
    for word, i in word_index.items():
        if i > MAX_FEATURES: continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
        else:
            oov_list.append(word)
    return  embedding_matrix, oov_list

In [34]:
embedding_matrix, oov_list = initialize_embeddings(EMBEDDING_FILE, tok)
print(embedding_matrix.shape)
print(np.mean(embedding_matrix), np.std(embedding_matrix))

(200000, 300)
0.0029242610409603276 0.2315748370561002


In [36]:
len(oov_list)

92089

In [37]:
tok.doc_freq.most_common(200000)[-100:]

[('nihh', 1),
 ('pletnikov', 1),
 ('mhdisc1', 1),
 ('disruptedinschizophrenia1', 1),
 ('transgenicmouseoffersawindowongeneenvironmentinterplayprenatalinfectionaltersbehavioringeneticallyvulnerable',
  1),
 ('strategicplanningreports', 1),
 ('breakinggroundbreakingthroughthestrategicplanformooddisordersresearch', 1),
 ('monastries', 1),
 ('casecomments', 1),
 ('onlyitems', 1),
 ('pkt7wfyollo', 1),
 ('letitgo', 1),
 ('dynampic', 1),
 ('noveseminarys', 1),
 ('nietrality', 1),
 ('resumelike', 1),
 ('khud', 1),
 ('jaata', 1),
 ('dusor', 1),
 ('hamesha', 1),
 ('taiyaar', 1),
 ('rehte', 1),
 ('kaheen', 1),
 ('khush', 1),
 ('aake', 1),
 ('dunga', 1),
 ('chuda', 1),
 ('bhosdke', 1),
 ('kiya', 1),
 ('xebat', 1),
 ('archivesarchivearchivearchivearchiveso', 1),
 ('highfashion', 1),
 ('japhethic', 1),
 ('philistia', 1),
 ('fylfot', 1),
 ('fractionary', 1),
 ('topnotable', 1),
 ('geographics', 1),
 ('ab1axxvarhy', 1),
 ('porcom', 1),
 ('porkom', 1),
 ('protoceltic', 1),
 ('refuring', 1),
 ('notifyon

In [150]:
from sklearn.base import BaseEstimator, ClassifierMixin
class GRUClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, gru_dim=150, dense_dim=256, batch_size=128, epochs=2, bidirectional=False, 
                 pool_type='all', initial_weights=None, optimizer='adam' ,verbose=1, out_dim=6, callbacks=None,
                spatial_drop=0.0, dropout=0.0, mask_zero=True, 
                gru_kernel_regularization = 0.0001,
                gru_recurrent_regularization = 0.0001,
                gru_bias_regularization = 0.0001,
                embeddings_regularization = 0.0,
                ):
        
        self.gru_dim = gru_dim
        self.dense_dim = dense_dim
        self.batch_size = batch_size
        self.epochs= epochs
        self.bidirectional = bidirectional
        self.pool_type = pool_type
        self.initial_weights = initial_weights
        self.verbose = verbose
        self.callbacks = callbacks
        self.optimizer = optimizer
        self.out_dim = out_dim
        self.spatial_drop = spatial_drop
        self.dropout = dropout
        self.mask_zero = mask_zero
        self.gru_kernel_regularization = gru_kernel_regularization
        self.gru_recurrent_regularization = gru_recurrent_regularization
        self.gru_bias_regularization = gru_bias_regularization
        self.embeddings_regularization = embeddings_regularization
        
    def _build_model(self):
        inp = Input(shape=(MAX_LEN,))
        emb = Embedding(MAX_FEATURES, 
                        EMBED_SIZE,
                        weights=[self.initial_weights],
                        mask_zero=self.mask_zero,
                        #embeddings_regularizer=regularizers.l2(self.embeddings_regularization),
                        trainable=False)(inp)

        if self.mask_zero:
            emb = ZeroMaskedLayer()(emb)
            
        emb = SpatialDropout1D(self.spatial_drop)(emb)
        
        if self.bidirectional:
            enc = Bidirectional(CuDNNGRU(int(self.gru_dim), return_sequences=True, return_state=True,
                                         ))(emb)
            x = enc[0]
            state = enc[1]
            
            #enc = Bidirectional(CuDNNGRU(int(self.gru_dim), return_sequences=True, return_state=True,
            #                             ))(emb)
            #x = enc[0]
            #state = enc[1]
        else:
            x, state = CuDNNGRU(int(self.gru_dim), return_sequences=True, return_state=True,
                            kernel_regularizer=regularizers.l2(self.gru_kernel_regularization),
                            recurrent_regularizer=regularizers.l2(self.gru_recurrent_regularization),
                            bias_regularizer=regularizers.l2(self.gru_bias_regularization)
                               )(emb)
            #x = TimeDistributed(Dense(100, activation='relu'))(x)
            #x = CuDNNGRU(150, return_sequences=True)(x)
            #x = SpatialDropout1D(0.5)(x)
        
        if self.pool_type == 'avg':
            x = GlobalAveragePooling1D()(x)
            x = concatenate([x, state])
            
        elif self.pool_type == 'max':
            x = GlobalMaxPool1D()(x)
            x = concatenate([x, state])
            
        elif self.pool_type == 'attn':
            x = AttentionLayer(MAX_LEN)(x)
            x = concatenate([x, state])
            
        elif self.pool_type == 'all':
            #x1 = GlobalAveragePooling1D()(emb)
            x2 = GlobalMaxPool1D()(x)
            x3 = AttentionLayer(MAX_LEN)(x)
            x4 = Dot(1)([x2, x3])
            x = concatenate([x2, x3, x4])
    
        x = Dropout(self.dropout)(x)
        x = Dense(self.dense_dim)(x)
        x = PReLU()(x)
        
        #x = Dropout(self.dropout)(x)
        #x = Dense(256)(x)
        #x = PReLU()(x)

        out = Dense(self.out_dim, activation="sigmoid")(x)
        if self.optimizer == 'adam':
            opt = Adam(lr=0.001, decay=0.0,)
        if self.optimizer == 'nadam':
            opt = Nadam(lr=0.002)
        elif self.optimizer == 'rmsprop':
            opt = RMSprop(clipnorm=1.0)
        model = Model(inputs=inp, outputs=out)
        model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
        return model
    
    def fit(self, X, y):
        self.model = self._build_model()
        
        if self.callbacks:
            self.model.fit(X, y, batch_size=self.batch_size, epochs=self.epochs,
                       verbose=self.verbose,
                       callbacks=self.callbacks,
                       shuffle=True)
        else:
            self.model.fit(X, y, batch_size=self.batch_size, epochs=self.epochs,
                       verbose=self.verbose,
                       shuffle=True)
        return self
    
    def predict(self, X, y=None):
        if self.model:
            y_hat = self.model.predict(X, batch_size=1024)
        else:
            raise ValueError("Model not fit yet")
        return y_hat
    

In [140]:
def lr_decay(epoch):
    if epoch == 0:
        return 0.001
    if epoch == 1:
        return 0.0008
    if epoch == 2:
        return 0.001
    if epoch == 3:
        return 0.00001


def shuffle_crossvalidator(model, cvlist, X, y, lr_decay):
    y_trues = []
    y_preds = []
    scores = []
    LRDecay = LearningRateScheduler(lr_decay)

    for tr_index, val_index in cvlist:
        X_tr, y_tr = X[tr_index, :], y[tr_index, :]
        X_val, y_val = X[val_index, :], y[val_index, :]
        RocAuc = RocAucEvaluation(validation_data=(X_val, y_val), interval=1)

        model.set_params(**{'callbacks':[RocAuc, LRDecay]})
        model.fit(X_tr, y_tr)

        y_pred = model.predict(X_val)
        score = roc_auc_score(y_val, y_pred)
        scores.append(score)
        print("ROC AUC for this fold is ", score)
        y_trues.append(y_val)
        y_preds.append(y_pred)
        K.clear_session()
        gc.collect()
        #break
    y_trues = np.concatenate(y_trues)
    y_preds = np.concatenate(y_preds)
    score = roc_auc_score(y_trues, y_preds)
    print("Overall score on 10 fold CV is {}".format(score))
    
    return y_preds, y_trues, scores

def outoffold_crossvalidator(model_params, cvlist, X, y, lr_decay):
    y_preds = np.zeros(y.shape)
    LRDecay = LearningRateScheduler(lr_decay)

    for tr_index, val_index in cvlist:
        X_tr, y_tr = X[tr_index, :], y[tr_index, :]
        X_val, y_val = X[val_index, :], y[val_index, :]
        RocAuc = RocAucEvaluation(validation_data=(X_val, y_val), interval=1)
        
        model.set_params(**{'callbacks':[RocAuc, LRDecay]})
        model.fit(X_tr, y_tr)

        y_pred = model.predict(X_val)
        print("ROC AUC for this fold is ", roc_auc_score(y_val, y_pred))
        y_preds[val_idx] = y_pred
        K.clear_session()
        break
    score = roc_auc_score(y, y_preds)
    print("Overall score on 10 fold CV is {}".format(score))
    
    return y_preds, y_trues, score


In [151]:
def lr_decay(epoch):
    if epoch == 0:
        return 0.002
    if epoch == 1:
        return 0.002
    if epoch == 2:
        return 0.002
    if epoch == 3:
        return 0.002
    return 0.002
model = GRUClassifier(gru_dim=300, dense_dim=600, initial_weights=embedding_matrix, bidirectional=True,
                    batch_size=256, epochs=10, optimizer='nadam', pool_type='all', spatial_drop=0.5, 
                      dropout=0.0)

y_preds, y_trues, _ = shuffle_crossvalidator(model, cvlist2, X_train, y, lr_decay)

Epoch 1/10
 ROC-AUC - epoch: 1 - score: 0.988307 

Epoch 2/10
 ROC-AUC - epoch: 2 - score: 0.988821 

Epoch 3/10
 ROC-AUC - epoch: 3 - score: 0.989728 

Epoch 4/10
 ROC-AUC - epoch: 4 - score: 0.990304 

Epoch 5/10
 ROC-AUC - epoch: 5 - score: 0.990427 

Epoch 6/10
 ROC-AUC - epoch: 6 - score: 0.990251 

Epoch 7/10
 ROC-AUC - epoch: 7 - score: 0.990271 

Epoch 8/10
 ROC-AUC - epoch: 8 - score: 0.990724 

Epoch 9/10
 ROC-AUC - epoch: 9 - score: 0.990932 

Epoch 10/10
 ROC-AUC - epoch: 10 - score: 0.989986 

ROC AUC for this fold is  0.9899858499842192
Epoch 1/10

KeyboardInterrupt: 

In [98]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials


def uniform_int(name, lower, upper):
    # `quniform` returns:
    # round(uniform(low, high) / q) * q
    return hp.quniform(name, lower, upper, q=1)

def loguniform_int(name, lower, upper):
    # Do not forget to make a logarithm for the
    # lower and upper bounds.
    return hp.qloguniform(name, np.log(lower), np.log(upper), q=1)

parameter_space = {
    'gru_dim': uniform_int('gru_dim', 50, 600),
    'dense_dim': uniform_int('dense_dim', 100, 1000),
    'lr1': hp.uniform('lr1', 0.0001, 0.005),
    'lr2': hp.uniform('lr2', 0.0001, 0.005),
    'spatial_drop': hp.uniform('spatial_drop', 0, 0.5),
    'dropout': hp.uniform('dropout', 0, 0.5),
    'batch_size': loguniform_int('batch_size', 16, 512),
    'mask_zero': hp.choice('mask_zero', [True, False]),
    'optimizer': hp.choice('optimizer', ['adam', 'rmsprop']),
    'pool_type': hp.choice('pool_type', ['avg', 'max', 'attn', 'all']),
    'bidirectional': hp.choice('bidirectional', [True, False]),
    'gru_kernel_reg': hp.loguniform('gru_kernel_reg', np.log(1e-10), np.log(1e-4)),
    'gru_recc_reg': hp.loguniform('gru_recc_reg', np.log(1e-10), np.log(1e-4)),
    'gru_bias_reg': hp.loguniform('gru_bias_reg', np.log(1e-10), np.log(1e-4)),
    #'embeddings_reg': hp.loguniform('embeddings_reg', 1e-8, 1e-4)
}


def objective(parameter_space):
    
    def lr_decay(epoch):
        if epoch == 0:
            return parameter_space['lr1']
        if epoch == 1:
            return parameter_space['lr2']
    
    model = GRUClassifier(initial_weights=embedding_matrix, bidirectional=False,
                          gru_dim = int(parameter_space['gru_dim']),
                          dense_dim = int(parameter_space['dense_dim']),
                          mask_zero = parameter_space['mask_zero'],
                          pool_type = parameter_space['pool_type'],
                          batch_size= int(parameter_space['batch_size']), 
                          epochs=5, 
                          optimizer=parameter_space['optimizer'],
                          dropout=parameter_space['dropout'],
                          spatial_drop=parameter_space['spatial_drop'],
                          gru_kernel_regularization = parameter_space["gru_kernel_reg"],
                          gru_recurrent_regularization = parameter_space["gru_recc_reg"],
                          gru_bias_regularization = parameter_space["gru_bias_reg"],
                          #embeddings_regularization = parameter_space["embeddings_reg"],
                          )

    y_preds, y_trues, scores = shuffle_crossvalidator(model, cvlist1, X_train, y, lr_decay)    
    score = roc_auc_score(y_trues, y_preds)
    print("Score for parameters {} is {}".format(parameter_space, score))
    #return score
    return {
        'loss': -1* score,
        'status': STATUS_OK,
        'other_Stuff': {'scores': scores, 'variance': np.std(scores)},
        }

trials = Trials()

best = fmin(objective,
    space=parameter_space,
    algo=tpe.suggest,
    max_evals=100,
    trials=trials
           )

print(best)

Epoch 1/2
 ROC-AUC - epoch: 1 - score: 0.982053 

Epoch 2/2

KeyboardInterrupt: 

In [15]:
import gc 
gc.collect()
K.clear_session()

In [40]:
parameter_list = [{'batch_size': [47.0],
  'bidirectional': [1],
  'dense_dim': [973.0],
  'dropout': [0.19862535182199834],
  'gru_bias_reg': [1.011936859273273e-08],
  'gru_dim': [358.0],
  'gru_kernel_reg': [2.0678669679829352e-10],
  'gru_recc_reg': [8.946942716621634e-07],
  'lr1': [0.0015982451490776767],
  'lr2': [0.0002459290205687559],
  'mask_zero': [1],
  'optimizer': [0],
  'pool_type': [3],
  'spatial_drop': [0.2696100622336198]},
 {'batch_size': [83.0],
  'bidirectional': [1],
  'dense_dim': [351.0],
  'dropout': [0.07833431778315075],
  'gru_bias_reg': [1.989216237371643e-09],
  'gru_dim': [478.0],
  'gru_kernel_reg': [2.1606860352426398e-10],
  'gru_recc_reg': [1.6736919208281796e-07],
  'lr1': [0.00263784102869703],
  'lr2': [0.0005711207564167526],
  'mask_zero': [1],
  'optimizer': [0],
  'pool_type': [3],
  'spatial_drop': [0.21401382410917008]},
 {'batch_size': [49.0],
  'bidirectional': [1],
  'dense_dim': [997.0],
  'dropout': [0.19115533803668047],
  'gru_bias_reg': [5.222640591389245e-10],
  'gru_dim': [399.0],
  'gru_kernel_reg': [8.078459790975857e-10],
  'gru_recc_reg': [6.100081276448957e-08],
  'lr1': [0.0019427338445684181],
  'lr2': [0.00010186610979091696],
  'mask_zero': [1],
  'optimizer': [0],
  'pool_type': [3],
  'spatial_drop': [0.22614208466560007]},
 {'batch_size': [41.0],
  'bidirectional': [1],
  'dense_dim': [973.0],
  'dropout': [0.20050865242539928],
  'gru_bias_reg': [1.1451922219328368e-08],
  'gru_dim': [392.0],
  'gru_kernel_reg': [1.0516629869555607e-09],
  'gru_recc_reg': [1.2593577396164419e-06],
  'lr1': [0.0016205788115723873],
  'lr2': [0.00011538601448660545],
  'mask_zero': [1],
  'optimizer': [0],
  'pool_type': [3],
  'spatial_drop': [0.3803897135211322]},
 {'batch_size': [37.0],
  'bidirectional': [1],
  'dense_dim': [237.0],
  'dropout': [0.12273937792021693],
  'gru_bias_reg': [2.7055793227129377e-09],
  'gru_dim': [407.0],
  'gru_kernel_reg': [1.9122269544090935e-09],
  'gru_recc_reg': [1.5269966614646778e-06],
  'lr1': [0.0019545667587842147],
  'lr2': [0.00034205962093229346],
  'mask_zero': [1],
  'optimizer': [0],
  'pool_type': [3],
  'spatial_drop': [0.239366738134983]},
 {'batch_size': [16.0],
  'bidirectional': [1],
  'dense_dim': [783.0],
  'dropout': [0.19251258375962352],
  'gru_bias_reg': [1.6889374260262626e-08],
  'gru_dim': [401.0],
  'gru_kernel_reg': [2.1685591958268602e-10],
  'gru_recc_reg': [8.179324804312695e-07],
  'lr1': [0.0016526011543532724],
  'lr2': [0.00020256532886638333],
  'mask_zero': [1],
  'optimizer': [0],
  'pool_type': [3],
  'spatial_drop': [0.3994866402536531]},
 {'batch_size': [90.0],
  'bidirectional': [1],
  'dense_dim': [995.0],
  'dropout': [0.22199634608987717],
  'gru_bias_reg': [1.162462425352503e-10],
  'gru_dim': [234.0],
  'gru_kernel_reg': [2.6194833614316782e-09],
  'gru_recc_reg': [3.719163247084088e-10],
  'lr1': [0.002927034550684743],
  'lr2': [0.0003517541152030026],
  'mask_zero': [1],
  'optimizer': [0],
  'pool_type': [3],
  'spatial_drop': [0.01670368024684063]},
 {'batch_size': [71.0],
  'bidirectional': [1],
  'dense_dim': [999.0],
  'dropout': [0.31884760544934115],
  'gru_bias_reg': [3.489577142351314e-10],
  'gru_dim': [310.0],
  'gru_kernel_reg': [3.464891994757166e-10],
  'gru_recc_reg': [1.5572206853920122e-09],
  'lr1': [0.0019593704753874588],
  'lr2': [0.000472320376364918],
  'mask_zero': [1],
  'optimizer': [0],
  'pool_type': [3],
  'spatial_drop': [0.0905500602659617]},
 {'batch_size': [21.0],
  'bidirectional': [1],
  'dense_dim': [971.0],
  'dropout': [0.07825915818812121],
  'gru_bias_reg': [9.370609019263161e-08],
  'gru_dim': [452.0],
  'gru_kernel_reg': [1.1699932655152522e-09],
  'gru_recc_reg': [1.118300667865804e-06],
  'lr1': [0.0009194436554174992],
  'lr2': [0.00011484392164348851],
  'mask_zero': [1],
  'optimizer': [0],
  'pool_type': [0],
  'spatial_drop': [0.3127687159502792]},
 {'batch_size': [41.0],
  'bidirectional': [1],
  'dense_dim': [888.0],
  'dropout': [0.18388910768345024],
  'gru_bias_reg': [1.0198714543301587e-08],
  'gru_dim': [322.0],
  'gru_kernel_reg': [4.3029409259060245e-08],
  'gru_recc_reg': [3.1764731171976037e-06],
  'lr1': [0.0017624941021206772],
  'lr2': [0.0006933312845721429],
  'mask_zero': [1],
  'optimizer': [0],
  'pool_type': [3],
  'spatial_drop': [0.3120345974541232]}
                 ]

In [99]:
#Pick top 10 parameter settings, Bag models for those settings
#Try linear blending on those settings
#NUM_BAGS = 5
#cvlist3 = list(StratifiedShuffleSplit(n_splits=NUM_BAGS, test_size=0.05, random_state=786).split(y, y[:,2]))
y = train[list_classes].values

def shuffle_train_predict(model, cvlist, X, y, X_test, lr_decay):
    y_trues = []
    y_preds = []
    y_test_preds = []
    scores = []
    LRDecay = LearningRateScheduler(lr_decay)

    for tr_index, val_index in cvlist:
        X_tr, y_tr = X[tr_index, :], y[tr_index, :]
        X_val, y_val = X[val_index, :], y[val_index, :]
        RocAuc = RocAucEvaluation(validation_data=(X_val, y_val), interval=1)

        model.set_params(**{'callbacks':[RocAuc, LRDecay]})
        model.fit(X_tr, y_tr)

        y_pred = model.predict(X_val)
        y_test_pred = model.predict(X_test)
        score = roc_auc_score(y_val, y_pred)
        scores.append(score)
        print("ROC AUC for this fold is ", score)
        y_trues.append(y_val)
        y_preds.append(y_pred)
        y_test_preds.append(y_test_pred)
        K.clear_session()
        gc.collect()
        #break
    y_trues = np.concatenate(y_trues)
    y_preds = np.concatenate(y_preds)
    y_test_preds = np.mean(y_test_preds, axis=0)
    print("Shape of test _preds is ", y_test_preds.shape)
    print("Means of val and test preds are {} and {}".format(np.mean(y_preds, axis=1), np.mean(y_test_preds, axis=1)))
    score = roc_auc_score(y_trues, y_preds)
    print("Overall score on 10 fold CV is {}".format(score))
    
    return y_preds, y_trues, y_test_preds

def oof_train_predict(model, cvlist, X, y, X_test, lr_decay):
    #y_trues = []
    y_test_preds = []
    scores = []
    y_preds = np.zeros(y.shape)
    LRDecay = LearningRateScheduler(lr_decay)

    for tr_index, val_index in cvlist:
        X_tr, y_tr = X[tr_index, :], y[tr_index, :]
        X_val, y_val = X[val_index, :], y[val_index, :]
        RocAuc = RocAucEvaluation(validation_data=(X_val, y_val), interval=1)

        model.set_params(**{'callbacks':[RocAuc, LRDecay]})
        model.fit(X_tr, y_tr)

        y_pred = model.predict(X_val)
        y_test_pred = model.predict(X_test)
        score = roc_auc_score(y_val, y_pred)
        scores.append(score)
        print("ROC AUC for this fold is ", score)
        #y_trues.append(y_val)
        y_preds[val_index, :] = y_pred
        y_test_preds.append(y_test_pred)
        K.clear_session()
        gc.collect()
        #break
    #y_trues = np.concatenate(y_trues)
    #y_preds = np.concatenate(y_preds)
    y_test_preds = np.mean(y_test_preds, axis=0)
    print("Shape of test _preds is ", y_test_preds.shape)
    print("Means of val and test preds are {} and {}".format(np.mean(y_preds, axis=0), np.mean(y_test_preds, axis=0)))
    score = roc_auc_score(y, y_preds)
    print("Overall score on 10 fold CV is {}".format(score))
    
    return y_preds, y_test_preds

def train_predict(parameter_space):
    
    def lr_decay(epoch):
        if epoch == 0:
            return parameter_space['lr1'][0]
        if epoch == 1:
            return parameter_space['lr2'][0]
        if epoch == 2:
            return parameter_space['lr2'][0]
        if epoch == 3:
            return parameter_space['lr2'][0]
        if epoch == 4:
            return parameter_space['lr2'][0]
        if epoch == 5:
            return parameter_space['lr2'][0]
    
    model = GRUClassifier(initial_weights=embedding_matrix, bidirectional=[True, False][parameter_space['bidirectional'][0]],
                          gru_dim = int(parameter_space['gru_dim'][0]),
                          dense_dim = int(parameter_space['dense_dim'][0]),
                          mask_zero = [True, False][parameter_space['mask_zero'][0]],
                          pool_type = ['avg', 'max', 'attn', 'all'][parameter_space['pool_type'][0]],
                          batch_size= int(parameter_space['batch_size'][0]), 
                          epochs=5,
                          optimizer=["adam", "rmsprop"][parameter_space['optimizer'][0]],
                          dropout=parameter_space['dropout'][0],
                          spatial_drop=parameter_space['spatial_drop'][0],
                          gru_kernel_regularization = parameter_space["gru_kernel_reg"][0],
                          gru_recurrent_regularization = parameter_space["gru_recc_reg"][0],
                          gru_bias_regularization = parameter_space["gru_bias_reg"][0],
                          #embeddings_regularization = parameter_space["embeddings_reg"],
                          )

    y_preds, y_trues, y_test_preds = shuffle_train_predict(model, cvlist2, X_train, y, X_test, lr_decay) 
    #y_preds, y_test_preds = shuffle_train_predict(model, cvlist2, X_train, y, X_test, lr_decay)
    return y_preds, y_trues, y_test_preds

#####
y_preds_all = []
y_trues_all = []
y_test_preds_all = []
for params in parameter_list:
    y_preds, y_trues, y_test_preds = train_predict(params)
    y_preds_all.append(y_preds)
    y_trues_all.append(y_trues)
    y_test_preds_all.append(y_test_preds)
    


Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.988271 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989440 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.990099 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.990488 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.990278 

ROC AUC for this fold is  0.990278184376726
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.987985 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989558 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.989969 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989952 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.989465 

ROC AUC for this fold is  0.9894650710770194
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.986376 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.988095 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.988792 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989406 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.989219 

ROC AUC for this fold is  0.9892188842059451
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.989085 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.990208 

Epoch 3/5
 ROC-A

 ROC-AUC - epoch: 4 - score: 0.989688 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.989924 

ROC AUC for this fold is  0.9899235072051001
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.985469 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.986570 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.987546 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.988261 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.988102 

ROC AUC for this fold is  0.9881018769615434
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.987986 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.990316 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.990834 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.991103 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.991371 

ROC AUC for this fold is  0.9913705992266468
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.987637 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989075 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.989693 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989807 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.989521 

ROC AUC for this fold is 

 ROC-AUC - epoch: 2 - score: 0.990195 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.990138 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.990442 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.990674 

ROC AUC for this fold is  0.9906735420612999
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.988000 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989086 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.989631 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989837 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.989856 

ROC AUC for this fold is  0.9898562020250997
Shape of test _preds is  (153164, 6)
Means of val and test preds are [1.9118090e-01 6.5650656e-06 2.7719328e-01 ... 3.4689543e-05 4.8451344e-04
 5.9451890e-04] and [7.0314580e-01 4.7876467e-05 5.1864703e-05 ... 1.8435554e-05 2.8027338e-04
 3.8648781e-01]
Overall score on 10 fold CV is 0.989678491115494
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.987633 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989374 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.989658 

Epoch 4/5
 ROC-A

 ROC-AUC - epoch: 5 - score: 0.989743 

ROC AUC for this fold is  0.9897431146144707
Shape of test _preds is  (153164, 6)
Means of val and test preds are [2.1985795e-01 1.8366880e-05 3.3459386e-01 ... 6.8765868e-05 3.7406789e-04
 4.7276661e-04] and [7.3218870e-01 9.3206800e-05 1.1844366e-04 ... 5.5521086e-05 6.5178750e-04
 3.9143741e-01]
Overall score on 10 fold CV is 0.9894736728787645
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.988015 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989966 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.990436 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.990622 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.990781 

ROC AUC for this fold is  0.990780543449331
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.988842 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.990045 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.990134 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.990652 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.990464 

ROC AUC for this fold is  0.9904637530707019
Epoch 1/5
 ROC-AUC - 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989051 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.989155 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989834 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.990148 

ROC AUC for this fold is  0.9901482212505526
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.984931 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.987363 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.988202 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.988174 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.988247 

ROC AUC for this fold is  0.9882468241188661
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.988833 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.990773 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.990429 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.990877 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.991160 

ROC AUC for this fold is  0.9911599136524378
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.985872 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989023 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.989020 

Epoch 4/5
 ROC-

 ROC-AUC - epoch: 5 - score: 0.987653 

ROC AUC for this fold is  0.9876531586364532
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.987825 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989539 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.990222 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.990294 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.990372 

ROC AUC for this fold is  0.9903716970946413
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.985864 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989731 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.990094 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989667 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.989535 

ROC AUC for this fold is  0.9895351147457467
Shape of test _preds is  (153164, 6)
Means of val and test preds are [2.7396452e-01 6.4073902e-06 2.5150040e-01 ... 6.8276473e-05 3.1031275e-05
 1.3266895e-03] and [7.2734410e-01 3.2940148e-05 2.6095555e-05 ... 5.0629755e-05 5.9989648e-05
 3.8307980e-01]
Overall score on 10 fold CV is 0.9887741150163151
Epoch 1/5
 ROC-AUC -

 ROC-AUC - epoch: 3 - score: 0.990421 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989958 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.990450 

ROC AUC for this fold is  0.9904495203241672
Shape of test _preds is  (153164, 6)
Means of val and test preds are [1.9015951e-01 1.1506774e-05 2.8803870e-01 ... 4.6468806e-05 1.4614485e-03
 7.0325827e-04] and [7.2139829e-01 1.6970062e-05 3.0218387e-06 ... 2.4934605e-05 5.1398456e-05
 3.7478706e-01]
Overall score on 10 fold CV is 0.9893812708111328
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.982693 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.988494 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.989269 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989459 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.989983 

ROC AUC for this fold is  0.9899831523748529
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.987198 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989119 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.989408 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989692 

Epoch 5/5
 ROC-

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.990288 

ROC AUC for this fold is  0.9902877022471545
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.988244 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989459 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.989692 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989817 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.989504 

ROC AUC for this fold is  0.9895043743279812
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.983572 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.987765 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.988306 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.989109 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.988499 

ROC AUC for this fold is  0.988499150803626
Epoch 1/5
 ROC-AUC - epoch: 1 - score: 0.988500 

Epoch 2/5
 ROC-AUC - epoch: 2 - score: 0.989828 

Epoch 3/5
 ROC-AUC - epoch: 3 - score: 0.990587 

Epoch 4/5
 ROC-AUC - epoch: 4 - score: 0.990415 

Epoch 5/5
 ROC-AUC - epoch: 5 - score: 0.990548 

ROC AUC for this fold is  0.9905475220950555
Epoch 1/5
 ROC-AUC - 

In [104]:
#Check corelation between different predictions
np.corrcoef(y_preds_all[0][:,0], y_preds_all[1][:,0])

array([[1.        , 0.98016906],
       [0.98016906, 1.        ]])

In [105]:
np.array(y_preds_all)[[1, 3,4,5,6]]

array([[[7.79079080e-01, 9.86868516e-03, 1.08758599e-01, 2.11035367e-03,
         1.91006184e-01, 2.80884445e-01],
        [6.97876294e-06, 1.41036482e-09, 1.00620821e-06, 2.39269671e-08,
         1.57298246e-07, 5.10783993e-09],
        [7.22955167e-01, 3.86204221e-03, 2.47543510e-02, 8.58226034e-04,
         1.13805935e-01, 1.66698694e-01],
        ...,
        [1.14090135e-03, 8.08621508e-07, 9.11738389e-05, 2.93071548e-06,
         8.42992813e-06, 1.21785797e-05],
        [7.08651263e-04, 5.32397848e-07, 3.23111948e-04, 7.07196435e-08,
         7.75570297e-05, 3.46000852e-05],
        [1.27295144e-02, 7.17958937e-06, 1.12834608e-03, 7.01096178e-06,
         7.89984944e-04, 1.13448048e-04]],

       [[6.53501987e-01, 1.20655801e-02, 7.93245658e-02, 9.36802477e-03,
         1.79860622e-01, 3.85026962e-01],
        [7.26245999e-05, 2.80117433e-07, 3.13615710e-05, 4.90710249e-07,
         4.90288312e-06, 5.41392353e-07],
        [9.27289128e-01, 1.93929393e-02, 1.15157492e-01, 3.500845

In [106]:
#Try different stacking approaches
from scipy.stats import gmean, hmean

preds_mean = gmean(np.array(y_preds_all)[[0,2,3,4,5,6]] , axis=0)
print(roc_auc_score(y_trues_all[0], preds_mean))
test_preds_mean = gmean(np.array(y_test_preds_all)[[0, 2,3,4,5,6]], axis=0)


0.9904810037095175


In [107]:
for i, col in enumerate(list_classes):
    print(col)
    print(roc_auc_score(y_trues_all[0][:, i], preds_mean[:, i]))

toxic
0.9855431359864897
severe_toxic
0.9923104034788409
obscene
0.9927540190357128
threat
0.9939637544521266
insult
0.9889492486171947
identity_hate
0.9893654606867403


In [41]:
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()
stemmer.stem("mother")

'mother'

In [108]:
#y_trues_stacked = np.concatenate(np.array(y_preds_all)[[0,2, 3,4,5,6]], axis=1)
preds_stacked = np.concatenate(np.array(y_preds_all)[[0,2, 3,4,5,6]], axis=1)
test_preds_stacked = np.concatenate(np.array(y_test_preds_all)[[0,2, 3,4,5,6]], axis=1)


In [109]:
import lightgbm as lgb
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
#y_trues = train[list_classes].values
cvlist = list(StratifiedKFold(10,random_state=1).split(preds_stacked, y[:,5]))
y_preds2 = np.zeros((preds_stacked.shape[0], len(list_classes)))
#y_preds2 =[]
y_test_preds2 = np.zeros((test_preds_stacked.shape[0], len(list_classes)))
for i, col in enumerate(list_classes):
    y_tmp = y[:, i]
    #model = RandomForestClassifier(n_estimators=100, max_depth=6, min_samples_leaf=50, class_weight='balanced', n_jobs=-1)
    model = lgb.LGBMClassifier(n_estimators=100, num_leaves=8, learning_rate=0.1, min_child_samples=500,
                               subsample=0.9, colsample_bytree=0.8, reg_lambda=1.0, class_weight='balanced')
    #model = LogisticRegression(C=0.01)
    y_preds2[:, i] = cross_val_predict(model, preds_stacked, y_tmp, cv=cvlist, n_jobs=1, method='predict_proba')[:,1]
    print(preds.shape)
    #y_preds2.append(preds)
    y_test_preds2[:, i] = model.fit(preds_stacked, y_tmp).predict_proba(test_preds_stacked)[:,1]
    print("Score for class {} is {}".format(col, roc_auc_score(y_tmp, y_preds2[:, i])))
print("Over auc score", roc_auc_score(y, y_preds2))

ValueError: Found input variables with inconsistent numbers of samples: [39895, 159571]

In [110]:
sample_submission = pd.read_csv("../input/sample_submission.csv")
sample_submission[list_classes] = test_preds_mean
sample_submission.to_csv('../input/gru_fasttext_10bags_submission.csv', index=False)

In [131]:
print(np.mean(test_preds_mean > 0.5,axis=0))
print(np.mean(preds_mean > 0.5, axis=0))

[0.22635215 0.00814813 0.13238751 0.00284009 0.09520514 0.01384137]
[0.0891014  0.00545839 0.0506671  0.00133483 0.0458166  0.00562759]


In [None]:
from sklearn.

In [111]:
#belnding with public
public_v1 = pd.read_csv("../input/hight_of_blend_v2.csv")
test_preds_mean2 = hmean([test_preds_mean, public_v1[list_classes].values], axis=0)
print(np.mean(test_preds_mean2 > 0.5,axis=0))
sample_submission[list_classes] = test_preds_mean2
sample_submission.to_csv('../input/gru_fasttext_10bags_submission_wplb.csv', index=False)

[0.21882427 0.00652242 0.12267243 0.00212844 0.09681126 0.01348881]


In [33]:
public_v1[list_classes]

Unnamed: 0,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0.990675,0.302595,0.966957,0.026856,0.899121,0.343316
1,0.001449,0.000313,0.000519,0.000035,0.000688,0.000207
2,0.003754,0.000471,0.001512,0.000137,0.001137,0.000324
3,0.000564,0.000121,0.000271,0.000093,0.000367,0.000055
4,0.006134,0.000244,0.001113,0.000199,0.001027,0.000144
5,0.000919,0.000121,0.000319,0.000184,0.000764,0.000094
6,0.003582,0.000071,0.000541,0.000048,0.001329,0.000100
7,0.550533,0.002491,0.028009,0.001836,0.092859,0.002300
8,0.028588,0.000160,0.005956,0.000111,0.008681,0.000373
9,0.000655,0.000068,0.000287,0.000043,0.000508,0.000057
