In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks
import tensorflow as tf
import random
import os
from sklearn.metrics import roc_auc_score, log_loss
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import StandardScaler, RobustScaler, PowerTransformer
from sklearn.model_selection import train_test_split

In [2]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [3]:
test_id = test.id
train.drop(['id'], axis=1, inplace=True)
test.drop(['id'] ,axis=1, inplace=True)

feature engineering

In [4]:
h_skew = train.loc[:, train.skew() >= 2].columns # with skewed
l_skew = train.loc[:, train.skew() < 2].columns # Bimodal
l_skew = list(set(l_skew) - set(['target']))

# high skewd columns:
# add mean, median, skew, min, max, var, std
train['median_h'] = train[h_skew].median(axis=1)
test['median_h'] = test[h_skew].median(axis=1)
train['var_h'] = train[h_skew].var(axis=1)
test['var_h'] = test[h_skew].var(axis=1)

# bimodal columns:

train['mean_l'] = train[l_skew].mean(axis=1)
test['mean_l'] = test[l_skew].mean(axis=1)
train['std_l'] = train[l_skew].std(axis=1)
test['std_l'] = test[l_skew].std(axis=1)
train['median_l'] = train[l_skew].median(axis=1)
test['median_l'] = test[l_skew].median(axis=1)
train['skew_l'] = train[l_skew].skew(axis=1)
test['skew_l'] = test[l_skew].skew(axis=1)
train['max_l'] = train[l_skew].max(axis=1)
test['max_l'] = test[l_skew].max(axis=1)
train['var_l'] = train[l_skew].var(axis=1)
test['var_l'] = test[l_skew].var(axis=1)

In [5]:
train, valid = train_test_split(train, test_size=0.2)
X_train = train.drop(['target'], axis=1)
y_train = train.target
X_valid = valid.drop(['target'], axis=1)
y_valid = valid.target
X_test = test

model

In [6]:
# seed
seed = 42
def seedAll(seed):
    np.random.seed(seed) # set numpy seed
    tf.random.set_seed(seed) # set tensorflow seed
    random.seed(seed) # set random seed
    os.environ['PYTHONHASHSEED'] = str(seed)
seedAll(seed)

In [57]:
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    mode='min',
    patience=20,
    min_delta=0,
    baseline=None,
    restore_best_weights=True,
    verbose=1
)
plateau = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    mode='min',
    patience=7,
    factor=.2,
    verbose=1        
)

def create_model(each_layers, activation):   
    model = keras.Sequential()
    for layer in range(len(each_layers)):
        if layer == 0:
            model.add(layers.Dense(each_layers[layer], activation=activation, input_shape=[X_train.shape[1]]))
        else:
            model.add(layers.Dense(each_layers[layer], activation=activation))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=0.0007),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

In [58]:
model_nn = KerasClassifier(build_fn=create_model)

  model_nn = KerasClassifier(build_fn=create_model)


In [14]:
from sklearn.metrics import make_scorer, roc_auc_score
scoring = {'auc': make_scorer(roc_auc_score, needs_proba=True, multi_class="ovr")}

In [43]:
params_fit_nn = {
    'epochs': [700],
    'batch_size': [2048],
    'verbose': [0],
    'each_layers': [[108, 64, 32], [128, 64, 32]],
    'activation': ['swish'],
    'validation_data': [(X_valid, y_valid)]
}

search_nn = GridSearchCV(model_nn,
                         params_fit_nn,
                         cv=KFold(n_splits=5, shuffle=True, random_state=42),
                         n_jobs=-1,
                         verbose=0
                        ).fit(X_train, y_train)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [72]:
pd.DataFrame(search_nn.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_activation,param_batch_size,param_each_layers,param_epochs,param_validation_data,param_verbose,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,19.38852,0.302755,1.870198,0.236647,swish,2048,"[4, 2]",10,"([f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10,...",10,"{'activation': 'swish', 'batch_size': 2048, 'e...",0.576042,0.701729,0.664448,0.504865,0.61374,0.612165,0.068667,3
1,20.387663,0.519953,2.277798,1.620004,swish,2048,"[16, 8]",10,"([f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10,...",10,"{'activation': 'swish', 'batch_size': 2048, 'e...",0.658625,0.628844,0.657906,0.634562,0.664958,0.648979,0.014432,2
2,23.563177,1.517136,1.8736,1.518453,swish,2048,"[32, 16]",10,"([f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10,...",10,"{'activation': 'swish', 'batch_size': 2048, 'e...",0.647458,0.673125,0.666031,0.661625,0.625573,0.654763,0.016832,1
