In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks
import tensorflow as tf
import random
import os
from sklearn.metrics import roc_auc_score, log_loss
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import StandardScaler, RobustScaler, PowerTransformer
from sklearn.model_selection import train_test_split

In [None]:
train = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv')

In [None]:
test_id = test.id
train.drop(['id'], axis=1, inplace=True)
test.drop(['id'] ,axis=1, inplace=True)

feature engineering

In [None]:
h_skew = train.loc[:, train.skew() >= 2].columns # with skewed
l_skew = train.loc[:, train.skew() < 2].columns # Bimodal
l_skew = list(set(l_skew) - set(['target']))

# high skewd columns:
# add mean, median, skew, min, max, var, std
train['median_h'] = train[h_skew].median(axis=1)
test['median_h'] = test[h_skew].median(axis=1)
train['var_h'] = train[h_skew].var(axis=1)
test['var_h'] = test[h_skew].var(axis=1)

# bimodal columns:

train['mean_l'] = train[l_skew].mean(axis=1)
test['mean_l'] = test[l_skew].mean(axis=1)
train['std_l'] = train[l_skew].std(axis=1)
test['std_l'] = test[l_skew].std(axis=1)
train['median_l'] = train[l_skew].median(axis=1)
test['median_l'] = test[l_skew].median(axis=1)
train['skew_l'] = train[l_skew].skew(axis=1)
test['skew_l'] = test[l_skew].skew(axis=1)
train['max_l'] = train[l_skew].max(axis=1)
test['max_l'] = test[l_skew].max(axis=1)
train['var_l'] = train[l_skew].var(axis=1)
test['var_l'] = test[l_skew].var(axis=1)

In [None]:
train, valid = train_test_split(train, test_size=0.1)
X_train = train.drop(['target'], axis=1)
y_train = train.target
X_valid = valid.drop(['target'], axis=1)
y_valid = valid.target
X_test = test

model

In [None]:
# seed
seed = 42
def seedAll(seed):
    np.random.seed(seed) # set numpy seed
    tf.random.set_seed(seed) # set tensorflow seed
    random.seed(seed) # set random seed
    os.environ['PYTHONHASHSEED'] = str(seed)
seedAll(seed)

In [None]:
early_stopping = callbacks.EarlyStopping(
    monitor='val_loss',
    mode='min',
    patience=20,
    min_delta=0,
    baseline=None,
    restore_best_weights=True,
    verbose=1
)
plateau = callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    mode='min',
    patience=7,
    factor=.2,
    verbose=1        
)

def create_model(each_layers, activation, learning_rate):   
    model = keras.Sequential()
    for layer in range(len(each_layers)):
        if layer == 0:
            model.add(layers.Dense(each_layers[layer], activation=activation, input_shape=[X_train.shape[1]]))
        else:
            model.add(layers.Dense(each_layers[layer], activation=activation))
    model.add(keras.layers.Dense(1, activation='sigmoid'))
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['AUC']
    )
    return model

In [None]:
model_nn = KerasClassifier(build_fn=create_model)

In [None]:
# from sklearn.metrics import make_scorer, roc_auc_score
# scoring = {'auc': make_scorer(roc_auc_score, needs_proba=True, refit=False)}

In [None]:
import copy

units = [2 ** i for i in range(1, 11)]
comb_units = []
layer_count = 3

def go(mincnt, maxcnt, curcnt, rlt):
    if curcnt >= mincnt:
        comb_units.append(copy.deepcopy(rlt))
    if curcnt == maxcnt:
        return
    for i in range(len(units)):
        rlt.append(units[i])
        go(mincnt, maxcnt, curcnt + 1, rlt)
        rlt.pop()
        
go(3, 3, 0, [])

In [None]:
스탑

In [None]:
params_fit_nn = {
    'callbacks': [[early_stopping, plateau]],
    'epochs': [700],
    'batch_size': [2048],
    'verbose': [1],
    'each_layers': comb_units,
    'activation': ['swish'],
    'learning_rate': [0.0007],
    'validation_data': [(X_valid, y_valid)]
}

search_nn = RandomizedSearchCV(model_nn,
                               params_fit_nn,
                               cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
                               verbose=1, scoring='roc_auc',
                               n_iter=50
                              ).fit(X_train, y_train)

In [None]:
pd.DataFrame(search_nn.cv_results_)