![](https://storage.googleapis.com/kaggle-competitions/kaggle/28009/logos/header.png?)

In [None]:
from tensorflow import keras
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.optimizers import RMSprop

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from sklearn.pipeline import Pipeline

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Dense, BatchNormalization, Dropout, Embedding,  Flatten
from tensorflow.keras.models import Model, Sequential

from tensorflow.data import Dataset
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import QuantileTransformer,  KBinsDiscretizer

from sklearn import metrics
from sklearn.impute import SimpleImputer

from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, cross_val_score

# Load Dataset

In [None]:
%%time
train = pd.DataFrame(pd.read_csv('../input/tabular-playground-series-oct-2021/train.csv'))
# test  = pd.DataFrame(pd.read_csv('../input/tabular-playground-series-oct-2021/test.csv'))

In [None]:
# import datatable as dt
# train = dt.Frame(train)
# for i,col in enumerate(train):
#     if col.type.name == 'float64':
#         train[:,i] = dt.as_type(col,'float32')
# train = train.to_pandas()

# test = dt.Frame(test)
# for i,col in enumerate(test):
#     if col.type.name == 'float64':
#         test[:,i] = dt.as_type(col,'float32')
# test = test.to_pandas()

In [None]:
%%time
train['target'] = train['target'].astype(str)


# Tuning Neural Network

In [None]:
import skopt
from skopt.utils import use_named_args
from skopt.space.space import Real, Integer, Categorical
space = [Real(low=1e-6, high=1e-1, prior='log-uniform', name='learning_rate'),
         Integer(low=1, high=10, name='num_dense_layers'),
         Integer(low=5, high=512, name='num_dense_nodes'),
         Categorical(categories=['relu', 'sigmoid'], name='activation')]

In [None]:
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=train[features].shape[1]))
    
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
        
    model.add(keras.layers.Dense(1))
    optimizer = keras.optimizers.SGD(lr=learning_rate)
    model.compile(loss="mse", optimizer=optimizer)
    return mode

keras_classif = keras.wrappers.scikit_learn.KerasClassifier(build_model)

In [None]:
from scipy.stats import reciprocal
param_distribs = {
"n_hidden": [0, 1, 2, 3],
"n_neurons": np.arange(1, 100),
"learning_rate": reciprocal(3e-4, 3e-2),
}

In [None]:
rnd_search_cv = RandomizedSearchCV(keras_classif, param_distribs, n_iter=1000, cv=3)

rnd_search_cv.fit(train[features], np.float32(train['target']), validation_split = 0.2, callbacks=[keras.callbacks.EarlyStopping(patience=10)])

In [None]:
from skopt import BayesSearchCV

bayes_cv_tuner = BayesSearchCV(
    estimator = keras_classif(),
    search_spaces = {
        'learning_rate': (0.0001, 0.001, 'log-uniform'),
        'num_dense_layers': (1, 5),
        'num_dense_nodes': (1, 300),
        'activation': ['relu', 'softmax'],
        'classification': ['softmax', 'sigmoid'],
    },    
    scoring = 'roc_auc',
    cv = StratifiedKFold(
        n_splits=3,
        shuffle=True,
        random_state=42
    ),
    n_jobs = -1,
    n_iter = 1000,   
    verbose = 0,
    refit = True,
    random_state = 42
)

In [None]:
@use_named_args(dimensions=dimensions)
def fitness(learning_rate, num_dense_layers, num_dense_nodes, activation, classification):
    """
    Hyper-parameters:
    learning_rate:     Learning-rate for the optimizer.
    num_dense_layers:  Number of dense layers.
    num_dense_nodes:   Number of nodes in each dense layer.
    activation:        Activation function for all layers.
    classification:    Activation function for output layer.
    """

    # Print the hyper-parameters.
    print('learning rate: {0:.1e}'.format(learning_rate))
    print('num_dense_layers:', num_dense_layers)
    print('num_dense_nodes:', num_dense_nodes)
    print('activation:', activation)
    print('classification:', classification)
    
    # Create the neural network with these hyper-parameters.
    model = create_model(learning_rate=learning_rate,
                         num_dense_layers=num_dense_layers,
                         num_dense_nodes=num_dense_nodes,
                         activation=activation,
                         classification=classification)

    # Dir-name for the TensorBoard log-files.
    log_dir = log_dir_name(learning_rate, num_dense_layers,
                           num_dense_nodes, activation, classification)
    
    # Create a callback-function for Keras which will be
    # run after each epoch has ended during training.
    # This saves the log-files for TensorBoard.
    # Note that there are complications when histogram_freq=1.
    # It might give strange errors and it also does not properly
    # support Keras data-generators for the validation-set.
    callback_log = TensorBoard(
        log_dir=log_dir,
        histogram_freq=0,
        write_graph=True,
        write_grads=False,
        write_images=False)
   
    # Use Keras to train the model.
    history = model.fit(x= X_train,
                        y= y_train,
                        epochs=3,
                        batch_size=128,
                        validation_data=validation_data,
                        callbacks=[callback_log])

    # Get the classification accuracy on the validation-set
    # after the last training-epoch.
    auc = history.history['val_auc'][-1]

    # Print the classification accuracy.
    print()
    print("Accuracy: {0:.2%}".format(auc))
    print()

    # Save the model if it improves on the best-found performance.
    # We use the global keyword so we update the variable outside
    # of this function.
    global best_auc

    # If the classification accuracy of the saved model is improved ...
    if accuracy > best_accuracy:
        # Save the new model to harddisk.
        model.save(path_best_model)
        
        # Update the classification accuracy.
        best_auc = auc

    # Delete the Keras model with these hyper-parameters from memory.
    del model
    
    # Clear the Keras session, otherwise it will keep adding new
    # models to the same TensorFlow graph each time we create
    # a model with a different set of hyper-parameters.
    K.clear_session()
    
    # NOTE: Scikit-optimize does minimization so it tries to
    # find a set of hyper-parameters with the LOWEST fitness-value.
    # Because we are interested in the HIGHEST classification
    # accuracy, we need to negate this number so it can be minimized.
    return -auc
# This function exactly comes from :Hvass-Labs, TensorFlow-Tutorials

In [None]:
import talos

auc = tf.keras.metrics.AUC()
def ann_model(x_train, y_train, x_val, y_val, params):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape=x_train.shape[1]))
    for layer in range(params["n_hidden"]):
        model.add(keras.layers.Dense(params['n_neurons'], activation="relu"))
    model.add(keras.layers.Dense(1, activation = "sigmoid"))
    optimizer = RMSprop(lr=params['learning_rate'], rho=0.9, epsilon=1e-08, decay=0.0)
    model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics = auc)
    out = model.fit(
        x = x_train,
        y = y_train,
        batch_size = 1024,
        callbacks=[es],
        validation_data = (x_val, y_val), 
        verbose = 1)
    return out, model

In [None]:
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize

space = [Integer(1, 5, name='n_hidden'),
         Integer(30, 400, name='n_neurons'),
         Real(1e-6, 1e-3, "log-uniform", name='learning_rate')
        ]

@use_named_args(space)
def objective(**params):
    keras_classif.set_params(**params)
    keras_classif.fit(x = train[features], y = np.float32(train['target']), batch_size = 256, shuffle = True, callbacks = EarlyStopping(patience = 5), validation_split = 0.2)
    
    return -metrics.roc_auc_score(np.float32(train['target']), keras_classif.predict_proba(np.float32(train[features]))[:,1])

In [None]:
res_gp = gp_minimize(objective, space, n_calls=50, random_state=0)

In [None]:
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import BayesSearchCV

space = {
    'n_hidden': Integer(1, 5),
    'n_neurons': Integer(30, 400),
    'learning_rate': Real(1e-6, 1e-3)
}
# bs = BayesSearchCV(estimator = keras_classif, search_spaces = space, n_iter = 50, scoring = "roc_auc")

In [None]:
bs.fit(train[features], np.float32(train['target']))

In [None]:
from skopt import gp_minimize
res_gp = gp_minimize(objective, space, n_calls=50, random_state=0)

"Best score=%.4f" % res_gp.fun

In [None]:
# instantiating the model in the strategy scope creates the model on the TPU
with tpu_strategy.scope():
    model = Sequential([
        Input(train[features].shape[1:]),
        Dense(150,  activation='softmax'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
        ])
    model.compile(loss='binary_crossentropy', optimizer = optimizer, metrics = tf.keras.metrics.AUC(name='aucroc'))

In [None]:
%%time
# train model normally
lr_sc

## Simple NN

In [None]:
%%time 
n_neurons_1 = [64,32,16,8]
n_neurons_2 = [64,32,8, 4]

auc = tf.keras.metrics.AUC(name='aucroc')
optimizer = RMSprop(lr=5e-4, rho=0.9, epsilon=1e-08, decay=0.0)
es = EarlyStopping(patience = 3)
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=1e-4,
        decay_steps = 450,
        decay_rate= 0.8)

kf = KFold(5, shuffle = True)
kf.split(train)

for i in range(1,5,1):
    lv1_keras = Sequential([
    Input(train[features].shape[1:]),
    Embedding(input_dim=64, output_dim=4),
    Flatten(),
    Dense(n_neurons_1[i-1],  activation='relu'),
    Dropout(0.4),
    Dense(n_neurons_2[i-1],  activation='relu'),
    Dropout(0.4),
    Dense(1, activation='sigmoid'),
    ])
    
    lv1_keras.compile(loss='binary_crossentropy', optimizer = optimizer, metrics=[auc])
    
    for train_ix, test_ix in kf.split(train):
        lv1_keras.fit(x = np.float32(train[features].iloc[train_ix,:]), y = np.float32(train['claim'].iloc[train_ix]),
              batch_size = 1024, shuffle = True, callbacks = es, validation_split = 0.2)
        train.loc[test_ix, f'lv1_keras_{i}'] = lv1_keras.predict(x = np.float32(train[features].iloc[test_ix,:]))

## LGBM (4)

In [None]:
from lightgbm import LGBMClassifier
# Por um fraco GridSearch:

kf = KFold(5, shuffle = True)
kf.split(train)

for i in range(1,5,1):
    lv1_lgbm = LGBMClassifier(num_leaves = 45 - 2*i)
    for train_ix, test_ix in kf.split(train):
        lv1_lgbm.fit(X = np.float32(train[features].iloc[train_ix,:]), y = np.float32(train['claim'].iloc[train_ix]))
        train.loc[test_ix,f'lv1_lgbm_{i}'] = lv1_lgbm.predict_proba(X = np.float32(train[features].iloc[test_ix,:]))[:,1]

In [None]:
# search_space = {
#     'num_leaves': [35, 45, 55, 65],
#     'min_data_in_leaf': [800, 1200, 1500]
# }

# gs = GridSearchCV(
#     lv1_lgbm,
#     param_grid = search_space,
#     scoring = "roc_auc",
#     n_jobs = -1,
#     cv = 5,
#     verbose = 1000)

# gs.fit(dataset[features], dataset['claim'])

## CatBoost

In [None]:
from catboost import CatBoostClassifier
lv1_catb = CatBoostClassifier(verbose = 100)

kf = KFold(3, shuffle = True)
kf.split(train)
train['lv1_catb'] = np.nan
for train_ix, test_ix in kf.split(train):
    lv1_catb.fit(np.float32(train[features].iloc[train_ix,:]), y = np.float32(train['claim'].iloc[train_ix]))
    train.loc[test_ix,'lv1_catb'] = [item[1] for item in lv1_catb.predict_proba(np.float32(train[features].iloc[test_ix,:]))]

## Logistic Regression (4)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA

kf = KFold(5, shuffle = True, random_state = 4)
kf.split(train)

lv1_logi = LogisticRegression()

for i in range(1,5,1):
    pca = PCA(n_components = 0.95 - (i-1)*0.05)
    pca.fit(train[features])

    for train_ix, test_ix in kf.split(train):
        lv1_logi.fit(
            pca.transform(np.float32(train[features].iloc[train_ix,:])), 
            y = np.float32(train['claim'].iloc[train_ix]))
        
        train.loc[test_ix,f'lv1_logi_{i}'] = lv1_logi.predict_proba(
            pca.transform(np.float32(train[features].iloc[test_ix,:])))[:,1]

# Meta Learner

In [None]:
lv2_features = [col for col in train.columns if col not in ['claim', 'id'] and col not in features]

## XGBoost

In [None]:
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV
search_space = {
    "max_depth": 6,
    "eta": 0.3,
    "subsample": 0.85,
    'colsample_bytree': 1,
    'objective':'reg:squarederror',
    'eval_metric': "auc"
}

dtrain = xgb.DMatrix(train[lv2_features], train['claim'])

# xgb.train(search_space, dtrain, num_boost_round = 999, evals = evallist, early_stopping_rounds=10)
# cv = GridSearchCV(xgb_model, param_grid = search_space, scoring = "roc_auc", n_jobs = -1, cv = 4)
# # pd.DataFrame(cv.cv_results_)

cv = xgb.cv(
    search_space,
    dtrain,
    num_boost_round=999,
    seed=42,
    nfold=5,
    metrics={'auc'},
    early_stopping_rounds=10,
)

In [None]:
cv

In [None]:
# from sklearn.linear_model import LogisticRegression, LinearRegression



# ols = LinearRegression()
# ols.fit(train[lv2_features], train['claim'])
# cross_val_score(ols, train[lv2_features], y = train['claim'], scoring = "roc_auc", cv = 7)

### Calibrando as probabilidades

In [None]:
# %%time
# from sklearn.linear_model import LogisticRegression, LinearRegression

# calibrated_lv2 = train[lv2_features].copy()
# logi = LogisticRegression()
# for feat in lv2_features[1:3]:
#     X = np.array(calibrated_lv2[feat]).reshape(-1, 1)
#     logi.fit(X, train['claim'])

#     calibrated_lv2[feat] = logi.predict_proba(X)[:,1]

# # lv2_train[lv2_features].hist()

# On Testing Data

## ANN

In [None]:
# lv1_keras.fit(x = np.float32(train[features]), y = np.float32(train['claim']),
#           batch_size = 1024, shuffle = True, callbacks = es, validation_split = 0.2)
# test['lv1_keras'] = lv1_keras.predict(x = np.float32(test[features]),  y = np.float32(test['claim']))

## LGBM

In [None]:
# lv1_lgbm.fit(np.float32(train[features]), y = np.float32(train['claim']))
# test['lv1_lgbm'] = lv1_lgbm.predict_proba(np.float32(test[features]))[:,1]

## CatBoost

In [None]:
# lv1_logi.fit(np.float32(train[features]), y = np.float32(train['claim']))
# test['lv1_catb'] = [item[1] for item in lv1_catb.predict_proba(np.float32(test[features]))]

## Logistic Regression

In [None]:
# lv1_logi.fit(np.float32(train[features]), y = np.float32(train['claim']))
# test['lv1_catb'] = lv1_logi.predict_proba(np.float32(test[features]))[:,1]

In [None]:
# sub = pd.read_csv('../input/tabular-playground-series-sep-2021/sample_solution.csv')
# sub['claim'] = ols.predict(X = np.float32(test[lv2_features]))
# sub=sub.set_index('id')
# sub.to_csv('simple_stacking.csv')

In [None]:
# # !pip install slack-webhook
# from slack_webhook import Slack
# slack = Slack(url='https://hooks.slack.com/services/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')
# slack.post(text="Tuning Finished!")