## Santander Customer Transaction Prediction

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline

In [2]:
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler, scale
from sklearn.metrics import roc_auc_score

In [41]:
train=pd.read_csv('train.csv')

In [60]:
test=pd.read_csv('test.csv')

In [4]:
features=[c for c in train.columns if c not in ['ID_code', 'target']]

In [11]:
train.head()

Unnamed: 0,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,...,log10_var_197,r1_var_198,r2_var_198,exp_var_198,tanh_var_198,log10_var_198,r1_var_199,r2_var_199,exp_var_199,tanh_var_199
0,8.9255,-6.7863,11.9081,5.093,11.4607,-9.2834,5.1187,18.6266,-4.92,5.747,...,0.932651,12.8,12.78,355151.6,1.0,1.106541,-1.1,-1.09,0.335746,-0.797389
1,11.5006,-4.1473,13.8588,5.389,12.3622,7.0433,5.6208,16.5338,3.1468,8.0851,...,0.943935,18.4,18.36,93736670.0,1.0,1.263778,2.0,1.95,7.041351,0.960459
2,8.6093,-2.7457,12.0805,7.8928,10.5825,-9.0837,6.9427,14.6155,-4.9193,5.9525,...,0.917374,14.7,14.72,2476112.0,1.0,1.167973,0.4,0.4,1.486612,0.37695
3,11.0604,-2.1518,8.9522,7.1957,12.5846,-1.8361,5.8428,14.925,-5.8609,8.245,...,1.012508,18.0,17.97,63700310.0,1.0,1.254541,-9.0,-9.0,0.000123,-1.0
4,9.8369,-1.4834,12.8746,6.6375,12.2772,2.4486,5.9405,19.2514,6.2654,7.6784,...,0.977865,18.0,18.0,65489470.0,1.0,1.25521,-8.8,-8.81,0.000149,-1.0


In [61]:
test.head()

Unnamed: 0,ID_code,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,...,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199
0,test_0,11.0656,7.7798,12.9536,9.4292,11.4327,-2.3805,5.8493,18.2675,2.1337,...,-2.1556,11.8495,-1.43,2.4508,13.7112,2.4669,4.3654,10.72,15.4722,-8.7197
1,test_1,8.5304,1.2543,11.3047,5.1858,9.1974,-4.0117,6.0196,18.6316,-4.4131,...,10.6165,8.8349,0.9403,10.1282,15.5765,0.4773,-1.4852,9.8714,19.1293,-20.976
2,test_2,5.4827,-10.3581,10.1407,7.0479,10.2628,9.8052,4.895,20.2537,1.5233,...,-0.7484,10.9935,1.9803,2.18,12.9813,2.1281,-7.1086,7.0618,19.8956,-23.1794
3,test_3,8.5374,-1.3222,12.022,6.5749,8.8458,3.1744,4.9397,20.566,3.3755,...,9.5702,9.0766,1.658,3.5813,15.1874,3.1656,3.9567,9.2295,13.0168,-4.2108
4,test_4,11.7058,-0.1327,14.1295,7.7506,9.1035,-8.5848,6.8595,10.6048,2.989,...,4.2259,9.1723,1.2835,3.3778,19.5542,-0.286,-5.1612,7.2882,13.926,-9.1846



## Data exploration analysis
- Thanks to Santander EDA and Prediction from Gabriel Preda (https://www.kaggle.com/gpreda/santander-eda-and-prediction)
- No missing data in both train and test datasets
- Standard deviation is relatively large for both train and test variable data, mean values are distributed over a large range
- Only 10 % train data with a target value of 1
- Barely no correlation among features
- Var 68, 313, 126 have duplicated values of about 1000, 300, 300, respectively.

## Feature engineering

#### Add the polynominal feature columns up to power of 4, add root and rounded feature columns

In [6]:
for feature in features:
    #train['mean_'+feature] = (train[feature].mean()-train[feature])
    #train['z_'+feature] = (train[feature] - train[feature].mean())/train[feature].std(ddof=0)
    #train['sq_'+feature] = (train[feature])**2
    #train['sqrt_'+feature] = np.abs(train[feature])**(1/2)
    #train['c_'+feature] = (train[feature])**3
    #train['p4_'+feature] = (train[feature])**4
    train['r1_'+feature] = np.round(train[feature], 1)
    train['r2_'+feature] = np.round(train[feature], 2)
    train['exp_'+feature]=np.exp(train[feature])
    train['tanh_'+feature]=np.tanh(train[feature])
    if train[feature].min()>0:
        train['log10_'+feature]=np.log10(train[feature])

In [7]:
for feature in features:
    # test['mean_'+feature] = (train[feature].mean()-test[feature])
    # test['z_'+feature] = (test[feature] - train[feature].mean())/train[feature].std(ddof=0)
    #test['sq_'+feature] = (test[feature])**2
    #test['sqrt_'+feature] = np.abs(test[feature])**(1/2)
    #test['c_'+feature] = (test[feature])**3
    #test['p4_'+feature] = (test[feature])**4
    test['r1_'+feature] = np.round(test[feature], 1)
    test['r2_'+feature] = np.round(test[feature], 2)
    test['exp_'+feature]=np.exp(test[feature])
    test['tanh_'+feature]=np.tanh(test[feature])
    if test[feature].min()>0:
        test['log10_'+feature]=np.log10(test[feature])

In [42]:
y=train['target']
X = train.drop(['target', 'ID_code'], axis=1)

In [43]:
sc = StandardScaler()
X= sc.fit_transform(X)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

In [44]:
X_train.shape, X_valid.shape, y_train.shape, y_valid.shape

((160000, 200), (40000, 200), (160000,), (40000,))

In [62]:
X_test = test.drop(['ID_code'],axis = 1)
X_test= sc.fit_transform(X_test)

In [18]:
#sc = StandardScaler()
#X= sc.fit_transform(X)
#X_test = sc.transform(X_test)
n_splits = 5 # Number of K-fold Splits

splits = list(StratifiedKFold(n_splits=n_splits, shuffle=True).split(X, y))

## Classification models

### LightGBM

In [11]:
import lightgbm as lgb

In [9]:
train_data = lgb.Dataset(X_train, label=y_train)
validation_data = lgb.Dataset(X_valid, label=y_valid)

In [74]:
lgb_param = {
        'num_leaves': 6,
        'max_bin': 63,
        'min_data_in_leaf': 45,
        'learning_rate': 0.01,
        'min_sum_hessian_in_leaf': 0.000446,
        'bagging_fraction': 0.55, 
        'bagging_freq': 5, 
        'max_depth': 14,
        'save_binary': True,
        'seed': 42,
        'feature_fraction_seed': 31415,
         'feature_fraction': 0.51,
        'bagging_seed': 42,
        'drop_seed': 42,
        'data_random_seed': 42,
        'objective': 'binary',
        'boosting_type': 'gbdt',
        'verbose': 1,
        'metric': 'auc',
        'is_unbalance': True,
        'boost_from_average': False,
    }

In [11]:
num_round = 15000
lgb_clf = lgb.train(param, train_data, num_round, valid_sets=[validation_data],verbose_eval=1000, early_stopping_rounds = 250)

Training until validation scores don't improve for 250 rounds.
[1000]	valid_0's auc: 0.847525
[2000]	valid_0's auc: 0.872533
[3000]	valid_0's auc: 0.883519
[4000]	valid_0's auc: 0.889354
[5000]	valid_0's auc: 0.89292
[6000]	valid_0's auc: 0.894756
[7000]	valid_0's auc: 0.896039
[8000]	valid_0's auc: 0.896756
[9000]	valid_0's auc: 0.897017
Early stopping, best iteration is:
[9605]	valid_0's auc: 0.897263


#### With exp, tanh and log10, auc got slightly higher (0.8972>0.8969)

In [21]:
lgb_clf.save_model('exp_tanh_lgb_clf.txt')
# lgb = lgb.Booster(model_file='lgb_clf.txt')  

<lightgbm.basic.Booster at 0x172b4d9bcc0>

In [16]:
importance_df = pd.DataFrame()
new_features=[c for c in train.columns]
importance_df["feature"] = new_features
importance_df["importance"] = lgb_clf.feature_importance()

In [20]:
pd.options.display.max_rows=500
importance_df.sort_values(by=['importance'], ascending=False)[:500]

Unnamed: 0,feature,importance
610,r1_var_94,163
432,r1_var_53,156
680,tanh_var_109,151
205,r1_var_1,143
597,r2_var_91,143
297,tanh_var_21,140
604,tanh_var_92,139
1030,r1_var_191,137
298,r1_var_22,136
228,r2_var_6,134


#### Feature scaling

In [22]:
sc = StandardScaler()
scaled_X_train=sc.fit_transform(X_train)
scaled_X_valid=sc.fit_transform(X_valid)

train_data = lgb.Dataset(scaled_X_train, label=y_train)
validation_data = lgb.Dataset(scaled_X_valid, label=y_valid)

In [23]:
num_round = 15000
scaled_lgb_clf = lgb.train(param, train_data, num_round, valid_sets=[validation_data],verbose_eval=1000, early_stopping_rounds = 250)

Training until validation scores don't improve for 250 rounds.
[1000]	valid_0's auc: 0.848944
[2000]	valid_0's auc: 0.871042
[3000]	valid_0's auc: 0.880982
[4000]	valid_0's auc: 0.885394
[5000]	valid_0's auc: 0.888294
[6000]	valid_0's auc: 0.889617
Early stopping, best iteration is:
[6194]	valid_0's auc: 0.889858


Did not work well with the exp, tanh, and log10. Only 0.8898.
It was better with the cubic before. Standard scaler may conflict with feature engineering.

In [32]:
scaled_lgb_clf.save_model('scaled_lgb_clf.txt')

<lightgbm.basic.Booster at 0x1b70c279358>

In [33]:
scaled_importance_df = pd.DataFrame()
scaled_importance_df["Feature"] = features
scaled_importance_df["importance"] = scaled_lgb_clf.feature_importance()
scaled_importance_df.sort_values(by=['importance'], ascending=False)[:20]

Unnamed: 0,Feature,importance
768,r1_var_94,154
210,r1_var_1,154
282,r1_var_13,140
756,r1_var_92,139
799,r2_var_99,129
336,r1_var_22,128
139,var_139,125
234,r1_var_5,123
684,r1_var_80,119
522,r1_var_53,117


### xgboost

In [13]:
import xgboost as xgb

In [28]:
param = {
        'eta': 0.1,
        'max_depth': 15,
        'gamma': 100,
        'objective':'binary:logistic',    
        'eval_metric':'auc',
        'seed':42
        }

In [75]:
xgb_param = {
    'min_child_weight': 10.0,
    'objective': 'binary:logistic',
    'max_depth': 7,
    'max_delta_step': 1.8,
    'colsample_bytree': 0.4,
    'subsample': 0.8,
    'eta': 0.025,
    'gamma': 0.65,
    'eval_metric':'auc'
        }

In [23]:
X.shape,y.shape

((200000, 200), (200000,))

In [24]:
oof = np.zeros(len(X))
#predictions = np.zeros(len(X_test))
#feature_importance_df = pd.DataFrame()

for i, (train_idx, valid_idx) in enumerate(splits):  
    print(f'Fold {i + 1}')
    x_train = np.array(X)
    y_train = np.array(y)
    trn_data = xgb.DMatrix(x_train[train_idx.astype(int)], label=y_train[train_idx.astype(int)])
    val_data = xgb.DMatrix(x_train[valid_idx.astype(int)], label=y_train[valid_idx.astype(int)])
    watchlist = [(trn_data, 'train'), (val_data, 'valid')]
                 
    #num_round = 15000
    clf=xgb.train(param, trn_data, 700, evals=watchlist, early_stopping_rounds=250, verbose_eval=500)
    
    oof[valid_idx] = clf.predict(x_train[valid_idx], ntree_limit=clf.best_ntree_limit)
    
    #fold_importance_df = pd.DataFrame()
    #fold_importance_df["feature"] = features
    #fold_importance_df["importance"] = clf.feature_importance()
    #fold_importance_df["fold"] = i + 1
    #feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
    
    #predictions += clf.predict(X_test, num_iteration=clf.best_iteration) / 5

print("CV score: {:<8.5f}".format(roc_auc_score(y, oof)))


Fold 1
[22:18:28] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 142 extra nodes, 0 pruned nodes, max_depth=7
[0]	train-auc:0.636529	valid-auc:0.617924
Multiple eval metrics have been passed: 'valid-auc' will be used for early stopping.

Will train until valid-auc hasn't improved in 250 rounds.
[22:18:29] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 142 extra nodes, 0 pruned nodes, max_depth=7
[22:18:30] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 138 extra nodes, 0 pruned nodes, max_depth=7
[22:18:31] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 136 extra nodes, 0 pruned nodes, max_depth=7
[22:18:32] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 158 extra nodes, 0 pruned nodes, max_depth=7
[22:18:33] C:\Users\Administrator\Desktop\xgboo

[22:19:19] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 158 extra nodes, 0 pruned nodes, max_depth=7
[22:19:20] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 168 extra nodes, 0 pruned nodes, max_depth=7
[22:19:21] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 164 extra nodes, 0 pruned nodes, max_depth=7
[22:19:22] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 132 extra nodes, 0 pruned nodes, max_depth=7
[22:19:23] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 180 extra nodes, 0 pruned nodes, max_depth=7
[22:19:24] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 170 extra nodes, 0 pruned nodes, max_depth=7
[22:19:25] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

[22:20:14] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 164 extra nodes, 0 pruned nodes, max_depth=7
[22:20:15] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 154 extra nodes, 0 pruned nodes, max_depth=7
[22:20:16] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 170 extra nodes, 2 pruned nodes, max_depth=7
[22:20:17] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 142 extra nodes, 0 pruned nodes, max_depth=7
[22:20:18] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 146 extra nodes, 0 pruned nodes, max_depth=7
[22:20:19] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 156 extra nodes, 0 pruned nodes, max_depth=7
[22:20:20] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

[22:21:10] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 150 extra nodes, 0 pruned nodes, max_depth=7
[22:21:11] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 150 extra nodes, 0 pruned nodes, max_depth=7
[22:21:12] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 184 extra nodes, 0 pruned nodes, max_depth=7
[22:21:13] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 136 extra nodes, 0 pruned nodes, max_depth=7
[22:21:14] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 178 extra nodes, 0 pruned nodes, max_depth=7
[22:21:15] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 148 extra nodes, 0 pruned nodes, max_depth=7
[22:21:16] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

[22:22:10] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 148 extra nodes, 0 pruned nodes, max_depth=7
[22:22:11] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 174 extra nodes, 0 pruned nodes, max_depth=7
[22:22:12] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 166 extra nodes, 0 pruned nodes, max_depth=7
[22:22:13] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 160 extra nodes, 0 pruned nodes, max_depth=7
[22:22:14] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 148 extra nodes, 0 pruned nodes, max_depth=7
[22:22:15] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 152 extra nodes, 0 pruned nodes, max_depth=7
[22:22:16] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

[22:23:09] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 164 extra nodes, 0 pruned nodes, max_depth=7
[22:23:10] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 130 extra nodes, 0 pruned nodes, max_depth=7
[22:23:11] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 172 extra nodes, 0 pruned nodes, max_depth=7
[22:23:12] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 158 extra nodes, 0 pruned nodes, max_depth=7
[22:23:13] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 156 extra nodes, 0 pruned nodes, max_depth=7
[22:23:14] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 126 extra nodes, 0 pruned nodes, max_depth=7
[22:23:15] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

[22:24:07] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 156 extra nodes, 0 pruned nodes, max_depth=7
[22:24:08] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 168 extra nodes, 0 pruned nodes, max_depth=7
[22:24:09] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 188 extra nodes, 0 pruned nodes, max_depth=7
[22:24:10] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 152 extra nodes, 0 pruned nodes, max_depth=7
[22:24:11] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 142 extra nodes, 0 pruned nodes, max_depth=7
[22:24:12] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 150 extra nodes, 0 pruned nodes, max_depth=7
[22:24:13] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

[22:25:04] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 142 extra nodes, 0 pruned nodes, max_depth=7
[22:25:05] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 186 extra nodes, 0 pruned nodes, max_depth=7
[22:25:06] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 142 extra nodes, 0 pruned nodes, max_depth=7
[22:25:08] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 180 extra nodes, 0 pruned nodes, max_depth=7
[22:25:09] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 166 extra nodes, 0 pruned nodes, max_depth=7
[22:25:10] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 142 extra nodes, 0 pruned nodes, max_depth=7
[22:25:11] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

[22:26:04] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 152 extra nodes, 0 pruned nodes, max_depth=7
[22:26:05] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 116 extra nodes, 0 pruned nodes, max_depth=7
[22:26:06] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 166 extra nodes, 0 pruned nodes, max_depth=7
[22:26:07] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 176 extra nodes, 0 pruned nodes, max_depth=7
[22:26:08] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 164 extra nodes, 0 pruned nodes, max_depth=7
[22:26:09] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 196 extra nodes, 0 pruned nodes, max_depth=7
[22:26:10] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

[22:27:02] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 132 extra nodes, 0 pruned nodes, max_depth=7
[22:27:03] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 144 extra nodes, 0 pruned nodes, max_depth=7
[22:27:04] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 166 extra nodes, 0 pruned nodes, max_depth=7
[22:27:05] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 158 extra nodes, 0 pruned nodes, max_depth=7
[22:27:06] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 148 extra nodes, 0 pruned nodes, max_depth=7
[22:27:07] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 120 extra nodes, 0 pruned nodes, max_depth=7
[22:27:08] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

[22:28:01] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 160 extra nodes, 0 pruned nodes, max_depth=7
[22:28:02] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 182 extra nodes, 0 pruned nodes, max_depth=7
[22:28:03] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 144 extra nodes, 0 pruned nodes, max_depth=7
[22:28:04] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 122 extra nodes, 0 pruned nodes, max_depth=7
[22:28:05] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 104 extra nodes, 0 pruned nodes, max_depth=7
[22:28:06] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 132 extra nodes, 0 pruned nodes, max_depth=7
[22:28:07] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

[22:28:59] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 126 extra nodes, 0 pruned nodes, max_depth=7
[22:29:00] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 140 extra nodes, 0 pruned nodes, max_depth=7
[22:29:01] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 182 extra nodes, 0 pruned nodes, max_depth=7
[22:29:02] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 182 extra nodes, 0 pruned nodes, max_depth=7
[22:29:03] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 166 extra nodes, 0 pruned nodes, max_depth=7
[22:29:04] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 138 extra nodes, 0 pruned nodes, max_depth=7
[22:29:05] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

[22:29:54] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 138 extra nodes, 0 pruned nodes, max_depth=7
[22:29:55] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 122 extra nodes, 0 pruned nodes, max_depth=7
[22:29:57] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 172 extra nodes, 0 pruned nodes, max_depth=7
[22:29:58] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 156 extra nodes, 0 pruned nodes, max_depth=7
[22:29:59] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 152 extra nodes, 0 pruned nodes, max_depth=7
[22:30:00] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 108 extra nodes, 0 pruned nodes, max_depth=7
[22:30:02] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pru

AttributeError: 'numpy.ndarray' object has no attribute 'feature_names'

In [11]:
num_round = 15000
xgb_clf = xgb.train(param, d_train, num_round,watchlist, early_stopping_rounds=250, verbose_eval=100)

[14:09:12] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 2736 pruned nodes, max_depth=7
[0]	train-auc:0.637763	valid-auc:0.627464
Multiple eval metrics have been passed: 'valid-auc' will be used for early stopping.

Will train until valid-auc hasn't improved in 250 rounds.
[14:09:27] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 2756 pruned nodes, max_depth=4
[14:09:41] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 2846 pruned nodes, max_depth=6
[14:09:55] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 2894 pruned nodes, max_depth=6
[14:10:09] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 2738 pruned nodes, max_depth=7
[14:10:23] C:\Users\Administrator\Desktop\xg

[14:22:40] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 3634 pruned nodes, max_depth=6
[14:22:56] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 4066 pruned nodes, max_depth=4
[14:23:12] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 3582 pruned nodes, max_depth=7
[14:23:27] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 3112 pruned nodes, max_depth=7
[14:23:42] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 18 extra nodes, 2962 pruned nodes, max_depth=9
[14:23:58] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 14 extra nodes, 3488 pruned nodes, max_depth=5
[14:24:13] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:

[14:36:54] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 12 extra nodes, 2816 pruned nodes, max_depth=6
[14:37:10] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 3510 pruned nodes, max_depth=3
[14:37:26] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 3298 pruned nodes, max_depth=4
[14:37:42] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 3466 pruned nodes, max_depth=2
[14:37:57] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 3128 pruned nodes, max_depth=5
[14:38:14] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 10 extra nodes, 3256 pruned nodes, max_depth=5
[14:38:30] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74:

[14:51:56] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 3786 pruned nodes, max_depth=1
[14:52:12] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 3594 pruned nodes, max_depth=2
[14:52:28] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 4158 pruned nodes, max_depth=1
[14:52:45] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 4260 pruned nodes, max_depth=1
[14:53:01] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 3428 pruned nodes, max_depth=2
[14:53:18] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 3474 pruned nodes, max_depth=3
[14:53:35] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tr

[15:06:33] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:06:51] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:07:08] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:07:24] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:07:40] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:07:56] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:08:12] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tr

[15:21:35] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:21:51] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:22:08] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:22:24] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:22:40] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:22:56] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:23:11] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tr

[15:36:12] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:36:28] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:36:45] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:37:02] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:37:19] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:37:38] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:37:56] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tr

[15:50:45] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:51:01] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:51:17] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:51:32] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:51:48] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:52:04] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[15:52:20] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tr

[16:06:12] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[16:06:30] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[16:06:48] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
[16:07:06] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 3578 pruned nodes, max_depth=0
Stopping. Best iteration:
[183]	train-auc:0.883188	valid-auc:0.856554



In [73]:
xgb_clf=clf
xgb_clf.save_model('xgb_clf_raw.model')

In [21]:
df=pd.DataFrame(list(xgb_clf.get_fscore().items()), columns=['feature','importance']).sort_values('importance', ascending=False)

### Neural network

In [10]:
#https://www.kaggle.com/super13579/pytorch-nn-with-cyclelr-and-k-fold-lightgbm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
import time

In [28]:
n_splits = 5 # Number of K-fold Splits

splits = list(StratifiedKFold(n_splits=n_splits, shuffle=True).split(train, y))

In [12]:
class CyclicLR(object):
    def __init__(self, optimizer, base_lr=1e-3, max_lr=6e-3,
                 step_size=2000, mode='triangular', gamma=1.,
                 scale_fn=None, scale_mode='cycle', last_batch_iteration=-1):

        if not isinstance(optimizer, Optimizer):
            raise TypeError('{} is not an Optimizer'.format(
                type(optimizer).__name__))
        self.optimizer = optimizer

        if isinstance(base_lr, list) or isinstance(base_lr, tuple):
            if len(base_lr) != len(optimizer.param_groups):
                raise ValueError("expected {} base_lr, got {}".format(
                    len(optimizer.param_groups), len(base_lr)))
            self.base_lrs = list(base_lr)
        else:
            self.base_lrs = [base_lr] * len(optimizer.param_groups)

        if isinstance(max_lr, list) or isinstance(max_lr, tuple):
            if len(max_lr) != len(optimizer.param_groups):
                raise ValueError("expected {} max_lr, got {}".format(
                    len(optimizer.param_groups), len(max_lr)))
            self.max_lrs = list(max_lr)
        else:
            self.max_lrs = [max_lr] * len(optimizer.param_groups)

        self.step_size = step_size

        if mode not in ['triangular', 'triangular2', 'exp_range'] \
                and scale_fn is None:
            raise ValueError('mode is invalid and scale_fn is None')

        self.mode = mode
        self.gamma = gamma

        if scale_fn is None:
            if self.mode == 'triangular':
                self.scale_fn = self._triangular_scale_fn
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = self._triangular2_scale_fn
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = self._exp_range_scale_fn
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode

        self.batch_step(last_batch_iteration + 1)
        self.last_batch_iteration = last_batch_iteration

    def batch_step(self, batch_iteration=None):
        if batch_iteration is None:
            batch_iteration = self.last_batch_iteration + 1
        self.last_batch_iteration = batch_iteration
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

    def _triangular_scale_fn(self, x):
        return 1.

    def _triangular2_scale_fn(self, x):
        return 1 / (2. ** (x - 1))

    def _exp_range_scale_fn(self, x):
        return self.gamma**(x)

    def get_lr(self):
        step_size = float(self.step_size)
        cycle = np.floor(1 + self.last_batch_iteration / (2 * step_size))
        x = np.abs(self.last_batch_iteration / step_size - 2 * cycle + 1)

        lrs = []
        param_lrs = zip(self.optimizer.param_groups, self.base_lrs, self.max_lrs)
        for param_group, base_lr, max_lr in param_lrs:
            base_height = (max_lr - base_lr) * np.maximum(0, (1 - x))
            if self.scale_mode == 'cycle':
                lr = base_lr + base_height * self.scale_fn(cycle)
            else:
                lr = base_lr + base_height * self.scale_fn(self.last_batch_iteration)
            lrs.append(lr)
        return lrs

In [13]:
class Simple_NN(nn.Module):
    def __init__(self ,input_dim ,hidden_dim, dropout = 0.2):
        super(Simple_NN, self).__init__()
        
        self.inpt_dim = input_dim
        self.hidden_dim = hidden_dim
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, int(hidden_dim/2))
        self.fc3 = nn.Linear(int(hidden_dim/2), int(hidden_dim/4))
        self.fc4 = nn.Linear(int(hidden_dim/4), int(hidden_dim/8))
        self.fc5 = nn.Linear(int(hidden_dim/8), 1)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.bn2 = nn.BatchNorm1d(int(hidden_dim/2))
        self.bn3 = nn.BatchNorm1d(int(hidden_dim/4))
        self.bn4 = nn.BatchNorm1d(int(hidden_dim/8))
    
    def forward(self, x):
        y = self.fc1(x)
        y = self.relu(y)
        #y = self.bn1(y)
        y = self.dropout(y)
        
        y = self.fc2(y)
        y = self.relu(y)
        #y = self.bn2(y)
        y = self.dropout(y)
        
        y = self.fc3(y)
        y = self.relu(y)
        #y = self.bn3(y)
        y = self.dropout(y)
        
        y = self.fc4(y)
        y = self.relu(y)
        #y = self.bn4(y)
        y = self.dropout(y)
        
        out= self.fc5(y)
        
        return out

In [29]:
model = Simple_NN(200,512)
model.cuda()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0002) # Using Adam optimizer

In [15]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [30]:
train_features=train
test_features=X_test
train_target=y

In [31]:
train_features.shape,test_features.shape,train_target.shape

((200000, 200), (200000, 200), (200000,))

In [32]:
from torch.optim.optimizer import Optimizer
n_epochs = 40
batch_size = 25000

train_preds = np.zeros((len(train_features)))
test_preds = np.zeros((len(test_features)))

x_test = np.array(test_features)
x_test_cuda = torch.tensor(x_test, dtype=torch.float).cuda()
test = torch.utils.data.TensorDataset(x_test_cuda)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)

avg_losses_f = []
avg_val_losses_f = []

for i, (train_idx, valid_idx) in enumerate(splits):  
    x_train = np.array(train_features)
    y_train = np.array(train_target)
    
    x_train_fold = torch.tensor(x_train[train_idx.astype(int)], dtype=torch.float).cuda()
    y_train_fold = torch.tensor(y_train[train_idx.astype(int), np.newaxis], dtype=torch.float32).cuda()
    
    x_val_fold = torch.tensor(x_train[valid_idx.astype(int)], dtype=torch.float).cuda()
    y_val_fold = torch.tensor(y_train[valid_idx.astype(int), np.newaxis], dtype=torch.float32).cuda()
    
    loss_fn = torch.nn.BCEWithLogitsLoss()
    
    step_size = 300
    base_lr, max_lr = 0.0001, 0.001  
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
                             lr=max_lr)
    
    ################################################################################################
    scheduler = CyclicLR(optimizer, base_lr=base_lr, max_lr=max_lr,
               step_size=step_size, mode='exp_range',
               gamma=0.99994)
    ###############################################################################################

    train = torch.utils.data.TensorDataset(x_train_fold, y_train_fold)
    valid = torch.utils.data.TensorDataset(x_val_fold, y_val_fold)
    
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
    valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False)
    
    print(f'Fold {i + 1}')
    for epoch in range(n_epochs):
        start_time = time.time()
        model.train()
        avg_loss = 0.
        #avg_auc = 0.
        for i, (x_batch, y_batch) in enumerate(train_loader):
            y_pred = model(x_batch)
            #########################
            if scheduler:
                #print('cycle_LR')
                scheduler.batch_step()
            ########################
            loss = loss_fn(y_pred, y_batch)

            optimizer.zero_grad()
            loss.backward()

            optimizer.step()
            avg_loss += loss.item()/len(train_loader)
            #avg_auc += round(roc_auc_score(y_batch.cpu(),y_pred.detach().cpu()),4) / len(train_loader)
        model.eval()
        
        valid_preds_fold = np.zeros((x_val_fold.size(0)))
        test_preds_fold = np.zeros((len(test_features)))
        
        avg_val_loss = 0.
        #avg_val_auc = 0.
        for i, (x_batch, y_batch) in enumerate(valid_loader):
            y_pred = model(x_batch).detach()
            
            #avg_val_auc += round(roc_auc_score(y_batch.cpu(),sigmoid(y_pred.cpu().numpy())[:, 0]),4) / len(valid_loader)
            avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
            valid_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0]
            
        elapsed_time = time.time() - start_time 
        print('Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f} \t time={:.2f}s'.format(
            epoch + 1, n_epochs, avg_loss, avg_val_loss, elapsed_time))
        
    avg_losses_f.append(avg_loss)
    avg_val_losses_f.append(avg_val_loss) 
    
    for i, (x_batch,) in enumerate(test_loader):
        y_pred = model(x_batch).detach()

        test_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0]
        
    train_preds[valid_idx] = valid_preds_fold
    test_preds += test_preds_fold / len(splits)

auc  =  round(roc_auc_score(train_target,train_preds),4)      
print('All \t loss={:.4f} \t val_loss={:.4f} \t auc={:.4f}'.format(np.average(avg_losses_f),np.average(avg_val_losses_f),auc))

Fold 1
Epoch 1/40 	 loss=0.5072 	 val_loss=0.3813 	 time=7.68s
Epoch 2/40 	 loss=0.3498 	 val_loss=0.3352 	 time=7.47s
Epoch 3/40 	 loss=0.3552 	 val_loss=0.3282 	 time=7.53s
Epoch 4/40 	 loss=0.3372 	 val_loss=0.3297 	 time=7.47s
Epoch 5/40 	 loss=0.3374 	 val_loss=0.3252 	 time=7.55s
Epoch 6/40 	 loss=0.3309 	 val_loss=0.3186 	 time=7.47s
Epoch 7/40 	 loss=0.3301 	 val_loss=0.3162 	 time=7.50s
Epoch 8/40 	 loss=0.3256 	 val_loss=0.3127 	 time=7.49s
Epoch 9/40 	 loss=0.3206 	 val_loss=0.3053 	 time=7.47s
Epoch 10/40 	 loss=0.3128 	 val_loss=0.2953 	 time=7.48s
Epoch 11/40 	 loss=0.3018 	 val_loss=0.2840 	 time=7.49s
Epoch 12/40 	 loss=0.2924 	 val_loss=0.2745 	 time=7.49s
Epoch 13/40 	 loss=0.2850 	 val_loss=0.2682 	 time=7.51s
Epoch 14/40 	 loss=0.2772 	 val_loss=0.2639 	 time=7.45s
Epoch 15/40 	 loss=0.2746 	 val_loss=0.2593 	 time=7.50s
Epoch 16/40 	 loss=0.2723 	 val_loss=0.2567 	 time=7.50s
Epoch 17/40 	 loss=0.2685 	 val_loss=0.2546 	 time=7.56s
Epoch 18/40 	 loss=0.2657 	 val_l

Epoch 25/40 	 loss=0.1396 	 val_loss=0.1336 	 time=7.31s
Epoch 26/40 	 loss=0.1393 	 val_loss=0.1558 	 time=7.37s
Epoch 27/40 	 loss=0.1492 	 val_loss=0.1497 	 time=7.31s
Epoch 28/40 	 loss=0.1522 	 val_loss=0.1447 	 time=7.32s
Epoch 29/40 	 loss=0.1454 	 val_loss=0.1448 	 time=7.29s
Epoch 30/40 	 loss=0.1394 	 val_loss=0.1457 	 time=7.41s
Epoch 31/40 	 loss=0.1326 	 val_loss=0.1446 	 time=7.31s
Epoch 32/40 	 loss=0.1293 	 val_loss=0.1440 	 time=7.36s
Epoch 33/40 	 loss=0.1305 	 val_loss=0.1455 	 time=7.28s
Epoch 34/40 	 loss=0.1291 	 val_loss=0.1627 	 time=7.34s
Epoch 35/40 	 loss=0.1277 	 val_loss=0.1656 	 time=7.27s
Epoch 36/40 	 loss=0.1268 	 val_loss=0.1522 	 time=7.32s
Epoch 37/40 	 loss=0.1248 	 val_loss=0.1551 	 time=7.31s
Epoch 38/40 	 loss=0.1220 	 val_loss=0.1587 	 time=7.31s
Epoch 39/40 	 loss=0.1210 	 val_loss=0.1629 	 time=7.28s
Epoch 40/40 	 loss=0.1196 	 val_loss=0.1647 	 time=7.54s
Fold 5
Epoch 1/40 	 loss=0.1425 	 val_loss=0.0684 	 time=7.91s
Epoch 2/40 	 loss=0.1385 

In [35]:
torch.save(model.state_dict(), 'simple nn' )

In [None]:
#load on gpu
#device = torch.device("cuda")
#model = TheModelClass(*args, **kwargs)
#model.load_state_dict(torch.load(PATH))
#model.to(device)
# Make sure to call input = input.to(device) on any input tensors that you feed to the model

### Tensorflow

In [8]:
import tensorflow as tf

In [9]:
he_init = tf.variance_scaling_initializer()

def dnn(inputs, n_hidden_layers=2, n_neurons=100, name=None,
        activation=tf.nn.elu, initializer=he_init):
    with tf.variable_scope(name, "dnn"):
        for layer in range(n_hidden_layers):
            inputs = tf.layers.dense(inputs, n_neurons, activation=activation,
                                     kernel_initializer=initializer,
                                     name="hidden%d" % (layer + 1))
        return inputs

In [8]:
n_inputs = 200
n_outputs = 2

#reset_graph()

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

dnn_outputs = dnn(X)

logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
Y_proba = tf.nn.softmax(logits, name="Y_proba")

Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Colocations handled automatically by placer.


In [9]:
learning_rate = 0.01

xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss, name="training_op")

correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [10]:
n_epochs = 1000
batch_size = 200

max_checks_without_progress = 20
checks_without_progress = 0
best_loss = np.infty

with tf.Session() as sess:
    init.run()

    for epoch in range(n_epochs):
        start_pos=0
        for batches in range(int(len(X_train)/batch_size)):
            X_batch,y_batch=X_train[start_pos:start_pos+batch_size],y_train[start_pos:start_pos+batch_size]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            start_pos+=batch_size
        #rnd_idx = np.random.permutation(len(X_train))
        #for rnd_indices in np.array_split(rnd_idx, len(X_train) // batch_size):
            #X_batch, y_batch = X_train[rnd_indices], y_train[rnd_indices]
            #sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid, y: y_valid})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./MLP.ckpt")
            best_loss = loss_val
            checks_without_progress = 0
        else:
            checks_without_progress += 1
            if checks_without_progress > max_checks_without_progress:
                print("Early stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
            epoch, loss_val, best_loss, acc_val * 100))

with tf.Session() as sess:
    saver.restore(sess, "./MLP.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
    print("Final test accuracy: {:.2f}%".format(acc_test * 100))

NameError: name 'X_train' is not defined

In [10]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.exceptions import NotFittedError

class DNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, n_hidden_layers=5, n_neurons=100, optimizer_class=tf.train.AdamOptimizer,
                 learning_rate=0.01, batch_size=20, activation=tf.nn.elu, initializer=he_init,
                 batch_norm_momentum=None, dropout_rate=None, random_state=None):
        """Initialize the DNNClassifier by simply storing all the hyperparameters."""
        self.n_hidden_layers = n_hidden_layers
        self.n_neurons = n_neurons
        self.optimizer_class = optimizer_class
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.activation = activation
        self.initializer = initializer
        self.batch_norm_momentum = batch_norm_momentum
        self.dropout_rate = dropout_rate
        self.random_state = random_state
        self._session = None

    def _dnn(self, inputs):
        """Build the hidden layers, with support for batch normalization and dropout."""
        for layer in range(self.n_hidden_layers):
            if self.dropout_rate:
                inputs = tf.layers.dropout(inputs, self.dropout_rate, training=self._training)
            inputs = tf.layers.dense(inputs, self.n_neurons,
                                     kernel_initializer=self.initializer,
                                     name="hidden%d" % (layer + 1))
            if self.batch_norm_momentum:
                inputs = tf.layers.batch_normalization(inputs, momentum=self.batch_norm_momentum,
                                                       training=self._training)
            inputs = self.activation(inputs, name="hidden%d_out" % (layer + 1))
        return inputs

    def _build_graph(self, n_inputs, n_outputs):
        """Build the same model as earlier"""
        if self.random_state is not None:
            tf.set_random_seed(self.random_state)
            np.random.seed(self.random_state)

        X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
        y = tf.placeholder(tf.int32, shape=(None), name="y")

        if self.batch_norm_momentum or self.dropout_rate:
            self._training = tf.placeholder_with_default(False, shape=(), name='training')
        else:
            self._training = None

        dnn_outputs = self._dnn(X)

        logits = tf.layers.dense(dnn_outputs, n_outputs, kernel_initializer=he_init, name="logits")
        Y_proba = tf.nn.softmax(logits, name="Y_proba")

        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                                  logits=logits)
        loss = tf.reduce_mean(xentropy, name="loss")

        optimizer = self.optimizer_class(learning_rate=self.learning_rate)
        training_op = optimizer.minimize(loss)

        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

        # Make the important operations available easily through instance variables
        self._X, self._y = X, y
        self._Y_proba, self._loss = Y_proba, loss
        self._training_op, self._accuracy = training_op, accuracy
        self._init, self._saver = init, saver

    def close_session(self):
        if self._session:
            self._session.close()

    def _get_model_params(self):
        """Get all variable values (used for early stopping, faster than saving to disk)"""
        with self._graph.as_default():
            gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        return {gvar.op.name: value for gvar, value in zip(gvars, self._session.run(gvars))}

    def _restore_model_params(self, model_params):
        """Set all variables to the given values (for early stopping, faster than loading from disk)"""
        gvar_names = list(model_params.keys())
        assign_ops = {gvar_name: self._graph.get_operation_by_name(gvar_name + "/Assign")
                      for gvar_name in gvar_names}
        init_values = {gvar_name: assign_op.inputs[1] for gvar_name, assign_op in assign_ops.items()}
        feed_dict = {init_values[gvar_name]: model_params[gvar_name] for gvar_name in gvar_names}
        self._session.run(assign_ops, feed_dict=feed_dict)

    def fit(self, X, y, n_epochs=100, X_valid=None, y_valid=None):
        """Fit the model to the training set. If X_valid and y_valid are provided, use early stopping."""
        self.close_session()

        # infer n_inputs and n_outputs from the training set.
        n_inputs = X.shape[1]
        self.classes_ = np.unique(y)
        n_outputs = len(self.classes_)
        
        # Translate the labels vector to a vector of sorted class indices, containing
        # integers from 0 to n_outputs - 1.
        # For example, if y is equal to [8, 8, 9, 5, 7, 6, 6, 6], then the sorted class
        # labels (self.classes_) will be equal to [5, 6, 7, 8, 9], and the labels vector
        # will be translated to [3, 3, 4, 0, 2, 1, 1, 1]
        self.class_to_index_ = {label: index
                                for index, label in enumerate(self.classes_)}
        y = np.array([self.class_to_index_[label]
                      for label in y], dtype=np.int32)
        
        self._graph = tf.Graph()
        with self._graph.as_default():
            self._build_graph(n_inputs, n_outputs)
            # extra ops for batch normalization
            extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        # needed in case of early stopping
        max_checks_without_progress = 20
        checks_without_progress = 0
        best_loss = np.infty
        best_params = None
        
        # Now train the model!
        self._session = tf.Session(graph=self._graph)
        with self._session.as_default() as sess:
            self._init.run()
            for epoch in range(n_epochs):
                start_pos=0
                for batches in range(int(len(X)/self.batch_size)):
                    X_batch,y_batch=X[start_pos:start_pos+self.batch_size],y[start_pos:start_pos+self.batch_size]
                    feed_dict = {self._X: X_batch, self._y: y_batch}
                    start_pos+=self.batch_size
                #rnd_idx = np.random.permutation(len(X))
                #for rnd_indices in np.array_split(rnd_idx, len(X) // self.batch_size):
                    #X_batch, y_batch = X[rnd_indices], y[rnd_indices]
                    #feed_dict = {self._X: X_batch, self._y: y_batch}
                    if self._training is not None:
                        feed_dict[self._training] = True
                    sess.run(self._training_op, feed_dict=feed_dict)
                    if extra_update_ops:
                        sess.run(extra_update_ops, feed_dict=feed_dict)
                if X_valid is not None and y_valid is not None:
                    loss_val, acc_val = sess.run([self._loss, self._accuracy],
                                                 feed_dict={self._X: X_valid,
                                                            self._y: y_valid})
                    if loss_val < best_loss:
                        best_params = self._get_model_params()
                        best_loss = loss_val
                        checks_without_progress = 0
                    else:
                        checks_without_progress += 1
                    print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_val, best_loss, acc_val * 100))
                    if checks_without_progress > max_checks_without_progress:
                        print("Early stopping!")
                        break
                else:
                    loss_train, acc_train = sess.run([self._loss, self._accuracy],
                                                     feed_dict={self._X: X_batch,
                                                                self._y: y_batch})
                    print("{}\tLast training batch loss: {:.6f}\tAccuracy: {:.2f}%".format(
                        epoch, loss_train, acc_train * 100))
            # If we used early stopping then rollback to the best model found
            if best_params:
                self._restore_model_params(best_params)
            return self

    def predict_proba(self, X):
        if not self._session:
            raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__)
        with self._session.as_default() as sess:
            return self._Y_proba.eval(feed_dict={self._X: X})

    def predict(self, X):
        class_indices = np.argmax(self.predict_proba(X), axis=1)
        return np.array([[self.classes_[class_index]]
                         for class_index in class_indices], np.int32)

    def save(self, path):
        self._saver.save(self._session, path)

In [34]:
dnn_clf = DNNClassifier(random_state=42)
dnn_clf.fit(X_train, y_train, n_epochs=1000, X_valid=X_valid, y_valid=y_valid)

0	Validation loss: 0.249402	Best loss: 0.249402	Accuracy: 91.14%
1	Validation loss: 0.572434	Best loss: 0.249402	Accuracy: 89.95%
2	Validation loss: 0.326405	Best loss: 0.249402	Accuracy: 89.95%
3	Validation loss: 0.343844	Best loss: 0.249402	Accuracy: 89.95%
4	Validation loss: 0.353421	Best loss: 0.249402	Accuracy: 89.95%
5	Validation loss: 0.418960	Best loss: 0.249402	Accuracy: 89.95%
6	Validation loss: 0.350180	Best loss: 0.249402	Accuracy: 89.95%
7	Validation loss: 0.348217	Best loss: 0.249402	Accuracy: 89.95%
8	Validation loss: 0.348487	Best loss: 0.249402	Accuracy: 89.95%
9	Validation loss: 0.348361	Best loss: 0.249402	Accuracy: 89.95%
10	Validation loss: 0.348417	Best loss: 0.249402	Accuracy: 89.95%
11	Validation loss: 0.348391	Best loss: 0.249402	Accuracy: 89.95%
12	Validation loss: 0.348406	Best loss: 0.249402	Accuracy: 89.95%
13	Validation loss: 0.348398	Best loss: 0.249402	Accuracy: 89.95%
14	Validation loss: 0.348401	Best loss: 0.249402	Accuracy: 89.95%
15	Validation loss: 

DNNClassifier(activation=<function elu at 0x00000174A10ABBF8>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0x00000174A3839898>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [42]:
dnn_clf.save('dnn_clf')

In [36]:
from sklearn.metrics import accuracy_score
y_pred = dnn_clf.predict(X_valid)
accuracy_score(y_valid, y_pred)

0.91135

### Random search

In [17]:
from sklearn.model_selection import RandomizedSearchCV

def leaky_relu(alpha=0.01):
    def parametrized_leaky_relu(z, name=None):
        return tf.maximum(alpha * z, z, name=name)
    return parametrized_leaky_relu

param_distribs = {
    "n_epochs":[100,500,1000]
    "n_neurons": [10, 30, 50, 70, 90, 100, 120, 140, 160],
    "batch_size": [10, 50, 100, 500],
    "learning_rate": [0.01, 0.02, 0.05, 0.1],
    "activation": [tf.nn.relu, tf.nn.elu, leaky_relu(alpha=0.01), leaky_relu(alpha=0.1)],
    # you could also try exploring different numbers of hidden layers, different optimizers, etc.
    "n_hidden_layers": [2,3,4,5],
    #"optimizer_class": [tf.train.AdamOptimizer, partial(tf.train.MomentumOptimizer, momentum=0.95)],
}

rnd_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
                                cv=3, random_state=42, verbose=2)
rnd_search.fit(X_train, y_train, X_valid=X_valid, y_valid=y_valid)

# If you have Scikit-Learn 0.18 or earlier, you should upgrade, or use the fit_params argument:
# fit_params = dict(X_valid=X_valid1, y_valid=y_valid1, n_epochs=1000)
# rnd_search = RandomizedSearchCV(DNNClassifier(random_state=42), param_distribs, n_iter=50,
#                                 fit_params=fit_params, random_state=42, verbose=2)
# rnd_search.fit(X_train1, y_train1)

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] n_neurons=50, n_hidden_layers=4, learning_rate=0.02, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426E5F048> 
0	Validation loss: 0.244824	Best loss: 0.244824	Accuracy: 90.89%
1	Validation loss: 0.246529	Best loss: 0.244824	Accuracy: 91.01%
2	Validation loss: 0.253831	Best loss: 0.244824	Accuracy: 90.79%
3	Validation loss: 0.247672	Best loss: 0.244824	Accuracy: 90.87%
4	Validation loss: 0.252642	Best loss: 0.244824	Accuracy: 90.86%
5	Validation loss: 0.260139	Best loss: 0.244824	Accuracy: 90.61%
6	Validation loss: 0.245625	Best loss: 0.244824	Accuracy: 90.97%
7	Validation loss: 0.246783	Best loss: 0.244824	Accuracy: 90.79%
8	Validation loss: 0.246674	Best loss: 0.244824	Accuracy: 90.82%
9	Validation loss: 0.252266	Best loss: 0.244824	Accuracy: 90.65%
10	Validation loss: 0.251288	Best loss: 0.244824	Accuracy: 90.57%
11	Validation loss: 0.253482	Best loss: 0.244824	Accuracy: 

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   21.6s remaining:    0.0s


0	Validation loss: 0.245361	Best loss: 0.245361	Accuracy: 90.68%
1	Validation loss: 0.244428	Best loss: 0.244428	Accuracy: 90.95%
2	Validation loss: 0.252440	Best loss: 0.244428	Accuracy: 90.85%
3	Validation loss: 0.257561	Best loss: 0.244428	Accuracy: 90.82%
4	Validation loss: 0.254575	Best loss: 0.244428	Accuracy: 90.90%
5	Validation loss: 0.262527	Best loss: 0.244428	Accuracy: 90.82%
6	Validation loss: 0.271459	Best loss: 0.244428	Accuracy: 90.52%
7	Validation loss: 0.258908	Best loss: 0.244428	Accuracy: 90.76%
8	Validation loss: 0.248503	Best loss: 0.244428	Accuracy: 90.91%
9	Validation loss: 0.245855	Best loss: 0.244428	Accuracy: 90.91%
10	Validation loss: 0.245839	Best loss: 0.244428	Accuracy: 90.90%
11	Validation loss: 0.245400	Best loss: 0.244428	Accuracy: 90.90%
12	Validation loss: 0.245740	Best loss: 0.244428	Accuracy: 90.92%
13	Validation loss: 0.245904	Best loss: 0.244428	Accuracy: 90.91%
14	Validation loss: 0.248483	Best loss: 0.244428	Accuracy: 90.82%
15	Validation loss: 

16	Validation loss: 0.371271	Best loss: 0.247605	Accuracy: 90.15%
17	Validation loss: 0.338046	Best loss: 0.247605	Accuracy: 90.20%
18	Validation loss: 0.339741	Best loss: 0.247605	Accuracy: 90.26%
19	Validation loss: 0.372216	Best loss: 0.247605	Accuracy: 90.31%
20	Validation loss: 0.392949	Best loss: 0.247605	Accuracy: 90.17%
21	Validation loss: 0.390086	Best loss: 0.247605	Accuracy: 90.17%
Early stopping!
[CV]  n_neurons=90, n_hidden_layers=2, learning_rate=0.05, batch_size=500, activation=<function relu at 0x00000214013FFE18>, total=  17.0s
[CV] n_neurons=10, n_hidden_layers=4, learning_rate=0.1, batch_size=50, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426E5F048> 
0	Validation loss: 0.290773	Best loss: 0.290773	Accuracy: 90.10%
1	Validation loss: 0.301144	Best loss: 0.290773	Accuracy: 89.99%
2	Validation loss: 0.296180	Best loss: 0.290773	Accuracy: 89.67%
3	Validation loss: 0.283226	Best loss: 0.283226	Accuracy: 90.19%
4	Validation loss: 0.326503	

14	Validation loss: 0.327253	Best loss: 0.327253	Accuracy: 89.95%
15	Validation loss: 0.327253	Best loss: 0.327253	Accuracy: 89.95%
16	Validation loss: 0.327253	Best loss: 0.327253	Accuracy: 89.95%
17	Validation loss: 0.327253	Best loss: 0.327253	Accuracy: 89.95%
18	Validation loss: 0.327253	Best loss: 0.327253	Accuracy: 89.95%
19	Validation loss: 0.327253	Best loss: 0.327253	Accuracy: 89.95%
20	Validation loss: 0.327253	Best loss: 0.327253	Accuracy: 89.95%
21	Validation loss: 0.327253	Best loss: 0.327253	Accuracy: 89.95%
22	Validation loss: 0.327253	Best loss: 0.327253	Accuracy: 89.95%
23	Validation loss: 0.327253	Best loss: 0.327253	Accuracy: 89.95%
Early stopping!
[CV]  n_neurons=160, n_hidden_layers=2, learning_rate=0.02, batch_size=10, activation=<function relu at 0x00000214013FFE18>, total= 3.2min
[CV] n_neurons=160, n_hidden_layers=2, learning_rate=0.02, batch_size=10, activation=<function relu at 0x00000214013FFE18> 
0	Validation loss: 0.327355	Best loss: 0.327355	Accuracy: 89.

0	Validation loss: 0.435187	Best loss: 0.435187	Accuracy: 86.55%
1	Validation loss: 21.625786	Best loss: 0.435187	Accuracy: 22.07%
2	Validation loss: 73.004326	Best loss: 0.435187	Accuracy: 89.94%
3	Validation loss: 27.849735	Best loss: 0.435187	Accuracy: 89.95%
4	Validation loss: 5.009664	Best loss: 0.435187	Accuracy: 89.93%
5	Validation loss: 206.583298	Best loss: 0.435187	Accuracy: 89.58%
6	Validation loss: 61.982525	Best loss: 0.435187	Accuracy: 89.93%
7	Validation loss: 152.772430	Best loss: 0.435187	Accuracy: 89.95%
8	Validation loss: 65.594833	Best loss: 0.435187	Accuracy: 79.93%
9	Validation loss: 819.181335	Best loss: 0.435187	Accuracy: 29.39%
10	Validation loss: 1247.104370	Best loss: 0.435187	Accuracy: 10.60%
11	Validation loss: 237.475525	Best loss: 0.435187	Accuracy: 89.94%
12	Validation loss: 367.839386	Best loss: 0.435187	Accuracy: 89.95%
13	Validation loss: 687.074768	Best loss: 0.435187	Accuracy: 86.70%
14	Validation loss: 3288.257080	Best loss: 0.435187	Accuracy: 83.8

18	Validation loss: 0.328117	Best loss: 0.266218	Accuracy: 89.24%
19	Validation loss: 0.320422	Best loss: 0.266218	Accuracy: 90.30%
20	Validation loss: 0.425200	Best loss: 0.266218	Accuracy: 89.69%
21	Validation loss: 0.352053	Best loss: 0.266218	Accuracy: 90.81%
22	Validation loss: 0.335259	Best loss: 0.266218	Accuracy: 90.23%
23	Validation loss: 0.347515	Best loss: 0.266218	Accuracy: 90.62%
24	Validation loss: 0.400050	Best loss: 0.266218	Accuracy: 90.09%
25	Validation loss: 0.378870	Best loss: 0.266218	Accuracy: 89.89%
26	Validation loss: 0.417837	Best loss: 0.266218	Accuracy: 90.18%
27	Validation loss: 0.378222	Best loss: 0.266218	Accuracy: 89.99%
28	Validation loss: 0.599479	Best loss: 0.266218	Accuracy: 90.41%
29	Validation loss: 0.433915	Best loss: 0.266218	Accuracy: 89.16%
30	Validation loss: 0.439766	Best loss: 0.266218	Accuracy: 89.27%
31	Validation loss: 0.663175	Best loss: 0.266218	Accuracy: 90.17%
32	Validation loss: 0.547579	Best loss: 0.266218	Accuracy: 90.28%
33	Validat

8	Validation loss: 0.345942	Best loss: 0.245909	Accuracy: 89.74%
9	Validation loss: 0.288521	Best loss: 0.245909	Accuracy: 90.42%
10	Validation loss: 0.300660	Best loss: 0.245909	Accuracy: 90.51%
11	Validation loss: 0.299862	Best loss: 0.245909	Accuracy: 90.50%
12	Validation loss: 0.332053	Best loss: 0.245909	Accuracy: 90.41%
13	Validation loss: 0.335427	Best loss: 0.245909	Accuracy: 90.38%
14	Validation loss: 0.338402	Best loss: 0.245909	Accuracy: 90.46%
15	Validation loss: 0.372262	Best loss: 0.245909	Accuracy: 90.35%
16	Validation loss: 0.401295	Best loss: 0.245909	Accuracy: 90.36%
17	Validation loss: 0.440202	Best loss: 0.245909	Accuracy: 90.26%
18	Validation loss: 0.460245	Best loss: 0.245909	Accuracy: 89.98%
19	Validation loss: 0.445828	Best loss: 0.245909	Accuracy: 90.07%
20	Validation loss: 0.498963	Best loss: 0.245909	Accuracy: 90.18%
21	Validation loss: 0.516808	Best loss: 0.245909	Accuracy: 90.14%
Early stopping!
[CV]  n_neurons=160, n_hidden_layers=2, learning_rate=0.01, ba

22	Validation loss: 0.248847	Best loss: 0.238763	Accuracy: 91.01%
Early stopping!
[CV]  n_neurons=10, n_hidden_layers=2, learning_rate=0.01, batch_size=100, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426F4A6A8>, total=  17.6s
[CV] n_neurons=70, n_hidden_layers=4, learning_rate=0.05, batch_size=10, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426E5F048> 
0	Validation loss: 15.778711	Best loss: 15.778711	Accuracy: 89.95%
1	Validation loss: 75.313522	Best loss: 15.778711	Accuracy: 89.95%
2	Validation loss: 74.780502	Best loss: 15.778711	Accuracy: 89.83%
3	Validation loss: 339.547791	Best loss: 15.778711	Accuracy: 89.94%
4	Validation loss: 881.737305	Best loss: 15.778711	Accuracy: 89.95%
5	Validation loss: 55.934170	Best loss: 15.778711	Accuracy: 84.32%
6	Validation loss: 207.079971	Best loss: 15.778711	Accuracy: 89.97%
7	Validation loss: 201.436081	Best loss: 15.778711	Accuracy: 89.94%
8	Validation loss: 1802.250366	Best lo

4	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
5	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
6	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
7	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
8	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
9	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
10	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
11	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
12	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
13	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
14	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
15	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
16	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
17	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
18	Validation loss: 0.333526	Best loss: 0.333525	Accuracy: 89.95%
19	Validation lo

20	Validation loss: 0.237724	Best loss: 0.236170	Accuracy: 91.31%
21	Validation loss: 0.236414	Best loss: 0.236170	Accuracy: 91.33%
22	Validation loss: 0.237612	Best loss: 0.236170	Accuracy: 91.31%
23	Validation loss: 0.244474	Best loss: 0.236170	Accuracy: 91.30%
24	Validation loss: 0.236941	Best loss: 0.236170	Accuracy: 91.30%
25	Validation loss: 0.237736	Best loss: 0.236170	Accuracy: 91.28%
26	Validation loss: 0.237321	Best loss: 0.236170	Accuracy: 91.22%
27	Validation loss: 7.192541	Best loss: 0.236170	Accuracy: 89.95%
28	Validation loss: 6.797245	Best loss: 0.236170	Accuracy: 29.83%
29	Validation loss: 1.188685	Best loss: 0.236170	Accuracy: 89.95%
30	Validation loss: 1.734808	Best loss: 0.236170	Accuracy: 89.95%
31	Validation loss: 1.322412	Best loss: 0.236170	Accuracy: 89.95%
32	Validation loss: 1.436582	Best loss: 0.236170	Accuracy: 89.95%
33	Validation loss: 0.411692	Best loss: 0.236170	Accuracy: 89.95%
34	Validation loss: 0.327841	Best loss: 0.236170	Accuracy: 89.95%
35	Validat

6	Validation loss: 10.071965	Best loss: 0.391058	Accuracy: 10.05%
7	Validation loss: 13.163713	Best loss: 0.391058	Accuracy: 10.05%
8	Validation loss: 10.304442	Best loss: 0.391058	Accuracy: 10.05%
9	Validation loss: 14.445163	Best loss: 0.391058	Accuracy: 10.05%
10	Validation loss: 12.305861	Best loss: 0.391058	Accuracy: 10.05%
11	Validation loss: 14.794880	Best loss: 0.391058	Accuracy: 10.05%
12	Validation loss: 15.628451	Best loss: 0.391058	Accuracy: 10.05%
13	Validation loss: 12.847030	Best loss: 0.391058	Accuracy: 10.05%
14	Validation loss: 12.109972	Best loss: 0.391058	Accuracy: 10.05%
15	Validation loss: 12.719664	Best loss: 0.391058	Accuracy: 10.05%
16	Validation loss: 14.130715	Best loss: 0.391058	Accuracy: 10.05%
17	Validation loss: 5.091355	Best loss: 0.391058	Accuracy: 10.05%
18	Validation loss: 9.990839	Best loss: 0.391058	Accuracy: 10.05%
19	Validation loss: 4.871067	Best loss: 0.391058	Accuracy: 10.05%
20	Validation loss: 5.479630	Best loss: 0.391058	Accuracy: 10.05%
21	

19	Validation loss: 0.326339	Best loss: 0.326195	Accuracy: 89.95%
20	Validation loss: 0.326339	Best loss: 0.326195	Accuracy: 89.95%
21	Validation loss: 0.326339	Best loss: 0.326195	Accuracy: 89.95%
Early stopping!
[CV]  n_neurons=100, n_hidden_layers=3, learning_rate=0.1, batch_size=100, activation=<function relu at 0x00000214013FFE18>, total=  35.9s
[CV] n_neurons=100, n_hidden_layers=3, learning_rate=0.1, batch_size=100, activation=<function relu at 0x00000214013FFE18> 
0	Validation loss: 0.326221	Best loss: 0.326221	Accuracy: 89.95%
1	Validation loss: 0.326209	Best loss: 0.326209	Accuracy: 89.95%
2	Validation loss: 0.326259	Best loss: 0.326209	Accuracy: 89.95%
3	Validation loss: 0.326278	Best loss: 0.326209	Accuracy: 89.95%
4	Validation loss: 0.326284	Best loss: 0.326209	Accuracy: 89.95%
5	Validation loss: 0.326283	Best loss: 0.326209	Accuracy: 89.95%
6	Validation loss: 0.326282	Best loss: 0.326209	Accuracy: 89.95%
7	Validation loss: 0.326285	Best loss: 0.326209	Accuracy: 89.95%
8	V

12	Validation loss: 0.327005	Best loss: 0.252252	Accuracy: 89.95%
13	Validation loss: 0.327005	Best loss: 0.252252	Accuracy: 89.95%
14	Validation loss: 0.327005	Best loss: 0.252252	Accuracy: 89.95%
15	Validation loss: 0.327005	Best loss: 0.252252	Accuracy: 89.95%
16	Validation loss: 0.327005	Best loss: 0.252252	Accuracy: 89.95%
17	Validation loss: 0.327005	Best loss: 0.252252	Accuracy: 89.95%
18	Validation loss: 0.327005	Best loss: 0.252252	Accuracy: 89.95%
19	Validation loss: 0.327005	Best loss: 0.252252	Accuracy: 89.95%
20	Validation loss: 0.327005	Best loss: 0.252252	Accuracy: 89.95%
21	Validation loss: 0.327005	Best loss: 0.252252	Accuracy: 89.95%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=5, learning_rate=0.02, batch_size=100, activation=<function elu at 0x00000214013EE8C8>, total=  30.8s
[CV] n_neurons=160, n_hidden_layers=5, learning_rate=0.1, batch_size=500, activation=<function elu at 0x00000214013EE8C8> 
0	Validation loss: 0.326281	Best loss: 0.326281	Accuracy: 89.95

13	Validation loss: 0.329110	Best loss: 0.326319	Accuracy: 89.95%
14	Validation loss: 0.327075	Best loss: 0.326319	Accuracy: 89.95%
15	Validation loss: 0.327643	Best loss: 0.326319	Accuracy: 89.95%
16	Validation loss: 0.327461	Best loss: 0.326319	Accuracy: 89.95%
17	Validation loss: 0.327403	Best loss: 0.326319	Accuracy: 89.95%
18	Validation loss: 0.327692	Best loss: 0.326319	Accuracy: 89.95%
19	Validation loss: 0.329776	Best loss: 0.326319	Accuracy: 89.95%
20	Validation loss: 0.330203	Best loss: 0.326319	Accuracy: 89.95%
21	Validation loss: 0.328973	Best loss: 0.326319	Accuracy: 89.95%
22	Validation loss: 0.327220	Best loss: 0.326319	Accuracy: 89.95%
23	Validation loss: 0.326920	Best loss: 0.326319	Accuracy: 89.95%
24	Validation loss: 0.327020	Best loss: 0.326319	Accuracy: 89.95%
25	Validation loss: 0.327490	Best loss: 0.326319	Accuracy: 89.95%
Early stopping!
[CV]  n_neurons=160, n_hidden_layers=5, learning_rate=0.1, batch_size=500, activation=<function elu at 0x00000214013EE8C8>, to

0	Validation loss: 0.245989	Best loss: 0.245989	Accuracy: 90.60%
1	Validation loss: 0.246474	Best loss: 0.245989	Accuracy: 90.57%
2	Validation loss: 0.246826	Best loss: 0.245989	Accuracy: 90.83%
3	Validation loss: 0.259351	Best loss: 0.245989	Accuracy: 90.55%
4	Validation loss: 0.257051	Best loss: 0.245989	Accuracy: 90.77%
5	Validation loss: 0.276689	Best loss: 0.245989	Accuracy: 90.57%
6	Validation loss: 0.263326	Best loss: 0.245989	Accuracy: 90.56%
7	Validation loss: 0.275011	Best loss: 0.245989	Accuracy: 90.43%
8	Validation loss: 0.302959	Best loss: 0.245989	Accuracy: 90.41%
9	Validation loss: 13.773416	Best loss: 0.245989	Accuracy: 87.33%
10	Validation loss: 1.837474	Best loss: 0.245989	Accuracy: 89.12%
11	Validation loss: 1.696353	Best loss: 0.245989	Accuracy: 88.50%
12	Validation loss: 2.373427	Best loss: 0.245989	Accuracy: 87.01%
13	Validation loss: 2.337209	Best loss: 0.245989	Accuracy: 89.01%
14	Validation loss: 2.479682	Best loss: 0.245989	Accuracy: 90.06%
15	Validation loss:

18	Validation loss: 0.359540	Best loss: 0.250832	Accuracy: 89.85%
19	Validation loss: 0.372959	Best loss: 0.250832	Accuracy: 89.93%
20	Validation loss: 0.429107	Best loss: 0.250832	Accuracy: 89.61%
21	Validation loss: 0.442567	Best loss: 0.250832	Accuracy: 89.55%
Early stopping!
[CV]  n_neurons=70, n_hidden_layers=2, learning_rate=0.01, batch_size=100, activation=<function elu at 0x00000214013EE8C8>, total=  26.4s
[CV] n_neurons=30, n_hidden_layers=3, learning_rate=0.02, batch_size=50, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426E5F048> 
0	Validation loss: 0.252406	Best loss: 0.252406	Accuracy: 90.78%
1	Validation loss: 0.270233	Best loss: 0.252406	Accuracy: 91.07%
2	Validation loss: 0.243479	Best loss: 0.243479	Accuracy: 91.18%
3	Validation loss: 0.285083	Best loss: 0.243479	Accuracy: 90.62%
4	Validation loss: 0.264375	Best loss: 0.243479	Accuracy: 91.11%
5	Validation loss: 0.264305	Best loss: 0.243479	Accuracy: 91.15%
6	Validation loss: 0.273595	Be

21	Validation loss: 0.245506	Best loss: 0.238696	Accuracy: 91.12%
22	Validation loss: 0.263893	Best loss: 0.238696	Accuracy: 91.04%
23	Validation loss: 0.244785	Best loss: 0.238696	Accuracy: 90.94%
24	Validation loss: 0.251722	Best loss: 0.238696	Accuracy: 90.67%
25	Validation loss: 0.256983	Best loss: 0.238696	Accuracy: 90.51%
26	Validation loss: 0.257869	Best loss: 0.238696	Accuracy: 91.12%
27	Validation loss: 0.241672	Best loss: 0.238696	Accuracy: 90.96%
28	Validation loss: 0.251965	Best loss: 0.238696	Accuracy: 90.69%
29	Validation loss: 0.239627	Best loss: 0.238696	Accuracy: 91.09%
30	Validation loss: 0.250328	Best loss: 0.238696	Accuracy: 90.79%
31	Validation loss: 0.239823	Best loss: 0.238696	Accuracy: 91.10%
32	Validation loss: 0.306464	Best loss: 0.238696	Accuracy: 91.05%
33	Validation loss: 0.237985	Best loss: 0.237985	Accuracy: 91.11%
34	Validation loss: 0.255296	Best loss: 0.237985	Accuracy: 90.38%
35	Validation loss: 0.247867	Best loss: 0.237985	Accuracy: 91.03%
36	Validat

6	Validation loss: 0.240701	Best loss: 0.238234	Accuracy: 91.18%
7	Validation loss: 0.242219	Best loss: 0.238234	Accuracy: 91.07%
8	Validation loss: 0.241052	Best loss: 0.238234	Accuracy: 91.20%
9	Validation loss: 0.241585	Best loss: 0.238234	Accuracy: 91.14%
10	Validation loss: 0.243093	Best loss: 0.238234	Accuracy: 91.05%
11	Validation loss: 0.244754	Best loss: 0.238234	Accuracy: 90.95%
12	Validation loss: 0.244826	Best loss: 0.238234	Accuracy: 90.93%
13	Validation loss: 0.248667	Best loss: 0.238234	Accuracy: 90.86%
14	Validation loss: 0.248100	Best loss: 0.238234	Accuracy: 90.75%
15	Validation loss: 0.247147	Best loss: 0.238234	Accuracy: 90.76%
16	Validation loss: 0.246211	Best loss: 0.238234	Accuracy: 90.85%
17	Validation loss: 0.247133	Best loss: 0.238234	Accuracy: 90.77%
18	Validation loss: 0.248557	Best loss: 0.238234	Accuracy: 90.73%
19	Validation loss: 0.249893	Best loss: 0.238234	Accuracy: 90.61%
20	Validation loss: 0.250048	Best loss: 0.238234	Accuracy: 90.67%
21	Validation 

14	Validation loss: 0.253388	Best loss: 0.240285	Accuracy: 90.84%
15	Validation loss: 0.255055	Best loss: 0.240285	Accuracy: 90.74%
16	Validation loss: 0.260015	Best loss: 0.240285	Accuracy: 90.79%
17	Validation loss: 0.254950	Best loss: 0.240285	Accuracy: 90.77%
18	Validation loss: 0.256286	Best loss: 0.240285	Accuracy: 90.81%
19	Validation loss: 0.257849	Best loss: 0.240285	Accuracy: 90.68%
20	Validation loss: 0.258495	Best loss: 0.240285	Accuracy: 90.71%
21	Validation loss: 0.258277	Best loss: 0.240285	Accuracy: 90.71%
Early stopping!
[CV]  n_neurons=10, n_hidden_layers=5, learning_rate=0.01, batch_size=100, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426E5F048>, total=  20.3s
[CV] n_neurons=10, n_hidden_layers=5, learning_rate=0.01, batch_size=100, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426E5F048> 
0	Validation loss: 0.243774	Best loss: 0.243774	Accuracy: 91.10%
1	Validation loss: 0.241392	Best loss: 0.241392	Ac

14	Validation loss: 41.017811	Best loss: 2.277885	Accuracy: 89.95%
15	Validation loss: 3.692694	Best loss: 2.277885	Accuracy: 89.26%
16	Validation loss: 60.773907	Best loss: 2.277885	Accuracy: 89.95%
17	Validation loss: 2257.313232	Best loss: 2.277885	Accuracy: 89.94%
18	Validation loss: 319.658142	Best loss: 2.277885	Accuracy: 89.95%
19	Validation loss: 241.880798	Best loss: 2.277885	Accuracy: 89.89%
20	Validation loss: 115.967628	Best loss: 2.277885	Accuracy: 89.96%
21	Validation loss: 158.608231	Best loss: 2.277885	Accuracy: 89.94%
22	Validation loss: 215.298203	Best loss: 2.277885	Accuracy: 89.95%
23	Validation loss: 42.260990	Best loss: 2.277885	Accuracy: 89.99%
24	Validation loss: 13.041297	Best loss: 2.277885	Accuracy: 89.06%
25	Validation loss: 55.389423	Best loss: 2.277885	Accuracy: 89.95%
26	Validation loss: 62.552223	Best loss: 2.277885	Accuracy: 89.94%
27	Validation loss: 77.855515	Best loss: 2.277885	Accuracy: 89.96%
28	Validation loss: 10.484162	Best loss: 2.277885	Accura

7	Validation loss: 0.243469	Best loss: 0.239245	Accuracy: 91.00%
8	Validation loss: 0.243497	Best loss: 0.239245	Accuracy: 91.05%
9	Validation loss: 0.244435	Best loss: 0.239245	Accuracy: 90.99%
10	Validation loss: 0.244564	Best loss: 0.239245	Accuracy: 91.00%
11	Validation loss: 0.244984	Best loss: 0.239245	Accuracy: 90.97%
12	Validation loss: 0.245637	Best loss: 0.239245	Accuracy: 90.94%
13	Validation loss: 0.246127	Best loss: 0.239245	Accuracy: 90.96%
14	Validation loss: 0.246748	Best loss: 0.239245	Accuracy: 90.97%
15	Validation loss: 0.247105	Best loss: 0.239245	Accuracy: 90.94%
16	Validation loss: 0.247534	Best loss: 0.239245	Accuracy: 90.95%
17	Validation loss: 0.248146	Best loss: 0.239245	Accuracy: 90.95%
18	Validation loss: 0.248546	Best loss: 0.239245	Accuracy: 90.91%
19	Validation loss: 0.248922	Best loss: 0.239245	Accuracy: 90.96%
20	Validation loss: 0.249308	Best loss: 0.239245	Accuracy: 90.93%
21	Validation loss: 0.249333	Best loss: 0.239245	Accuracy: 90.96%
22	Validation

22	Validation loss: 1.177074	Best loss: 0.246533	Accuracy: 89.53%
Early stopping!
[CV]  n_neurons=120, n_hidden_layers=2, learning_rate=0.1, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426F4A6A8>, total=  18.1s
[CV] n_neurons=120, n_hidden_layers=2, learning_rate=0.1, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426F4A6A8> 
0	Validation loss: 0.277863	Best loss: 0.277863	Accuracy: 91.03%
1	Validation loss: 0.249133	Best loss: 0.249133	Accuracy: 91.04%
2	Validation loss: 0.260195	Best loss: 0.249133	Accuracy: 90.83%
3	Validation loss: 0.250720	Best loss: 0.249133	Accuracy: 90.95%
4	Validation loss: 0.252865	Best loss: 0.249133	Accuracy: 90.86%
5	Validation loss: 0.263528	Best loss: 0.249133	Accuracy: 90.69%
6	Validation loss: 0.269239	Best loss: 0.249133	Accuracy: 90.61%
7	Validation loss: 0.283306	Best loss: 0.249133	Accuracy: 90.37%
8	Validation loss: 0.289457	Best loss: 0.249133	Accuracy:

8	Validation loss: 12410.651367	Best loss: 133.842773	Accuracy: 89.88%
9	Validation loss: 90953.023438	Best loss: 133.842773	Accuracy: 89.97%
10	Validation loss: 2170.280518	Best loss: 133.842773	Accuracy: 90.05%
11	Validation loss: 2943.081543	Best loss: 133.842773	Accuracy: 89.91%
12	Validation loss: 7220.642578	Best loss: 133.842773	Accuracy: 54.32%
13	Validation loss: 814.724976	Best loss: 133.842773	Accuracy: 86.66%
14	Validation loss: 1013.544800	Best loss: 133.842773	Accuracy: 74.20%
15	Validation loss: 499.206299	Best loss: 133.842773	Accuracy: 83.19%
16	Validation loss: 347.990051	Best loss: 133.842773	Accuracy: 69.94%
17	Validation loss: 538.641418	Best loss: 133.842773	Accuracy: 89.42%
18	Validation loss: 3385.001709	Best loss: 133.842773	Accuracy: 89.96%
19	Validation loss: 7292.638184	Best loss: 133.842773	Accuracy: 89.94%
20	Validation loss: 7634.350586	Best loss: 133.842773	Accuracy: 90.04%
21	Validation loss: 190833.296875	Best loss: 133.842773	Accuracy: 89.81%
Early st

55	Validation loss: 2637.110352	Best loss: 118.655136	Accuracy: 10.86%
56	Validation loss: 16611.730469	Best loss: 118.655136	Accuracy: 13.67%
57	Validation loss: 16664.792969	Best loss: 118.655136	Accuracy: 13.32%
58	Validation loss: 6528.775391	Best loss: 118.655136	Accuracy: 33.00%
Early stopping!
[CV]  n_neurons=70, n_hidden_layers=5, learning_rate=0.05, batch_size=10, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426F4A6A8>, total= 6.7min
[CV] n_neurons=120, n_hidden_layers=2, learning_rate=0.1, batch_size=100, activation=<function relu at 0x00000214013FFE18> 
0	Validation loss: 0.326399	Best loss: 0.326399	Accuracy: 89.95%
1	Validation loss: 0.326184	Best loss: 0.326184	Accuracy: 89.95%
2	Validation loss: 0.326262	Best loss: 0.326184	Accuracy: 89.95%
3	Validation loss: 0.326312	Best loss: 0.326184	Accuracy: 89.95%
4	Validation loss: 0.326330	Best loss: 0.326184	Accuracy: 89.95%
5	Validation loss: 0.326336	Best loss: 0.326184	Accuracy: 89.95%
6	Valid

4	Validation loss: 2.958631	Best loss: 1.286534	Accuracy: 89.85%
5	Validation loss: 9848.561523	Best loss: 1.286534	Accuracy: 70.71%
6	Validation loss: 7.880002	Best loss: 1.286534	Accuracy: 89.96%
7	Validation loss: 6.381007	Best loss: 1.286534	Accuracy: 74.93%
8	Validation loss: 334.840454	Best loss: 1.286534	Accuracy: 89.93%
9	Validation loss: 30.447269	Best loss: 1.286534	Accuracy: 89.95%
10	Validation loss: 24.447823	Best loss: 1.286534	Accuracy: 89.94%
11	Validation loss: 15.499322	Best loss: 1.286534	Accuracy: 89.95%
12	Validation loss: 9.980968	Best loss: 1.286534	Accuracy: 64.00%
13	Validation loss: 17.168484	Best loss: 1.286534	Accuracy: 89.95%
14	Validation loss: 3.558173	Best loss: 1.286534	Accuracy: 71.32%
15	Validation loss: 22.625246	Best loss: 1.286534	Accuracy: 89.92%
16	Validation loss: 8.470228	Best loss: 1.286534	Accuracy: 72.04%
17	Validation loss: 87.827888	Best loss: 1.286534	Accuracy: 89.86%
18	Validation loss: 769.522156	Best loss: 1.286534	Accuracy: 89.59%
19	

2	Validation loss: 0.259591	Best loss: 0.259591	Accuracy: 90.50%
3	Validation loss: 0.273031	Best loss: 0.259591	Accuracy: 89.87%
4	Validation loss: 0.266497	Best loss: 0.259591	Accuracy: 90.36%
5	Validation loss: 0.276730	Best loss: 0.259591	Accuracy: 89.98%
6	Validation loss: 0.298639	Best loss: 0.259591	Accuracy: 89.27%
7	Validation loss: 0.302652	Best loss: 0.259591	Accuracy: 88.92%
8	Validation loss: 0.298898	Best loss: 0.259591	Accuracy: 89.31%
9	Validation loss: 0.305644	Best loss: 0.259591	Accuracy: 89.76%
10	Validation loss: 0.310661	Best loss: 0.259591	Accuracy: 89.03%
11	Validation loss: 0.317512	Best loss: 0.259591	Accuracy: 89.38%
12	Validation loss: 0.313273	Best loss: 0.259591	Accuracy: 89.36%
13	Validation loss: 0.309873	Best loss: 0.259591	Accuracy: 88.76%
14	Validation loss: 0.301807	Best loss: 0.259591	Accuracy: 88.92%
15	Validation loss: 0.343766	Best loss: 0.259591	Accuracy: 87.85%
16	Validation loss: 0.340132	Best loss: 0.259591	Accuracy: 89.52%
17	Validation loss

10	Validation loss: 294.422211	Best loss: 17.626123	Accuracy: 87.22%
11	Validation loss: 721.527100	Best loss: 17.626123	Accuracy: 89.94%
12	Validation loss: 473.549194	Best loss: 17.626123	Accuracy: 89.74%
13	Validation loss: 825.910522	Best loss: 17.626123	Accuracy: 89.95%
14	Validation loss: 400.678711	Best loss: 17.626123	Accuracy: 89.90%
15	Validation loss: 392.132263	Best loss: 17.626123	Accuracy: 90.01%
16	Validation loss: 688.653625	Best loss: 17.626123	Accuracy: 89.86%
17	Validation loss: 3813.540771	Best loss: 17.626123	Accuracy: 89.76%
18	Validation loss: 659.821289	Best loss: 17.626123	Accuracy: 89.98%
19	Validation loss: 212.313004	Best loss: 17.626123	Accuracy: 90.07%
20	Validation loss: 131.336075	Best loss: 17.626123	Accuracy: 85.64%
21	Validation loss: 620.561096	Best loss: 17.626123	Accuracy: 77.51%
Early stopping!
[CV]  n_neurons=50, n_hidden_layers=3, learning_rate=0.1, batch_size=10, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426E5

12	Validation loss: 8931.499023	Best loss: 146.907181	Accuracy: 90.05%
13	Validation loss: 738.871704	Best loss: 146.907181	Accuracy: 87.80%
14	Validation loss: 1328.954102	Best loss: 146.907181	Accuracy: 90.26%
15	Validation loss: 6558.999023	Best loss: 146.907181	Accuracy: 89.95%
16	Validation loss: 11837.741211	Best loss: 146.907181	Accuracy: 90.21%
17	Validation loss: 4483.594238	Best loss: 146.907181	Accuracy: 90.07%
18	Validation loss: 995.612610	Best loss: 146.907181	Accuracy: 89.25%
19	Validation loss: 2167.159668	Best loss: 146.907181	Accuracy: 90.20%
20	Validation loss: 1697.333618	Best loss: 146.907181	Accuracy: 89.95%
21	Validation loss: 3811.387939	Best loss: 146.907181	Accuracy: 89.94%
22	Validation loss: 1168.940430	Best loss: 146.907181	Accuracy: 90.06%
Early stopping!
[CV]  n_neurons=140, n_hidden_layers=4, learning_rate=0.1, batch_size=100, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426E5F048>, total= 1.2min
[CV] n_neurons=140, n_hidd

21	Validation loss: 0.446954	Best loss: 0.245822	Accuracy: 89.71%
Early stopping!
[CV]  n_neurons=70, n_hidden_layers=2, learning_rate=0.01, batch_size=500, activation=<function relu at 0x00000214013FFE18>, total=  12.5s
[CV] n_neurons=70, n_hidden_layers=2, learning_rate=0.01, batch_size=500, activation=<function relu at 0x00000214013FFE18> 
0	Validation loss: 0.244581	Best loss: 0.244581	Accuracy: 90.86%
1	Validation loss: 0.244753	Best loss: 0.244581	Accuracy: 91.00%
2	Validation loss: 0.247559	Best loss: 0.244581	Accuracy: 90.99%
3	Validation loss: 0.256704	Best loss: 0.244581	Accuracy: 90.85%
4	Validation loss: 0.254457	Best loss: 0.244581	Accuracy: 90.65%
5	Validation loss: 0.260191	Best loss: 0.244581	Accuracy: 90.54%
6	Validation loss: 0.260461	Best loss: 0.244581	Accuracy: 90.57%
7	Validation loss: 0.266428	Best loss: 0.244581	Accuracy: 90.49%
8	Validation loss: 0.270894	Best loss: 0.244581	Accuracy: 90.36%
9	Validation loss: 0.276887	Best loss: 0.244581	Accuracy: 90.40%
10	Va

8	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
9	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
10	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
11	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
12	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
13	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
14	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
15	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
16	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
17	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
18	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
19	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
20	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
21	Validation loss: 0.327855	Best loss: 0.327345	Accuracy: 89.95%
Early stopping!
[CV]  n_neurons=90, n_hidden_layers=5, learning_rate=0.05, bat

0	Validation loss: 1.042796	Best loss: 1.042796	Accuracy: 19.58%
1	Validation loss: 1.204993	Best loss: 1.042796	Accuracy: 10.07%
2	Validation loss: 0.725262	Best loss: 0.725262	Accuracy: 10.18%
3	Validation loss: 0.816362	Best loss: 0.725262	Accuracy: 10.10%
4	Validation loss: 0.889528	Best loss: 0.725262	Accuracy: 10.05%
5	Validation loss: 0.678168	Best loss: 0.678168	Accuracy: 89.95%
6	Validation loss: 1.015329	Best loss: 0.678168	Accuracy: 10.05%
7	Validation loss: 1.227748	Best loss: 0.678168	Accuracy: 10.05%
8	Validation loss: 0.993529	Best loss: 0.678168	Accuracy: 10.05%
9	Validation loss: 0.577042	Best loss: 0.577042	Accuracy: 89.95%
10	Validation loss: 0.581281	Best loss: 0.577042	Accuracy: 89.95%
11	Validation loss: 0.633322	Best loss: 0.577042	Accuracy: 89.95%
12	Validation loss: 1.071755	Best loss: 0.577042	Accuracy: 10.05%
13	Validation loss: 0.598612	Best loss: 0.577042	Accuracy: 89.95%
14	Validation loss: 1.007240	Best loss: 0.577042	Accuracy: 10.05%
15	Validation loss: 

4	Validation loss: 0.282832	Best loss: 0.282832	Accuracy: 89.93%
5	Validation loss: 0.428658	Best loss: 0.282832	Accuracy: 81.33%
6	Validation loss: 0.386655	Best loss: 0.282832	Accuracy: 83.36%
7	Validation loss: 0.320725	Best loss: 0.282832	Accuracy: 87.68%
8	Validation loss: 0.316560	Best loss: 0.282832	Accuracy: 90.33%
9	Validation loss: 0.318683	Best loss: 0.282832	Accuracy: 89.78%
10	Validation loss: 0.312862	Best loss: 0.282832	Accuracy: 87.11%
11	Validation loss: 0.283703	Best loss: 0.282832	Accuracy: 90.10%
12	Validation loss: 0.376726	Best loss: 0.282832	Accuracy: 85.64%
13	Validation loss: 0.362020	Best loss: 0.282832	Accuracy: 85.54%
14	Validation loss: 0.374353	Best loss: 0.282832	Accuracy: 83.63%
15	Validation loss: 0.337740	Best loss: 0.282832	Accuracy: 86.43%
16	Validation loss: 0.295777	Best loss: 0.282832	Accuracy: 88.80%
17	Validation loss: 0.321579	Best loss: 0.282832	Accuracy: 87.49%
18	Validation loss: 0.433697	Best loss: 0.282832	Accuracy: 90.87%
19	Validation lo

21	Validation loss: 0.396505	Best loss: 0.315012	Accuracy: 84.39%
22	Validation loss: 0.882408	Best loss: 0.315012	Accuracy: 62.40%
23	Validation loss: 0.452910	Best loss: 0.315012	Accuracy: 80.93%
24	Validation loss: 0.374661	Best loss: 0.315012	Accuracy: 87.25%
25	Validation loss: 0.597048	Best loss: 0.315012	Accuracy: 73.66%
26	Validation loss: 0.596055	Best loss: 0.315012	Accuracy: 77.10%
27	Validation loss: 0.424846	Best loss: 0.315012	Accuracy: 83.56%
28	Validation loss: 0.491765	Best loss: 0.315012	Accuracy: 78.91%
29	Validation loss: 0.451305	Best loss: 0.315012	Accuracy: 81.87%
30	Validation loss: 0.549246	Best loss: 0.315012	Accuracy: 74.95%
31	Validation loss: 0.637056	Best loss: 0.315012	Accuracy: 82.63%
32	Validation loss: 0.632626	Best loss: 0.315012	Accuracy: 68.66%
33	Validation loss: 0.558985	Best loss: 0.315012	Accuracy: 78.22%
34	Validation loss: 0.522347	Best loss: 0.315012	Accuracy: 81.25%
35	Validation loss: 0.424289	Best loss: 0.315012	Accuracy: 83.03%
36	Validat

3	Validation loss: 0.333196	Best loss: 0.326193	Accuracy: 89.95%
4	Validation loss: 0.328063	Best loss: 0.326193	Accuracy: 89.95%
5	Validation loss: 0.332057	Best loss: 0.326193	Accuracy: 89.95%
6	Validation loss: 0.333644	Best loss: 0.326193	Accuracy: 89.95%
7	Validation loss: 0.480620	Best loss: 0.326193	Accuracy: 89.95%
8	Validation loss: 0.541792	Best loss: 0.326193	Accuracy: 89.95%
9	Validation loss: 0.346649	Best loss: 0.326193	Accuracy: 89.95%
10	Validation loss: 0.849517	Best loss: 0.326193	Accuracy: 10.05%
11	Validation loss: 0.327366	Best loss: 0.326193	Accuracy: 89.95%
12	Validation loss: 0.327019	Best loss: 0.326193	Accuracy: 89.95%
13	Validation loss: 0.350985	Best loss: 0.326193	Accuracy: 89.95%
14	Validation loss: 0.566172	Best loss: 0.326193	Accuracy: 89.95%
15	Validation loss: 0.653780	Best loss: 0.326193	Accuracy: 89.95%
16	Validation loss: 0.400969	Best loss: 0.326193	Accuracy: 89.95%
17	Validation loss: 0.366032	Best loss: 0.326193	Accuracy: 89.95%
18	Validation los

0	Validation loss: 0.271190	Best loss: 0.271190	Accuracy: 90.25%
1	Validation loss: 0.274131	Best loss: 0.271190	Accuracy: 90.46%
2	Validation loss: 48852.363281	Best loss: 0.271190	Accuracy: 89.78%
3	Validation loss: 180.373474	Best loss: 0.271190	Accuracy: 88.60%
4	Validation loss: 55.259274	Best loss: 0.271190	Accuracy: 89.67%
5	Validation loss: 47.015797	Best loss: 0.271190	Accuracy: 72.60%
6	Validation loss: 97.356056	Best loss: 0.271190	Accuracy: 83.95%
7	Validation loss: 45.812969	Best loss: 0.271190	Accuracy: 89.88%
8	Validation loss: 88321.562500	Best loss: 0.271190	Accuracy: 89.41%
9	Validation loss: 5597.521484	Best loss: 0.271190	Accuracy: 89.89%
10	Validation loss: 3185.757080	Best loss: 0.271190	Accuracy: 89.90%
11	Validation loss: 641.900513	Best loss: 0.271190	Accuracy: 82.47%
12	Validation loss: 450.539337	Best loss: 0.271190	Accuracy: 85.50%
13	Validation loss: 179.634827	Best loss: 0.271190	Accuracy: 89.92%
14	Validation loss: 233.354050	Best loss: 0.271190	Accuracy:

12	Validation loss: 0.261883	Best loss: 0.248245	Accuracy: 90.34%
13	Validation loss: 0.264153	Best loss: 0.248245	Accuracy: 90.72%
14	Validation loss: 0.274740	Best loss: 0.248245	Accuracy: 90.78%
15	Validation loss: 0.285008	Best loss: 0.248245	Accuracy: 90.67%
16	Validation loss: 0.293563	Best loss: 0.248245	Accuracy: 90.10%
17	Validation loss: 0.309156	Best loss: 0.248245	Accuracy: 90.08%
18	Validation loss: 0.318617	Best loss: 0.248245	Accuracy: 90.25%
19	Validation loss: 0.338256	Best loss: 0.248245	Accuracy: 89.84%
20	Validation loss: 0.360299	Best loss: 0.248245	Accuracy: 89.77%
21	Validation loss: 0.377038	Best loss: 0.248245	Accuracy: 89.51%
Early stopping!
[CV]  n_neurons=160, n_hidden_layers=3, learning_rate=0.01, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426F4A6A8>, total=  30.8s
[CV] n_neurons=160, n_hidden_layers=3, learning_rate=0.01, batch_size=500, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x

24	Validation loss: 10.352236	Best loss: 1.851437	Accuracy: 90.30%
Early stopping!
[CV]  n_neurons=70, n_hidden_layers=3, learning_rate=0.1, batch_size=50, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426F4A6A8>, total=  49.8s
[CV] n_neurons=160, n_hidden_layers=3, learning_rate=0.05, batch_size=100, activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426F4A6A8> 
0	Validation loss: 0.257389	Best loss: 0.257389	Accuracy: 90.58%
1	Validation loss: 7.550720	Best loss: 0.257389	Accuracy: 89.33%
2	Validation loss: 6.771464	Best loss: 0.257389	Accuracy: 90.03%
3	Validation loss: 3.043350	Best loss: 0.257389	Accuracy: 90.04%
4	Validation loss: 1.228607	Best loss: 0.257389	Accuracy: 89.98%
5	Validation loss: 29.071281	Best loss: 0.257389	Accuracy: 88.34%
6	Validation loss: 16.263134	Best loss: 0.257389	Accuracy: 82.80%
7	Validation loss: 6.068343	Best loss: 0.257389	Accuracy: 89.10%
8	Validation loss: 5.458417	Best loss: 0.257389	Accurac

11	Validation loss: 0.781013	Best loss: 0.326182	Accuracy: 89.95%
12	Validation loss: 0.906761	Best loss: 0.326182	Accuracy: 10.05%
13	Validation loss: 0.406168	Best loss: 0.326182	Accuracy: 89.95%
14	Validation loss: 0.829569	Best loss: 0.326182	Accuracy: 89.95%
15	Validation loss: 0.443189	Best loss: 0.326182	Accuracy: 89.95%
16	Validation loss: 0.341864	Best loss: 0.326182	Accuracy: 89.95%
17	Validation loss: 0.381337	Best loss: 0.326182	Accuracy: 89.95%
18	Validation loss: 0.580126	Best loss: 0.326182	Accuracy: 89.95%
19	Validation loss: 0.598174	Best loss: 0.326182	Accuracy: 89.95%
20	Validation loss: 1.068177	Best loss: 0.326182	Accuracy: 89.95%
21	Validation loss: 0.339900	Best loss: 0.326182	Accuracy: 89.95%
22	Validation loss: 0.326185	Best loss: 0.326182	Accuracy: 89.95%
Early stopping!
[CV]  n_neurons=140, n_hidden_layers=5, learning_rate=0.1, batch_size=50, activation=<function elu at 0x00000214013EE8C8>, total= 1.6min
[CV] n_neurons=140, n_hidden_layers=5, learning_rate=0.

14	Validation loss: 0.465586	Best loss: 0.412806	Accuracy: 89.95%
15	Validation loss: 0.465588	Best loss: 0.412806	Accuracy: 89.95%
16	Validation loss: 0.465586	Best loss: 0.412806	Accuracy: 89.95%
17	Validation loss: 0.465588	Best loss: 0.412806	Accuracy: 89.95%
18	Validation loss: 0.465588	Best loss: 0.412806	Accuracy: 89.95%
19	Validation loss: 0.465586	Best loss: 0.412806	Accuracy: 89.95%
20	Validation loss: 0.465586	Best loss: 0.412806	Accuracy: 89.95%
21	Validation loss: 0.465588	Best loss: 0.412806	Accuracy: 89.95%
Early stopping!
[CV]  n_neurons=30, n_hidden_layers=2, learning_rate=0.1, batch_size=50, activation=<function elu at 0x00000214013EE8C8>, total=  31.4s


[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed: 194.9min finished


0	Validation loss: 0.239230	Best loss: 0.239230	Accuracy: 91.24%
1	Validation loss: 0.238639	Best loss: 0.238639	Accuracy: 91.24%
2	Validation loss: 0.238121	Best loss: 0.238121	Accuracy: 91.28%
3	Validation loss: 0.238273	Best loss: 0.238121	Accuracy: 91.17%
4	Validation loss: 0.239033	Best loss: 0.238121	Accuracy: 91.29%
5	Validation loss: 0.239405	Best loss: 0.238121	Accuracy: 91.23%
6	Validation loss: 0.240298	Best loss: 0.238121	Accuracy: 91.15%
7	Validation loss: 0.238747	Best loss: 0.238121	Accuracy: 91.25%
8	Validation loss: 0.240295	Best loss: 0.238121	Accuracy: 91.26%
9	Validation loss: 0.241841	Best loss: 0.238121	Accuracy: 91.20%
10	Validation loss: 0.243012	Best loss: 0.238121	Accuracy: 91.14%
11	Validation loss: 0.241260	Best loss: 0.238121	Accuracy: 91.23%
12	Validation loss: 0.244111	Best loss: 0.238121	Accuracy: 91.15%
13	Validation loss: 0.242935	Best loss: 0.238121	Accuracy: 91.12%
14	Validation loss: 0.242649	Best loss: 0.238121	Accuracy: 91.11%
15	Validation loss: 

RandomizedSearchCV(cv=3, error_score='raise',
          estimator=DNNClassifier(activation=<function elu at 0x00000214013EE8C8>,
       batch_norm_momentum=None, batch_size=20, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0x000002141701EBA8>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=100,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42),
          fit_params=None, iid=True, n_iter=50, n_jobs=1,
          param_distributions={'n_neurons': [10, 30, 50, 70, 90, 100, 120, 140, 160], 'batch_size': [10, 50, 100, 500], 'learning_rate': [0.01, 0.02, 0.05, 0.1], 'activation': [<function relu at 0x00000214013FFE18>, <function elu at 0x00000214013EE8C8>, <function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426E5F048>, <function leaky_relu.<locals>.parametrized_leaky_relu at 0x0000021426F4A6A8>], 'n_hidden_layers': [2, 3, 4, 5]},
          pre_dispatch='2*n_jobs', ra

In [19]:
rnd_search.best_params_

{'n_neurons': 10,
 'n_hidden_layers': 5,
 'learning_rate': 0.01,
 'batch_size': 100,
 'activation': <function __main__.leaky_relu.<locals>.parametrized_leaky_relu(z, name=None)>}

In [20]:
rnd_search.best_estimator_.save("DNN after random search")

## Ensemble stacking

In [12]:
lgb_clf = lgb.Booster(model_file='lightbgm original data with 0.899.txt') 

In [32]:
def leaky_relu(alpha=0.01):
    def parametrized_leaky_relu(z, name=None):
        return tf.maximum(alpha * z, z, name=name)
    return parametrized_leaky_relu
dnn_clf = DNNClassifier(random_state=42, n_neurons=10,n_hidden_layers=5,learning_rate=0.01, batch_size=100,activation=leaky_relu(alpha=0.01))
dnn_clf.fit(X_train, y_train, n_epochs=1000, X_valid=X_valid, y_valid=y_valid)

Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Colocations handled automatically by placer.
0	Validation loss: 0.326859	Best loss: 0.326859	Accuracy: 89.95%
1	Validation loss: 0.326427	Best loss: 0.326427	Accuracy: 89.95%
2	Validation loss: 0.326017	Best loss: 0.326017	Accuracy: 89.95%
3	Validation loss: 0.326300	Best loss: 0.326017	Accuracy: 89.95%
4	Validation loss: 0.326324	Best loss: 0.326017	Accuracy: 89.95%
5	Validation loss: 0.326390	Best loss: 0.326017	Accuracy: 89.95%
6	Validation loss: 0.326337	Best loss: 0.326017	Accuracy: 89.95%
7	Validation loss: 0.326559	Best loss: 0.326017	Accuracy: 89.95%
8	Validation loss: 0.326571	Best loss: 0.326017	Accuracy: 89.95%
9	Validation loss: 0.326495	Best loss: 0.326017	Accuracy: 89.95%
10	Validation loss: 0.326187	Best loss: 0.326017	Accuracy: 89.95%
11	Validation loss: 0.326213	Best loss: 0.326017	Accuracy: 89.95%
12	Validation loss: 0.326408	Best loss: 0.326017	Accuracy: 89.95%
13	Validation loss: 0

DNNClassifier(activation=<function leaky_relu.<locals>.parametrized_leaky_relu at 0x00000113BD111D90>,
       batch_norm_momentum=None, batch_size=100, dropout_rate=None,
       initializer=<tensorflow.python.ops.init_ops.VarianceScaling object at 0x00000113A38D9D30>,
       learning_rate=0.01, n_hidden_layers=5, n_neurons=10,
       optimizer_class=<class 'tensorflow.python.training.adam.AdamOptimizer'>,
       random_state=42)

In [77]:
n_splits = 5 # Number of K-fold Splits
splits = list(StratifiedKFold(n_splits=n_splits, shuffle=True).split(X, y))
predictions = np.zeros(len(X_test))

for i, (train_idx, valid_idx) in enumerate(splits):  
    X_train_predictions = np.empty((32000, 3), dtype=np.float32)
    print(f'Fold {i + 1}')
    #lightBGM training
    x_train = np.array(X)
    y_train = np.array(y)

    trn_data = lgb.Dataset(x_train[train_idx.astype(int)], label=y_train[train_idx.astype(int)])
    val_data = lgb.Dataset(x_train[valid_idx.astype(int)], label=y_train[valid_idx.astype(int)])    
    num_round = 15000
    
    lgb_clf = lgb.train(lgb_param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=1000, early_stopping_rounds = 250)
    
    X_train_predictions[:, 0] = lgb_clf.predict(trn_data)
    
    #XGboost
    trn_data = xgb.DMatrix(x_train[train_idx.astype(int)], label=y_train[train_idx.astype(int)])
    val_data = xgb.DMatrix(x_train[valid_idx.astype(int)], label=y_train[valid_idx.astype(int)])
    watchlist = [(trn_data, 'train'), (val_data, 'valid')]
    xgb_clf=xgb.train(xgb_param, trn_data, 700, evals=watchlist, early_stopping_rounds=250, verbose_eval=500)
    X_train_predictions[:, 1] = xgb_clf.predict(trn_data)
    
    #DNN
    dnn_clf = DNNClassifier(random_state=42, n_neurons=10,n_hidden_layers=5,learning_rate=0.01, 
                            batch_size=100,activation=leaky_relu(alpha=0.01))
    dnn_clf.fit(x_train[train_idx.astype(int)], y_train[train_idx.astype(int)], n_epochs=1000, 
                X_valid=x_train[valid_idx.astype(int)], y_valid=y_train[valid_idx.astype(int)])
    trn_data=x_train[valid_idx.astype(int)]
    X_train_predictions[:, 2] = dnn_clf.predict(trn_data).ravel()
    
    #Stack with RF
    rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
    rnd_forest_blender.fit(X_train_predictions, y_train[train_idx.astype(int)])
    
    oof[valid_idx] =  rnd_forest_blender.predict(x_train[valid_idx])
    
    
    # Prediction
    X_test_predictions = np.empty((len(X_test), 3), dtype=np.float32)
    X_test_predictions[:, 0] = lgb_clf.predict(X_test)
    xgb_test = xgb.DMatrix(X_test)
    X_test_predictions[:, 1] = xgb_clf.predict(xgb_test)
    X_test_predictions[:, 2] = dnn_clf.predict(X_test).ravel()
    
    predictions += rnd_forest_blender.predict(X_test_predictions)/5

print("CV score: {:<8.5f}".format(roc_auc_score(y, oof)))

Fold 1
Training until validation scores don't improve for 250 rounds.
[1000]	training's auc: 0.863042	valid_1's auc: 0.846143
[2000]	training's auc: 0.891049	valid_1's auc: 0.871905
[3000]	training's auc: 0.903758	valid_1's auc: 0.882893
[4000]	training's auc: 0.91159	valid_1's auc: 0.889249
[5000]	training's auc: 0.916991	valid_1's auc: 0.89308
[6000]	training's auc: 0.920833	valid_1's auc: 0.895317
[7000]	training's auc: 0.924171	valid_1's auc: 0.896886
[8000]	training's auc: 0.927212	valid_1's auc: 0.897698
[9000]	training's auc: 0.930283	valid_1's auc: 0.898212
Early stopping, best iteration is:
[9468]	training's auc: 0.93173	valid_1's auc: 0.898429


TypeError: Cannot use Dataset instance for prediction, please use raw data instead

In [45]:
X_train_predictions = np.empty((len(X_train), 3), dtype=np.float32)
X_train_predictions[:, 0] = lgb_clf.predict(X_train)

In [46]:
xgb_train = xgb.DMatrix(X_train)
X_train_predictions[:, 1] = xgb_clf.predict(xgb_train)

In [48]:
X_train_predictions[:, 2] = dnn_clf.predict(X_train).ravel()

In [49]:
from sklearn.ensemble import RandomForestClassifier
rnd_forest_blender = RandomForestClassifier(n_estimators=200, oob_score=True, random_state=42)
rnd_forest_blender.fit(X_train_predictions, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=200, n_jobs=1,
            oob_score=True, random_state=42, verbose=0, warm_start=False)

In [51]:
X_val_predictions = np.empty((len(X_valid), 3), dtype=np.float32)
X_val_predictions[:, 0] = lgb_clf.predict(X_valid)

In [56]:
xgb_val = xgb.DMatrix(X_valid)
X_val_predictions[:, 1] = xgb_clf.predict(xgb_val)

In [57]:
X_val_predictions[:, 2] = dnn_clf.predict(X_valid).ravel()

In [58]:
y_pred = rnd_forest_blender.predict(X_val_predictions)

In [59]:
from sklearn.metrics import accuracy_score
accuracy_score(y_valid, y_pred)

0.960575

In [70]:
y_proba = rnd_forest_blender.predict_proba(X_val_predictions)

## Submission

In [None]:
X_test_predictions = np.empty((len(X_test), 3), dtype=np.float32)
X_test_predictions[:, 0] = lgb_clf.predict(X_test)

In [None]:
submission=pd.DataFrame()
submission['ID_code']=test['ID_code']
def generate_submission(pred, submission):
    submission['target']=pred
    submision.to_csv('submission.csv', index=False)
    return submission