In [1]:
import numpy as np
import pandas as pd
import feather
from bayes_opt import BayesianOptimization
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score

In [2]:
def blend_opt(lgbm_wt = 1, keras_wt = 1, xgb_wt = 1, ada_wt = 1, gp_wt = 1):
    train_df.drop("SK_ID_CURR", errors = "ignore", inplace = True, axis = 1)
    blended_preds = train_df["LGBM"] * lgbm_wt + train_df["Keras"] * keras_wt + \
                    train_df["AdaBoost"] * ada_wt + train_df["XGBoost"] * xgb_wt
    blended_preds = blended_preds / (lgbm_wt + keras_wt + ada_wt + xgb_wt)
    return roc_auc_score(target, blended_preds)

In [3]:
lgbm = pd.read_csv("v11_predictions_LGBM_train.csv")
keras = pd.read_csv("v11_predictions_Keras_train.csv")
adaboost = pd.read_csv("v11_predictions_AdaBoost_train.csv")
xgboost = pd.read_csv("v11_predictions_XGBoost_train.csv")
target = pd.read_feather("target.feather")["TARGET"]

In [4]:
train_df = pd.DataFrame(index = lgbm.index)
train_df["LGBM"] = lgbm["TARGET"]
train_df["Keras"] = keras["TARGET"]
train_df["AdaBoost"] = adaboost["TARGET"]
train_df["XGBoost"] = xgboost["TARGET"]

In [5]:
weights_range = {"lgbm_wt":(0,20),
                 "keras_wt":(0,20),
                 "xgb_wt":(0,20),
                 "ada_wt":(0,20)}

In [6]:
bayes_cv = BayesianOptimization(blend_opt, weights_range)

In [7]:
bayes_cv.maximize(init_points = 25, n_iter = 25)

[31mInitialization[0m
[94m------------------------------------------------------------------------------[0m
 Step |   Time |      Value |    ada_wt |   keras_wt |   lgbm_wt |    xgb_wt | 
    1 | 00m00s | [35m   0.85219[0m | [32m   3.8499[0m | [32m    1.8183[0m | [32m  16.4175[0m | [32m   0.6554[0m | 
    2 | 00m00s |    0.80793 |    6.3558 |    12.0252 |    3.4614 |   11.2160 | 
    3 | 00m00s |    0.81830 |   13.7759 |     5.2468 |    5.9394 |   11.6345 | 
    4 | 00m00s |    0.83287 |    9.2364 |     2.8472 |    3.8943 |    2.0834 | 
    5 | 00m00s |    0.80794 |   14.8014 |    16.1776 |    5.1903 |   16.7846 | 
    6 | 00m00s |    0.84011 |   18.1093 |    11.7350 |    8.4350 |    0.4807 | 
    7 | 00m00s |    0.83918 |    7.2230 |    13.3970 |   15.2407 |    4.4395 | 
    8 | 00m00s |    0.80666 |    9.2984 |    10.7428 |    3.6900 |   14.0574 | 
    9 | 00m00s |    0.81160 |   13.8122 |    13.8781 |    5.9688 |   15.4149 | 
   10 | 00m00s |    0.81268 |   17.3103 |  



[31mBayesian Optimization[0m
[94m------------------------------------------------------------------------------[0m
 Step |   Time |      Value |    ada_wt |   keras_wt |   lgbm_wt |    xgb_wt | 
   26 | 00m10s |    0.84460 |   19.8035 |    19.9448 |   19.4728 |    0.1475 | 
   27 | 00m11s |    0.82680 |   19.4328 |    19.6886 |   19.3592 |   19.3531 | 
   28 | 00m12s | [35m   0.85337[0m | [32m  12.8650[0m | [32m    0.1811[0m | [32m  18.5684[0m | [32m   0.0972[0m | 


  " state: %s" % convergence_dict)


   29 | 00m13s |    0.84646 |    0.3659 |    18.8866 |   19.7958 |    0.3207 | 
   30 | 00m11s | [35m   0.85414[0m | [32m   3.4993[0m | [32m    0.2265[0m | [32m  12.5974[0m | [32m   0.0698[0m | 
   31 | 00m11s |    0.85191 |   18.8985 |     0.4295 |   13.8515 |    0.0554 | 
   32 | 00m10s |    0.85389 |    7.2036 |     0.0833 |   14.2424 |    0.0165 | 
   33 | 00m10s | [35m   0.85475[0m | [32m   0.5161[0m | [32m    0.2696[0m | [32m  16.9580[0m | [32m   0.0319[0m | 
   34 | 00m11s |    0.85286 |    0.1361 |     2.6466 |   14.1688 |    0.0011 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   35 | 00m11s |    0.85428 |    6.0127 |     0.0445 |   19.7901 |    0.0355 | 


  " state: %s" % convergence_dict)


   36 | 00m10s |    0.85300 |   17.9995 |     0.2137 |   19.1396 |    0.0153 | 


  " state: %s" % convergence_dict)


   37 | 00m11s |    0.85429 |    0.6652 |     0.0664 |   14.5162 |    0.2442 | 
   38 | 00m11s |    0.85430 |    3.7618 |     0.1109 |   17.4974 |    0.1077 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   39 | 00m11s |    0.85398 |    0.1904 |     1.1336 |   19.9208 |    0.3118 | 


  " state: %s" % convergence_dict)


   40 | 00m09s |    0.85452 |    1.7454 |     0.2772 |   19.1211 |    0.1044 | 


  " state: %s" % convergence_dict)


   41 | 00m10s |    0.85473 |    1.1810 |     0.3052 |   19.4719 |    0.0174 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   42 | 00m11s |    0.85422 |    2.0780 |     0.3619 |   16.8258 |    0.1939 | 
   43 | 00m10s |    0.85391 |    2.7379 |     0.2408 |   17.4776 |    0.3605 | 
   44 | 00m10s |    0.85446 |    1.3462 |     0.5355 |   16.9482 |    0.0750 | 
   45 | 00m11s |    0.85389 |    1.0809 |     1.5101 |   18.4041 |    0.1258 | 


  " state: %s" % convergence_dict)


   46 | 00m09s |    0.85404 |    0.8602 |     1.6910 |   19.5196 |    0.0197 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   47 | 00m10s | [35m   0.85476[0m | [32m   1.3061[0m | [32m    0.1811[0m | [32m  19.6037[0m | [32m   0.0083[0m | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   48 | 00m11s |    0.85420 |    0.2408 |     0.3720 |   19.1961 |    0.3675 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   49 | 00m10s |    0.85331 |    2.5411 |     0.3719 |   17.2102 |    0.6650 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


   50 | 00m11s |    0.85402 |    0.3371 |     0.0023 |   16.8088 |    0.4524 | 


  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)
  " state: %s" % convergence_dict)


In [11]:
max_params = vars(bayes_cv)['res']['max']['max_params']

In [9]:
lgbm_test = pd.read_csv("v11_predictions_LGBM.csv")
keras_test = pd.read_csv("v11_predictions_Keras.csv")
adaboost_test = pd.read_csv("v11_predictions_AdaBoost.csv")
xgboost_test = pd.read_csv("v11_predictions_XGBoost.csv")

In [12]:
blended_test = lgbm_test["TARGET"] * max_params['lgbm_wt'] + \
               keras_test["TARGET"] * max_params['keras_wt'] + \
               adaboost_test["TARGET"] * max_params['ada_wt'] + \
               xgboost_test["TARGET"] * max_params['xgb_wt']

blended_test = blended_test / (max_params['lgbm_wt'] + max_params['keras_wt'] + \
               max_params['ada_wt'] + max_params['xgb_wt'])

In [14]:
blended_preds = pd.DataFrame({"SK_ID_CURR":lgbm_test["SK_ID_CURR"], "TARGET":blended_test})
blended_preds.to_csv("v11_predictions_blended.csv", index = False)