In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 200)

In [2]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

In [3]:
train_test_data = pd.concat([train_data, test_data])

In [4]:
train_test_data.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0.0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1.0,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1.0,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1.0,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0.0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0.0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0.0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0.0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1.0,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1.0,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [5]:
train_test_data.isnull().sum()

PassengerId       0
Survived        418
Pclass            0
Name              0
Sex               0
Age             263
SibSp             0
Parch             0
Ticket            0
Fare              1
Cabin          1014
Embarked          2
dtype: int64

In [6]:
train_test_data.shape

(1309, 12)

In [7]:
train_test_data['Cabin'] = train_test_data['Cabin'].str[:1]

In [8]:
object_columns = ['Name', 'Sex', 'Ticket', 'Cabin', 'Embarked']

for column in object_columns:
    train_test_data[column] = pd.factorize(train_test_data[column])[0]

In [9]:
train_test_data['Cabin'] = train_test_data['Cabin'].replace(0, np.nan)

In [10]:
train_test_data['Cabin'].fillna(train_test_data.groupby('Pclass')['Cabin'].transform('median'), inplace=True)

In [11]:
train_test_data.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0.0,3,0,0,22.0,1,0,0,7.25,-1.0,0
1,2,1.0,1,1,1,38.0,1,0,1,71.2833,3.0,1
2,3,1.0,3,2,1,26.0,0,0,2,7.925,-1.0,0
3,4,1.0,1,3,1,35.0,1,0,3,53.1,3.0,0
4,5,0.0,3,4,0,35.0,0,0,4,8.05,-1.0,0
5,6,0.0,3,5,0,,0,0,5,8.4583,-1.0,2
6,7,0.0,1,6,0,54.0,0,0,6,51.8625,1.0,0
7,8,0.0,3,7,0,2.0,3,1,7,21.075,-1.0,0
8,9,1.0,3,8,1,27.0,0,2,8,11.1333,-1.0,0
9,10,1.0,2,9,1,14.0,1,0,9,30.0708,-1.0,1


In [12]:
train_data = train_test_data[~train_test_data['Survived'].isnull()]
test_data = train_test_data[train_test_data['Survived'].isnull()]

In [13]:
ftr_train = train_data.drop(['PassengerId', 'Survived', 'Name', 'Ticket'], axis=1)
target = train_data['Survived']

In [14]:
from sklearn.model_selection import train_test_split

train_x, valid_x, train_y, valid_y = train_test_split(ftr_train, target, test_size=0.3, random_state=2022)

In [36]:
from bayes_opt import BayesianOptimization
from sklearn.metrics import roc_auc_score
from lightgbm import LGBMClassifier

bayesian_params = {
    'max_depth': (4, 10), 
    'num_leaves': (12, 32), 
    'min_child_samples': (5, 100), 
    'min_child_weight':(1, 25),
    'subsample':(0.5, 1.0),
    'max_bin':(10, 250),
    'reg_lambda':(0.001, 10),
    'reg_alpha': (0.01, 50) 
}

In [37]:
def lgb_roc_eval(max_depth, num_leaves, min_child_samples, min_child_weight, subsample, 
                max_bin, reg_lambda, reg_alpha):
    params = {
        "n_estimators":500, "learning_rate":0.02,
        'max_depth': int(round(max_depth)),
        'num_leaves': int(round(num_leaves)), 
        'min_child_samples': int(round(min_child_samples)),
        'min_child_weight': int(round(min_child_weight)),
        'subsample': max(min(subsample, 1), 0), 
        'max_bin':  max(int(round(max_bin)),10),
        'reg_lambda': max(reg_lambda,0),
        'reg_alpha': max(reg_alpha, 0)
    }
    lgb_model = LGBMClassifier(**params)
    lgb_model.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric= 'auc', verbose= 50, 
                early_stopping_rounds= 50)
    valid_proba = lgb_model.predict_proba(valid_x)[:, 1]
    roc_auc = roc_auc_score(valid_y, valid_proba)
    
    return roc_auc   

In [38]:
lgbB0 = BayesianOptimization(f=lgb_roc_eval, pbounds=bayesian_params, random_state=2022)
lgbB0.maximize(init_points=5, n_iter=25)

|   iter    |  target   |  max_bin  | max_depth | min_ch... | min_ch... | num_le... | reg_alpha | reg_la... | subsample |
-------------------------------------------------------------------------------------------------------------------------
[50]	training's auc: 0.844591	training's binary_logloss: 0.546484	valid_1's auc: 0.838869	valid_1's binary_logloss: 0.547006
[100]	training's auc: 0.849396	training's binary_logloss: 0.50795	valid_1's auc: 0.849792	valid_1's binary_logloss: 0.508818




| [0m 1       [0m | [0m 0.8498  [0m | [0m 12.25   [0m | [0m 6.994   [0m | [0m 15.77   [0m | [0m 2.199   [0m | [0m 25.71   [0m | [0m 24.35   [0m | [0m 8.977   [0m | [0m 0.8237  [0m |
[50]	training's auc: 0.792087	training's binary_logloss: 0.603911	valid_1's auc: 0.805506	valid_1's binary_logloss: 0.597149
| [0m 2       [0m | [0m 0.8055  [0m | [0m 225.3   [0m | [0m 8.327   [0m | [0m 83.98   [0m | [0m 20.86   [0m | [0m 28.67   [0m | [0m 47.85   [0m | [0m 3.681   [0m | [0m 0.7474  [0m |
[50]	training's auc: 0.797759	training's binary_logloss: 0.552484	valid_1's auc: 0.825685	valid_1's binary_logloss: 0.536289




[100]	training's auc: 0.838301	training's binary_logloss: 0.497643	valid_1's auc: 0.849524	valid_1's binary_logloss: 0.487982
[150]	training's auc: 0.848447	training's binary_logloss: 0.478487	valid_1's auc: 0.852679	valid_1's binary_logloss: 0.470431
| [95m 3       [0m | [95m 0.8534  [0m | [95m 91.48   [0m | [95m 7.717   [0m | [95m 97.87   [0m | [95m 3.314   [0m | [95m 26.88   [0m | [95m 14.63   [0m | [95m 2.987   [0m | [95m 0.8762  [0m |
[50]	training's auc: 0.828355	training's binary_logloss: 0.547515	valid_1's auc: 0.850179	valid_1's binary_logloss: 0.547024




| [0m 4       [0m | [0m 0.8502  [0m | [0m 14.48   [0m | [0m 7.142   [0m | [0m 87.12   [0m | [0m 10.33   [0m | [0m 16.24   [0m | [0m 23.76   [0m | [0m 5.647   [0m | [0m 0.6747  [0m |
[50]	training's auc: 0.827943	training's binary_logloss: 0.597372	valid_1's auc: 0.850417	valid_1's binary_logloss: 0.590371
| [0m 5       [0m | [0m 0.8506  [0m | [0m 244.2   [0m | [0m 4.227   [0m | [0m 80.46   [0m | [0m 9.589   [0m | [0m 26.96   [0m | [0m 45.73   [0m | [0m 3.727   [0m | [0m 0.9824  [0m |




[50]	training's auc: 0.830649	training's binary_logloss: 0.544984	valid_1's auc: 0.849911	valid_1's binary_logloss: 0.528679
[100]	training's auc: 0.846625	training's binary_logloss: 0.488393	valid_1's auc: 0.852798	valid_1's binary_logloss: 0.479842
[150]	training's auc: 0.854521	training's binary_logloss: 0.469875	valid_1's auc: 0.855298	valid_1's binary_logloss: 0.463656
| [95m 6       [0m | [95m 0.8564  [0m | [95m 92.02   [0m | [95m 7.767   [0m | [95m 96.25   [0m | [95m 1.549   [0m | [95m 23.49   [0m | [95m 12.58   [0m | [95m 0.1006  [0m | [95m 0.8987  [0m |
[50]	training's auc: 0.849087	training's binary_logloss: 0.514561	valid_1's auc: 0.843482	valid_1's binary_logloss: 0.519454




| [0m 7       [0m | [0m 0.8481  [0m | [0m 83.68   [0m | [0m 7.175   [0m | [0m 88.04   [0m | [0m 7.417   [0m | [0m 13.79   [0m | [0m 5.972   [0m | [0m 7.761   [0m | [0m 0.9866  [0m |
[50]	training's auc: 0.851912	training's binary_logloss: 0.531202	valid_1's auc: 0.85619	valid_1's binary_logloss: 0.517934




[100]	training's auc: 0.858376	training's binary_logloss: 0.472208	valid_1's auc: 0.858631	valid_1's binary_logloss: 0.467013
[150]	training's auc: 0.860648	training's binary_logloss: 0.451857	valid_1's auc: 0.858899	valid_1's binary_logloss: 0.45124
| [95m 8       [0m | [95m 0.8596  [0m | [95m 109.0   [0m | [95m 8.377   [0m | [95m 93.65   [0m | [95m 2.181   [0m | [95m 19.28   [0m | [95m 7.213   [0m | [95m 1.035   [0m | [95m 0.8781  [0m |




[50]	training's auc: 0.828626	training's binary_logloss: 0.544941	valid_1's auc: 0.850595	valid_1's binary_logloss: 0.54507
| [0m 9       [0m | [0m 0.8506  [0m | [0m 106.9   [0m | [0m 8.953   [0m | [0m 89.23   [0m | [0m 1.0     [0m | [0m 12.51   [0m | [0m 26.48   [0m | [0m 0.001   [0m | [0m 0.7705  [0m |




[50]	training's auc: 0.855567	training's binary_logloss: 0.496939	valid_1's auc: 0.838958	valid_1's binary_logloss: 0.506559
| [0m 10      [0m | [0m 0.841   [0m | [0m 111.4   [0m | [0m 4.797   [0m | [0m 72.66   [0m | [0m 1.441   [0m | [0m 29.24   [0m | [0m 5.699   [0m | [0m 1.043   [0m | [0m 0.9354  [0m |
[50]	training's auc: 0.837585	training's binary_logloss: 0.544303	valid_1's auc: 0.849524	valid_1's binary_logloss: 0.528941
[100]	training's auc: 0.850128	training's binary_logloss: 0.488867	valid_1's auc: 0.853482	valid_1's binary_logloss: 0.480514
| [0m 11      [0m | [0m 0.8543  [0m | [0m 109.4   [0m | [0m 7.539   [0m | [0m 99.82   [0m | [0m 16.5    [0m | [0m 19.13   [0m | [0m 8.586   [0m | [0m 5.288   [0m | [0m 0.5703  [0m |




[50]	training's auc: 0.851234	training's binary_logloss: 0.522377	valid_1's auc: 0.845714	valid_1's binary_logloss: 0.514842
[100]	training's auc: 0.860914	training's binary_logloss: 0.464961	valid_1's auc: 0.849762	valid_1's binary_logloss: 0.466407
[150]	training's auc: 0.867546	training's binary_logloss: 0.442526	valid_1's auc: 0.854107	valid_1's binary_logloss: 0.451472
[200]	training's auc: 0.874206	training's binary_logloss: 0.429656	valid_1's auc: 0.857589	valid_1's binary_logloss: 0.444925
[250]	training's auc: 0.880778	training's binary_logloss: 0.418494	valid_1's auc: 0.859077	valid_1's binary_logloss: 0.440206
[300]	training's auc: 0.885404	training's binary_logloss: 0.409792	valid_1's auc: 0.860893	valid_1's binary_logloss: 0.438268
[350]	training's auc: 0.889086	training's binary_logloss: 0.402863	valid_1's auc: 0.860863	valid_1's binary_logloss: 0.436059
| [95m 12      [0m | [95m 0.8615  [0m | [95m 129.4   [0m | [95m 8.442   [0m | [95m 97.7    [0m | [95m 5.233 



[50]	training's auc: 0.841815	training's binary_logloss: 0.54719	valid_1's auc: 0.849583	valid_1's binary_logloss: 0.531093
[100]	training's auc: 0.849114	training's binary_logloss: 0.490598	valid_1's auc: 0.852321	valid_1's binary_logloss: 0.482131
[150]	training's auc: 0.854087	training's binary_logloss: 0.46714	valid_1's auc: 0.852619	valid_1's binary_logloss: 0.462804
[200]	training's auc: 0.857346	training's binary_logloss: 0.454573	valid_1's auc: 0.857054	valid_1's binary_logloss: 0.4536
| [0m 13      [0m | [0m 0.8578  [0m | [0m 137.1   [0m | [0m 4.364   [0m | [0m 98.97   [0m | [0m 14.1    [0m | [0m 12.82   [0m | [0m 6.973   [0m | [0m 9.948   [0m | [0m 0.6684  [0m |




[50]	training's auc: 0.854634	training's binary_logloss: 0.509346	valid_1's auc: 0.843065	valid_1's binary_logloss: 0.505627
[100]	training's auc: 0.864965	training's binary_logloss: 0.453678	valid_1's auc: 0.85372	valid_1's binary_logloss: 0.459088
[150]	training's auc: 0.87446	training's binary_logloss: 0.432365	valid_1's auc: 0.855565	valid_1's binary_logloss: 0.447297
| [0m 14      [0m | [0m 0.8582  [0m | [0m 151.7   [0m | [0m 10.0    [0m | [0m 100.0   [0m | [0m 1.0     [0m | [0m 22.81   [0m | [0m 0.01    [0m | [0m 0.001   [0m | [0m 1.0     [0m |
[50]	training's auc: 0.797759	training's binary_logloss: 0.561495	valid_1's auc: 0.825685	valid_1's binary_logloss: 0.545634




[100]	training's auc: 0.830649	training's binary_logloss: 0.511881	valid_1's auc: 0.849911	valid_1's binary_logloss: 0.500427
| [0m 15      [0m | [0m 0.8499  [0m | [0m 139.3   [0m | [0m 7.995   [0m | [0m 94.08   [0m | [0m 5.184   [0m | [0m 31.52   [0m | [0m 18.55   [0m | [0m 7.57    [0m | [0m 0.577   [0m |
[50]	training's auc: 0.855063	training's binary_logloss: 0.498387	valid_1's auc: 0.847143	valid_1's binary_logloss: 0.506741




| [0m 16      [0m | [0m 0.8524  [0m | [0m 148.8   [0m | [0m 7.785   [0m | [0m 78.3    [0m | [0m 22.23   [0m | [0m 12.25   [0m | [0m 1.395   [0m | [0m 4.488   [0m | [0m 0.8502  [0m |
[50]	training's auc: 0.852557	training's binary_logloss: 0.521583	valid_1's auc: 0.844554	valid_1's binary_logloss: 0.514434




[100]	training's auc: 0.861451	training's binary_logloss: 0.464315	valid_1's auc: 0.848869	valid_1's binary_logloss: 0.466153
[150]	training's auc: 0.869005	training's binary_logloss: 0.441445	valid_1's auc: 0.856518	valid_1's binary_logloss: 0.450793
[200]	training's auc: 0.875941	training's binary_logloss: 0.427247	valid_1's auc: 0.858958	valid_1's binary_logloss: 0.444268
[250]	training's auc: 0.881391	training's binary_logloss: 0.416165	valid_1's auc: 0.859881	valid_1's binary_logloss: 0.438715
[300]	training's auc: 0.887551	training's binary_logloss: 0.407174	valid_1's auc: 0.861577	valid_1's binary_logloss: 0.436408
[350]	training's auc: 0.891364	training's binary_logloss: 0.400009	valid_1's auc: 0.86253	valid_1's binary_logloss: 0.433149
| [95m 17      [0m | [95m 0.8631  [0m | [95m 176.7   [0m | [95m 8.908   [0m | [95m 97.89   [0m | [95m 7.42    [0m | [95m 24.88   [0m | [95m 0.7022  [0m | [95m 3.349   [0m | [95m 0.8998  [0m |




[50]	training's auc: 0.851169	training's binary_logloss: 0.499866	valid_1's auc: 0.844137	valid_1's binary_logloss: 0.504902
| [0m 18      [0m | [0m 0.8475  [0m | [0m 179.0   [0m | [0m 10.0    [0m | [0m 80.09   [0m | [0m 1.0     [0m | [0m 17.55   [0m | [0m 0.01    [0m | [0m 8.799   [0m | [0m 1.0     [0m |




[50]	training's auc: 0.843317	training's binary_logloss: 0.533756	valid_1's auc: 0.847827	valid_1's binary_logloss: 0.5207
[100]	training's auc: 0.849152	training's binary_logloss: 0.480057	valid_1's auc: 0.849554	valid_1's binary_logloss: 0.474612
| [0m 19      [0m | [0m 0.85    [0m | [0m 172.7   [0m | [0m 8.418   [0m | [0m 96.31   [0m | [0m 24.0    [0m | [0m 31.59   [0m | [0m 5.205   [0m | [0m 2.667   [0m | [0m 0.8347  [0m |
[50]	training's auc: 0.797759	training's binary_logloss: 0.550332	valid_1's auc: 0.825685	valid_1's binary_logloss: 0.533584




[100]	training's auc: 0.845508	training's binary_logloss: 0.494452	valid_1's auc: 0.852768	valid_1's binary_logloss: 0.48498
| [0m 20      [0m | [0m 0.8532  [0m | [0m 168.4   [0m | [0m 9.535   [0m | [0m 97.96   [0m | [0m 7.766   [0m | [0m 14.57   [0m | [0m 11.58   [0m | [0m 5.309   [0m | [0m 0.7005  [0m |




[50]	training's auc: 0.844656	training's binary_logloss: 0.534083	valid_1's auc: 0.841339	valid_1's binary_logloss: 0.521081
[100]	training's auc: 0.852178	training's binary_logloss: 0.476574	valid_1's auc: 0.848155	valid_1's binary_logloss: 0.47282
[150]	training's auc: 0.858213	training's binary_logloss: 0.453412	valid_1's auc: 0.85503	valid_1's binary_logloss: 0.455525
[200]	training's auc: 0.86125	training's binary_logloss: 0.444181	valid_1's auc: 0.855565	valid_1's binary_logloss: 0.449299
[250]	training's auc: 0.866847	training's binary_logloss: 0.438241	valid_1's auc: 0.856488	valid_1's binary_logloss: 0.4466
[300]	training's auc: 0.869292	training's binary_logloss: 0.433873	valid_1's auc: 0.85747	valid_1's binary_logloss: 0.444592
[350]	training's auc: 0.870995	training's binary_logloss: 0.430384	valid_1's auc: 0.857768	valid_1's binary_logloss: 0.44308
[400]	training's auc: 0.873246	training's binary_logloss: 0.42726	valid_1's auc: 0.85994	valid_1's binary_logloss: 0.441631
[4



[100]	training's auc: 0.858539	training's binary_logloss: 0.469347	valid_1's auc: 0.84994	valid_1's binary_logloss: 0.469367
[150]	training's auc: 0.862671	training's binary_logloss: 0.446397	valid_1's auc: 0.853095	valid_1's binary_logloss: 0.453647
[200]	training's auc: 0.870664	training's binary_logloss: 0.435114	valid_1's auc: 0.856071	valid_1's binary_logloss: 0.446772
[250]	training's auc: 0.876912	training's binary_logloss: 0.425179	valid_1's auc: 0.860208	valid_1's binary_logloss: 0.442204
[300]	training's auc: 0.881239	training's binary_logloss: 0.417005	valid_1's auc: 0.862173	valid_1's binary_logloss: 0.439741
[350]	training's auc: 0.884965	training's binary_logloss: 0.410885	valid_1's auc: 0.861994	valid_1's binary_logloss: 0.438445
| [0m 22      [0m | [0m 0.8631  [0m | [0m 209.1   [0m | [0m 8.461   [0m | [0m 98.69   [0m | [0m 12.54   [0m | [0m 18.39   [0m | [0m 1.273   [0m | [0m 5.306   [0m | [0m 0.5452  [0m |
[50]	training's auc: 0.84329	training's bin



[150]	training's auc: 0.860703	training's binary_logloss: 0.450437	valid_1's auc: 0.855179	valid_1's binary_logloss: 0.455337
[200]	training's auc: 0.866125	training's binary_logloss: 0.439969	valid_1's auc: 0.852917	valid_1's binary_logloss: 0.448629
| [0m 23      [0m | [0m 0.8556  [0m | [0m 206.8   [0m | [0m 8.127   [0m | [0m 97.03   [0m | [0m 9.853   [0m | [0m 16.83   [0m | [0m 2.923   [0m | [0m 7.158   [0m | [0m 0.7329  [0m |
[50]	training's auc: 0.841365	training's binary_logloss: 0.535659	valid_1's auc: 0.839673	valid_1's binary_logloss: 0.522864




[100]	training's auc: 0.852769	training's binary_logloss: 0.478934	valid_1's auc: 0.850625	valid_1's binary_logloss: 0.474603
[150]	training's auc: 0.856635	training's binary_logloss: 0.457936	valid_1's auc: 0.855923	valid_1's binary_logloss: 0.45822
[200]	training's auc: 0.860377	training's binary_logloss: 0.448367	valid_1's auc: 0.85625	valid_1's binary_logloss: 0.451607
| [0m 24      [0m | [0m 0.8581  [0m | [0m 202.9   [0m | [0m 5.348   [0m | [0m 98.01   [0m | [0m 20.99   [0m | [0m 28.11   [0m | [0m 4.403   [0m | [0m 5.641   [0m | [0m 0.6015  [0m |
[50]	training's auc: 0.852769	training's binary_logloss: 0.520184	valid_1's auc: 0.845506	valid_1's binary_logloss: 0.512684




[100]	training's auc: 0.860431	training's binary_logloss: 0.464418	valid_1's auc: 0.849643	valid_1's binary_logloss: 0.465968
| [0m 25      [0m | [0m 0.8514  [0m | [0m 223.1   [0m | [0m 7.622   [0m | [0m 97.69   [0m | [0m 17.8    [0m | [0m 27.25   [0m | [0m 2.825   [0m | [0m 0.2522  [0m | [0m 0.5671  [0m |




[50]	training's auc: 0.854483	training's binary_logloss: 0.509311	valid_1's auc: 0.843214	valid_1's binary_logloss: 0.505494
[100]	training's auc: 0.863826	training's binary_logloss: 0.454926	valid_1's auc: 0.852738	valid_1's binary_logloss: 0.459481
[150]	training's auc: 0.869743	training's binary_logloss: 0.437654	valid_1's auc: 0.853988	valid_1's binary_logloss: 0.44967
[200]	training's auc: 0.875274	training's binary_logloss: 0.426801	valid_1's auc: 0.85628	valid_1's binary_logloss: 0.44364
[250]	training's auc: 0.879666	training's binary_logloss: 0.419012	valid_1's auc: 0.856458	valid_1's binary_logloss: 0.441465
[300]	training's auc: 0.882611	training's binary_logloss: 0.413415	valid_1's auc: 0.855923	valid_1's binary_logloss: 0.439756
| [0m 26      [0m | [0m 0.8575  [0m | [0m 136.1   [0m | [0m 10.0    [0m | [0m 100.0   [0m | [0m 19.73   [0m | [0m 25.43   [0m | [0m 0.01    [0m | [0m 0.001   [0m | [0m 1.0     [0m |




[50]	training's auc: 0.843713	training's binary_logloss: 0.523854	valid_1's auc: 0.850565	valid_1's binary_logloss: 0.520805
| [0m 27      [0m | [0m 0.8506  [0m | [0m 184.8   [0m | [0m 8.449   [0m | [0m 92.89   [0m | [0m 6.856   [0m | [0m 29.4    [0m | [0m 15.54   [0m | [0m 0.1125  [0m | [0m 0.917   [0m |




[50]	training's auc: 0.854634	training's binary_logloss: 0.509346	valid_1's auc: 0.843065	valid_1's binary_logloss: 0.505627
[100]	training's auc: 0.864965	training's binary_logloss: 0.453678	valid_1's auc: 0.85372	valid_1's binary_logloss: 0.459088
[150]	training's auc: 0.874336	training's binary_logloss: 0.432151	valid_1's auc: 0.85497	valid_1's binary_logloss: 0.44739
| [0m 28      [0m | [0m 0.8576  [0m | [0m 123.6   [0m | [0m 6.571   [0m | [0m 100.0   [0m | [0m 1.0     [0m | [0m 12.0    [0m | [0m 0.01    [0m | [0m 0.001   [0m | [0m 1.0     [0m |




[50]	training's auc: 0.858262	training's binary_logloss: 0.482563	valid_1's auc: 0.843363	valid_1's binary_logloss: 0.495296
| [0m 29      [0m | [0m 0.8476  [0m | [0m 139.1   [0m | [0m 4.0     [0m | [0m 88.48   [0m | [0m 1.0     [0m | [0m 15.01   [0m | [0m 0.01    [0m | [0m 0.001   [0m | [0m 0.5     [0m |




[50]	training's auc: 0.849423	training's binary_logloss: 0.510924	valid_1's auc: 0.843661	valid_1's binary_logloss: 0.504943
[100]	training's auc: 0.859141	training's binary_logloss: 0.464877	valid_1's auc: 0.846399	valid_1's binary_logloss: 0.466648
| [0m 30      [0m | [0m 0.8486  [0m | [0m 208.0   [0m | [0m 10.0    [0m | [0m 100.0   [0m | [0m 23.68   [0m | [0m 13.56   [0m | [0m 0.01    [0m | [0m 0.001   [0m | [0m 0.5     [0m |


In [39]:
lgbB0.res

[{'target': 0.8497916666666666,
  'params': {'max_bin': 12.246067313863529,
   'max_depth': 6.994346865111928,
   'min_child_samples': 15.77145054380934,
   'min_child_weight': 2.1993764362631314,
   'num_leaves': 25.708151884861834,
   'reg_alpha': 24.354533530104362,
   'reg_lambda': 8.976674607206252,
   'subsample': 0.8237260353716331}},
 {'target': 0.8055059523809524,
  'params': {'max_bin': 225.2711494698392,
   'max_depth': 8.326809574988113,
   'min_child_samples': 83.97857497373803,
   'min_child_weight': 20.861633651723317,
   'num_leaves': 28.67159168116281,
   'reg_alpha': 47.85264633978365,
   'reg_lambda': 3.681076325612883,
   'subsample': 0.7474188149426438}},
 {'target': 0.8533630952380952,
  'params': {'max_bin': 91.4822739009978,
   'max_depth': 7.716575957469941,
   'min_child_samples': 97.8653156500221,
   'min_child_weight': 3.314393863311321,
   'num_leaves': 26.88412424509231,
   'reg_alpha': 14.632048710028132,
   'reg_lambda': 2.9874548342985574,
   'subsample

In [40]:
target_list = []
for result in lgbB0.res:
    target = result['target']
    target_list.append(target)

print(target_list)
print('maximum target index', np.argmax(np.array(target_list)))

[0.8497916666666666, 0.8055059523809524, 0.8533630952380952, 0.8501785714285715, 0.8505952380952381, 0.8563690476190476, 0.8480952380952382, 0.8595833333333335, 0.8505952380952381, 0.8410416666666667, 0.8543154761904762, 0.8614880952380952, 0.8577678571428573, 0.8582440476190476, 0.8499107142857142, 0.8523511904761905, 0.863125, 0.8475297619047619, 0.85, 0.8532142857142857, 0.8615773809523809, 0.8630654761904761, 0.8556250000000001, 0.8580952380952381, 0.8514285714285714, 0.857470238095238, 0.850625, 0.8576488095238095, 0.8476488095238095, 0.8486309523809523]
maximum target index 16


In [42]:
max_dict = lgbB0.res[np.argmax(np.array(target_list))]
print(max_dict)

{'target': 0.863125, 'params': {'max_bin': 176.65175037827473, 'max_depth': 8.908064010609426, 'min_child_samples': 97.88670947833252, 'min_child_weight': 7.420195191687601, 'num_leaves': 24.87714261675021, 'reg_alpha': 0.7022458793568418, 'reg_lambda': 3.3491858707784234, 'subsample': 0.899802179937488}}


In [43]:
clf = LGBMClassifier(
    n_jobs=-1,
    n_estimators=500,
    learning_rate=0.02,
    max_bin=176,
    min_child_samples=98,
    min_child_weight=7,
    num_leaves=24,
    subsample=0.899,
    max_depth=9,
    reg_alpha=0.702,
    reg_lambda=3.349,
    silent=-1,
    verbose=-1
)

clf.fit(train_x, train_y, eval_set=[(train_x, train_y), (valid_x, valid_y)], eval_metric='auc', verbose=30,
       early_stopping_rounds=30)

[30]	training's auc: 0.835904	training's binary_logloss: 0.558402	valid_1's auc: 0.834226	valid_1's binary_logloss: 0.548859




LGBMClassifier(learning_rate=0.02, max_bin=176, max_depth=9,
               min_child_samples=98, min_child_weight=7, n_estimators=500,
               num_leaves=24, reg_alpha=0.702, reg_lambda=3.349, silent=-1,
               subsample=0.899, verbose=-1)

In [45]:
preds = clf.predict_proba(test_data.drop(['PassengerId', 'Survived', 'Name', 'Ticket'], axis=1))[:, 1]

In [47]:
preds.tolist()

[0.36639205380829587,
 0.41010686458533924,
 0.36865008551405215,
 0.37547469007869994,
 0.41010686458533924,
 0.37547469007869994,
 0.41010686458533924,
 0.38587494838031766,
 0.41010686458533924,
 0.37547469007869994,
 0.36639205380829587,
 0.36865008551405215,
 0.4169213002455327,
 0.36865008551405215,
 0.4169213002455327,
 0.4169213002455327,
 0.36865008551405215,
 0.37319956625904366,
 0.41010686458533924,
 0.41010686458533924,
 0.38587494838031766,
 0.37319956625904366,
 0.4169213002455327,
 0.38587494838031766,
 0.4169213002455327,
 0.36865008551405215,
 0.4169213002455327,
 0.37319956625904366,
 0.38587494838031766,
 0.36865008551405215,
 0.36865008551405215,
 0.38587494838031766,
 0.41010686458533924,
 0.41010686458533924,
 0.38587494838031766,
 0.37319956625904366,
 0.41010686458533924,
 0.41010686458533924,
 0.37547469007869994,
 0.38587494838031766,
 0.36865008551405215,
 0.38587494838031766,
 0.36639205380829587,
 0.41010686458533924,
 0.4169213002455327,
 0.37547469007869