This notebook is a sample code with Japanese comments.

# ハイパーパラメータの調整

In [5]:
import numpy as np
import pandas as pd


train = pd.read_csv('../input/titanic/train.csv')
test = pd.read_csv('../input/titanic/test.csv')
gender_submission = pd.read_csv('../input/titanic/gender_submission.csv')

data = pd.concat([train, test], sort=False)

data['Sex'].replace(['male', 'female'], [0, 1], inplace=True)
data['Embarked'].fillna(('S'), inplace=True)
data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2}).astype(int)
data['Fare'].fillna(np.mean(data['Fare']), inplace=True)
data['Age'].fillna(data['Age'].median(), inplace=True)
data['FamilySize'] = data['Parch'] + data['SibSp'] + 1
data['IsAlone'] = 0
data.loc[data['FamilySize'] == 1, 'IsAlone'] = 1

In [6]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,FamilySize,IsAlone
0,1,0.0,3,"Braund, Mr. Owen Harris",0,22.0,1,0,A/5 21171,7.25,,0,2,0
1,2,1.0,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1,38.0,1,0,PC 17599,71.2833,C85,1,2,0
2,3,1.0,3,"Heikkinen, Miss. Laina",1,26.0,0,0,STON/O2. 3101282,7.925,,0,1,1
3,4,1.0,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1,35.0,1,0,113803,53.1,C123,0,2,0
4,5,0.0,3,"Allen, Mr. William Henry",0,35.0,0,0,373450,8.05,,0,1,1


In [7]:
delete_columns = ['Name', 'PassengerId', 'Ticket', 'Cabin']
data.drop(delete_columns, axis=1, inplace=True)

train = data[:len(train)]
test = data[len(train):]

y_train = train['Survived']
X_train = train.drop('Survived', axis=1)
X_test = test.drop('Survived', axis=1)

In [8]:
X_train.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,FamilySize,IsAlone
0,3,0,22.0,1,0,7.25,0,2,0
1,1,1,38.0,1,0,71.2833,1,2,0
2,3,1,26.0,0,0,7.925,0,1,1
3,1,1,35.0,1,0,53.1,0,2,0
4,3,0,35.0,0,0,8.05,0,1,1


## LightGBM

In [9]:
from sklearn.model_selection import train_test_split


X_train, X_valid, y_train, y_valid = \
    train_test_split(X_train, y_train, test_size=0.3,
                                 random_state=0, stratify=y_train)

In [10]:
categorical_features = ['Embarked', 'Pclass', 'Sex']

In [16]:
import lightgbm as lgb


lgb_train = lgb.Dataset(X_train, y_train,
                                         categorical_feature=categorical_features)
lgb_eval = lgb.Dataset(X_valid, y_valid, reference=lgb_train,
                                         categorical_feature=categorical_features)

model = lgb.train(params, lgb_train,
                               valid_sets=[lgb_train, lgb_eval],
                               verbose_eval=10,
                               num_boost_round=1000,
                               early_stopping_rounds=10)

y_pred = model.predict(X_test, num_iteration=model.best_iteration)

Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


In [17]:
y_pred[:10]

array([0.03605598, 0.40306884, 0.10732166, 0.0802399 , 0.46011271,
       0.20222002, 0.64929492, 0.11896033, 0.7452973 , 0.01917651])

# Optunaを使う

In [20]:
import optuna
from sklearn.metrics import log_loss

# max_bin  & num_leaves
def objective(trial):
    params = {
        'objective': 'binary',
        'max_bin': trial.suggest_int('max_bin', 255, 500),
        'learning_rate': 0.05,
        'num_leaves': trial.suggest_int('num_leaves', 32, 128),
    }

    lgb_train = lgb.Dataset(X_train, y_train,
                                             categorical_feature=categorical_features)
    lgb_eval = lgb.Dataset(X_valid, y_valid, reference=lgb_train,
                                            categorical_feature=categorical_features)

    model = lgb.train(params, lgb_train,
                                   valid_sets=[lgb_train, lgb_eval],
                                   verbose_eval=10,
                                   num_boost_round=1000,
                                   early_stopping_rounds=10)

    y_pred_valid = model.predict(X_valid, num_iteration=model.best_iteration)
    score = log_loss(y_valid, y_pred_valid)
    return score

In [21]:
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(seed=0))
study.optimize(objective, n_trials=40)


Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:39,080] Finished trial#0 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:39,760] Finished trial#1 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:40,484] Finished trial#2 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:41,281] Finished trial#3 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:42,062] Finished trial#4 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:42,807] Finished trial#5 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:43,317] Finished trial#6 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:43,786] Finished trial#7 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:44,526] Finished trial#8 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:45,101] Finished trial#9 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:45,823] Finished trial#10 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:46,342] Finished trial#11 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:46,848] Finished trial#12 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:47,401] Finished trial#13 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:47,919] Finished trial#14 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:48,451] Finished trial#15 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:49,193] Finished trial#16 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:49,904] Finished trial#17 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:50,453] Finished trial#18 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:51,145] Finished trial#19 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:51,740] Finished trial#20 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:52,697] Finished trial#21 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:53,370] Finished trial#22 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:53,978] Finished trial#23 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:54,561] Finished trial#24 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:55,514] Finished trial#25 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:56,308] Finished trial#26 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:56,890] Finished trial#27 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:57,467] Finished trial#28 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:58,272] Finished trial#29 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:58,845] Finished trial#30 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:31:59,589] Finished trial#31 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:32:00,229] Finished trial#32 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:32:00,772] Finished trial#33 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:32:01,317] Finished trial#34 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:32:01,841] Finished trial#35 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:32:02,373] Finished trial#36 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:32:03,166] Finished trial#37 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:32:03,574] Finished trial#38 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.

Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


[I 2020-04-30 02:32:03,992] Finished trial#39 resulted in value: 0.4332512137886331. Current best value is 0.4332512137886331 with parameters: {'max_bin': 427, 'num_leaves': 79}.


In [22]:
study.best_params

{'max_bin': 427, 'num_leaves': 79}

In [23]:
params = {
    'objective': 'binary',
    'max_bin': study.best_params['max_bin'],
    'learning_rate': 0.05,
    'num_leaves': study.best_params['num_leaves']
}

lgb_train = lgb.Dataset(X_train, y_train,
                                         categorical_feature=categorical_features)
lgb_eval = lgb.Dataset(X_valid, y_valid, reference=lgb_train,
                                         categorical_feature=categorical_features)

model = lgb.train(params, lgb_train,
                               valid_sets=[lgb_train, lgb_eval],
                               verbose_eval=10,
                               num_boost_round=1000,
                               early_stopping_rounds=10)

y_pred = model.predict(X_test, num_iteration=model.best_iteration)


Using categorical_feature in Dataset.



Training until validation scores don't improve for 10 rounds
[10]	training's binary_logloss: 0.505699	valid_1's binary_logloss: 0.532106
[20]	training's binary_logloss: 0.427825	valid_1's binary_logloss: 0.482279
[30]	training's binary_logloss: 0.377242	valid_1's binary_logloss: 0.456641
[40]	training's binary_logloss: 0.345424	valid_1's binary_logloss: 0.447083
[50]	training's binary_logloss: 0.323113	valid_1's binary_logloss: 0.440407
[60]	training's binary_logloss: 0.302727	valid_1's binary_logloss: 0.434527
[70]	training's binary_logloss: 0.285597	valid_1's binary_logloss: 0.434932
Early stopping, best iteration is:
[66]	training's binary_logloss: 0.293072	valid_1's binary_logloss: 0.433251


In [25]:
y_pred = (y_pred > 0.5).astype(int)