In [42]:
# Familiar imports
import numpy as np
import pandas as pd

# For ordinal encoding categorical variables, splitting data
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split

# For training model
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
from lightgbm import LGBMRegressor
import lightgbm as lgb
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold

from sklearn.linear_model import LinearRegression

#for analysis
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

import optuna
import warnings

In [2]:
# Load the training data
train = pd.read_csv("30-days-of-ml/train_folds.csv")
test = pd.read_csv("30-days-of-ml/test.csv")
sample_submission=pd.read_csv("30-days-of-ml/sample_submission.csv")

# Preview the data
train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13,target,kfold
0,1,B,B,B,C,B,B,A,E,C,...,0.160266,0.310921,0.38947,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634,4
1,2,B,B,A,A,B,D,A,F,A,...,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233,0
2,3,A,A,A,C,B,D,A,D,A,...,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351,4
3,4,B,B,A,C,B,D,A,E,C,...,0.239061,0.732948,0.679618,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253,1
4,6,A,A,A,C,B,D,A,E,A,...,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226,0


In [3]:
useful_cols=[column for column in train.columns if column not in ('id', 'target', 'kfold')]

In [4]:
train[useful_cols]

Unnamed: 0,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,...,cont4,cont5,cont6,cont7,cont8,cont9,cont10,cont11,cont12,cont13
0,B,B,B,C,B,B,A,E,C,N,...,0.610706,0.400361,0.160266,0.310921,0.389470,0.267559,0.237281,0.377873,0.322401,0.869850
1,B,B,A,A,B,D,A,F,A,O,...,0.276853,0.533087,0.558922,0.516294,0.594928,0.341439,0.906013,0.921701,0.261975,0.465083
2,A,A,A,C,B,D,A,D,A,F,...,0.285074,0.650609,0.375348,0.902567,0.555205,0.843531,0.748809,0.620126,0.541474,0.763846
3,B,B,A,C,B,D,A,E,C,K,...,0.284667,0.668980,0.239061,0.732948,0.679618,0.574844,0.346010,0.714610,0.540150,0.280682
4,A,A,A,C,B,D,A,E,A,N,...,0.287595,0.686964,0.420667,0.648182,0.684501,0.956692,1.000773,0.776742,0.625849,0.250823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299995,B,B,A,A,B,D,A,E,A,I,...,0.307883,0.769792,0.450538,0.934360,1.005077,0.853726,0.422541,1.063463,0.697685,0.506404
299996,A,B,A,C,B,B,A,E,E,F,...,0.736713,0.528056,0.508502,0.358247,0.257825,0.433525,0.301015,0.268447,0.577055,0.823611
299997,B,B,A,C,B,C,A,E,G,F,...,0.277074,0.688747,0.372425,0.364936,0.383224,0.551825,0.661007,0.629606,0.714139,0.245732
299998,A,B,A,C,B,B,A,E,E,I,...,0.805963,0.344404,0.424243,0.382028,0.468819,0.351036,0.288768,0.611169,0.380254,0.332030


In [5]:
 # List of categorical columns
object_cols = [col for col in useful_cols if 'cat' in col]
test=test[useful_cols]


# Random Forest

In [18]:
last_scores=[]
for fold in range(5):
    X_train = train[train.kfold!=fold].reset_index(drop=True)
    X_valid= train[train.kfold==fold].reset_index(drop=True)
    X_test=test.copy()
    
    y_train=X_train.target
    y_valid=X_valid.target
    
    X_train=X_train[useful_cols]
    X_valid=X_valid[useful_cols]
   
    # ordinal-encode categorical columns
    print('Encoding')
    ordinal_encoder = OrdinalEncoder()
    
    X_train[object_cols]=ordinal_encoder.fit_transform(X_train[object_cols])
    X_valid[object_cols]=ordinal_encoder.transform(X_valid[object_cols])
    X_test[object_cols] = ordinal_encoder.transform(X_test[object_cols])
    
    print('Training') 
    model = RandomForestRegressor(random_state=fold, verbose=100)
    model.fit(X_train,y_train)
    predictions=model.predict(X_valid)
    predictions_test=model.predict(X_test)
    last_scores.append(predictions_test)
    print(fold, mean_squared_error(y_valid,predictions,squared=False))

Encoding
Training
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
building tree 1 of 100
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    8.9s remaining:    0.0s
building tree 2 of 100
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   17.7s remaining:    0.0s
building tree 3 of 100
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   26.6s remaining:    0.0s
building tree 4 of 100
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:   35.5s remaining:    0.0s
building tree 5 of 100
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   44.7s remaining:    0.0s
building tree 6 of 100
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:   53.8s remaining:    0.0s
building tree 7 of 100
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:  1.0min remaining:    0.0s
building tree 8 of 100
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:  1.2min remaining:    0.0s
building tree 9 of 100
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:  

[Parallel(n_jobs=1)]: Done  79 out of  79 | elapsed: 11.9min remaining:    0.0s
building tree 80 of 100
[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed: 12.1min remaining:    0.0s
building tree 81 of 100
[Parallel(n_jobs=1)]: Done  81 out of  81 | elapsed: 12.2min remaining:    0.0s
building tree 82 of 100
[Parallel(n_jobs=1)]: Done  82 out of  82 | elapsed: 12.4min remaining:    0.0s
building tree 83 of 100
[Parallel(n_jobs=1)]: Done  83 out of  83 | elapsed: 12.5min remaining:    0.0s
building tree 84 of 100
[Parallel(n_jobs=1)]: Done  84 out of  84 | elapsed: 12.7min remaining:    0.0s
building tree 85 of 100
[Parallel(n_jobs=1)]: Done  85 out of  85 | elapsed: 12.8min remaining:    0.0s
building tree 86 of 100
[Parallel(n_jobs=1)]: Done  86 out of  86 | elapsed: 13.0min remaining:    0.0s
building tree 87 of 100
[Parallel(n_jobs=1)]: Done  87 out of  87 | elapsed: 13.2min remaining:    0.0s
building tree 88 of 100
[Parallel(n_jobs=1)]: Done  88 out of  88 | elapsed: 13.3min rem

[Parallel(n_jobs=1)]: Done  77 out of  77 | elapsed:    3.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  78 out of  78 | elapsed:    3.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  79 out of  79 | elapsed:    3.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:    4.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  81 out of  81 | elapsed:    4.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  82 out of  82 | elapsed:    4.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  83 out of  83 | elapsed:    4.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  84 out of  84 | elapsed:    4.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  85 out of  85 | elapsed:    4.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  86 out of  86 | elapsed:    4.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  87 out of  87 | elapsed:    4.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  88 out of  88 | elapsed:    4.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  89 out of  8

[Parallel(n_jobs=1)]: Done  79 out of  79 | elapsed:   13.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:   13.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  81 out of  81 | elapsed:   13.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  82 out of  82 | elapsed:   13.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  83 out of  83 | elapsed:   13.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  84 out of  84 | elapsed:   13.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  85 out of  85 | elapsed:   14.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  86 out of  86 | elapsed:   14.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  87 out of  87 | elapsed:   14.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  88 out of  88 | elapsed:   14.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  89 out of  89 | elapsed:   14.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  90 out of  90 | elapsed:   14.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  91 out of  9

[Parallel(n_jobs=1)]: Done  62 out of  62 | elapsed:  9.4min remaining:    0.0s
building tree 63 of 100
[Parallel(n_jobs=1)]: Done  63 out of  63 | elapsed:  9.5min remaining:    0.0s
building tree 64 of 100
[Parallel(n_jobs=1)]: Done  64 out of  64 | elapsed:  9.7min remaining:    0.0s
building tree 65 of 100
[Parallel(n_jobs=1)]: Done  65 out of  65 | elapsed:  9.8min remaining:    0.0s
building tree 66 of 100
[Parallel(n_jobs=1)]: Done  66 out of  66 | elapsed: 10.0min remaining:    0.0s
building tree 67 of 100
[Parallel(n_jobs=1)]: Done  67 out of  67 | elapsed: 10.1min remaining:    0.0s
building tree 68 of 100
[Parallel(n_jobs=1)]: Done  68 out of  68 | elapsed: 10.3min remaining:    0.0s
building tree 69 of 100
[Parallel(n_jobs=1)]: Done  69 out of  69 | elapsed: 10.4min remaining:    0.0s
building tree 70 of 100
[Parallel(n_jobs=1)]: Done  70 out of  70 | elapsed: 10.6min remaining:    0.0s
building tree 71 of 100
[Parallel(n_jobs=1)]: Done  71 out of  71 | elapsed: 10.7min rem

[Parallel(n_jobs=1)]: Done  53 out of  53 | elapsed:    2.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  54 out of  54 | elapsed:    2.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  55 out of  55 | elapsed:    2.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  56 out of  56 | elapsed:    2.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  57 out of  57 | elapsed:    2.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  58 out of  58 | elapsed:    2.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  59 out of  59 | elapsed:    2.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    3.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  61 out of  61 | elapsed:    3.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  62 out of  62 | elapsed:    3.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  63 out of  63 | elapsed:    3.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  64 out of  64 | elapsed:    3.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  65 out of  6

[Parallel(n_jobs=1)]: Done  55 out of  55 | elapsed:    8.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  56 out of  56 | elapsed:    9.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  57 out of  57 | elapsed:    9.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  58 out of  58 | elapsed:    9.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  59 out of  59 | elapsed:    9.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:    9.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  61 out of  61 | elapsed:    9.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  62 out of  62 | elapsed:   10.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  63 out of  63 | elapsed:   10.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  64 out of  64 | elapsed:   10.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  65 out of  65 | elapsed:   10.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  66 out of  66 | elapsed:   10.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  67 out of  6

[Parallel(n_jobs=1)]: Done  44 out of  44 | elapsed:  6.8min remaining:    0.0s
building tree 45 of 100
[Parallel(n_jobs=1)]: Done  45 out of  45 | elapsed:  7.0min remaining:    0.0s
building tree 46 of 100
[Parallel(n_jobs=1)]: Done  46 out of  46 | elapsed:  7.1min remaining:    0.0s
building tree 47 of 100
[Parallel(n_jobs=1)]: Done  47 out of  47 | elapsed:  7.3min remaining:    0.0s
building tree 48 of 100
[Parallel(n_jobs=1)]: Done  48 out of  48 | elapsed:  7.4min remaining:    0.0s
building tree 49 of 100
[Parallel(n_jobs=1)]: Done  49 out of  49 | elapsed:  7.6min remaining:    0.0s
building tree 50 of 100
[Parallel(n_jobs=1)]: Done  50 out of  50 | elapsed:  7.7min remaining:    0.0s
building tree 51 of 100
[Parallel(n_jobs=1)]: Done  51 out of  51 | elapsed:  7.9min remaining:    0.0s
building tree 52 of 100
[Parallel(n_jobs=1)]: Done  52 out of  52 | elapsed:  8.0min remaining:    0.0s
building tree 53 of 100
[Parallel(n_jobs=1)]: Done  53 out of  53 | elapsed:  8.2min rem

[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:    1.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  31 out of  31 | elapsed:    1.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed:    1.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  33 out of  33 | elapsed:    1.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  34 out of  34 | elapsed:    1.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  35 out of  35 | elapsed:    1.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  36 out of  36 | elapsed:    1.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  37 out of  37 | elapsed:    1.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  38 out of  38 | elapsed:    1.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  39 out of  39 | elapsed:    1.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    1.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  41 out of  4

[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed:    5.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  33 out of  33 | elapsed:    5.5s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  34 out of  34 | elapsed:    5.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  35 out of  35 | elapsed:    5.8s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  36 out of  36 | elapsed:    5.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  37 out of  37 | elapsed:    6.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  38 out of  38 | elapsed:    6.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  39 out of  39 | elapsed:    6.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    6.6s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  41 out of  41 | elapsed:    6.7s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  42 out of  42 | elapsed:    6.9s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  43 out of  43 | elapsed:    7.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  44 out of  4

[Parallel(n_jobs=1)]: Done  26 out of  26 | elapsed:  3.9min remaining:    0.0s
building tree 27 of 100
[Parallel(n_jobs=1)]: Done  27 out of  27 | elapsed:  4.1min remaining:    0.0s
building tree 28 of 100
[Parallel(n_jobs=1)]: Done  28 out of  28 | elapsed:  4.2min remaining:    0.0s
building tree 29 of 100
[Parallel(n_jobs=1)]: Done  29 out of  29 | elapsed:  4.4min remaining:    0.0s
building tree 30 of 100
[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:  4.6min remaining:    0.0s
building tree 31 of 100
[Parallel(n_jobs=1)]: Done  31 out of  31 | elapsed:  4.7min remaining:    0.0s
building tree 32 of 100
[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed:  4.9min remaining:    0.0s
building tree 33 of 100
[Parallel(n_jobs=1)]: Done  33 out of  33 | elapsed:  5.0min remaining:    0.0s
building tree 34 of 100
[Parallel(n_jobs=1)]: Done  34 out of  34 | elapsed:  5.2min remaining:    0.0s
building tree 35 of 100
[Parallel(n_jobs=1)]: Done  35 out of  35 | elapsed:  5.3min rem

KeyboardInterrupt: 

# Light GBMTunned

In [6]:
lgbm_parameters = {
   'metric': 'rmse',
    'learning_rate':.008,
    'n_estimators': 50000,
    'reg_alpha': 9.56,
    'reg_lambda': 9.35,
    'colsample_bytree': 0.21497646795452627,
    'subsample': 0.7582562557431147,
    'learning_rate': 0.009985133666265425,
    'max_depth': 77,
    'num_leaves': 77,
    'min_child_samples': 290,
    'max_bin': 772,
    'cat_l2': 17,
    'cat_smoth':96,
    'seed': 2
}


In [8]:
last_scores=[]
rmse_scores=[]
for fold in range(5):
    X_train = train[train.kfold!=fold].reset_index(drop=True)
    X_valid= train[train.kfold==fold].reset_index(drop=True)
    X_test=test.copy()
    
    y_train=X_train.target
    y_valid=X_valid.target
    
    X_train=X_train[useful_cols]
    X_valid=X_valid[useful_cols]
   
    # ordinal-encode categorical columns
    print('Encoding')
    ordinal_encoder = OrdinalEncoder()
    
    X_train[object_cols]=ordinal_encoder.fit_transform(X_train[object_cols])
    X_valid[object_cols]=ordinal_encoder.transform(X_valid[object_cols])
    X_test[object_cols] = ordinal_encoder.transform(X_test[object_cols])
    
    print('Training') 
    model =LGBMRegressor(**lgbm_parameters)
    model.fit(X_train, y_train, eval_set = ((X_valid,y_valid)), early_stopping_rounds = 200,verbose=1000,
              categorical_feature=object_cols)
    predictions=model.predict(X_valid)
    predictions_test=model.predict(X_test)
    last_scores.append(predictions_test)
    rmse=mean_squared_error(y_valid,predictions,squared=False)
    print(fold, rmse)
    rmse_scores.append(rmse)

Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.71917
[2000]	valid_0's rmse: 0.715942
[3000]	valid_0's rmse: 0.715338
Early stopping, best iteration is:
[3299]	valid_0's rmse: 0.715289
0 0.7152890254212041
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.727117
[2000]	valid_0's rmse: 0.723855
[3000]	valid_0's rmse: 0.723238
Early stopping, best iteration is:
[3183]	valid_0's rmse: 0.723219
1 0.7232186600094757
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.722435
[2000]	valid_0's rmse: 0.719259
[3000]	valid_0's rmse: 0.718583
[4000]	valid_0's rmse: 0.718441
Early stopping, best iteration is:
[4008]	valid_0's rmse: 0.71844
2 0.7184400074692084
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.72146
[2000]	valid_0's rmse: 0.718539
[3000]	valid_0's rmse: 0.718017
Early stopping, best iteration is:
[3687]	valid_0's rmse: 0.717952
3 0.7179519662088705
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717228
[2000]	valid_0's rmse: 0.714087
[3000]	valid_0's rmse: 0.713509
Early stopping, best iteration is:
[3376]	valid_0's rmse: 0.713436
4 0.7134359170022957


In [None]:
print(np.mean(rmse_scores), np.std(rmse_scores))
#0.71766711522221088

In [10]:
np.column_stack(last_scores).shape
#the five arrays are for every fold

(200000, 5)

In [22]:
last_scores

[array([8.05848806, 8.36995011, 8.42735571, ..., 8.49073457, 8.18897189,
        8.04246291]),
 array([8.03001228, 8.28199395, 8.40922208, ..., 8.46566595, 8.13418058,
        7.90576582]),
 array([7.9932608 , 8.33373381, 8.35507225, ..., 8.4573602 , 8.06991463,
        7.9837623 ]),
 array([8.01472017, 8.39058241, 8.35813248, ..., 8.45091867, 8.08682882,
        7.86114897]),
 array([8.07038851, 8.35901012, 8.35926018, ..., 8.49227046, 8.13488447,
        7.98448404])]

In [16]:
np.mean(np.column_stack(last_scores),axis=1)

array([8.03337397, 8.34705408, 8.38180854, ..., 8.47138997, 8.12295608,
       7.95552481])

In [12]:
preds=np.mean(np.column_stack(last_scores),axis=1)

In [19]:
sample_submission.target=preds
sample_submission

Unnamed: 0,id,target
0,0,8.033374
1,5,8.347054
2,15,8.381809
3,16,8.460419
4,17,8.145011
...,...,...
199995,499987,8.058993
199996,499990,8.447804
199997,499991,8.471390
199998,499994,8.122956


In [20]:
sample_submission.to_csv('submission_lgb1.csv',index=False)

# LGBM Standarization 

In [24]:
useful_cols=[column for column in train.columns if column not in ('id', 'target', 'kfold')]

In [25]:
 # List of categorical columns
object_cols = [col for col in useful_cols if 'cat' in col]
numerical_cols = [col for col in useful_cols if 'cont' in col]
test=test[useful_cols]

In [27]:
lgbm_parameters = {
   'metric': 'rmse',
    'learning_rate':.008,
    'n_estimators': 50000,
    'reg_alpha': 9.56,
    'reg_lambda': 9.35,
    'colsample_bytree': 0.21497646795452627,
    'subsample': 0.7582562557431147,
    'learning_rate': 0.009985133666265425,
    'max_depth': 77,
    'num_leaves': 77,
    'min_child_samples': 290,
    'max_bin': 772,
    'cat_l2': 17,
    'cat_smoth':96,
    'seed': 2
}

In [28]:
last_scores=[]
rmse_scores=[]
for fold in range(5):
    X_train = train[train.kfold!=fold].reset_index(drop=True)
    X_valid= train[train.kfold==fold].reset_index(drop=True)
    X_test=test.copy()
    
    y_train=X_train.target
    y_valid=X_valid.target
    
    X_train=X_train[useful_cols]
    X_valid=X_valid[useful_cols]
   
    # ordinal-encode categorical columns
    print('Encoding')
    ordinal_encoder = OrdinalEncoder()
    #for object cols
    X_train[object_cols]=ordinal_encoder.fit_transform(X_train[object_cols])
    X_valid[object_cols]=ordinal_encoder.transform(X_valid[object_cols])
    X_test[object_cols] = ordinal_encoder.transform(X_test[object_cols])
    
    #for numerical cols
    scaler =StandardScaler()
    X_train[numerical_cols]=scaler.fit_transform(X_train[numerical_cols])
    X_valid[numerical_cols]=scaler.transform(X_valid[numerical_cols])
    X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
    
    print('Training') 
    model =LGBMRegressor(**lgbm_parameters)
    model.fit(X_train, y_train, eval_set = ((X_valid,y_valid)), early_stopping_rounds = 200,verbose=1000,
              categorical_feature=object_cols)
    predictions=model.predict(X_valid)
    predictions_test=model.predict(X_test)
    last_scores.append(predictions_test)
    rmse=mean_squared_error(y_valid,predictions,squared=False)
    print(fold, rmse)
    rmse_scores.append(rmse)

Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.719228
[2000]	valid_0's rmse: 0.715967
[3000]	valid_0's rmse: 0.71534
Early stopping, best iteration is:
[3752]	valid_0's rmse: 0.715285
0 0.7152850409432048
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.727106
[2000]	valid_0's rmse: 0.723789
[3000]	valid_0's rmse: 0.72319
Early stopping, best iteration is:
[3164]	valid_0's rmse: 0.723168
1 0.7231680926003768
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.722376
[2000]	valid_0's rmse: 0.71917
[3000]	valid_0's rmse: 0.718503
Early stopping, best iteration is:
[3741]	valid_0's rmse: 0.718363
2 0.718363133332485
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721429
[2000]	valid_0's rmse: 0.718527
[3000]	valid_0's rmse: 0.717976
Early stopping, best iteration is:
[3435]	valid_0's rmse: 0.717921
3 0.7179209441906655
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.717252
[2000]	valid_0's rmse: 0.714073
[3000]	valid_0's rmse: 0.713494
Early stopping, best iteration is:
[3191]	valid_0's rmse: 0.713465
4 0.7134645341782053


In [29]:
print(np.mean(rmse_scores), np.std(rmse_scores))

0.7176403490489875 0.0032906536017071808


# LGBM Standarization and hot encoder

In [30]:
useful_cols=[column for column in train.columns if column not in ('id', 'target', 'kfold')]
 # List of categorical columns
object_cols = [col for col in useful_cols if 'cat' in col]
numerical_cols = [col for col in useful_cols if 'cont' in col]
test=test[useful_cols]

In [31]:
lgbm_parameters = {
   'metric': 'rmse',
    'learning_rate':.008,
    'n_estimators': 50000,
    'reg_alpha': 9.56,
    'reg_lambda': 9.35,
    'colsample_bytree': 0.21497646795452627,
    'subsample': 0.7582562557431147,
    'learning_rate': 0.009985133666265425,
    'max_depth': 77,
    'num_leaves': 77,
    'min_child_samples': 290,
    'max_bin': 772,
    'cat_l2': 17,
    'cat_smoth':96,
    'seed': 42
}

In [48]:
last_scores=[]
rmse_scores=[]
for fold in range(5):
    X_train = train[train.kfold!=fold].reset_index(drop=True)
    X_valid= train[train.kfold==fold].reset_index(drop=True)
    X_test=test.copy()
    
    y_train=X_train.target
    y_valid=X_valid.target
    
    X_train=X_train[useful_cols]
    X_valid=X_valid[useful_cols]
   
    # ordinal-encode categorical columns
    print('Encoding')
    ohe = preprocessing.OneHotEncoder(sparse=False, handle_unknown='ignore')
    #for object cols
    X_train_ohe=ohe.fit_transform(X_train[object_cols])
    X_valid_ohe=ohe.transform(X_valid[object_cols])
    X_test_ohe = ohe.transform(X_test[object_cols])
    
    X_train_ohe=pd.DataFrame(X_train_ohe, columns=[f"ohe_{i}" for i in range(X_train_ohe.shape[1])])
    X_valid_ohe=pd.DataFrame(X_valid_ohe, columns=[f"ohe_{i}" for i in range(X_valid_ohe.shape[1])])
    X_test_ohe=pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])
    
    X_train=pd.concat([X_train,X_train_ohe], axis=1)
    X_valid=pd.concat([X_valid,X_valid_ohe], axis=1)
    X_test=pd.concat([X_test,X_test_ohe], axis=1)
    
    # Remove categorical columns (will replace with one-hot encoding)
    X_train = X_train.drop(object_cols, axis=1)
    X_valid = X_valid.drop(object_cols,axis=1)
    X_test=X_test.drop(object_cols,axis=1)
    #for numerical cols
    scaler =StandardScaler()
    X_train[numerical_cols]=scaler.fit_transform(X_train[numerical_cols])
    X_valid[numerical_cols]=scaler.transform(X_valid[numerical_cols])
    X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
    
    print('Training') 
    model =LGBMRegressor(**lgbm_parameters)
    model.fit(X_train, y_train, eval_set = ((X_valid,y_valid)), early_stopping_rounds = 200,verbose=1000)
    predictions=model.predict(X_valid)
    predictions_test=model.predict(X_test)
    last_scores.append(predictions_test)
    rmse=mean_squared_error(y_valid,predictions,squared=False)
    print(fold, rmse)
    rmse_scores.append(rmse)
    

Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.718989
[2000]	valid_0's rmse: 0.715893
[3000]	valid_0's rmse: 0.715266
Early stopping, best iteration is:
[3706]	valid_0's rmse: 0.715174
0 0.7151735211244796
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.726796
[2000]	valid_0's rmse: 0.723509
[3000]	valid_0's rmse: 0.722881
Early stopping, best iteration is:
[3743]	valid_0's rmse: 0.722806
1 0.7228064131919003
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.722138
[2000]	valid_0's rmse: 0.71897
[3000]	valid_0's rmse: 0.71829
Early stopping, best iteration is:
[3794]	valid_0's rmse: 0.718173
2 0.7181730818268607
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721298
[2000]	valid_0's rmse: 0.718414
[3000]	valid_0's rmse: 0.717928
Early stopping, best iteration is:

In [49]:
print(np.mean(rmse_scores), np.std(rmse_scores))

0.7174741924285338 0.0032032586978842557


In [5]:
useful_cols=[column for column in train.columns if column not in ('id', 'target', 'kfold')]
 # List of categorical columns
object_cols = [col for col in useful_cols if 'cat' in col]
numerical_cols = [col for col in useful_cols if 'cont' in col]
test=test[useful_cols]

In [6]:
def run(trial):
    #last_scores=[]
    #rmse_scores=[]
    fold=0
    learning_rate=trial.suggest_float('learning_rate', .008,.25, log=True)
    reg_lambda=trial.suggest_loguniform('reg_lambda', 1e-8, 100.0)
    reg_alpha=trial.suggest_loguniform('reg_alpha', 1e-8, 100.0)
    subsample=trial.suggest_float('subsamble', .1, 1.0)
    colsample_bytree=trial.suggest_float('colsample_bytree', .1, 1.0)
    max_depth=trial.suggest_int('max_depth',10,77)
    num_leaves=trial.suggest_int('num_leaves',10,77)
    min_child_samples=trial.suggest_int('min_child_samples',10,100)
        
    X_train = train[train.kfold!=fold].reset_index(drop=True)
    X_valid= train[train.kfold==fold].reset_index(drop=True)
    #X_test=test.copy()

    y_train=X_train.target
    y_valid=X_valid.target

    X_train=X_train[useful_cols]
    X_valid=X_valid[useful_cols]

        # ordinal-encode categorical columns
    print('Encoding')
    ohe = preprocessing.OneHotEncoder(sparse=False, handle_unknown='ignore')
        #for object cols
    X_train_ohe=ohe.fit_transform(X_train[object_cols])
    X_valid_ohe=ohe.transform(X_valid[object_cols])
        #X_test_ohe = ohe.transform(X_test[object_cols])

    X_train_ohe=pd.DataFrame(X_train_ohe, columns=[f"ohe_{i}" for i in range(X_train_ohe.shape[1])])
    X_valid_ohe=pd.DataFrame(X_valid_ohe, columns=[f"ohe_{i}" for i in range(X_valid_ohe.shape[1])])
        #X_test_ohe=pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])

    X_train=pd.concat([X_train,X_train_ohe], axis=1)
    X_valid=pd.concat([X_valid,X_valid_ohe], axis=1)
        #X_test=pd.concat([X_test,X_test_ohe], axis=1)

        # Remove categorical columns (will replace with one-hot encoding)
    X_train = X_train.drop(object_cols, axis=1)
    X_valid = X_valid.drop(object_cols,axis=1)
        #X_test=X_test.drop(object_cols,axis=1)
        #for numerical cols
    scaler =StandardScaler()
    X_train[numerical_cols]=scaler.fit_transform(X_train[numerical_cols])
    X_valid[numerical_cols]=scaler.transform(X_valid[numerical_cols])
        #X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])

    print('Training') 
    model =LGBMRegressor(random_state=42,
                            learning_rate=learning_rate, 
                            reg_lambda=reg_lambda,
                            reg_alpha=reg_alpha,
                            subsample=subsample,
                            colsample_bytree=colsample_bytree,
                            max_depth=max_depth,
                            num_leaves=num_leaves,
                            min_child_samples=min_child_samples,
                             n_estimators=7000
                            )
    model.fit(X_train, y_train, eval_set = ((X_valid,y_valid)), early_stopping_rounds = 200,verbose=1000)
    predictions=model.predict(X_valid)
        #predictions_test=model.predict(X_test)
        #last_scores.append(predictions_test)
    rmse=mean_squared_error(y_valid,predictions,squared=False)
    print(fold, rmse)
    return rmse   
    

In [7]:
study = optuna.create_study(direction='minimize')
study.optimize(run, n_trials=100)
study.best_params

[32m[I 2021-08-31 05:40:09,169][0m A new study created in memory with name: no-name-c7451d32-c7d3-4c69-9264-0261fbd98c59[0m


Encoding
Training
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[265]	valid_0's l2: 0.516758


[32m[I 2021-08-31 05:40:18,334][0m Trial 0 finished with value: 0.7188586717929577 and parameters: {'learning_rate': 0.19185444850119765, 'reg_lambda': 7.254606589829645, 'reg_alpha': 5.325796952954004e-08, 'subsamble': 0.5165904606780625, 'colsample_bytree': 0.7880680141138762, 'max_depth': 53, 'num_leaves': 17, 'min_child_samples': 52}. Best is trial 0 with value: 0.7188586717929577.[0m


0 0.7188586717929577
Encoding
Training
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[581]	valid_0's l2: 0.515577


[32m[I 2021-08-31 05:40:34,291][0m Trial 1 finished with value: 0.7180365596310633 and parameters: {'learning_rate': 0.09940899072455849, 'reg_lambda': 6.672585616147851e-07, 'reg_alpha': 9.688706189916708, 'subsamble': 0.4691578019300532, 'colsample_bytree': 0.7441597659589874, 'max_depth': 44, 'num_leaves': 23, 'min_child_samples': 89}. Best is trial 1 with value: 0.7180365596310633.[0m


0 0.7180365596310633
Encoding
Training
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[241]	valid_0's l2: 0.51407


[32m[I 2021-08-31 05:40:44,495][0m Trial 2 finished with value: 0.7169866121227203 and parameters: {'learning_rate': 0.18018484852080524, 'reg_lambda': 3.412190788033858e-08, 'reg_alpha': 2.3716307745039355e-08, 'subsamble': 0.856034571336129, 'colsample_bytree': 0.10992351576848298, 'max_depth': 30, 'num_leaves': 29, 'min_child_samples': 94}. Best is trial 2 with value: 0.7169866121227203.[0m


0 0.7169866121227203
Encoding
Training
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[93]	valid_0's l2: 0.518004


[32m[I 2021-08-31 05:40:53,772][0m Trial 3 finished with value: 0.7197250551967326 and parameters: {'learning_rate': 0.23217652940268174, 'reg_lambda': 2.3129637117974147e-05, 'reg_alpha': 7.77343969385137, 'subsamble': 0.6716906977659317, 'colsample_bytree': 0.8675267438584352, 'max_depth': 29, 'num_leaves': 46, 'min_child_samples': 14}. Best is trial 2 with value: 0.7169866121227203.[0m


0 0.7197250551967326
Encoding
Training
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[507]	valid_0's l2: 0.515246


[32m[I 2021-08-31 05:41:11,393][0m Trial 4 finished with value: 0.717806097893518 and parameters: {'learning_rate': 0.08083195547934452, 'reg_lambda': 6.961961429009591e-07, 'reg_alpha': 4.2045697614049625e-06, 'subsamble': 0.7396024655420042, 'colsample_bytree': 0.2703601131671033, 'max_depth': 36, 'num_leaves': 55, 'min_child_samples': 50}. Best is trial 2 with value: 0.7169866121227203.[0m


0 0.717806097893518
Encoding
Training
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[353]	valid_0's l2: 0.51684


[32m[I 2021-08-31 05:41:30,403][0m Trial 5 finished with value: 0.7189155840909945 and parameters: {'learning_rate': 0.06701374791889882, 'reg_lambda': 8.389393747792923e-08, 'reg_alpha': 2.0066091483464708, 'subsamble': 0.5142026881390583, 'colsample_bytree': 0.5117266312992441, 'max_depth': 16, 'num_leaves': 71, 'min_child_samples': 12}. Best is trial 2 with value: 0.7169866121227203.[0m


0 0.7189155840909945
Encoding
Training
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[270]	valid_0's l2: 0.517341


[32m[I 2021-08-31 05:41:44,219][0m Trial 6 finished with value: 0.7192641120882689 and parameters: {'learning_rate': 0.11710137024472739, 'reg_lambda': 8.102438251142218e-05, 'reg_alpha': 3.196012452517953e-08, 'subsamble': 0.256555332933312, 'colsample_bytree': 0.43940699218810264, 'max_depth': 50, 'num_leaves': 65, 'min_child_samples': 49}. Best is trial 2 with value: 0.7169866121227203.[0m


0 0.7192641120882689
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516897
Early stopping, best iteration is:
[1745]	valid_0's l2: 0.516457


[32m[I 2021-08-31 05:42:28,732][0m Trial 7 finished with value: 0.7186495051150484 and parameters: {'learning_rate': 0.024165388744867573, 'reg_lambda': 0.0029524882282712258, 'reg_alpha': 4.121793198672161e-05, 'subsamble': 0.40893367129102065, 'colsample_bytree': 0.7143568654371925, 'max_depth': 58, 'num_leaves': 53, 'min_child_samples': 91}. Best is trial 2 with value: 0.7169866121227203.[0m


0 0.7186495051150484
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.515533
Early stopping, best iteration is:
[1037]	valid_0's l2: 0.515492


[32m[I 2021-08-31 05:42:51,296][0m Trial 8 finished with value: 0.717977727434254 and parameters: {'learning_rate': 0.07713458989506049, 'reg_lambda': 0.0057026204618624305, 'reg_alpha': 4.262089161215132e-07, 'subsamble': 0.2366972227467502, 'colsample_bytree': 0.38007548578169237, 'max_depth': 27, 'num_leaves': 22, 'min_child_samples': 20}. Best is trial 2 with value: 0.7169866121227203.[0m


0 0.717977727434254
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516287
Early stopping, best iteration is:
[1451]	valid_0's l2: 0.515846


[32m[I 2021-08-31 05:43:16,433][0m Trial 9 finished with value: 0.7182244459758703 and parameters: {'learning_rate': 0.05208347461110538, 'reg_lambda': 3.5644646791133874, 'reg_alpha': 0.15268107785184498, 'subsamble': 0.30860886436159873, 'colsample_bytree': 0.8365660454339663, 'max_depth': 62, 'num_leaves': 18, 'min_child_samples': 49}. Best is trial 2 with value: 0.7169866121227203.[0m


0 0.7182244459758703
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516647
[2000]	valid_0's l2: 0.513567
[3000]	valid_0's l2: 0.512866
[4000]	valid_0's l2: 0.512632
Early stopping, best iteration is:
[4483]	valid_0's l2: 0.512587


[32m[I 2021-08-31 05:44:52,026][0m Trial 10 finished with value: 0.7159515116565125 and parameters: {'learning_rate': 0.015275397700720858, 'reg_lambda': 0.04051328162729846, 'reg_alpha': 0.004981663402480902, 'subsamble': 0.9334867970909132, 'colsample_bytree': 0.14538517418056326, 'max_depth': 71, 'num_leaves': 36, 'min_child_samples': 74}. Best is trial 10 with value: 0.7159515116565125.[0m


0 0.7159515116565125
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.524953
[2000]	valid_0's l2: 0.517
[3000]	valid_0's l2: 0.514287
[4000]	valid_0's l2: 0.51331
[5000]	valid_0's l2: 0.512823
[6000]	valid_0's l2: 0.51257
[7000]	valid_0's l2: 0.5124
Did not meet early stopping. Best iteration is:
[6999]	valid_0's l2: 0.512399


[32m[I 2021-08-31 05:47:14,382][0m Trial 11 finished with value: 0.7158202059739815 and parameters: {'learning_rate': 0.008179932732379826, 'reg_lambda': 0.05749798927514922, 'reg_alpha': 0.00494264412757278, 'subsamble': 0.9979189702743186, 'colsample_bytree': 0.10382162980223705, 'max_depth': 75, 'num_leaves': 35, 'min_child_samples': 75}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7158202059739815
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.526274
[2000]	valid_0's l2: 0.517991
[3000]	valid_0's l2: 0.514902
[4000]	valid_0's l2: 0.513801
[5000]	valid_0's l2: 0.513337
[6000]	valid_0's l2: 0.513064
[7000]	valid_0's l2: 0.512903
Did not meet early stopping. Best iteration is:
[6992]	valid_0's l2: 0.5129


[32m[I 2021-08-31 05:49:39,931][0m Trial 12 finished with value: 0.7161706815638756 and parameters: {'learning_rate': 0.008353180057531495, 'reg_lambda': 0.09144940904691663, 'reg_alpha': 0.005140052350668941, 'subsamble': 0.9850256031134061, 'colsample_bytree': 0.15929775395895784, 'max_depth': 72, 'num_leaves': 35, 'min_child_samples': 73}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7161706815638756
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.52143
[2000]	valid_0's l2: 0.516211
[3000]	valid_0's l2: 0.514859
[4000]	valid_0's l2: 0.514226
[5000]	valid_0's l2: 0.513807
[6000]	valid_0's l2: 0.51358
Early stopping, best iteration is:
[6154]	valid_0's l2: 0.513531


[32m[I 2021-08-31 05:52:01,463][0m Trial 13 finished with value: 0.7166110614517803 and parameters: {'learning_rate': 0.008157956715742046, 'reg_lambda': 0.16179113034985734, 'reg_alpha': 0.0011416366301700714, 'subsamble': 0.9996861789269823, 'colsample_bytree': 0.2644835492442019, 'max_depth': 76, 'num_leaves': 38, 'min_child_samples': 73}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7166110614517803
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.52261
[2000]	valid_0's l2: 0.517658
[3000]	valid_0's l2: 0.515904
[4000]	valid_0's l2: 0.514959
[5000]	valid_0's l2: 0.514351
[6000]	valid_0's l2: 0.513957
[7000]	valid_0's l2: 0.513627
Did not meet early stopping. Best iteration is:
[6980]	valid_0's l2: 0.513627


[32m[I 2021-08-31 05:53:53,111][0m Trial 14 finished with value: 0.7166775303460284 and parameters: {'learning_rate': 0.016379786299010362, 'reg_lambda': 0.21361795406000902, 'reg_alpha': 0.03460827054816429, 'subsamble': 0.8287424708632956, 'colsample_bytree': 0.24899539909328783, 'max_depth': 67, 'num_leaves': 10, 'min_child_samples': 72}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7166775303460284
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.51764
[2000]	valid_0's l2: 0.516133
[3000]	valid_0's l2: 0.515877
Early stopping, best iteration is:
[2848]	valid_0's l2: 0.515851


[32m[I 2021-08-31 05:55:01,892][0m Trial 15 finished with value: 0.7182279345890473 and parameters: {'learning_rate': 0.01489406286619189, 'reg_lambda': 0.018083336124271036, 'reg_alpha': 9.652630863664348e-05, 'subsamble': 0.1119527636715496, 'colsample_bytree': 0.616080853795151, 'max_depth': 68, 'num_leaves': 44, 'min_child_samples': 64}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7182279345890473
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.515387
[2000]	valid_0's l2: 0.514324
Early stopping, best iteration is:
[2611]	valid_0's l2: 0.514175


[32m[I 2021-08-31 05:56:02,828][0m Trial 16 finished with value: 0.7170598490579965 and parameters: {'learning_rate': 0.02659547270872434, 'reg_lambda': 51.91151670064337, 'reg_alpha': 0.0477476610288203, 'subsamble': 0.8898445235173967, 'colsample_bytree': 0.3519824443957724, 'max_depth': 75, 'num_leaves': 32, 'min_child_samples': 82}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7170598490579965
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.51722
[2000]	valid_0's l2: 0.513646
[3000]	valid_0's l2: 0.512901
[4000]	valid_0's l2: 0.512647
Early stopping, best iteration is:
[4148]	valid_0's l2: 0.512631


[32m[I 2021-08-31 05:57:41,823][0m Trial 17 finished with value: 0.7159823297955741 and parameters: {'learning_rate': 0.01241845962044518, 'reg_lambda': 9.732147330312353e-05, 'reg_alpha': 0.00013931646267277385, 'subsamble': 0.6538333960361242, 'colsample_bytree': 0.16025400413254196, 'max_depth': 63, 'num_leaves': 52, 'min_child_samples': 32}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7159823297955741
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516312
Early stopping, best iteration is:
[1691]	valid_0's l2: 0.516016


[32m[I 2021-08-31 05:58:20,577][0m Trial 18 finished with value: 0.7183426282126292 and parameters: {'learning_rate': 0.031129411604237843, 'reg_lambda': 0.0003847647296127531, 'reg_alpha': 0.8020738157388616, 'subsamble': 0.768283031229207, 'colsample_bytree': 0.6079431744649894, 'max_depth': 77, 'num_leaves': 42, 'min_child_samples': 63}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7183426282126292
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.520816
[2000]	valid_0's l2: 0.517742
[3000]	valid_0's l2: 0.516949
[4000]	valid_0's l2: 0.516561
[5000]	valid_0's l2: 0.516469
[6000]	valid_0's l2: 0.516393
Early stopping, best iteration is:
[5819]	valid_0's l2: 0.516373


[32m[I 2021-08-31 06:00:16,136][0m Trial 19 finished with value: 0.7185910678811016 and parameters: {'learning_rate': 0.011028823280376574, 'reg_lambda': 2.2618801822969914, 'reg_alpha': 0.0017647417284615812, 'subsamble': 0.9192875266529357, 'colsample_bytree': 0.9661248671217828, 'max_depth': 56, 'num_leaves': 28, 'min_child_samples': 99}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7185910678811016
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.514684
[2000]	valid_0's l2: 0.512845
Early stopping, best iteration is:
[2293]	valid_0's l2: 0.51273


[32m[I 2021-08-31 06:01:13,763][0m Trial 20 finished with value: 0.716051907280016 and parameters: {'learning_rate': 0.020102581570435075, 'reg_lambda': 0.02906558389067836, 'reg_alpha': 0.009958440327820001, 'subsamble': 0.6687828133361031, 'colsample_bytree': 0.10715041707083375, 'max_depth': 43, 'num_leaves': 62, 'min_child_samples': 37}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.716051907280016
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.517891
[2000]	valid_0's l2: 0.5145
[3000]	valid_0's l2: 0.513679
[4000]	valid_0's l2: 0.513416
Early stopping, best iteration is:
[4247]	valid_0's l2: 0.513357


[32m[I 2021-08-31 06:02:53,584][0m Trial 21 finished with value: 0.716489313731704 and parameters: {'learning_rate': 0.012874298200676435, 'reg_lambda': 0.00034528377419125044, 'reg_alpha': 0.00011545973266831596, 'subsamble': 0.5939360937330944, 'colsample_bytree': 0.20720450132040352, 'max_depth': 64, 'num_leaves': 50, 'min_child_samples': 33}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.716489313731704
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.514898
Early stopping, best iteration is:
[1488]	valid_0's l2: 0.514145


[32m[I 2021-08-31 06:03:35,704][0m Trial 22 finished with value: 0.7170390004788946 and parameters: {'learning_rate': 0.03678787703535251, 'reg_lambda': 1.8083615915853925e-05, 'reg_alpha': 85.96125506089835, 'subsamble': 0.7839816185108888, 'colsample_bytree': 0.3196693553436596, 'max_depth': 69, 'num_leaves': 39, 'min_child_samples': 39}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7170390004788946
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.522529
[2000]	valid_0's l2: 0.515666
[3000]	valid_0's l2: 0.513696
[4000]	valid_0's l2: 0.51324
[5000]	valid_0's l2: 0.513054
Early stopping, best iteration is:
[5506]	valid_0's l2: 0.512992


[32m[I 2021-08-31 06:05:46,779][0m Trial 23 finished with value: 0.7162342658088833 and parameters: {'learning_rate': 0.010082378039277508, 'reg_lambda': 0.0012448256407150287, 'reg_alpha': 1.3629243769946148e-05, 'subsamble': 0.9344810844611013, 'colsample_bytree': 0.16120248245404434, 'max_depth': 61, 'num_leaves': 59, 'min_child_samples': 80}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7162342658088833
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.514856
[2000]	valid_0's l2: 0.513255
[3000]	valid_0's l2: 0.512934
Early stopping, best iteration is:
[2891]	valid_0's l2: 0.512921


[32m[I 2021-08-31 06:06:53,359][0m Trial 24 finished with value: 0.7161847693858925 and parameters: {'learning_rate': 0.018872624782600177, 'reg_lambda': 0.286253703317506, 'reg_alpha': 0.000790849792347045, 'subsamble': 0.6884328390851964, 'colsample_bytree': 0.17271677867436583, 'max_depth': 71, 'num_leaves': 45, 'min_child_samples': 63}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7161847693858925
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.517499
[2000]	valid_0's l2: 0.515671
[3000]	valid_0's l2: 0.515204
Early stopping, best iteration is:
[3303]	valid_0's l2: 0.515173


[32m[I 2021-08-31 06:08:19,614][0m Trial 25 finished with value: 0.7177557966371694 and parameters: {'learning_rate': 0.01167964694447154, 'reg_lambda': 4.228851873193243e-06, 'reg_alpha': 0.00029281464797728686, 'subsamble': 0.8290646598496888, 'colsample_bytree': 0.44566593637316665, 'max_depth': 49, 'num_leaves': 76, 'min_child_samples': 28}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7177557966371694
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.517178
[2000]	valid_0's l2: 0.513984
[3000]	valid_0's l2: 0.513395
Early stopping, best iteration is:
[3705]	valid_0's l2: 0.513222


[32m[I 2021-08-31 06:09:47,091][0m Trial 26 finished with value: 0.7163952984881206 and parameters: {'learning_rate': 0.014273778597317878, 'reg_lambda': 0.01068009628892072, 'reg_alpha': 1.2458689700498501e-06, 'subsamble': 0.599601457430853, 'colsample_bytree': 0.21233510543031625, 'max_depth': 67, 'num_leaves': 49, 'min_child_samples': 80}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7163952984881206
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.522235
[2000]	valid_0's l2: 0.515415
[3000]	valid_0's l2: 0.513559
[4000]	valid_0's l2: 0.512927
[5000]	valid_0's l2: 0.512589
[6000]	valid_0's l2: 0.51248
Early stopping, best iteration is:
[6275]	valid_0's l2: 0.512422


[32m[I 2021-08-31 06:11:55,188][0m Trial 27 finished with value: 0.7158366773174645 and parameters: {'learning_rate': 0.009926453558552198, 'reg_lambda': 0.00017408115679404146, 'reg_alpha': 0.011219613942006287, 'subsamble': 0.9434265817881337, 'colsample_bytree': 0.1046932990291469, 'max_depth': 59, 'num_leaves': 37, 'min_child_samples': 58}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7158366773174645
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.525092
[2000]	valid_0's l2: 0.5171
[3000]	valid_0's l2: 0.51433
[4000]	valid_0's l2: 0.513295
[5000]	valid_0's l2: 0.512844
[6000]	valid_0's l2: 0.512589
[7000]	valid_0's l2: 0.512403
Did not meet early stopping. Best iteration is:
[7000]	valid_0's l2: 0.512403


[32m[I 2021-08-31 06:14:14,877][0m Trial 28 finished with value: 0.71582317936476 and parameters: {'learning_rate': 0.008087877533518556, 'reg_lambda': 1.121916899863391, 'reg_alpha': 0.08250655942073659, 'subsamble': 0.9504141796945825, 'colsample_bytree': 0.1040342952346971, 'max_depth': 57, 'num_leaves': 35, 'min_child_samples': 58}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.71582317936476
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.521642
[2000]	valid_0's l2: 0.5164
[3000]	valid_0's l2: 0.515073
[4000]	valid_0's l2: 0.514413
[5000]	valid_0's l2: 0.513995
[6000]	valid_0's l2: 0.513772
[7000]	valid_0's l2: 0.513619
Did not meet early stopping. Best iteration is:
[6998]	valid_0's l2: 0.513618


[32m[I 2021-08-31 06:16:45,128][0m Trial 29 finished with value: 0.7166716072076706 and parameters: {'learning_rate': 0.009384177620740466, 'reg_lambda': 91.36292481791482, 'reg_alpha': 0.22455240836007762, 'subsamble': 0.9995637903465562, 'colsample_bytree': 0.31324246533153033, 'max_depth': 54, 'num_leaves': 27, 'min_child_samples': 57}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7166716072076706
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.530567
[2000]	valid_0's l2: 0.52205
[3000]	valid_0's l2: 0.517562
[4000]	valid_0's l2: 0.515425
[5000]	valid_0's l2: 0.51412
[6000]	valid_0's l2: 0.513479
[7000]	valid_0's l2: 0.513113
Did not meet early stopping. Best iteration is:
[7000]	valid_0's l2: 0.513113


[32m[I 2021-08-31 06:19:04,062][0m Trial 30 finished with value: 0.7163189297049081 and parameters: {'learning_rate': 0.008103181362994143, 'reg_lambda': 11.820044390036472, 'reg_alpha': 0.03196379141516917, 'subsamble': 0.8804843144500316, 'colsample_bytree': 0.10031234751239393, 'max_depth': 43, 'num_leaves': 33, 'min_child_samples': 57}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7163189297049081
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.521008
[2000]	valid_0's l2: 0.515585
[3000]	valid_0's l2: 0.514213
[4000]	valid_0's l2: 0.513668
[5000]	valid_0's l2: 0.513454
Early stopping, best iteration is:
[5683]	valid_0's l2: 0.513399


[32m[I 2021-08-31 06:21:07,211][0m Trial 31 finished with value: 0.7165184405126727 and parameters: {'learning_rate': 0.010444227689176641, 'reg_lambda': 0.5101799404006553, 'reg_alpha': 0.007689398628415434, 'subsamble': 0.9380379236683921, 'colsample_bytree': 0.2183874959377704, 'max_depth': 59, 'num_leaves': 37, 'min_child_samples': 42}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7165184405126727
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.51777
[2000]	valid_0's l2: 0.513994
[3000]	valid_0's l2: 0.51308
[4000]	valid_0's l2: 0.512735
[5000]	valid_0's l2: 0.512546
Early stopping, best iteration is:
[5427]	valid_0's l2: 0.512474


[32m[I 2021-08-31 06:22:45,953][0m Trial 32 finished with value: 0.7158728007217077 and parameters: {'learning_rate': 0.017220260461947207, 'reg_lambda': 0.99520605139909, 'reg_alpha': 0.19633981859379793, 'subsamble': 0.9567705245382802, 'colsample_bytree': 0.10463202148543063, 'max_depth': 51, 'num_leaves': 23, 'min_child_samples': 69}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7158728007217077
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516828
[2000]	valid_0's l2: 0.513591
[3000]	valid_0's l2: 0.512805
[4000]	valid_0's l2: 0.512584
Early stopping, best iteration is:
[3922]	valid_0's l2: 0.512564


[32m[I 2021-08-31 06:24:01,232][0m Trial 33 finished with value: 0.7159359111103405 and parameters: {'learning_rate': 0.019022079621508446, 'reg_lambda': 0.820883356225161, 'reg_alpha': 0.3536857884171492, 'subsamble': 0.8358538554276851, 'colsample_bytree': 0.10247300656690506, 'max_depth': 49, 'num_leaves': 24, 'min_child_samples': 66}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7159359111103405
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.52522
[2000]	valid_0's l2: 0.518455
[3000]	valid_0's l2: 0.515927
[4000]	valid_0's l2: 0.514826
[5000]	valid_0's l2: 0.514175
[6000]	valid_0's l2: 0.513698
[7000]	valid_0's l2: 0.513362
Did not meet early stopping. Best iteration is:
[6983]	valid_0's l2: 0.513358


[32m[I 2021-08-31 06:26:05,849][0m Trial 34 finished with value: 0.7164903474123749 and parameters: {'learning_rate': 0.010238234166754368, 'reg_lambda': 19.47202142989287, 'reg_alpha': 3.1184821638783857, 'subsamble': 0.9489491855014154, 'colsample_bytree': 0.21340634829423843, 'max_depth': 37, 'num_leaves': 15, 'min_child_samples': 86}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7164903474123749
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516643
[2000]	valid_0's l2: 0.514738
[3000]	valid_0's l2: 0.514087
[4000]	valid_0's l2: 0.513815
Early stopping, best iteration is:
[3923]	valid_0's l2: 0.513794


[32m[I 2021-08-31 06:27:21,393][0m Trial 35 finished with value: 0.7167945446083457 and parameters: {'learning_rate': 0.0230126127308413, 'reg_lambda': 1.5205561532004679, 'reg_alpha': 0.09016152531899144, 'subsamble': 0.8774299495917108, 'colsample_bytree': 0.28871064237717387, 'max_depth': 53, 'num_leaves': 20, 'min_child_samples': 55}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7167945446083457
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.518696
[2000]	valid_0's l2: 0.514814
[3000]	valid_0's l2: 0.513864
[4000]	valid_0's l2: 0.513497
[5000]	valid_0's l2: 0.51327
Early stopping, best iteration is:
[4912]	valid_0's l2: 0.513253


[32m[I 2021-08-31 06:29:23,192][0m Trial 36 finished with value: 0.7164170742074055 and parameters: {'learning_rate': 0.01278119195966399, 'reg_lambda': 9.05500468156523, 'reg_alpha': 19.24180618987625, 'subsamble': 0.7822305448677056, 'colsample_bytree': 0.3757131373800798, 'max_depth': 46, 'num_leaves': 31, 'min_child_samples': 69}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7164170742074055
Encoding
Training
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[425]	valid_0's l2: 0.514894


[32m[I 2021-08-31 06:29:35,393][0m Trial 37 finished with value: 0.71756131458181 and parameters: {'learning_rate': 0.13548395170130872, 'reg_lambda': 0.06480944503902034, 'reg_alpha': 0.030557745134512124, 'subsamble': 0.7298495847736322, 'colsample_bytree': 0.18098882483033735, 'max_depth': 13, 'num_leaves': 26, 'min_child_samples': 44}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.71756131458181
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.514227
Early stopping, best iteration is:
[941]	valid_0's l2: 0.514195


[32m[I 2021-08-31 06:29:59,299][0m Trial 38 finished with value: 0.7170742030041847 and parameters: {'learning_rate': 0.04822524588454445, 'reg_lambda': 0.0015369654440845126, 'reg_alpha': 0.7937817402631303, 'subsamble': 0.9708312155912544, 'colsample_bytree': 0.25241967569931617, 'max_depth': 40, 'num_leaves': 41, 'min_child_samples': 59}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7170742030041847
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.526899
[2000]	valid_0's l2: 0.51962
[3000]	valid_0's l2: 0.516627
[4000]	valid_0's l2: 0.515298
[5000]	valid_0's l2: 0.514519
[6000]	valid_0's l2: 0.513983
[7000]	valid_0's l2: 0.513613
Did not meet early stopping. Best iteration is:
[7000]	valid_0's l2: 0.513613


[32m[I 2021-08-31 06:32:02,775][0m Trial 39 finished with value: 0.7166676740316894 and parameters: {'learning_rate': 0.008826698748812442, 'reg_lambda': 0.006132570996915215, 'reg_alpha': 0.01539395682961053, 'subsamble': 0.9052676967202026, 'colsample_bytree': 0.13661816557752757, 'max_depth': 53, 'num_leaves': 14, 'min_child_samples': 77}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7166676740316894
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516477
Early stopping, best iteration is:
[1382]	valid_0's l2: 0.516169


[32m[I 2021-08-31 06:32:33,744][0m Trial 40 finished with value: 0.7184487317116186 and parameters: {'learning_rate': 0.03689956173662028, 'reg_lambda': 1.7484616817012218e-06, 'reg_alpha': 0.0029590049148073955, 'subsamble': 0.3937574187282779, 'colsample_bytree': 0.7251277474579206, 'max_depth': 32, 'num_leaves': 31, 'min_child_samples': 86}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7184487317116186
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516334
[2000]	valid_0's l2: 0.513381
[3000]	valid_0's l2: 0.512691
[4000]	valid_0's l2: 0.512518
Early stopping, best iteration is:
[4067]	valid_0's l2: 0.512497


[32m[I 2021-08-31 06:33:52,130][0m Trial 41 finished with value: 0.7158887395058514 and parameters: {'learning_rate': 0.01981999420347477, 'reg_lambda': 0.7728344616211164, 'reg_alpha': 0.3016552208785013, 'subsamble': 0.8341019782594211, 'colsample_bytree': 0.10915554864736196, 'max_depth': 49, 'num_leaves': 25, 'min_child_samples': 68}. Best is trial 11 with value: 0.7158202059739815.[0m


0 0.7158887395058514
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.520651
[2000]	valid_0's l2: 0.515035
[3000]	valid_0's l2: 0.513328
[4000]	valid_0's l2: 0.512699
[5000]	valid_0's l2: 0.51244
Early stopping, best iteration is:
[5432]	valid_0's l2: 0.512361


[32m[I 2021-08-31 06:35:33,676][0m Trial 42 finished with value: 0.7157946975915584 and parameters: {'learning_rate': 0.01648083242066651, 'reg_lambda': 3.665762532232525, 'reg_alpha': 5.611636993099838, 'subsamble': 0.8678272404794748, 'colsample_bytree': 0.13245782555280045, 'max_depth': 57, 'num_leaves': 22, 'min_child_samples': 68}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7157946975915584
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.519712
[2000]	valid_0's l2: 0.514717
[3000]	valid_0's l2: 0.513321
[4000]	valid_0's l2: 0.512779
[5000]	valid_0's l2: 0.512536
Early stopping, best iteration is:
[5524]	valid_0's l2: 0.512456


[32m[I 2021-08-31 06:37:24,512][0m Trial 43 finished with value: 0.7158608625956279 and parameters: {'learning_rate': 0.01630465992829934, 'reg_lambda': 4.965645527913258, 'reg_alpha': 19.110086882545634, 'subsamble': 0.9509509688708702, 'colsample_bytree': 0.1905272294087511, 'max_depth': 58, 'num_leaves': 20, 'min_child_samples': 61}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7158608625956279
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.520955
[2000]	valid_0's l2: 0.515813
[3000]	valid_0's l2: 0.514126
[4000]	valid_0's l2: 0.513316
[5000]	valid_0's l2: 0.512915
[6000]	valid_0's l2: 0.512675
[7000]	valid_0's l2: 0.512568
Did not meet early stopping. Best iteration is:
[6962]	valid_0's l2: 0.512564


[32m[I 2021-08-31 06:39:45,530][0m Trial 44 finished with value: 0.7159364624809625 and parameters: {'learning_rate': 0.013425096679788957, 'reg_lambda': 3.420284843691292, 'reg_alpha': 42.896706606772334, 'subsamble': 0.8591472762192722, 'colsample_bytree': 0.19573187236990536, 'max_depth': 58, 'num_leaves': 19, 'min_child_samples': 60}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7159364624809625
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.525731
[2000]	valid_0's l2: 0.519424
[3000]	valid_0's l2: 0.517043
[4000]	valid_0's l2: 0.515877
[5000]	valid_0's l2: 0.515154
[6000]	valid_0's l2: 0.514606
[7000]	valid_0's l2: 0.514212
Did not meet early stopping. Best iteration is:
[6994]	valid_0's l2: 0.514212


[32m[I 2021-08-31 06:41:38,099][0m Trial 45 finished with value: 0.7170855714142855 and parameters: {'learning_rate': 0.009418265387351011, 'reg_lambda': 26.988775584156162, 'reg_alpha': 10.06067121033086, 'subsamble': 0.9100003635950422, 'colsample_bytree': 0.42277148580786994, 'max_depth': 21, 'num_leaves': 13, 'min_child_samples': 52}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7170855714142855
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.513
Early stopping, best iteration is:
[857]	valid_0's l2: 0.512918


[32m[I 2021-08-31 06:41:59,183][0m Trial 46 finished with value: 0.716182623640552 and parameters: {'learning_rate': 0.059068875726381406, 'reg_lambda': 5.653848831456116, 'reg_alpha': 3.0260793282398977, 'subsamble': 0.9993324395777744, 'colsample_bytree': 0.1434558446055691, 'max_depth': 65, 'num_leaves': 35, 'min_child_samples': 48}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.716182623640552
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.520656
[2000]	valid_0's l2: 0.515958
[3000]	valid_0's l2: 0.514715
[4000]	valid_0's l2: 0.514006
[5000]	valid_0's l2: 0.513572
[6000]	valid_0's l2: 0.513347
[7000]	valid_0's l2: 0.513237
Did not meet early stopping. Best iteration is:
[6994]	valid_0's l2: 0.513232


[32m[I 2021-08-31 06:44:09,233][0m Trial 47 finished with value: 0.7164024260674339 and parameters: {'learning_rate': 0.011611468377974363, 'reg_lambda': 9.248513208570126e-08, 'reg_alpha': 1.4782807413551493, 'subsamble': 0.9591908884483752, 'colsample_bytree': 0.24194025599313018, 'max_depth': 60, 'num_leaves': 21, 'min_child_samples': 52}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7164024260674339
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.517274
[2000]	valid_0's l2: 0.515056
[3000]	valid_0's l2: 0.51448
[4000]	valid_0's l2: 0.514247
Early stopping, best iteration is:
[4153]	valid_0's l2: 0.514197


[32m[I 2021-08-31 06:45:41,400][0m Trial 48 finished with value: 0.7170756138190028 and parameters: {'learning_rate': 0.016459416134552343, 'reg_lambda': 0.11297663351322011, 'reg_alpha': 7.861249423692707, 'subsamble': 0.8026514684397775, 'colsample_bytree': 0.5042635038039591, 'max_depth': 55, 'num_leaves': 29, 'min_child_samples': 61}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7170756138190028
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.514134
[2000]	valid_0's l2: 0.513087
Early stopping, best iteration is:
[2200]	valid_0's l2: 0.513045


[32m[I 2021-08-31 06:46:40,157][0m Trial 49 finished with value: 0.7162713549112117 and parameters: {'learning_rate': 0.025920355679869632, 'reg_lambda': 5.236151298857016e-05, 'reg_alpha': 21.053655872448253, 'subsamble': 0.500772249834714, 'colsample_bytree': 0.28451089434260424, 'max_depth': 46, 'num_leaves': 40, 'min_child_samples': 46}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7162713549112117
Encoding
Training
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[518]	valid_0's l2: 0.513688


[32m[I 2021-08-31 06:46:51,721][0m Trial 50 finished with value: 0.7167204284200371 and parameters: {'learning_rate': 0.22798066546188098, 'reg_lambda': 0.0003599036187046475, 'reg_alpha': 0.06958074506015687, 'subsamble': 0.8837812565706322, 'colsample_bytree': 0.1452008273473089, 'max_depth': 72, 'num_leaves': 10, 'min_child_samples': 76}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7167204284200371
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.518254
[2000]	valid_0's l2: 0.514615
[3000]	valid_0's l2: 0.513456
[4000]	valid_0's l2: 0.512977
[5000]	valid_0's l2: 0.512707
[6000]	valid_0's l2: 0.512539
Early stopping, best iteration is:
[6262]	valid_0's l2: 0.512521


[32m[I 2021-08-31 06:48:40,137][0m Trial 51 finished with value: 0.7159058559561498 and parameters: {'learning_rate': 0.017331599882551837, 'reg_lambda': 0.5458829134385135, 'reg_alpha': 0.6564036514719128, 'subsamble': 0.9676767062024163, 'colsample_bytree': 0.13333199542262927, 'max_depth': 57, 'num_leaves': 18, 'min_child_samples': 70}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7159058559561498
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.519957
[2000]	valid_0's l2: 0.515246
[3000]	valid_0's l2: 0.514093
[4000]	valid_0's l2: 0.51361
[5000]	valid_0's l2: 0.513346
Early stopping, best iteration is:
[5132]	valid_0's l2: 0.513313


[32m[I 2021-08-31 06:50:16,168][0m Trial 52 finished with value: 0.7164588630426878 and parameters: {'learning_rate': 0.014897424445902094, 'reg_lambda': 2.1340973082710004, 'reg_alpha': 1.013870995925226e-08, 'subsamble': 0.9338709044935752, 'colsample_bytree': 0.18150170308539604, 'max_depth': 51, 'num_leaves': 22, 'min_child_samples': 67}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7164588630426878
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.528433
[2000]	valid_0's l2: 0.519869
[3000]	valid_0's l2: 0.515997
[4000]	valid_0's l2: 0.514334
[5000]	valid_0's l2: 0.513551
[6000]	valid_0's l2: 0.513105
[7000]	valid_0's l2: 0.51287
Did not meet early stopping. Best iteration is:
[6995]	valid_0's l2: 0.512869


[32m[I 2021-08-31 06:52:36,861][0m Trial 53 finished with value: 0.7161484111137613 and parameters: {'learning_rate': 0.0080185260137472, 'reg_lambda': 32.78673824623246, 'reg_alpha': 0.10546551093112101, 'subsamble': 0.9706787927411347, 'colsample_bytree': 0.12763875483298237, 'max_depth': 62, 'num_leaves': 33, 'min_child_samples': 55}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7161484111137613
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.515925
[2000]	valid_0's l2: 0.514042
[3000]	valid_0's l2: 0.513431
Early stopping, best iteration is:
[3424]	valid_0's l2: 0.513276


[32m[I 2021-08-31 06:53:37,675][0m Trial 54 finished with value: 0.7164330814343276 and parameters: {'learning_rate': 0.029959375265413483, 'reg_lambda': 6.078464909917128, 'reg_alpha': 0.0005626563923626989, 'subsamble': 0.9036631153761575, 'colsample_bytree': 0.22933114539281055, 'max_depth': 52, 'num_leaves': 16, 'min_child_samples': 64}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7164330814343276
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.520787
[2000]	valid_0's l2: 0.514722
[3000]	valid_0's l2: 0.513224
[4000]	valid_0's l2: 0.512842
Early stopping, best iteration is:
[4604]	valid_0's l2: 0.512722


[32m[I 2021-08-31 06:55:28,243][0m Trial 55 finished with value: 0.7160459912333007 and parameters: {'learning_rate': 0.010746023953399573, 'reg_lambda': 1.0480622070781864, 'reg_alpha': 4.411270404759641, 'subsamble': 0.852777892812585, 'colsample_bytree': 0.18512393532394802, 'max_depth': 65, 'num_leaves': 47, 'min_child_samples': 72}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7160459912333007
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516154
[2000]	valid_0's l2: 0.513507
[3000]	valid_0's l2: 0.513089
Early stopping, best iteration is:
[3111]	valid_0's l2: 0.513011


[32m[I 2021-08-31 06:56:34,040][0m Trial 56 finished with value: 0.716247515236686 and parameters: {'learning_rate': 0.02333130993146747, 'reg_lambda': 0.3540537998887716, 'reg_alpha': 0.0027563422647501083, 'subsamble': 0.718992736148336, 'colsample_bytree': 0.16167576629088662, 'max_depth': 56, 'num_leaves': 29, 'min_child_samples': 74}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.716247515236686
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.524992
[2000]	valid_0's l2: 0.517264
[3000]	valid_0's l2: 0.514685
[4000]	valid_0's l2: 0.513659
[5000]	valid_0's l2: 0.513129
[6000]	valid_0's l2: 0.512817
[7000]	valid_0's l2: 0.512614
Did not meet early stopping. Best iteration is:
[6999]	valid_0's l2: 0.512613


[32m[I 2021-08-31 06:58:43,810][0m Trial 57 finished with value: 0.7159698500131266 and parameters: {'learning_rate': 0.009230064676052894, 'reg_lambda': 0.022544139797573283, 'reg_alpha': 0.014624617574351581, 'subsamble': 0.9291324287492637, 'colsample_bytree': 0.10404261142431862, 'max_depth': 74, 'num_leaves': 23, 'min_child_samples': 66}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7159698500131266
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.517082
[2000]	valid_0's l2: 0.51533
[3000]	valid_0's l2: 0.515063
Early stopping, best iteration is:
[3134]	valid_0's l2: 0.515032


[32m[I 2021-08-31 07:00:06,034][0m Trial 58 finished with value: 0.7176569880809552 and parameters: {'learning_rate': 0.021601425049719, 'reg_lambda': 0.13942128533523973, 'reg_alpha': 58.72225167982401, 'subsamble': 0.9666292308300242, 'colsample_bytree': 0.7993016516721109, 'max_depth': 60, 'num_leaves': 35, 'min_child_samples': 61}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7176569880809552
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.517335
[2000]	valid_0's l2: 0.515924
Early stopping, best iteration is:
[2470]	valid_0's l2: 0.515681


[32m[I 2021-08-31 07:01:05,412][0m Trial 59 finished with value: 0.7181093215037078 and parameters: {'learning_rate': 0.01660941644602892, 'reg_lambda': 0.003314341399858474, 'reg_alpha': 1.3354245104410545, 'subsamble': 0.8078553200499424, 'colsample_bytree': 0.6534396356673321, 'max_depth': 46, 'num_leaves': 42, 'min_child_samples': 78}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7181093215037078
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.518236
[2000]	valid_0's l2: 0.514524
[3000]	valid_0's l2: 0.513678
[4000]	valid_0's l2: 0.513356
Early stopping, best iteration is:
[4562]	valid_0's l2: 0.513282


[32m[I 2021-08-31 07:03:07,444][0m Trial 60 finished with value: 0.716437147940185 and parameters: {'learning_rate': 0.012260279827757499, 'reg_lambda': 14.724478719692375, 'reg_alpha': 15.875032025872274, 'subsamble': 0.7526058665918021, 'colsample_bytree': 0.3400030344979371, 'max_depth': 68, 'num_leaves': 38, 'min_child_samples': 93}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.716437147940185
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.521831
[2000]	valid_0's l2: 0.51568
[3000]	valid_0's l2: 0.513817
[4000]	valid_0's l2: 0.513206
[5000]	valid_0's l2: 0.512862
Early stopping, best iteration is:
[5620]	valid_0's l2: 0.512717


[32m[I 2021-08-31 07:04:53,434][0m Trial 61 finished with value: 0.7160424782970033 and parameters: {'learning_rate': 0.014295186454657957, 'reg_lambda': 1.5214460325507537, 'reg_alpha': 0.262939360468726, 'subsamble': 0.8703794878641784, 'colsample_bytree': 0.12707434783280774, 'max_depth': 49, 'num_leaves': 25, 'min_child_samples': 70}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7160424782970033
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516215
[2000]	valid_0's l2: 0.513299
[3000]	valid_0's l2: 0.512642
[4000]	valid_0's l2: 0.512472
Early stopping, best iteration is:
[3983]	valid_0's l2: 0.512461


[32m[I 2021-08-31 07:06:08,925][0m Trial 62 finished with value: 0.7158639123679369 and parameters: {'learning_rate': 0.020117559066865007, 'reg_lambda': 3.375451249029576, 'reg_alpha': 0.40387682213645804, 'subsamble': 0.9065536461926411, 'colsample_bytree': 0.1021278601660311, 'max_depth': 41, 'num_leaves': 25, 'min_child_samples': 68}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7158639123679369
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.514218
[2000]	valid_0's l2: 0.513195
Early stopping, best iteration is:
[2006]	valid_0's l2: 0.513188


[32m[I 2021-08-31 07:06:48,781][0m Trial 63 finished with value: 0.7163711123120969 and parameters: {'learning_rate': 0.03722751993852197, 'reg_lambda': 3.0962300876321205, 'reg_alpha': 0.5473293703720982, 'subsamble': 0.13588780442169368, 'colsample_bytree': 0.1578302935883425, 'max_depth': 39, 'num_leaves': 29, 'min_child_samples': 84}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7163711123120969
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.517911
[2000]	valid_0's l2: 0.515994
[3000]	valid_0's l2: 0.515183
[4000]	valid_0's l2: 0.514998
Early stopping, best iteration is:
[4233]	valid_0's l2: 0.514955


[32m[I 2021-08-31 07:07:51,454][0m Trial 64 finished with value: 0.7176036220869779 and parameters: {'learning_rate': 0.03071559932403523, 'reg_lambda': 84.9874041689251, 'reg_alpha': 0.17436930208067142, 'subsamble': 0.9995849843607099, 'colsample_bytree': 0.950180193154315, 'max_depth': 40, 'num_leaves': 12, 'min_child_samples': 64}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7176036220869779
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.518083
[2000]	valid_0's l2: 0.515111
[3000]	valid_0's l2: 0.514193
[4000]	valid_0's l2: 0.513652
[5000]	valid_0's l2: 0.51329
[6000]	valid_0's l2: 0.513129
Early stopping, best iteration is:
[6326]	valid_0's l2: 0.513114


[32m[I 2021-08-31 07:09:40,247][0m Trial 65 finished with value: 0.7163198068488658 and parameters: {'learning_rate': 0.018291708576573536, 'reg_lambda': 0.22152384211083984, 'reg_alpha': 0.021980503664321863, 'subsamble': 0.9251477060597671, 'colsample_bytree': 0.195796247446183, 'max_depth': 63, 'num_leaves': 17, 'min_child_samples': 57}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7163198068488658
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.526821
[2000]	valid_0's l2: 0.518919
[3000]	valid_0's l2: 0.515655
[4000]	valid_0's l2: 0.514272
[5000]	valid_0's l2: 0.513596
[6000]	valid_0's l2: 0.513212
[7000]	valid_0's l2: 0.513019
Did not meet early stopping. Best iteration is:
[6981]	valid_0's l2: 0.513016


[32m[I 2021-08-31 07:11:45,162][0m Trial 66 finished with value: 0.716251116492434 and parameters: {'learning_rate': 0.010066563167038786, 'reg_lambda': 5.480141423019631, 'reg_alpha': 0.06145507653901723, 'subsamble': 0.9528420350076926, 'colsample_bytree': 0.12916469282853912, 'max_depth': 31, 'num_leaves': 22, 'min_child_samples': 74}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.716251116492434
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.519083
[2000]	valid_0's l2: 0.514748
[3000]	valid_0's l2: 0.513768
[4000]	valid_0's l2: 0.513218
[5000]	valid_0's l2: 0.512974
Early stopping, best iteration is:
[5106]	valid_0's l2: 0.512941


[32m[I 2021-08-31 07:13:37,533][0m Trial 67 finished with value: 0.7161989381462802 and parameters: {'learning_rate': 0.011365807738803706, 'reg_lambda': 0.00017761545544904474, 'reg_alpha': 4.892217665496455, 'subsamble': 0.8998214853410077, 'colsample_bytree': 0.2339243123554237, 'max_depth': 44, 'num_leaves': 32, 'min_child_samples': 53}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7161989381462802
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.517591
[2000]	valid_0's l2: 0.513819
[3000]	valid_0's l2: 0.513018
[4000]	valid_0's l2: 0.51271
Early stopping, best iteration is:
[4173]	valid_0's l2: 0.512654


[32m[I 2021-08-31 07:15:07,216][0m Trial 68 finished with value: 0.7159982863418779 and parameters: {'learning_rate': 0.013508245106240133, 'reg_lambda': 0.07654137648779087, 'reg_alpha': 1.378614658395917, 'subsamble': 0.8537263548131938, 'colsample_bytree': 0.16736679759161566, 'max_depth': 34, 'num_leaves': 35, 'min_child_samples': 71}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7159982863418779
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.517023
[2000]	valid_0's l2: 0.513911
[3000]	valid_0's l2: 0.513117
[4000]	valid_0's l2: 0.51281
Early stopping, best iteration is:
[4520]	valid_0's l2: 0.512674


[32m[I 2021-08-31 07:16:27,307][0m Trial 69 finished with value: 0.7160125189081723 and parameters: {'learning_rate': 0.020675318445269604, 'reg_lambda': 2.196589113337035e-05, 'reg_alpha': 0.005928761944639666, 'subsamble': 0.4355468521605799, 'colsample_bytree': 0.10769946461195203, 'max_depth': 58, 'num_leaves': 20, 'min_child_samples': 66}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7160125189081723
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.515022
[2000]	valid_0's l2: 0.513254
Early stopping, best iteration is:
[2722]	valid_0's l2: 0.513018


[32m[I 2021-08-31 07:17:31,771][0m Trial 70 finished with value: 0.7162530767552947 and parameters: {'learning_rate': 0.026976508391988584, 'reg_lambda': 0.05039219874050437, 'reg_alpha': 31.882335702954137, 'subsamble': 0.9425753679426769, 'colsample_bytree': 0.2596008244535808, 'max_depth': 56, 'num_leaves': 26, 'min_child_samples': 59}. Best is trial 42 with value: 0.7157946975915584.[0m


0 0.7162530767552947
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.518168
[2000]	valid_0's l2: 0.51402
[3000]	valid_0's l2: 0.512984
[4000]	valid_0's l2: 0.512572
[5000]	valid_0's l2: 0.512356
Early stopping, best iteration is:
[5683]	valid_0's l2: 0.512268


[32m[I 2021-08-31 07:19:17,069][0m Trial 71 finished with value: 0.7157287562736403 and parameters: {'learning_rate': 0.015863821736850407, 'reg_lambda': 1.199264465828154, 'reg_alpha': 0.3328589119606132, 'subsamble': 0.8218805775811349, 'colsample_bytree': 0.10273746803376707, 'max_depth': 48, 'num_leaves': 25, 'min_child_samples': 68}. Best is trial 71 with value: 0.7157287562736403.[0m


0 0.7157287562736403
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.521028
[2000]	valid_0's l2: 0.515426
[3000]	valid_0's l2: 0.513799
[4000]	valid_0's l2: 0.513357
[5000]	valid_0's l2: 0.513111
Early stopping, best iteration is:
[5413]	valid_0's l2: 0.513028


[32m[I 2021-08-31 07:20:56,973][0m Trial 72 finished with value: 0.7162597164387251 and parameters: {'learning_rate': 0.015805292426245735, 'reg_lambda': 1.3100757422699802, 'reg_alpha': 1.6423512137902933e-07, 'subsamble': 0.8996588761219322, 'colsample_bytree': 0.1308915249393839, 'max_depth': 47, 'num_leaves': 23, 'min_child_samples': 62}. Best is trial 71 with value: 0.7157287562736403.[0m


0 0.7162597164387251
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.521941
[2000]	valid_0's l2: 0.51613
[3000]	valid_0's l2: 0.514583
[4000]	valid_0's l2: 0.513858
[5000]	valid_0's l2: 0.513492
[6000]	valid_0's l2: 0.513287
Early stopping, best iteration is:
[6389]	valid_0's l2: 0.513216


[32m[I 2021-08-31 07:23:07,634][0m Trial 73 finished with value: 0.716390690458164 and parameters: {'learning_rate': 0.009012045792402326, 'reg_lambda': 8.450175478435405, 'reg_alpha': 0.12758237819387766, 'subsamble': 0.8091207450405264, 'colsample_bytree': 0.19660709982605673, 'max_depth': 42, 'num_leaves': 28, 'min_child_samples': 79}. Best is trial 71 with value: 0.7157287562736403.[0m


0 0.716390690458164
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516894
[2000]	valid_0's l2: 0.513497
[3000]	valid_0's l2: 0.512707
[4000]	valid_0's l2: 0.512331
Early stopping, best iteration is:
[4721]	valid_0's l2: 0.51223


[32m[I 2021-08-31 07:24:37,294][0m Trial 74 finished with value: 0.7157022024939034 and parameters: {'learning_rate': 0.017617905594164103, 'reg_lambda': 0.2971710372778072, 'reg_alpha': 0.4754381939584199, 'subsamble': 0.9808300822318052, 'colsample_bytree': 0.10159075122483158, 'max_depth': 27, 'num_leaves': 27, 'min_child_samples': 76}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7157022024939034
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.521691
[2000]	valid_0's l2: 0.515403
[3000]	valid_0's l2: 0.513735
[4000]	valid_0's l2: 0.51324
[5000]	valid_0's l2: 0.51304
Early stopping, best iteration is:
[5450]	valid_0's l2: 0.512968


[32m[I 2021-08-31 07:26:27,892][0m Trial 75 finished with value: 0.7162175630984375 and parameters: {'learning_rate': 0.0126242803995614, 'reg_lambda': 0.42810678155744747, 'reg_alpha': 0.04921513798192993, 'subsamble': 0.9897891191172241, 'colsample_bytree': 0.15857481675724472, 'max_depth': 54, 'num_leaves': 30, 'min_child_samples': 75}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7162175630984375
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.521115
[2000]	valid_0's l2: 0.515262
[3000]	valid_0's l2: 0.513507
[4000]	valid_0's l2: 0.512998
[5000]	valid_0's l2: 0.512767
[6000]	valid_0's l2: 0.512699
Early stopping, best iteration is:
[6726]	valid_0's l2: 0.512668


[32m[I 2021-08-31 07:28:33,133][0m Trial 76 finished with value: 0.7160084175392442 and parameters: {'learning_rate': 0.014777251500234876, 'reg_lambda': 0.0007995941377340099, 'reg_alpha': 0.7691048533431178, 'subsamble': 0.604802621113658, 'colsample_bytree': 0.12370788079773519, 'max_depth': 15, 'num_leaves': 27, 'min_child_samples': 82}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7160084175392442
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.524583
[2000]	valid_0's l2: 0.518282
[3000]	valid_0's l2: 0.515901
[4000]	valid_0's l2: 0.514828
[5000]	valid_0's l2: 0.514269
[6000]	valid_0's l2: 0.513873
[7000]	valid_0's l2: 0.513699
Did not meet early stopping. Best iteration is:
[7000]	valid_0's l2: 0.513699


[32m[I 2021-08-31 07:31:18,633][0m Trial 77 finished with value: 0.7167281090327954 and parameters: {'learning_rate': 0.01006444511179265, 'reg_lambda': 45.55762062141898, 'reg_alpha': 97.5477992376041, 'subsamble': 0.917849199660713, 'colsample_bytree': 0.17318744286616233, 'max_depth': 20, 'num_leaves': 37, 'min_child_samples': 57}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7167281090327954
Encoding
Training
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[551]	valid_0's l2: 0.514476


[32m[I 2021-08-31 07:31:34,592][0m Trial 78 finished with value: 0.7172700638657972 and parameters: {'learning_rate': 0.07918097488252086, 'reg_lambda': 0.011942195532112971, 'reg_alpha': 0.39157003229699416, 'subsamble': 0.8763796272768029, 'colsample_bytree': 0.21339300338413103, 'max_depth': 26, 'num_leaves': 34, 'min_child_samples': 65}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7172700638657972
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.526284
[2000]	valid_0's l2: 0.51793
[3000]	valid_0's l2: 0.514855
[4000]	valid_0's l2: 0.513668
[5000]	valid_0's l2: 0.513152
[6000]	valid_0's l2: 0.512789
[7000]	valid_0's l2: 0.512608
Did not meet early stopping. Best iteration is:
[6992]	valid_0's l2: 0.512604


[32m[I 2021-08-31 07:33:59,530][0m Trial 79 finished with value: 0.7159636247888133 and parameters: {'learning_rate': 0.008632440075821078, 'reg_lambda': 3.835935532509616, 'reg_alpha': 2.2062146116788477, 'subsamble': 0.9786249905206743, 'colsample_bytree': 0.14889992088864956, 'max_depth': 26, 'num_leaves': 31, 'min_child_samples': 90}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7159636247888133
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.524441
[2000]	valid_0's l2: 0.517206
[3000]	valid_0's l2: 0.514646
[4000]	valid_0's l2: 0.513666
[5000]	valid_0's l2: 0.513218
[6000]	valid_0's l2: 0.512943
[7000]	valid_0's l2: 0.512823
Did not meet early stopping. Best iteration is:
[6995]	valid_0's l2: 0.512822


[32m[I 2021-08-31 07:36:06,239][0m Trial 80 finished with value: 0.7161157876871681 and parameters: {'learning_rate': 0.01159785293556516, 'reg_lambda': 13.729155139585478, 'reg_alpha': 0.001198344186160251, 'subsamble': 0.8428523314073049, 'colsample_bytree': 0.1212245002567628, 'max_depth': 66, 'num_leaves': 25, 'min_child_samples': 49}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7161157876871681
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.521938
[2000]	valid_0's l2: 0.51599
[3000]	valid_0's l2: 0.513921
[4000]	valid_0's l2: 0.513295
[5000]	valid_0's l2: 0.51303
[6000]	valid_0's l2: 0.512923
Early stopping, best iteration is:
[6592]	valid_0's l2: 0.512877


[32m[I 2021-08-31 07:37:58,962][0m Trial 81 finished with value: 0.7161544358996734 and parameters: {'learning_rate': 0.018287175821477035, 'reg_lambda': 0.6514117948048139, 'reg_alpha': 0.011637677779197855, 'subsamble': 0.951989339440402, 'colsample_bytree': 0.10100382771684753, 'max_depth': 52, 'num_leaves': 20, 'min_child_samples': 68}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7161544358996734
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.520676
[2000]	valid_0's l2: 0.515314
[3000]	valid_0's l2: 0.513601
[4000]	valid_0's l2: 0.512984
[5000]	valid_0's l2: 0.512664
Early stopping, best iteration is:
[5457]	valid_0's l2: 0.512562


[32m[I 2021-08-31 07:39:34,068][0m Trial 82 finished with value: 0.7159346219851689 and parameters: {'learning_rate': 0.021268247669621525, 'reg_lambda': 0.23226266543822485, 'reg_alpha': 0.1774302973136155, 'subsamble': 0.979454192273935, 'colsample_bytree': 0.10081805235441568, 'max_depth': 51, 'num_leaves': 18, 'min_child_samples': 71}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7159346219851689
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.518984
[2000]	valid_0's l2: 0.514616
[3000]	valid_0's l2: 0.513483
[4000]	valid_0's l2: 0.513026
[5000]	valid_0's l2: 0.51284
Early stopping, best iteration is:
[4952]	valid_0's l2: 0.512824


[32m[I 2021-08-31 07:41:12,126][0m Trial 83 finished with value: 0.7161176182696254 and parameters: {'learning_rate': 0.013671287927450643, 'reg_lambda': 9.635951494933599e-06, 'reg_alpha': 0.0028926052278057377, 'subsamble': 0.9232535760514425, 'colsample_bytree': 0.14763300610225347, 'max_depth': 61, 'num_leaves': 24, 'min_child_samples': 68}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7161176182696254
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516386
[2000]	valid_0's l2: 0.513777
[3000]	valid_0's l2: 0.513181
[4000]	valid_0's l2: 0.512975
Early stopping, best iteration is:
[4122]	valid_0's l2: 0.512946


[32m[I 2021-08-31 07:42:35,010][0m Trial 84 finished with value: 0.716202401101692 and parameters: {'learning_rate': 0.017878800683432377, 'reg_lambda': 2.4239258177082577, 'reg_alpha': 0.0372215188166527, 'subsamble': 0.8870699172264255, 'colsample_bytree': 0.16889790661657436, 'max_depth': 54, 'num_leaves': 27, 'min_child_samples': 59}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.716202401101692
Encoding
Training
Training until validation scores don't improve for 200 rounds
Early stopping, best iteration is:
[353]	valid_0's l2: 0.514092


[32m[I 2021-08-31 07:42:52,036][0m Trial 85 finished with value: 0.7170020187537797 and parameters: {'learning_rate': 0.10793761210373705, 'reg_lambda': 0.9917560728373139, 'reg_alpha': 4.792002547935293, 'subsamble': 0.9504428491303502, 'colsample_bytree': 0.19588836283619873, 'max_depth': 28, 'num_leaves': 44, 'min_child_samples': 76}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7170020187537797
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.513987
[2000]	valid_0's l2: 0.513012
Early stopping, best iteration is:
[2447]	valid_0's l2: 0.512872


[32m[I 2021-08-31 07:43:49,603][0m Trial 86 finished with value: 0.7161509885646147 and parameters: {'learning_rate': 0.024893396460085964, 'reg_lambda': 0.3690923898059051, 'reg_alpha': 0.02217026426567075, 'subsamble': 0.9115421610540306, 'colsample_bytree': 0.14054806692468896, 'max_depth': 47, 'num_leaves': 39, 'min_child_samples': 63}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7161509885646147
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.518573
[2000]	valid_0's l2: 0.513876
[3000]	valid_0's l2: 0.512865
Early stopping, best iteration is:
[3292]	valid_0's l2: 0.512817


[32m[I 2021-08-31 07:45:25,468][0m Trial 87 finished with value: 0.7161183391936076 and parameters: {'learning_rate': 0.015660595480418902, 'reg_lambda': 2.150926194588499, 'reg_alpha': 12.30921539047799, 'subsamble': 0.3608312866338302, 'colsample_bytree': 0.11913617686029983, 'max_depth': 59, 'num_leaves': 68, 'min_child_samples': 82}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7161183391936076
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.517459
[2000]	valid_0's l2: 0.515489
[3000]	valid_0's l2: 0.514934
Early stopping, best iteration is:
[3662]	valid_0's l2: 0.514784


[32m[I 2021-08-31 07:46:35,821][0m Trial 88 finished with value: 0.7174846883362923 and parameters: {'learning_rate': 0.02232744496128399, 'reg_lambda': 4.477987957735162, 'reg_alpha': 0.3890787817876602, 'subsamble': 0.9708457077657141, 'colsample_bytree': 0.5386173453665383, 'max_depth': 21, 'num_leaves': 21, 'min_child_samples': 72}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7174846883362923
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.525416
[2000]	valid_0's l2: 0.518526
[3000]	valid_0's l2: 0.516062
[4000]	valid_0's l2: 0.514991
[5000]	valid_0's l2: 0.514335
[6000]	valid_0's l2: 0.513902
[7000]	valid_0's l2: 0.513631
Did not meet early stopping. Best iteration is:
[7000]	valid_0's l2: 0.513631


[32m[I 2021-08-31 07:48:45,731][0m Trial 89 finished with value: 0.7166805258927561 and parameters: {'learning_rate': 0.009758652396256543, 'reg_lambda': 22.147133096183303, 'reg_alpha': 0.10659796676466253, 'subsamble': 0.5581666453585516, 'colsample_bytree': 0.22102420304908849, 'max_depth': 37, 'num_leaves': 16, 'min_child_samples': 68}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7166805258927561
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516067
[2000]	valid_0's l2: 0.513545
[3000]	valid_0's l2: 0.513157
Early stopping, best iteration is:
[3152]	valid_0's l2: 0.513133


[32m[I 2021-08-31 07:49:56,728][0m Trial 90 finished with value: 0.7163333138715218 and parameters: {'learning_rate': 0.01966268544982904, 'reg_lambda': 0.036099456931724554, 'reg_alpha': 0.9542180545188136, 'subsamble': 0.8211162291238973, 'colsample_bytree': 0.18180750702204276, 'max_depth': 70, 'num_leaves': 37, 'min_child_samples': 54}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7163333138715218
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.521995
[2000]	valid_0's l2: 0.515929
[3000]	valid_0's l2: 0.513884
[4000]	valid_0's l2: 0.513205
[5000]	valid_0's l2: 0.512928
Early stopping, best iteration is:
[5370]	valid_0's l2: 0.512819


[32m[I 2021-08-31 07:51:36,431][0m Trial 91 finished with value: 0.7161139642191894 and parameters: {'learning_rate': 0.0169233210973902, 'reg_lambda': 0.13550456400927302, 'reg_alpha': 0.27000952335425515, 'subsamble': 0.7786527106682261, 'colsample_bytree': 0.10025997388782636, 'max_depth': 49, 'num_leaves': 24, 'min_child_samples': 73}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7161139642191894
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.516003
[2000]	valid_0's l2: 0.513527
[3000]	valid_0's l2: 0.512953
Early stopping, best iteration is:
[3279]	valid_0's l2: 0.512907


[32m[I 2021-08-31 07:52:38,206][0m Trial 92 finished with value: 0.7161754848477665 and parameters: {'learning_rate': 0.027964670379566046, 'reg_lambda': 0.7352649834371162, 'reg_alpha': 0.4419142539379578, 'subsamble': 0.8703047324620453, 'colsample_bytree': 0.12122388288942276, 'max_depth': 48, 'num_leaves': 22, 'min_child_samples': 62}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7161754848477665
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.515938
[2000]	valid_0's l2: 0.513666
[3000]	valid_0's l2: 0.513026
[4000]	valid_0's l2: 0.512736
Early stopping, best iteration is:
[4031]	valid_0's l2: 0.512726


[32m[I 2021-08-31 07:53:57,479][0m Trial 93 finished with value: 0.7160485842781156 and parameters: {'learning_rate': 0.019739134934795306, 'reg_lambda': 1.656624727372653, 'reg_alpha': 0.07134494311994216, 'subsamble': 0.9533452022703988, 'colsample_bytree': 0.1447582484320539, 'max_depth': 51, 'num_leaves': 26, 'min_child_samples': 70}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7160485842781156
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.515263
[2000]	valid_0's l2: 0.513199
[3000]	valid_0's l2: 0.512775
Early stopping, best iteration is:
[2981]	valid_0's l2: 0.512767


[32m[I 2021-08-31 07:54:50,560][0m Trial 94 finished with value: 0.716077265200938 and parameters: {'learning_rate': 0.03426522017970218, 'reg_lambda': 6.400516744735983e-05, 'reg_alpha': 1.798497886913637, 'subsamble': 0.8399943933551806, 'colsample_bytree': 0.11966303682802985, 'max_depth': 42, 'num_leaves': 19, 'min_child_samples': 65}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.716077265200938
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.523376
[2000]	valid_0's l2: 0.516396
[3000]	valid_0's l2: 0.514392
[4000]	valid_0's l2: 0.513669
[5000]	valid_0's l2: 0.513296
[6000]	valid_0's l2: 0.513063
[7000]	valid_0's l2: 0.512852
Did not meet early stopping. Best iteration is:
[6999]	valid_0's l2: 0.512852


[32m[I 2021-08-31 07:57:12,659][0m Trial 95 finished with value: 0.7161366787122215 and parameters: {'learning_rate': 0.008053059975608672, 'reg_lambda': 0.7084882585088736, 'reg_alpha': 0.008777261536664574, 'subsamble': 0.9802292492151549, 'colsample_bytree': 0.15557406659742556, 'max_depth': 57, 'num_leaves': 33, 'min_child_samples': 20}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7161366787122215
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.522499
[2000]	valid_0's l2: 0.516017
[3000]	valid_0's l2: 0.513731
[4000]	valid_0's l2: 0.5131
[5000]	valid_0's l2: 0.512855
[6000]	valid_0's l2: 0.512711
Early stopping, best iteration is:
[6066]	valid_0's l2: 0.512705


[32m[I 2021-08-31 07:59:06,920][0m Trial 96 finished with value: 0.7160338827705325 and parameters: {'learning_rate': 0.01550060074502967, 'reg_lambda': 7.888179999834657, 'reg_alpha': 0.20049508683247413, 'subsamble': 0.9389909416505483, 'colsample_bytree': 0.10014147144553842, 'max_depth': 74, 'num_leaves': 28, 'min_child_samples': 77}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7160338827705325
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.523059
[2000]	valid_0's l2: 0.516502
[3000]	valid_0's l2: 0.514589
[4000]	valid_0's l2: 0.513917
[5000]	valid_0's l2: 0.513497
[6000]	valid_0's l2: 0.513273
Early stopping, best iteration is:
[6036]	valid_0's l2: 0.513263


[32m[I 2021-08-31 08:01:06,578][0m Trial 97 finished with value: 0.7164240369772672 and parameters: {'learning_rate': 0.010663850484802532, 'reg_lambda': 0.00014392250257199018, 'reg_alpha': 0.00419899256677055, 'subsamble': 0.8961637599256563, 'colsample_bytree': 0.17969944710076852, 'max_depth': 77, 'num_leaves': 25, 'min_child_samples': 68}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7164240369772672
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.514443
[2000]	valid_0's l2: 0.512834
[3000]	valid_0's l2: 0.512524
Early stopping, best iteration is:
[3040]	valid_0's l2: 0.51251


[32m[I 2021-08-31 08:02:09,748][0m Trial 98 finished with value: 0.715898293835209 and parameters: {'learning_rate': 0.02415153450969747, 'reg_lambda': 2.9051904393725803, 'reg_alpha': 3.1110322523775897, 'subsamble': 0.756664516131287, 'colsample_bytree': 0.13511493321954277, 'max_depth': 45, 'num_leaves': 30, 'min_child_samples': 59}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.715898293835209
Encoding
Training
Training until validation scores don't improve for 200 rounds
[1000]	valid_0's l2: 0.517925
[2000]	valid_0's l2: 0.513953
[3000]	valid_0's l2: 0.513117
[4000]	valid_0's l2: 0.51281
Early stopping, best iteration is:
[4711]	valid_0's l2: 0.512679


[32m[I 2021-08-31 08:04:06,702][0m Trial 99 finished with value: 0.7160161396703324 and parameters: {'learning_rate': 0.011911441300670662, 'reg_lambda': 0.20917686602976024, 'reg_alpha': 6.698260942483536, 'subsamble': 0.8561663772509371, 'colsample_bytree': 0.19778420913805123, 'max_depth': 55, 'num_leaves': 42, 'min_child_samples': 66}. Best is trial 74 with value: 0.7157022024939034.[0m


0 0.7160161396703324


{'learning_rate': 0.017617905594164103,
 'reg_lambda': 0.2971710372778072,
 'reg_alpha': 0.4754381939584199,
 'subsamble': 0.9808300822318052,
 'colsample_bytree': 0.10159075122483158,
 'max_depth': 27,
 'num_leaves': 27,
 'min_child_samples': 76}

In [24]:
lgbm_parameters = {
   'metric': 'rmse',
   'learning_rate': 0.017617905594164103,
    'n_estimators': 50000,
     'reg_alpha': 0.4754381939584199,
    'reg_lambda': 0.2971710372778072,
    'colsample_bytree': 0.10159075122483158,
     'subsample': 0.9808300822318052,
    'max_depth': 27,
    'num_leaves': 27,
    'min_child_samples': 76,
    'max_bin': 772,
    'cat_l2': 17,
    'cat_smoth':96,
    'seed': 42
}

In [18]:
useful_cols=[column for column in train.columns if column not in ('id', 'target', 'kfold')]
 # List of categorical columns
object_cols = [col for col in useful_cols if 'cat' in col]
numerical_cols = [col for col in useful_cols if 'cont' in col]
test=test[useful_cols]

final_test_predictions=[]
final_valid_predictions={}

last_scores=[]
rmse_scores=[]
for fold in range(5):
    X_train = train[train.kfold!=fold].reset_index(drop=True)
    X_valid= train[train.kfold==fold].reset_index(drop=True)
    X_test=test.copy()
    
    valid_ids=X_valid.id.values.tolist()
    
    y_train=X_train.target
    y_valid=X_valid.target
    
    X_train=X_train[useful_cols]
    X_valid=X_valid[useful_cols]
   
   # ordinal-encode categorical columns
    print('Encoding')
    ohe = preprocessing.OneHotEncoder(sparse=False, handle_unknown='ignore')
    #for object cols
    X_train_ohe=ohe.fit_transform(X_train[object_cols])
    X_valid_ohe=ohe.transform(X_valid[object_cols])
    X_test_ohe = ohe.transform(X_test[object_cols])
    
    X_train_ohe=pd.DataFrame(X_train_ohe, columns=[f"ohe_{i}" for i in range(X_train_ohe.shape[1])])
    X_valid_ohe=pd.DataFrame(X_valid_ohe, columns=[f"ohe_{i}" for i in range(X_valid_ohe.shape[1])])
    X_test_ohe=pd.DataFrame(X_test_ohe, columns=[f"ohe_{i}" for i in range(X_test_ohe.shape[1])])
    
    X_train=pd.concat([X_train,X_train_ohe], axis=1)
    X_valid=pd.concat([X_valid,X_valid_ohe], axis=1)
    X_test=pd.concat([X_test,X_test_ohe], axis=1)
    
    # Remove categorical columns (will replace with one-hot encoding)
    X_train = X_train.drop(object_cols, axis=1)
    X_valid = X_valid.drop(object_cols,axis=1)
    X_test=X_test.drop(object_cols,axis=1)
    
    
    #for numerical cols
    scaler =StandardScaler()
    X_train[numerical_cols]=scaler.fit_transform(X_train[numerical_cols])
    X_valid[numerical_cols]=scaler.transform(X_valid[numerical_cols])
    X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
    
    print('Training') 
    model =LGBMRegressor(**lgbm_parameters)
    model.fit(X_train, y_train, eval_set = ((X_valid,y_valid)), early_stopping_rounds = 500,verbose=1000)
    predictions=model.predict(X_valid)
    predictions_test=model.predict(X_test)
    final_test_predictions.append(predictions_test)
    final_valid_predictions.update(dict(zip(valid_ids,predictions)))
    
    rmse=mean_squared_error(y_valid,predictions,squared=False)
    print(fold, rmse)
    rmse_scores.append(rmse)
    

Encoding
Training
Training until validation scores don't improve for 500 rounds
[1000]	valid_0's rmse: 0.718293
[2000]	valid_0's rmse: 0.71575
[3000]	valid_0's rmse: 0.715166
[4000]	valid_0's rmse: 0.714951
[5000]	valid_0's rmse: 0.714905
Early stopping, best iteration is:
[4799]	valid_0's rmse: 0.714891
0 0.7148905224844296
Encoding
Training
Training until validation scores don't improve for 500 rounds
[1000]	valid_0's rmse: 0.726042
[2000]	valid_0's rmse: 0.723235
[3000]	valid_0's rmse: 0.722656
[4000]	valid_0's rmse: 0.722418
[5000]	valid_0's rmse: 0.722395
Early stopping, best iteration is:
[4759]	valid_0's rmse: 0.722377
1 0.7223767670022336
Encoding
Training
Training until validation scores don't improve for 500 rounds
[1000]	valid_0's rmse: 0.721522
[2000]	valid_0's rmse: 0.718926
[3000]	valid_0's rmse: 0.718358
[4000]	valid_0's rmse: 0.718139
[5000]	valid_0's rmse: 0.718107
Early stopping, best iteration is:
[4710]	valid_0's rmse: 0.718101
2 0.718100525872106
Encoding
Training


In [19]:
print(np.mean(rmse_scores), np.std(rmse_scores))

0.7172132968073746 0.0031972227229733487


In [20]:
final_valid_predictions= pd.DataFrame.from_dict(final_valid_predictions, orient='index').reset_index()
final_valid_predictions.columns=['id', 'pred_1']
final_valid_predictions.to_csv('train_pred_1.csv', index=False)

In [21]:
final_valid_predictions

Unnamed: 0,id,pred_1
0,2,8.355080
1,6,8.256156
2,8,8.460447
3,10,8.445550
4,18,8.189734
...,...,...
299995,499975,8.194221
299996,499976,8.611674
299997,499993,8.357469
299998,499996,7.802166


In [23]:
sample_submission.target=np.mean(np.column_stack(final_test_predictions), axis=1)
sample_submission.columns=['id', 'pred_1']
sample_submission.to_csv('test_pred_1.csv', index=False)

In [25]:
useful_cols=[column for column in train.columns if column not in ('id', 'target', 'kfold')]
 # List of categorical columns
object_cols = [col for col in useful_cols if 'cat' in col]
numerical_cols = [col for col in useful_cols if 'cont' in col]
test=test[useful_cols]

final_test_predictions=[]
final_valid_predictions={}

last_scores=[]
rmse_scores=[]
for fold in range(5):
    X_train = train[train.kfold!=fold].reset_index(drop=True)
    X_valid= train[train.kfold==fold].reset_index(drop=True)
    X_test=test.copy()
    
    valid_ids=X_valid.id.values.tolist()
    
    y_train=X_train.target
    y_valid=X_valid.target
    
    X_train=X_train[useful_cols]
    X_valid=X_valid[useful_cols]
   
    # ordinal-encode categorical columns
    print('Encoding')
    ordinal_encoder = OrdinalEncoder()
    #for object cols
    X_train[object_cols]=ordinal_encoder.fit_transform(X_train[object_cols])
    X_valid[object_cols]=ordinal_encoder.transform(X_valid[object_cols])
    X_test[object_cols] = ordinal_encoder.transform(X_test[object_cols])
    
    #for numerical cols
    scaler =StandardScaler()
    X_train[numerical_cols]=scaler.fit_transform(X_train[numerical_cols])
    X_valid[numerical_cols]=scaler.transform(X_valid[numerical_cols])
    X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
    
    print('Training') 
    model =LGBMRegressor(**lgbm_parameters)
    model.fit(X_train, y_train, eval_set = ((X_valid,y_valid)), early_stopping_rounds = 200,verbose=1000,
              categorical_feature=object_cols)
    predictions=model.predict(X_valid)
    predictions_test=model.predict(X_test)
    final_test_predictions.append(predictions_test)
    final_valid_predictions.update(dict(zip(valid_ids,predictions)))
    
    
    rmse=mean_squared_error(y_valid,predictions,squared=False)
    print(fold, rmse)
    rmse_scores.append(rmse)

Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.721292
[2000]	valid_0's rmse: 0.716902
[3000]	valid_0's rmse: 0.71569
[4000]	valid_0's rmse: 0.715257
[5000]	valid_0's rmse: 0.71512
Early stopping, best iteration is:
[4826]	valid_0's rmse: 0.715117
0 0.7151167023414063
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.728824
[2000]	valid_0's rmse: 0.724469
[3000]	valid_0's rmse: 0.723224
[4000]	valid_0's rmse: 0.722791
[5000]	valid_0's rmse: 0.722688
Early stopping, best iteration is:
[4816]	valid_0's rmse: 0.722665
1 0.7226651859902857
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.724001
[2000]	valid_0's rmse: 0.719829
[3000]	valid_0's rmse: 0.718739
[4000]	valid_0's rmse: 0.718375
Early stopping, best iteration is:
[4044]	valid_0's rmse: 0.718361
2 0.7183612417693777
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.723782
[2000]	valid_0's rmse: 0.71968
[3000]	valid_0's rmse: 0.718605
[4000]	valid_0's rmse: 0.71826
Early stopping, best iteration is:
[4563]	valid_0's rmse: 0.71821
3 0.7182098291206642
Encoding
Training


New categorical_feature is ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']


Training until validation scores don't improve for 200 rounds
[1000]	valid_0's rmse: 0.719376
[2000]	valid_0's rmse: 0.715138
[3000]	valid_0's rmse: 0.714004
[4000]	valid_0's rmse: 0.713585
Early stopping, best iteration is:
[4468]	valid_0's rmse: 0.713553
4 0.7135533994061092


In [26]:
print(np.mean(rmse_scores), np.std(rmse_scores))

0.7175812717255686 0.003135202691705293


In [29]:
final_valid_predictions= pd.DataFrame.from_dict(final_valid_predictions, orient='index').reset_index()
final_valid_predictions.columns=['id', 'pred_2']
final_valid_predictions.to_csv('train_pred_2.csv', index=False)


TypeError: 'numpy.ndarray' object is not callable

In [30]:
final_valid_predictions

Unnamed: 0,id,pred_2
0,2,8.382772
1,6,8.218328
2,8,8.433875
3,10,8.355328
4,18,8.188500
...,...,...
299995,499975,8.225013
299996,499976,8.613246
299997,499993,8.362486
299998,499996,7.768921


In [28]:
sample_submission.target=np.mean(np.column_stack(final_test_predictions), axis=1)
sample_submission.columns=['id', 'pred_2']
sample_submission.to_csv('test_pred_2.csv', index=False)

  sample_submission.target=np.mean(np.column_stack(final_test_predictions), axis=1)


In [33]:


useful_cols=[column for column in train.columns if column not in ('id', 'target', 'kfold')]
 # List of categorical columns
object_cols = [col for col in useful_cols if 'cat' in col]
numerical_cols = [col for col in useful_cols if 'cont' in col]
test=test[useful_cols]

final_test_predictions=[]
final_valid_predictions={}

last_scores=[]
rmse_scores=[]
for fold in range(5):
    X_train = train[train.kfold!=fold].reset_index(drop=True)
    X_valid= train[train.kfold==fold].reset_index(drop=True)
    X_test=test.copy()
    
    valid_ids=X_valid.id.values.tolist()
    
    y_train=X_train.target
    y_valid=X_valid.target
    
    X_train=X_train[useful_cols]
    X_valid=X_valid[useful_cols]
   
    # ordinal-encode categorical columns
    print('Encoding')
    ordinal_encoder = OrdinalEncoder()
    #for object cols
    X_train[object_cols]=ordinal_encoder.fit_transform(X_train[object_cols])
    X_valid[object_cols]=ordinal_encoder.transform(X_valid[object_cols])
    X_test[object_cols] = ordinal_encoder.transform(X_test[object_cols])
    
    #for numerical cols
    scaler =StandardScaler()
    X_train[numerical_cols]=scaler.fit_transform(X_train[numerical_cols])
    X_valid[numerical_cols]=scaler.transform(X_valid[numerical_cols])
    X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
    
    print('Training') 
     
    model = XGBRegressor(n_estimators=20000, random_state=42, learning_rate=.02, max_depth=5, tree_method='hist')
    model.fit(X_train, y_train, eval_set = [(X_valid,y_valid)], early_stopping_rounds = 400,verbose=1000)
    
    predictions=model.predict(X_valid)
    predictions_test=model.predict(X_test)
    final_test_predictions.append(predictions_test)
    final_valid_predictions.update(dict(zip(valid_ids,predictions)))
    
    
    rmse=mean_squared_error(y_valid,predictions,squared=False)
    print(fold, rmse)
    rmse_scores.append(rmse)

Encoding
Training
[0]	validation_0-rmse:7.62102
[1000]	validation_0-rmse:0.72091
[2000]	validation_0-rmse:0.71862
[3000]	validation_0-rmse:0.71825
[3524]	validation_0-rmse:0.71831
0 0.7182326233845375
Encoding
Training
[0]	validation_0-rmse:7.62425
[1000]	validation_0-rmse:0.72858
[2000]	validation_0-rmse:0.72626
[3000]	validation_0-rmse:0.72588
[3576]	validation_0-rmse:0.72593
1 0.7258669458210288
Encoding
Training
[0]	validation_0-rmse:7.62033
[1000]	validation_0-rmse:0.72427
[2000]	validation_0-rmse:0.72206
[3000]	validation_0-rmse:0.72168
[3157]	validation_0-rmse:0.72176
2 0.7216353173258985
Encoding
Training
[0]	validation_0-rmse:7.62672
[1000]	validation_0-rmse:0.72343
[2000]	validation_0-rmse:0.72138
[3000]	validation_0-rmse:0.72100
[3271]	validation_0-rmse:0.72104
3 0.720987603644735
Encoding
Training
[0]	validation_0-rmse:7.62652
[1000]	validation_0-rmse:0.71879
[2000]	validation_0-rmse:0.71663
[3000]	validation_0-rmse:0.71613
[3447]	validation_0-rmse:0.71621
4 0.7161064903577

In [34]:
print(np.mean(rmse_scores), np.std(rmse_scores))

0.7205657961067833 0.003309171746692743


In [35]:
final_valid_predictions= pd.DataFrame.from_dict(final_valid_predictions, orient='index').reset_index()
final_valid_predictions.columns=['id', 'pred_3']
final_valid_predictions.to_csv('train_pred_3.csv', index=False)

In [36]:
sample_submission.target=np.mean(np.column_stack(final_test_predictions), axis=1)
sample_submission.columns=['id', 'pred_3']
sample_submission.to_csv('test_pred_3.csv', index=False)

In [38]:
train = pd.read_csv("30-days-of-ml/train_folds.csv")
test = pd.read_csv("30-days-of-ml/test.csv")
sample_submission=pd.read_csv("30-days-of-ml/sample_submission.csv")

df1 = pd.read_csv("train_pred_1.csv")
df2 = pd.read_csv("train_pred_2.csv")
df3 = pd.read_csv("train_pred_3.csv")

df_test1 = pd.read_csv("test_pred_1.csv")
df_test2 = pd.read_csv("test_pred_2.csv")
df_test3 = pd.read_csv("test_pred_3.csv")

train = train.merge(df1, on="id", how="left")
train = train.merge(df2, on="id", how="left")
train = train.merge(df3, on="id", how="left")

test = test.merge(df_test1, on="id", how="left")
test = test.merge(df_test2, on="id", how="left")
test = test.merge(df_test3, on="id", how="left")

train.head()

Unnamed: 0,id,cat0,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,...,cont9,cont10,cont11,cont12,cont13,target,kfold,pred_1,pred_2,pred_3
0,1,B,B,B,C,B,B,A,E,C,...,0.267559,0.237281,0.377873,0.322401,0.86985,8.113634,4,8.507232,8.503018,8.544546
1,2,B,B,A,A,B,D,A,F,A,...,0.341439,0.906013,0.921701,0.261975,0.465083,8.481233,0,8.35508,8.382772,8.348386
2,3,A,A,A,C,B,D,A,D,A,...,0.843531,0.748809,0.620126,0.541474,0.763846,8.364351,4,8.175895,8.241156,8.162363
3,4,B,B,A,C,B,D,A,E,C,...,0.574844,0.34601,0.71461,0.54015,0.280682,8.049253,1,8.393389,8.388439,8.402562
4,6,A,A,A,C,B,D,A,E,A,...,0.956692,1.000773,0.776742,0.625849,0.250823,7.97226,0,8.256156,8.218328,8.234509


In [39]:
lgbm_parameters = {
   'metric': 'rmse',
   'learning_rate': 0.017617905594164103,
    'n_estimators': 50000,
     'reg_alpha': 0.4754381939584199,
    'reg_lambda': 0.2971710372778072,
    'colsample_bytree': 0.10159075122483158,
     'subsample': 0.9808300822318052,
    'max_depth': 27,
    'num_leaves': 27,
    'min_child_samples': 76,
    'max_bin': 772,
    'cat_l2': 17,
    'cat_smoth':96,
    'seed': 42
}

In [44]:
useful_features = ["pred_1", "pred_2", "pred_3"]
test = test[useful_features]

In [45]:
test

Unnamed: 0,pred_1,pred_2,pred_3
0,8.077633,8.077633,8.077633
1,8.373021,8.373021,8.373021
2,8.407936,8.407936,8.407936
3,8.479666,8.479666,8.479666
4,8.149429,8.149429,8.149429
...,...,...,...
199995,8.041326,8.041326,8.041326
199996,8.461375,8.461375,8.461375
199997,8.458954,8.458954,8.458954
199998,8.181627,8.181627,8.181627


In [50]:
useful_features = ["pred_1", "pred_2", "pred_3"]
test = test[useful_features]

final_predictions = []
scores = []
for fold in range(5):
    X_train = train[train.kfold!=fold].reset_index(drop=True)
    X_valid= train[train.kfold==fold].reset_index(drop=True)
    X_test=test.copy()
    
    
    y_train=X_train.target
    y_valid=X_valid.target
    
    X_train=X_train[useful_features]
    X_valid=X_valid[useful_features]
   
    
    print('Training') 
    model =LGBMRegressor(**lgbm_parameters)
    model.fit(X_train, y_train, eval_set = ((X_valid,y_valid)), early_stopping_rounds = 500,verbose=1000)
    
    #model = LinearRegression()
    #model.fit(X_train, y_train)
    
    predictions=model.predict(X_valid)
    predictions_test=model.predict(X_test)
    final_predictions.append(predictions_test)
    
    rmse=mean_squared_error(y_valid,predictions,squared=False)
    print(fold, rmse)
    scores.append(rmse)
    


Training
Training until validation scores don't improve for 500 rounds
Early stopping, best iteration is:
[249]	valid_0's rmse: 0.714677
0 0.7146769663796279
Training
Training until validation scores don't improve for 500 rounds
Early stopping, best iteration is:
[256]	valid_0's rmse: 0.722181
1 0.7221807332882377
Training
Training until validation scores don't improve for 500 rounds
Early stopping, best iteration is:
[204]	valid_0's rmse: 0.717906
2 0.7179063023388252
Training
Training until validation scores don't improve for 500 rounds
Early stopping, best iteration is:
[185]	valid_0's rmse: 0.717637
3 0.7176370420250531
Training
Training until validation scores don't improve for 500 rounds
Early stopping, best iteration is:
[221]	valid_0's rmse: 0.712854
4 0.7128535334823007


In [49]:
print(np.mean(scores), np.std(scores))

0.7170760635232711 0.0031659183145246596


In [51]:
print(np.mean(scores), np.std(scores))

0.7170509155028089 0.0031825747977556563


In [52]:
sample_submission.target = np.mean(np.column_stack(final_predictions), axis=1)
sample_submission.to_csv("submission_multiple.csv", index=False)