# Hyper parameters optimization #

## Parameters cell ##

Parameters are overiddent by papermill when run inside DVC stages



In [1]:
number_of_folds = 5 # this sets global setting of which how many bootstraps to use
repeats = 10
n_trials = 50
#first round of optimization
lgb_params = {"bagging_fraction": 0.9522534844058304, 
              "boosting_type": "dart", 
              "objective": "regression",
              "feature_fraction": 0.42236910941558053, 
              "lambda_l1": 0.020847266580277746, 
              "lambda_l2": 2.8448564854773326, 
              "learning_rate": 0.11484015430016059, 
              "max_depth": 3, 
              "max_leaves": 35, 
              "min_data_in_leaf": 9,
              "num_iterations": 150
             }
debug_local = True #to use local version

In [2]:
from pathlib import Path
import sys
import inspect

local = (Path("..") / "yspecies").resolve()
if debug_local and local.exists():
  sys.path.insert(0, Path("..").as_posix())
  #sys.path.insert(0, local.as_posix())
  print("extending pathes with local yspecies")
  print(sys.path)
  %load_ext autoreload
  %autoreload 2

extending pathes with local yspecies
['..', '/data/sources/yspecies/notebooks', '/opt/miniconda3/envs/yspecies/lib/python38.zip', '/opt/miniconda3/envs/yspecies/lib/python3.8', '/opt/miniconda3/envs/yspecies/lib/python3.8/lib-dynload', '', '/opt/miniconda3/envs/yspecies/lib/python3.8/site-packages', '/opt/miniconda3/envs/yspecies/lib/python3.8/site-packages/IPython/extensions', '/home/antonkulaga/.ipython']


In [3]:
from dataclasses import dataclass, replace
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
import seaborn as sns
import matplotlib.pyplot as plt

In [4]:
from typing import *
from yspecies.dataset import *
from yspecies.utils import *
from yspecies.workflow import TupleWith, Repeat, Collect
from yspecies.config import *
from yspecies.preprocess import FeatureSelection, DataExtractor
from yspecies.partition import DataPartitioner, PartitionParameters
from yspecies.selection import ShapSelector
from yspecies.tuning import Tune
from yspecies.models import ResultsCV, CrossValidator
import optuna
from optuna import Study, Trial

In [5]:
#settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
import pprint
pp = pprint.PrettyPrinter(indent=4)

#charts settings
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
%matplotlib auto
plt.ioff()
set_matplotlib_formats('svg')

Using matplotlib backend: TkAgg


### Loading data ###
Let's load data from species/genes/expressions selected by select_samples.py notebook

In [6]:
from pathlib import Path
locations: Locations = Locations("./") if Path("./data").exists() else Locations("../")

In [7]:
data = ExpressionDataset.from_folder(locations.interim.selected)
data

expressions,genes,species,samples,Genes Metadata,Species Metadata
"(445, 12340)",12340,39,445,"(12340, 2)","(40, 19)"


## Checking that crossvalidation works ##

Deciding on selection parameters (which fields to include, exclude, predict)

In [8]:
partition_params = PartitionParameters(number_of_folds, 1, 2, [],  42)


In [9]:
partition_cv_pipe = Pipeline([
    ('partitioner', DataPartitioner()),
    ('prepare_for_partitioning', TupleWith(lgb_params)),
    ('crossvalidator', CrossValidator())
]
)

In [10]:
repeated_cv_pipe =  Repeat(partition_cv_pipe, repeats, lambda x,i: (x[0], replace(x[1], seed = i)))
cv_pipeline =  Pipeline([
    ('extractor', DataExtractor()),
    ('prepare_for_partitioning', TupleWith(partition_params)), # to extract the data required for ML from the dataset
    ("partition_cv", repeated_cv_pipe)
    ]
    )

## Setting up features to select ##

In [11]:
selection = select_lifespan = FeatureSelection(
    samples = ["tissue","species"], #samples metadata to include
    species =  [], #species metadata other then Y label to include
    exclude_from_training = ["species"],  #exclude some fields from LightGBM training
    to_predict = "lifespan", #column to predict
    categorical = ["tissue"])

In [12]:
select_lifespan = selection
select_mass = replace(selection, to_predict = "mass_g")
select_gestation = replace(selection, to_predict = "gestation")
select_mtgc = replace(selection, to_predict = "mtgc")

## Checking cross-validation ##

In [13]:
cv_res = cv_pipeline.fit_transform((data, select_lifespan))
ResultsCV.take_best(cv_res)

Found `num_iterations` in params. Will use it instead of argument
Early stopping is not available in dart mode


[150]	cv_agg's l1: 7.0071 + 9.23023	cv_agg's l2: 158.687 + 254.039	cv_agg's huber: 5.95545 + 8.27924
[150]	cv_agg's l1: 2.38291 + 0.276588	cv_agg's l2: 33.5298 + 2.66791	cv_agg's huber: 1.80887 + 0.242136
[150]	cv_agg's l1: 2.42461 + 0.104708	cv_agg's l2: 33.4744 + 1.72278	cv_agg's huber: 1.85087 + 0.093391
[150]	cv_agg's l1: 13.0004 + 5.23252	cv_agg's l2: 201.022 + 104.496	cv_agg's huber: 11.307 + 4.68721
[150]	cv_agg's l1: 2.49413 + 0.377353	cv_agg's l2: 34.1965 + 5.6828	cv_agg's huber: 1.91509 + 0.312529
[150]	cv_agg's l1: 2.41961 + 0.2428	cv_agg's l2: 33.7931 + 3.63635	cv_agg's huber: 1.84299 + 0.212897
[150]	cv_agg's l1: 2.34481 + 0.134413	cv_agg's l2: 33.1724 + 2.30235	cv_agg's huber: 1.78264 + 0.113431
[150]	cv_agg's l1: 6.88053 + 9.15438	cv_agg's l2: 154.905 + 245.647	cv_agg's huber: 5.84682 + 8.20896
[150]	cv_agg's l1: 8.46022 + 7.76312	cv_agg's l2: 137.511 + 158.647	cv_agg's huber: 7.25739 + 6.94678
[150]	cv_agg's l1: 2.40884 + 0.102162	cv_agg's l2: 33.255 + 2.66855	cv_agg's 

1.3033874283531373

In [14]:
[c.last("huber")for c in cv_res]

# Optimization #

In [15]:
url = f'sqlite:///' +str((locations.metrics.lifespan / "study.sqlite").absolute())
print('loading (if exists) study from '+url)
storage = optuna.storages.RDBStorage(
    url=url
    #engine_kwargs={'check_same_thread': False}
)

loading (if exists) study from sqlite:////data/sources/yspecies/notebooks/../data/metrics/lifespan/study.sqlite


In [16]:
def objective_parameters(trial: Trial) -> dict:
    return {
        'objective': 'regression',
        'metric': {'mae', 'mse', 'huber'},
        'verbosity': -1,
        'boosting_type': trial.suggest_categorical('boosting_type', ['dart', 'gbdt']),
        'lambda_l1': trial.suggest_uniform('lambda_l1', 0.01, 4.0),
        'lambda_l2': trial.suggest_uniform('lambda_l2', 0.01, 4.0),
        'max_leaves': trial.suggest_int("max_leaves", 15, 25),
        'max_depth': trial.suggest_int('max_depth', 3, 8),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.3, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.3, 1.0),
        'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.1),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 3, 8),
        'drop_rate': trial.suggest_uniform('drop_rate', 0.1, 0.3),
        "verbose": -1
    }
optimization_parameters = objective_parameters

In [17]:
from yspecies.workflow import SplitReduce

def side(i: int):
    print(i)
    return i

prepare_partition = SplitReduce(
    outputs = DataPartitioner(), 
    split = lambda x: [(x[0], replace(partition_params, seed=side(x[2])))], 
    reduce = lambda x, output: (output[0], x[1]) 
)                               
partition_and_cv = Pipeline(
    [
        ("prepare partition", prepare_partition),
        ('crossvalidator', CrossValidator())
    ]
)

partition_and_cv_repeat =  Pipeline([
    ("repeat_cv_pipe", Repeat(partition_and_cv, repeats, 
                              lambda x, i: [x[0], x[1], i] )),
    ("collect_mean", Collect(fold=lambda results: np.array([r.last("huber") for r in results]).mean()))
    ]
    )

p = Pipeline([
     ('extractor', DataExtractor()),
     ('tune', Tune(partition_and_cv_repeat, n_trials = n_trials, parameters_space = optimization_parameters))    
])

In [None]:
best = p.fit_transform((data, select_lifespan))
best

0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0450354 + 0.00954981	cv_agg's l2: 0.0103292 + 0.00492932	cv_agg's huber: 0.00516459 + 0.00246466
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0450354 + 0.00954981	cv_agg's l2: 0.0103292 + 0.00492932	cv_agg's huber: 0.00516459 + 0.00246466
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0470572 + 0.0170571	cv_agg's l2: 0.0526272 + 0.0440876	cv_agg's huber: 0.0166453 + 0.0105423
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0470572 + 0.0170571	cv_agg's l2: 0.0526272 + 0.0440876	cv_agg's huber: 0.0166453 + 0.0105423
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0439069 + 0.00955033	cv_agg's l2: 0.0134161 + 0.00863501	cv_agg's huber: 0.00666935 + 0.00424484
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0439069 + 0.00955033	cv_agg's l2: 0.0134161 + 0.00863501	cv_agg'

[I 2020-08-13 13:11:57,854] Trial 0 finished with value: 0.010980089050296113 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 1.0015394482144673, 'lambda_l2': 0.2835427804155781, 'max_leaves': 25, 'max_depth': 8, 'feature_fraction': 0.5260057910641318, 'bagging_fraction': 0.8034577607874316, 'learning_rate': 0.08361855604732259, 'min_data_in_leaf': 8, 'drop_rate': 0.17304435068507087}. Best is trial 0 with value: 0.010980089050296113.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0776314 + 0.0084358	cv_agg's l2: 0.0939079 + 0.0543168	cv_agg's huber: 0.0333004 + 0.0112798
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0776314 + 0.0084358	cv_agg's l2: 0.0939079 + 0.0543168	cv_agg's huber: 0.0333004 + 0.0112798
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.110016 + 0.017932	cv_agg's l2: 0.278031 + 0.103563	cv_agg's huber: 0.0551277 + 0.00990949
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.110016 + 0.017932	cv_agg's l2: 0.278031 + 0.103563	cv_agg's huber: 0.0551277 + 0.00990949
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.138959 + 0.0322669	cv_agg's l2: 0.112423 + 0.0667931	cv_agg's huber: 0.045048 + 0.0203393
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.138959 + 0.0322669	cv_agg's l2: 0.112423 + 0.0667931	cv_agg's huber: 0.045048 + 0.020

[I 2020-08-13 13:16:58,428] Trial 1 finished with value: 0.04559555824096727 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 2.676957087704102, 'lambda_l2': 3.6037561309244555, 'max_leaves': 22, 'max_depth': 4, 'feature_fraction': 0.8901988789100828, 'bagging_fraction': 0.419316655841148, 'learning_rate': 0.0871130581891701, 'min_data_in_leaf': 8, 'drop_rate': 0.2974903100721361}. Best is trial 0 with value: 0.010980089050296113.


0
[200]	cv_agg's l1: 6.24192 + 0.580073	cv_agg's l2: 157.981 + 10.7662	cv_agg's huber: 5.22768 + 0.522413
1
[200]	cv_agg's l1: 5.75528 + 2.2462	cv_agg's l2: 86.5246 + 68.0048	cv_agg's huber: 4.79413 + 2.02863
2
[200]	cv_agg's l1: 6.20322 + 0.901862	cv_agg's l2: 160.325 + 22.3078	cv_agg's huber: 5.19431 + 0.814497
3
[200]	cv_agg's l1: 5.8487 + 1.9852	cv_agg's l2: 86.8602 + 63.1184	cv_agg's huber: 4.8774 + 1.78974
4
[200]	cv_agg's l1: 6.52095 + 0.888011	cv_agg's l2: 166.926 + 26.9495	cv_agg's huber: 5.48079 + 0.798582
5
[200]	cv_agg's l1: 6.2494 + 0.529756	cv_agg's l2: 158.797 + 14.8541	cv_agg's huber: 5.23699 + 0.474595
6
[200]	cv_agg's l1: 6.41949 + 0.894288	cv_agg's l2: 162.183 + 24.7007	cv_agg's huber: 5.38403 + 0.8021
7
[200]	cv_agg's l1: 6.41689 + 1.03865	cv_agg's l2: 162.082 + 30.0381	cv_agg's huber: 5.38928 + 0.940932
8
[200]	cv_agg's l1: 6.37457 + 1.18028	cv_agg's l2: 160.364 + 26.5352	cv_agg's huber: 5.35188 + 1.06598
9
[200]	cv_agg's l1: 5.85683 + 1.9423	cv_agg's l2: 87.8842 +

[I 2020-08-13 13:21:49,546] Trial 2 finished with value: 5.182278634198868 and parameters: {'boosting_type': 'dart', 'lambda_l1': 3.718611131794578, 'lambda_l2': 2.9078391602406577, 'max_leaves': 16, 'max_depth': 7, 'feature_fraction': 0.5832485430326604, 'bagging_fraction': 0.7442758577007983, 'learning_rate': 0.03305760571178193, 'min_data_in_leaf': 7, 'drop_rate': 0.22013496613925151}. Best is trial 0 with value: 0.010980089050296113.


0
[200]	cv_agg's l1: 9.03117 + 1.35252	cv_agg's l2: 285.183 + 36.1901	cv_agg's huber: 7.76936 + 1.23183
1
[200]	cv_agg's l1: 8.83499 + 0.97356	cv_agg's l2: 278.612 + 24.7905	cv_agg's huber: 7.59152 + 0.88808
2
[200]	cv_agg's l1: 8.82417 + 0.806836	cv_agg's l2: 278.674 + 24.087	cv_agg's huber: 7.58869 + 0.734264
3
[200]	cv_agg's l1: 8.2985 + 2.83831	cv_agg's l2: 188.232 + 152.536	cv_agg's huber: 7.09176 + 2.56867
4
[200]	cv_agg's l1: 8.85578 + 1.11611	cv_agg's l2: 280.425 + 33.9397	cv_agg's huber: 7.6203 + 1.02214
5
[200]	cv_agg's l1: 8.97766 + 1.17513	cv_agg's l2: 286.654 + 47.1505	cv_agg's huber: 7.72947 + 1.07767
6
[200]	cv_agg's l1: 9.2854 + 2.1653	cv_agg's l2: 293.005 + 72.5263	cv_agg's huber: 7.99494 + 1.97086
7
[200]	cv_agg's l1: 9.04851 + 1.16577	cv_agg's l2: 287.031 + 39.227	cv_agg's huber: 7.77863 + 1.06042
8
[200]	cv_agg's l1: 8.84475 + 0.718175	cv_agg's l2: 280.752 + 29.6952	cv_agg's huber: 7.60895 + 0.656359
9
[200]	cv_agg's l1: 8.82402 + 0.388319	cv_agg's l2: 280.94 + 18.1

[I 2020-08-13 13:27:08,819] Trial 3 finished with value: 7.636249706440313 and parameters: {'boosting_type': 'dart', 'lambda_l1': 0.4948126734296422, 'lambda_l2': 3.444574211021878, 'max_leaves': 20, 'max_depth': 5, 'feature_fraction': 0.440587258032043, 'bagging_fraction': 0.8170632094220356, 'learning_rate': 0.01969201970617205, 'min_data_in_leaf': 4, 'drop_rate': 0.29483544186714683}. Best is trial 0 with value: 0.010980089050296113.


0
[200]	cv_agg's l1: 2.66193 + 0.489962	cv_agg's l2: 19.3942 + 4.17791	cv_agg's huber: 2.02468 + 0.423838
1
[200]	cv_agg's l1: 2.55525 + 0.286773	cv_agg's l2: 18.9164 + 1.57342	cv_agg's huber: 1.93369 + 0.254201
2
[200]	cv_agg's l1: 2.72384 + 0.62333	cv_agg's l2: 20.2287 + 4.55127	cv_agg's huber: 2.07944 + 0.540929
3
[200]	cv_agg's l1: 2.46582 + 0.943564	cv_agg's l2: 13.8863 + 11.0268	cv_agg's huber: 1.85372 + 0.836701
4
[200]	cv_agg's l1: 2.37324 + 0.492911	cv_agg's l2: 12.7031 + 7.02485	cv_agg's huber: 1.76619 + 0.435912
5
[200]	cv_agg's l1: 2.4172 + 0.843929	cv_agg's l2: 13.3497 + 10.1002	cv_agg's huber: 1.80929 + 0.75092
6
[200]	cv_agg's l1: 2.40713 + 0.745234	cv_agg's l2: 13.616 + 9.84758	cv_agg's huber: 1.80108 + 0.663631
7
[200]	cv_agg's l1: 2.39474 + 0.499582	cv_agg's l2: 13.0363 + 7.07637	cv_agg's huber: 1.78606 + 0.440434
8
[200]	cv_agg's l1: 2.58672 + 0.196658	cv_agg's l2: 18.9966 + 2.00099	cv_agg's huber: 1.96176 + 0.170998
9
[200]	cv_agg's l1: 2.51339 + 0.968961	cv_agg's l

[I 2020-08-13 13:36:58,389] Trial 4 finished with value: 1.8911694593073478 and parameters: {'boosting_type': 'dart', 'lambda_l1': 1.5190729306535957, 'lambda_l2': 1.1119128377080727, 'max_leaves': 17, 'max_depth': 7, 'feature_fraction': 0.8901937223725862, 'bagging_fraction': 0.6169398554876693, 'learning_rate': 0.06781568199566365, 'min_data_in_leaf': 4, 'drop_rate': 0.20151322007494551}. Best is trial 0 with value: 0.010980089050296113.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.375704 + 0.065612	cv_agg's l2: 0.642521 + 0.242378	cv_agg's huber: 0.171628 + 0.033215
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.375704 + 0.065612	cv_agg's l2: 0.642521 + 0.242378	cv_agg's huber: 0.171628 + 0.033215
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.142212 + 0.0164895	cv_agg's l2: 0.116688 + 0.0326205	cv_agg's huber: 0.0448602 + 0.0074808
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.142212 + 0.0164895	cv_agg's l2: 0.116688 + 0.0326205	cv_agg's huber: 0.0448602 + 0.0074808
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.385103 + 0.0649002	cv_agg's l2: 0.660286 + 0.30307	cv_agg's huber: 0.170365 + 0.0363759
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.385103 + 0.0649002	cv_agg's l2: 0.660286 + 0.30307	cv_agg's huber: 0.170365 + 0.0363759
3
Trainin

[I 2020-08-13 13:41:14,648] Trial 5 finished with value: 0.11457770695575306 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 2.9541448774358487, 'lambda_l2': 1.2769928491828075, 'max_leaves': 15, 'max_depth': 3, 'feature_fraction': 0.6945011425638882, 'bagging_fraction': 0.6262775518642526, 'learning_rate': 0.04940951494452206, 'min_data_in_leaf': 8, 'drop_rate': 0.24259760009814293}. Best is trial 0 with value: 0.010980089050296113.


0
[200]	cv_agg's l1: 7.48118 + 0.536768	cv_agg's l2: 206.265 + 8.8554	cv_agg's huber: 6.35309 + 0.489348
1
[200]	cv_agg's l1: 7.28699 + 2.47923	cv_agg's l2: 136.297 + 112.068	cv_agg's huber: 6.18171 + 2.25577
2
[200]	cv_agg's l1: 7.53005 + 0.804167	cv_agg's l2: 204.231 + 11.8063	cv_agg's huber: 6.39562 + 0.732132
3
[200]	cv_agg's l1: 7.56833 + 1.11051	cv_agg's l2: 207.989 + 30.0827	cv_agg's huber: 6.43159 + 1.00865
4
[200]	cv_agg's l1: 7.52474 + 0.879802	cv_agg's l2: 209.172 + 24.3215	cv_agg's huber: 6.39829 + 0.805899
5
[200]	cv_agg's l1: 7.54116 + 0.988201	cv_agg's l2: 207.725 + 13.1982	cv_agg's huber: 6.4103 + 0.897916
6
[200]	cv_agg's l1: 7.57001 + 0.809683	cv_agg's l2: 213.043 + 9.9581	cv_agg's huber: 6.43635 + 0.739496
7
[200]	cv_agg's l1: 7.67067 + 1.68976	cv_agg's l2: 212.648 + 42.1699	cv_agg's huber: 6.53024 + 1.53757
8
[200]	cv_agg's l1: 7.22112 + 2.666	cv_agg's l2: 136.557 + 120.016	cv_agg's huber: 6.12382 + 2.42554
9
[200]	cv_agg's l1: 7.16856 + 2.09974	cv_agg's l2: 136.043

[I 2020-08-13 13:46:55,138] Trial 6 finished with value: 6.333595576787393 and parameters: {'boosting_type': 'dart', 'lambda_l1': 0.6788010661268392, 'lambda_l2': 1.1908964688585808, 'max_leaves': 20, 'max_depth': 7, 'feature_fraction': 0.5111640216090123, 'bagging_fraction': 0.6376088446952038, 'learning_rate': 0.022789847735609055, 'min_data_in_leaf': 7, 'drop_rate': 0.22705047508920853}. Best is trial 0 with value: 0.010980089050296113.


0
[200]	cv_agg's l1: 9.30442 + 0.289967	cv_agg's l2: 294.032 + 12.5289	cv_agg's huber: 7.99666 + 0.262703
1
[200]	cv_agg's l1: 9.29568 + 1.36835	cv_agg's l2: 293.635 + 37.5018	cv_agg's huber: 7.98823 + 1.2439
2
[200]	cv_agg's l1: 9.44837 + 1.31586	cv_agg's l2: 299.824 + 48.8373	cv_agg's huber: 8.12938 + 1.19908
3
[200]	cv_agg's l1: 9.16391 + 3.60856	cv_agg's l2: 217.897 + 190.573	cv_agg's huber: 7.85991 + 3.25518
4
[200]	cv_agg's l1: 9.25035 + 0.538412	cv_agg's l2: 294.438 + 11.1099	cv_agg's huber: 7.9484 + 0.488459
5
[200]	cv_agg's l1: 9.31733 + 0.595812	cv_agg's l2: 294.661 + 18.2178	cv_agg's huber: 8.00771 + 0.537253
6
[200]	cv_agg's l1: 9.34104 + 1.12822	cv_agg's l2: 297.599 + 36.3112	cv_agg's huber: 8.03072 + 1.03039
7
[200]	cv_agg's l1: 9.21714 + 0.811533	cv_agg's l2: 293.757 + 17.6328	cv_agg's huber: 7.92024 + 0.740537
8
[200]	cv_agg's l1: 9.45195 + 1.70972	cv_agg's l2: 301.169 + 48.4055	cv_agg's huber: 8.1286 + 1.55633
9
[200]	cv_agg's l1: 8.52839 + 2.29055	cv_agg's l2: 195.4 +

[I 2020-08-13 13:57:01,212] Trial 7 finished with value: 7.930857531470167 and parameters: {'boosting_type': 'dart', 'lambda_l1': 0.47507542146803117, 'lambda_l2': 3.337189673663502, 'max_leaves': 20, 'max_depth': 5, 'feature_fraction': 0.9021973524687987, 'bagging_fraction': 0.4843346760272327, 'learning_rate': 0.02316826915206871, 'min_data_in_leaf': 4, 'drop_rate': 0.25494895662116634}. Best is trial 0 with value: 0.010980089050296113.


0
[200]	cv_agg's l1: 3.12225 + 0.586342	cv_agg's l2: 36.8178 + 7.178	cv_agg's huber: 2.45688 + 0.513802
1
[200]	cv_agg's l1: 3.03128 + 0.290179	cv_agg's l2: 35.9562 + 4.56396	cv_agg's huber: 2.37649 + 0.25031
2
[200]	cv_agg's l1: 3.09907 + 0.440584	cv_agg's l2: 36.8216 + 5.89647	cv_agg's huber: 2.44008 + 0.381971
3
[200]	cv_agg's l1: 2.60393 + 0.491749	cv_agg's l2: 16.9978 + 7.92727	cv_agg's huber: 1.9794 + 0.445549
4
[200]	cv_agg's l1: 3.03862 + 0.291121	cv_agg's l2: 35.9073 + 3.60463	cv_agg's huber: 2.39309 + 0.254351
5
[200]	cv_agg's l1: 3.07198 + 0.492893	cv_agg's l2: 35.98 + 5.56436	cv_agg's huber: 2.4168 + 0.434265
6
[200]	cv_agg's l1: 3.02881 + 0.294803	cv_agg's l2: 35.5684 + 2.58698	cv_agg's huber: 2.37425 + 0.251448
7
[200]	cv_agg's l1: 2.78437 + 1.17508	cv_agg's l2: 19.3795 + 14.3176	cv_agg's huber: 2.14101 + 1.03907
8
[200]	cv_agg's l1: 3.02893 + 0.448925	cv_agg's l2: 35.8233 + 4.00607	cv_agg's huber: 2.37739 + 0.389259
9
[200]	cv_agg's l1: 3.02798 + 0.224001	cv_agg's l2: 35

[I 2020-08-13 14:01:53,212] Trial 8 finished with value: 2.3340893772222033 and parameters: {'boosting_type': 'dart', 'lambda_l1': 3.6909489123383312, 'lambda_l2': 3.6590256448618943, 'max_leaves': 17, 'max_depth': 3, 'feature_fraction': 0.6113845792688436, 'bagging_fraction': 0.38021404783564877, 'learning_rate': 0.07167343314419868, 'min_data_in_leaf': 3, 'drop_rate': 0.17528604604873113}. Best is trial 0 with value: 0.010980089050296113.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0936935 + 0.0201481	cv_agg's l2: 0.187202 + 0.113493	cv_agg's huber: 0.0446918 + 0.0150496
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0936935 + 0.0201481	cv_agg's l2: 0.187202 + 0.113493	cv_agg's huber: 0.0446918 + 0.0150496
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.135504 + 0.0217622	cv_agg's l2: 0.100385 + 0.0347452	cv_agg's huber: 0.0413019 + 0.0118754
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.135504 + 0.0217622	cv_agg's l2: 0.100385 + 0.0347452	cv_agg's huber: 0.0413019 + 0.0118754
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.090552 + 0.0191776	cv_agg's l2: 0.110719 + 0.0489227	cv_agg's huber: 0.0363874 + 0.0106291
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.090552 + 0.0191776	cv_agg's l2: 0.110719 + 0.0489227	cv_agg's huber: 0.0363874 + 0.010

[I 2020-08-13 14:06:56,805] Trial 9 finished with value: 0.0394604585452546 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 2.6886475463687143, 'lambda_l2': 0.870190137985274, 'max_leaves': 21, 'max_depth': 4, 'feature_fraction': 0.8654875079337332, 'bagging_fraction': 0.6958423622234591, 'learning_rate': 0.07267323212928173, 'min_data_in_leaf': 8, 'drop_rate': 0.29087858873884176}. Best is trial 0 with value: 0.010980089050296113.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0403549 + 0.00438369	cv_agg's l2: 0.0141587 + 0.00538653	cv_agg's huber: 0.00699909 + 0.00254426
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0403549 + 0.00438369	cv_agg's l2: 0.0141587 + 0.00538653	cv_agg's huber: 0.00699909 + 0.00254426
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0424511 + 0.00177274	cv_agg's l2: 0.0289962 + 0.0154411	cv_agg's huber: 0.0125498 + 0.00611971
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0424511 + 0.00177274	cv_agg's l2: 0.0289962 + 0.0154411	cv_agg's huber: 0.0125498 + 0.00611971
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0396159 + 0.00379609	cv_agg's l2: 0.0164139 + 0.00412997	cv_agg's huber: 0.00807448 + 0.0019214
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0396159 + 0.00379609	cv_agg's l2: 0.0164139 + 0.00412997	cv_a

[I 2020-08-13 14:09:06,583] Trial 10 finished with value: 0.009567813127030821 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 1.4542031067696364, 'lambda_l2': 0.18736835110098826, 'max_leaves': 24, 'max_depth': 8, 'feature_fraction': 0.3116147755516885, 'bagging_fraction': 0.9650803649640984, 'learning_rate': 0.09835194545023251, 'min_data_in_leaf': 6, 'drop_rate': 0.10303724194915359}. Best is trial 10 with value: 0.009567813127030821.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0333411 + 0.00465738	cv_agg's l2: 0.0163656 + 0.00592986	cv_agg's huber: 0.00777214 + 0.00246083
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0333411 + 0.00465738	cv_agg's l2: 0.0163656 + 0.00592986	cv_agg's huber: 0.00777214 + 0.00246083
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0388769 + 0.0062068	cv_agg's l2: 0.0161137 + 0.0131859	cv_agg's huber: 0.00732476 + 0.00527862
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0388769 + 0.0062068	cv_agg's l2: 0.0161137 + 0.0131859	cv_agg's huber: 0.00732476 + 0.00527862
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0329185 + 0.0055768	cv_agg's l2: 0.0201784 + 0.0123335	cv_agg's huber: 0.00958512 + 0.00541551
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0329185 + 0.0055768	cv_agg's l2: 0.0201784 + 0.0123335	cv_agg'

[I 2020-08-13 14:11:15,391] Trial 11 finished with value: 0.008880987551862904 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 1.2089191244778061, 'lambda_l2': 0.037165717905202206, 'max_leaves': 25, 'max_depth': 8, 'feature_fraction': 0.30164271536070647, 'bagging_fraction': 0.9654762769700597, 'learning_rate': 0.09873679654445253, 'min_data_in_leaf': 6, 'drop_rate': 0.12086016166502957}. Best is trial 11 with value: 0.008880987551862904.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0470741 + 0.00868017	cv_agg's l2: 0.0260438 + 0.0106249	cv_agg's huber: 0.011718 + 0.00395422
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0470741 + 0.00868017	cv_agg's l2: 0.0260438 + 0.0106249	cv_agg's huber: 0.011718 + 0.00395422
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.044399 + 0.00868654	cv_agg's l2: 0.0496694 + 0.019331	cv_agg's huber: 0.0201477 + 0.00683555
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.044399 + 0.00868654	cv_agg's l2: 0.0496694 + 0.019331	cv_agg's huber: 0.0201477 + 0.00683555
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0492827 + 0.00694575	cv_agg's l2: 0.0190897 + 0.0089342	cv_agg's huber: 0.00937851 + 0.00424974
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0492827 + 0.00694575	cv_agg's l2: 0.0190897 + 0.0089342	cv_agg's huber:

[I 2020-08-13 14:13:20,219] Trial 12 finished with value: 0.013058432817350755 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 1.6856667687020483, 'lambda_l2': 0.028178598479510725, 'max_leaves': 25, 'max_depth': 8, 'feature_fraction': 0.3117768746256709, 'bagging_fraction': 0.9939785149020599, 'learning_rate': 0.09713068853265873, 'min_data_in_leaf': 6, 'drop_rate': 0.10069104340541106}. Best is trial 11 with value: 0.008880987551862904.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.036409 + 0.003462	cv_agg's l2: 0.0197829 + 0.00948546	cv_agg's huber: 0.00914734 + 0.00385966
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.036409 + 0.003462	cv_agg's l2: 0.0197829 + 0.00948546	cv_agg's huber: 0.00914734 + 0.00385966
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0387465 + 0.00526071	cv_agg's l2: 0.0116531 + 0.0038275	cv_agg's huber: 0.00579121 + 0.00185413
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0387465 + 0.00526071	cv_agg's l2: 0.0116531 + 0.0038275	cv_agg's huber: 0.00579121 + 0.00185413
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0364464 + 0.00633302	cv_agg's l2: 0.0200972 + 0.00968383	cv_agg's huber: 0.00928784 + 0.00378546
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0364464 + 0.00633302	cv_agg's l2: 0.0200972 + 0.00968383	cv_agg'

[I 2020-08-13 14:15:50,945] Trial 13 finished with value: 0.007785595240009327 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 1.2515806441338146, 'lambda_l2': 2.118596471089924, 'max_leaves': 23, 'max_depth': 8, 'feature_fraction': 0.34697888872928373, 'bagging_fraction': 0.9940537254525283, 'learning_rate': 0.09892857057281569, 'min_data_in_leaf': 6, 'drop_rate': 0.10538764170003276}. Best is trial 13 with value: 0.007785595240009327.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00673201 + 0.000551187	cv_agg's l2: 0.00058747 + 0.000368983	cv_agg's huber: 0.000293735 + 0.000184492
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00673201 + 0.000551187	cv_agg's l2: 0.00058747 + 0.000368983	cv_agg's huber: 0.000293735 + 0.000184492
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00646885 + 0.00199724	cv_agg's l2: 0.000856963 + 0.000816454	cv_agg's huber: 0.000428481 + 0.000408227
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00646885 + 0.00199724	cv_agg's l2: 0.000856963 + 0.000816454	cv_agg's huber: 0.000428481 + 0.000408227
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0114918 + 0.0033193	cv_agg's l2: 0.00376884 + 0.00200963	cv_agg's huber: 0.00188442 + 0.00100482
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0114918 + 0.0033193	cv_agg's l2:

[I 2020-08-13 14:19:51,143] Trial 14 finished with value: 0.0012197392946566063 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.09377759144552344, 'lambda_l2': 2.327328917795175, 'max_leaves': 23, 'max_depth': 6, 'feature_fraction': 0.3921587329679141, 'bagging_fraction': 0.9118023265057759, 'learning_rate': 0.09885684336087432, 'min_data_in_leaf': 5, 'drop_rate': 0.1339912328014652}. Best is trial 14 with value: 0.0012197392946566063.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00500354 + 0.00194637	cv_agg's l2: 0.000465118 + 0.000359788	cv_agg's huber: 0.000232559 + 0.000179894
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00500354 + 0.00194637	cv_agg's l2: 0.000465118 + 0.000359788	cv_agg's huber: 0.000232559 + 0.000179894
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0132478 + 0.00701213	cv_agg's l2: 0.00594498 + 0.00575214	cv_agg's huber: 0.0028861 + 0.00271528
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0132478 + 0.00701213	cv_agg's l2: 0.00594498 + 0.00575214	cv_agg's huber: 0.0028861 + 0.00271528
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00671128 + 0.00277481	cv_agg's l2: 0.000769375 + 0.000554238	cv_agg's huber: 0.000384688 + 0.000277119
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00671128 + 0.00277481	cv_agg's l2: 0.0

[I 2020-08-13 14:25:16,962] Trial 15 finished with value: 0.000873989314264767 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.01134282970013463, 'lambda_l2': 2.232046709449993, 'max_leaves': 23, 'max_depth': 6, 'feature_fraction': 0.41395808431372894, 'bagging_fraction': 0.8842708834269593, 'learning_rate': 0.08575189153194364, 'min_data_in_leaf': 5, 'drop_rate': 0.14053142727077003}. Best is trial 15 with value: 0.000873989314264767.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0604545 + 0.0242732	cv_agg's l2: 0.0294281 + 0.0229122	cv_agg's huber: 0.0144096 + 0.0110726
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0604545 + 0.0242732	cv_agg's l2: 0.0294281 + 0.0229122	cv_agg's huber: 0.0144096 + 0.0110726
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0502629 + 0.0144636	cv_agg's l2: 0.0307923 + 0.0168271	cv_agg's huber: 0.0145195 + 0.00809176
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0502629 + 0.0144636	cv_agg's l2: 0.0307923 + 0.0168271	cv_agg's huber: 0.0145195 + 0.00809176
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0567794 + 0.0298422	cv_agg's l2: 0.0300651 + 0.0281581	cv_agg's huber: 0.0146257 + 0.0135487
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0567794 + 0.0298422	cv_agg's l2: 0.0300651 + 0.0281581	cv_agg's huber: 0.0

[I 2020-08-13 14:30:34,542] Trial 16 finished with value: 0.014311710426058031 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.06764943039813989, 'lambda_l2': 2.3198616125474105, 'max_leaves': 23, 'max_depth': 6, 'feature_fraction': 0.42052056234684493, 'bagging_fraction': 0.8826481863948099, 'learning_rate': 0.05482815046118875, 'min_data_in_leaf': 5, 'drop_rate': 0.13899067426196854}. Best is trial 15 with value: 0.000873989314264767.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00873325 + 0.00439796	cv_agg's l2: 0.00166267 + 0.00195902	cv_agg's huber: 0.000831333 + 0.000979512
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00873325 + 0.00439796	cv_agg's l2: 0.00166267 + 0.00195902	cv_agg's huber: 0.000831333 + 0.000979512
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0225862 + 0.00890605	cv_agg's l2: 0.017336 + 0.0101701	cv_agg's huber: 0.0076035 + 0.00407039
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0225862 + 0.00890605	cv_agg's l2: 0.017336 + 0.0101701	cv_agg's huber: 0.0076035 + 0.00407039
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0123033 + 0.00415081	cv_agg's l2: 0.00381181 + 0.00260009	cv_agg's huber: 0.00190591 + 0.00130005
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0123033 + 0.00415081	cv_agg's l2: 0.00381181 + 0.0026

[I 2020-08-13 14:38:05,319] Trial 17 finished with value: 0.003276258181953681 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.08565813504264398, 'lambda_l2': 2.6389089102403407, 'max_leaves': 22, 'max_depth': 6, 'feature_fraction': 0.7489541802768962, 'bagging_fraction': 0.8779572032715917, 'learning_rate': 0.08624095691530555, 'min_data_in_leaf': 5, 'drop_rate': 0.14027400579541083}. Best is trial 15 with value: 0.000873989314264767.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0508622 + 0.00941991	cv_agg's l2: 0.0271677 + 0.00737002	cv_agg's huber: 0.0135366 + 0.00364093
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0508622 + 0.00941991	cv_agg's l2: 0.0271677 + 0.00737002	cv_agg's huber: 0.0135366 + 0.00364093
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0507471 + 0.0101042	cv_agg's l2: 0.0430908 + 0.0187531	cv_agg's huber: 0.0184949 + 0.00615187
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0507471 + 0.0101042	cv_agg's l2: 0.0430908 + 0.0187531	cv_agg's huber: 0.0184949 + 0.00615187
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0514571 + 0.00664638	cv_agg's l2: 0.0573296 + 0.0153863	cv_agg's huber: 0.0226378 + 0.00375162
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0514571 + 0.00664638	cv_agg's l2: 0.0573296 + 0.0153863	cv_agg's h

[I 2020-08-13 14:41:57,838] Trial 18 finished with value: 0.01837347040097632 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 2.0701908924529837, 'lambda_l2': 1.726826391766918, 'max_leaves': 23, 'max_depth': 6, 'feature_fraction': 0.4088620088053657, 'bagging_fraction': 0.8904308153670366, 'learning_rate': 0.0815910645900724, 'min_data_in_leaf': 3, 'drop_rate': 0.16196440498545484}. Best is trial 15 with value: 0.000873989314264767.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0309843 + 0.00839303	cv_agg's l2: 0.00677975 + 0.00408797	cv_agg's huber: 0.00338987 + 0.00204398
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0309843 + 0.00839303	cv_agg's l2: 0.00677975 + 0.00408797	cv_agg's huber: 0.00338987 + 0.00204398
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.030763 + 0.0173291	cv_agg's l2: 0.00689482 + 0.00650295	cv_agg's huber: 0.00344741 + 0.00325148
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.030763 + 0.0173291	cv_agg's l2: 0.00689482 + 0.00650295	cv_agg's huber: 0.00344741 + 0.00325148
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0338801 + 0.0114489	cv_agg's l2: 0.031714 + 0.0246512	cv_agg's huber: 0.0113279 + 0.00656907
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0338801 + 0.0114489	cv_agg's l2: 0.031714 + 0.0246512	cv_agg

[I 2020-08-13 14:47:36,952] Trial 19 finished with value: 0.004676540243633851 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.1301612850063989, 'lambda_l2': 1.864751959879183, 'max_leaves': 18, 'max_depth': 5, 'feature_fraction': 0.4958010250154157, 'bagging_fraction': 0.5529094459336131, 'learning_rate': 0.061801500106572, 'min_data_in_leaf': 5, 'drop_rate': 0.14393538920098237}. Best is trial 15 with value: 0.000873989314264767.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.191059 + 0.0415755	cv_agg's l2: 0.291764 + 0.162813	cv_agg's huber: 0.0915043 + 0.0289835
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.191059 + 0.0415755	cv_agg's l2: 0.291764 + 0.162813	cv_agg's huber: 0.0915043 + 0.0289835
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.18286 + 0.0423615	cv_agg's l2: 0.260099 + 0.150342	cv_agg's huber: 0.0838344 + 0.0272073
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.18286 + 0.0423615	cv_agg's l2: 0.260099 + 0.150342	cv_agg's huber: 0.0838344 + 0.0272073
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.195368 + 0.0603986	cv_agg's l2: 0.32053 + 0.201743	cv_agg's huber: 0.0945842 + 0.0414114
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.195368 + 0.0603986	cv_agg's l2: 0.32053 + 0.201743	cv_agg's huber: 0.0945842 + 0.0414114
3
Tra

[I 2020-08-13 14:52:15,523] Trial 20 finished with value: 0.07915517094374133 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.7765672661204838, 'lambda_l2': 2.8820595882514586, 'max_leaves': 24, 'max_depth': 6, 'feature_fraction': 0.3634304978228753, 'bagging_fraction': 0.7761872529359845, 'learning_rate': 0.038604308768532415, 'min_data_in_leaf': 4, 'drop_rate': 0.1261010882151522}. Best is trial 15 with value: 0.000873989314264767.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00584616 + 0.00232478	cv_agg's l2: 0.000520799 + 0.000552102	cv_agg's huber: 0.000260399 + 0.000276051
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00584616 + 0.00232478	cv_agg's l2: 0.000520799 + 0.000552102	cv_agg's huber: 0.000260399 + 0.000276051
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00634965 + 0.00151862	cv_agg's l2: 0.000517844 + 0.000213259	cv_agg's huber: 0.000258922 + 0.000106629
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00634965 + 0.00151862	cv_agg's l2: 0.000517844 + 0.000213259	cv_agg's huber: 0.000258922 + 0.000106629
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00715683 + 0.00191643	cv_agg's l2: 0.000958156 + 0.000459252	cv_agg's huber: 0.000479078 + 0.000229626
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00715683 + 0.00191643	cv_a

[I 2020-08-13 15:00:37,555] Trial 21 finished with value: 0.0008705968725506693 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.03674546022666247, 'lambda_l2': 2.5025758383109715, 'max_leaves': 22, 'max_depth': 6, 'feature_fraction': 0.741996402547356, 'bagging_fraction': 0.8929839112500518, 'learning_rate': 0.0892243160116563, 'min_data_in_leaf': 5, 'drop_rate': 0.14842616274718734}. Best is trial 21 with value: 0.0008705968725506693.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00522322 + 0.00131233	cv_agg's l2: 0.000206877 + 6.25888e-05	cv_agg's huber: 0.000103438 + 3.12944e-05
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00522322 + 0.00131233	cv_agg's l2: 0.000206877 + 6.25888e-05	cv_agg's huber: 0.000103438 + 3.12944e-05
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0159241 + 0.0128847	cv_agg's l2: 0.0112652 + 0.0137229	cv_agg's huber: 0.00477732 + 0.00540125
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0159241 + 0.0128847	cv_agg's l2: 0.0112652 + 0.0137229	cv_agg's huber: 0.00477732 + 0.00540125
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00905631 + 0.00391403	cv_agg's l2: 0.00272511 + 0.00165921	cv_agg's huber: 0.00136255 + 0.000829605
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00905631 + 0.00391403	cv_agg's l2: 0.00272511

[I 2020-08-13 15:08:42,382] Trial 22 finished with value: 0.001717986154042453 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.053131590725194355, 'lambda_l2': 2.4404318133736185, 'max_leaves': 22, 'max_depth': 7, 'feature_fraction': 0.7804779007322804, 'bagging_fraction': 0.9035098989326601, 'learning_rate': 0.09095150991820514, 'min_data_in_leaf': 5, 'drop_rate': 0.1598502099195348}. Best is trial 21 with value: 0.0008705968725506693.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.016919 + 0.00464613	cv_agg's l2: 0.00277948 + 0.00203883	cv_agg's huber: 0.00138974 + 0.00101941
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.016919 + 0.00464613	cv_agg's l2: 0.00277948 + 0.00203883	cv_agg's huber: 0.00138974 + 0.00101941
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0191073 + 0.00878397	cv_agg's l2: 0.00980407 + 0.00921182	cv_agg's huber: 0.00455666 + 0.00410805
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0191073 + 0.00878397	cv_agg's l2: 0.00980407 + 0.00921182	cv_agg's huber: 0.00455666 + 0.00410805
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0187078 + 0.00537256	cv_agg's l2: 0.00352359 + 0.00175133	cv_agg's huber: 0.0017618 + 0.000875663
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0187078 + 0.00537256	cv_agg's l2: 0.00352359 + 0.0017

[I 2020-08-13 15:14:53,680] Trial 23 finished with value: 0.005094624719498458 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.35683086577908973, 'lambda_l2': 1.6314346373023965, 'max_leaves': 21, 'max_depth': 6, 'feature_fraction': 0.6753802863243348, 'bagging_fraction': 0.841583709209087, 'learning_rate': 0.07742588133464649, 'min_data_in_leaf': 5, 'drop_rate': 0.18868160144396573}. Best is trial 21 with value: 0.0008705968725506693.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0127328 + 0.00611333	cv_agg's l2: 0.00621434 + 0.00465084	cv_agg's huber: 0.00308564 + 0.00229501
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0127328 + 0.00611333	cv_agg's l2: 0.00621434 + 0.00465084	cv_agg's huber: 0.00308564 + 0.00229501
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0128134 + 0.00451198	cv_agg's l2: 0.00560041 + 0.00278138	cv_agg's huber: 0.00280021 + 0.00139069
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0128134 + 0.00451198	cv_agg's l2: 0.00560041 + 0.00278138	cv_agg's huber: 0.00280021 + 0.00139069
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00780977 + 0.0054814	cv_agg's l2: 0.00249017 + 0.00339218	cv_agg's huber: 0.00124509 + 0.00169609
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00780977 + 0.0054814	cv_agg's l2: 0.00249017 + 0.00

[I 2020-08-13 15:24:01,565] Trial 24 finished with value: 0.0012644653443555406 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.021443510258997198, 'lambda_l2': 2.8610112398451073, 'max_leaves': 24, 'max_depth': 5, 'feature_fraction': 0.8141254079396216, 'bagging_fraction': 0.9312667806559541, 'learning_rate': 0.09168725782333742, 'min_data_in_leaf': 4, 'drop_rate': 0.12009180548899155}. Best is trial 21 with value: 0.0008705968725506693.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0284109 + 0.00290712	cv_agg's l2: 0.0141563 + 0.00986309	cv_agg's huber: 0.00639652 + 0.00411102
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0284109 + 0.00290712	cv_agg's l2: 0.0141563 + 0.00986309	cv_agg's huber: 0.00639652 + 0.00411102
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0269477 + 0.00239475	cv_agg's l2: 0.00787016 + 0.00705547	cv_agg's huber: 0.00381728 + 0.00329492
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0269477 + 0.00239475	cv_agg's l2: 0.00787016 + 0.00705547	cv_agg's huber: 0.00381728 + 0.00329492
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0252612 + 0.00252881	cv_agg's l2: 0.00877584 + 0.00233968	cv_agg's huber: 0.00438778 + 0.00116962
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0252612 + 0.00252881	cv_agg's l2: 0.00877584 + 0.0023

[I 2020-08-13 15:31:04,285] Trial 25 finished with value: 0.007091138353322274 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.9285606059836597, 'lambda_l2': 2.178494639368528, 'max_leaves': 21, 'max_depth': 7, 'feature_fraction': 0.9715173366785124, 'bagging_fraction': 0.7414573799175114, 'learning_rate': 0.09290279235038647, 'min_data_in_leaf': 5, 'drop_rate': 0.15007498048911055}. Best is trial 21 with value: 0.0008705968725506693.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0383721 + 0.00921226	cv_agg's l2: 0.0305242 + 0.0190268	cv_agg's huber: 0.0114394 + 0.00590094
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0383721 + 0.00921226	cv_agg's l2: 0.0305242 + 0.0190268	cv_agg's huber: 0.0114394 + 0.00590094
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0592369 + 0.0212729	cv_agg's l2: 0.0284204 + 0.023777	cv_agg's huber: 0.013565 + 0.0110231
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0592369 + 0.0212729	cv_agg's l2: 0.0284204 + 0.023777	cv_agg's huber: 0.013565 + 0.0110231
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0605967 + 0.0189044	cv_agg's l2: 0.0333386 + 0.0185349	cv_agg's huber: 0.0153279 + 0.00869691
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0605967 + 0.0189044	cv_agg's l2: 0.0333386 + 0.0185349	cv_agg's huber: 0.01

[I 2020-08-13 15:35:43,747] Trial 26 finished with value: 0.01388585522788906 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.38398897463612175, 'lambda_l2': 3.1812005152793743, 'max_leaves': 19, 'max_depth': 4, 'feature_fraction': 0.5842408157665242, 'bagging_fraction': 0.8402129559073128, 'learning_rate': 0.07885715134640567, 'min_data_in_leaf': 7, 'drop_rate': 0.13080903959850193}. Best is trial 21 with value: 0.0008705968725506693.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0846491 + 0.0177529	cv_agg's l2: 0.105288 + 0.0538513	cv_agg's huber: 0.0341405 + 0.0143253
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0846491 + 0.0177529	cv_agg's l2: 0.105288 + 0.0538513	cv_agg's huber: 0.0341405 + 0.0143253
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.125279 + 0.0248717	cv_agg's l2: 0.105878 + 0.0472348	cv_agg's huber: 0.0426454 + 0.0149054
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.125279 + 0.0248717	cv_agg's l2: 0.105878 + 0.0472348	cv_agg's huber: 0.0426454 + 0.0149054
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.10356 + 0.0331063	cv_agg's l2: 0.27795 + 0.187835	cv_agg's huber: 0.0561434 + 0.0258679
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.10356 + 0.0331063	cv_agg's l2: 0.27795 + 0.187835	cv_agg's huber: 0.0561434 + 0.0258679

[I 2020-08-13 15:41:09,685] Trial 27 finished with value: 0.04144093454620371 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 2.061890724811629, 'lambda_l2': 2.61459984810676, 'max_leaves': 23, 'max_depth': 6, 'feature_fraction': 0.7390039182144091, 'bagging_fraction': 0.9366751848638101, 'learning_rate': 0.06226140241442071, 'min_data_in_leaf': 6, 'drop_rate': 0.18362766966267918}. Best is trial 21 with value: 0.0008705968725506693.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0226784 + 0.00276199	cv_agg's l2: 0.00560698 + 0.00284882	cv_agg's huber: 0.0028014 + 0.00142049
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0226784 + 0.00276199	cv_agg's l2: 0.00560698 + 0.00284882	cv_agg's huber: 0.0028014 + 0.00142049
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0214041 + 0.00506707	cv_agg's l2: 0.00730125 + 0.00310537	cv_agg's huber: 0.00365062 + 0.00155269
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0214041 + 0.00506707	cv_agg's l2: 0.00730125 + 0.00310537	cv_agg's huber: 0.00365062 + 0.00155269
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0193807 + 0.00199694	cv_agg's l2: 0.00418989 + 0.00183768	cv_agg's huber: 0.00209494 + 0.000918842
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0193807 + 0.00199694	cv_agg's l2: 0.00418989 + 0.001

[I 2020-08-13 15:45:41,157] Trial 28 finished with value: 0.006857949684666803 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.6508252258973276, 'lambda_l2': 3.95730404614422, 'max_leaves': 22, 'max_depth': 5, 'feature_fraction': 0.47198929895800756, 'bagging_fraction': 0.7043992905200163, 'learning_rate': 0.09975923303511873, 'min_data_in_leaf': 4, 'drop_rate': 0.1565667136904241}. Best is trial 21 with value: 0.0008705968725506693.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0310929 + 0.00268882	cv_agg's l2: 0.0144997 + 0.0047632	cv_agg's huber: 0.00711543 + 0.00223057
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0310929 + 0.00268882	cv_agg's l2: 0.0144997 + 0.0047632	cv_agg's huber: 0.00711543 + 0.00223057
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0289852 + 0.00250455	cv_agg's l2: 0.00829861 + 0.00260305	cv_agg's huber: 0.0041493 + 0.00130153
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0289852 + 0.00250455	cv_agg's l2: 0.00829861 + 0.00260305	cv_agg's huber: 0.0041493 + 0.00130153
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0291682 + 0.00371361	cv_agg's l2: 0.00979746 + 0.00194692	cv_agg's huber: 0.00489873 + 0.000973461
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0291682 + 0.00371361	cv_agg's l2: 0.00979746 + 0.0019469

[I 2020-08-13 15:50:02,270] Trial 29 finished with value: 0.005926821451449163 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 1.0319329639891228, 'lambda_l2': 1.523101163855415, 'max_leaves': 24, 'max_depth': 7, 'feature_fraction': 0.5620158928251601, 'bagging_fraction': 0.7800175725000547, 'learning_rate': 0.08356137655132642, 'min_data_in_leaf': 5, 'drop_rate': 0.16488397920707634}. Best is trial 21 with value: 0.0008705968725506693.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 3.34867 + 0.197085	cv_agg's l2: 36.4929 + 1.60302	cv_agg's huber: 2.6409 + 0.182949
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 3.34867 + 0.197085	cv_agg's l2: 36.4929 + 1.60302	cv_agg's huber: 2.6409 + 0.182949
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 3.33949 + 0.162398	cv_agg's l2: 36.6233 + 1.52747	cv_agg's huber: 2.62166 + 0.145462
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 3.33949 + 0.162398	cv_agg's l2: 36.6233 + 1.52747	cv_agg's huber: 2.62166 + 0.145462
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 3.41226 + 0.38266	cv_agg's l2: 37.255 + 4.87362	cv_agg's huber: 2.68539 + 0.351772
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 3.41226 + 0.38266	cv_agg's l2: 37.255 + 4.87362	cv_agg's huber: 2.68539 + 0.351772
3
Training until validation scores don't improve 

[I 2020-08-13 16:01:24,060] Trial 30 finished with value: 2.5575267265649337 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.3129688096801219, 'lambda_l2': 1.991185777439485, 'max_leaves': 25, 'max_depth': 6, 'feature_fraction': 0.6372396313609705, 'bagging_fraction': 0.9954450788353645, 'learning_rate': 0.010676455186618111, 'min_data_in_leaf': 3, 'drop_rate': 0.1128903622879699}. Best is trial 21 with value: 0.0008705968725506693.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0143323 + 0.00450174	cv_agg's l2: 0.00705737 + 0.00328396	cv_agg's huber: 0.00352868 + 0.00164198
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0143323 + 0.00450174	cv_agg's l2: 0.00705737 + 0.00328396	cv_agg's huber: 0.00352868 + 0.00164198
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0064663 + 0.00367633	cv_agg's l2: 0.00143742 + 0.00216499	cv_agg's huber: 0.000718712 + 0.0010825
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0064663 + 0.00367633	cv_agg's l2: 0.00143742 + 0.00216499	cv_agg's huber: 0.000718712 + 0.0010825
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00492762 + 0.00254996	cv_agg's l2: 0.000738274 + 0.000717882	cv_agg's huber: 0.000369137 + 0.000358941
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00492762 + 0.00254996	cv_agg's l2: 0.000738274

[I 2020-08-13 16:37:13,331] Trial 31 finished with value: 0.0008990867149605796 and parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.03681782972214824, 'lambda_l2': 2.949544867837493, 'max_leaves': 24, 'max_depth': 5, 'feature_fraction': 0.8089662722148161, 'bagging_fraction': 0.9405487078401557, 'learning_rate': 0.0919560265454591, 'min_data_in_leaf': 4, 'drop_rate': 0.11534494712759874}. Best is trial 21 with value: 0.0008705968725506693.


0
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0125466 + 0.00112387	cv_agg's l2: 0.00243554 + 0.00154275	cv_agg's huber: 0.00121777 + 0.000771374
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0125466 + 0.00112387	cv_agg's l2: 0.00243554 + 0.00154275	cv_agg's huber: 0.00121777 + 0.000771374
1
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.0102017 + 0.00218254	cv_agg's l2: 0.00192589 + 0.00137061	cv_agg's huber: 0.000962945 + 0.000685305
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.0102017 + 0.00218254	cv_agg's l2: 0.00192589 + 0.00137061	cv_agg's huber: 0.000962945 + 0.000685305
2
Training until validation scores don't improve for 10 rounds
[200]	cv_agg's l1: 0.00865016 + 0.00186888	cv_agg's l2: 0.000993717 + 0.000277164	cv_agg's huber: 0.000496859 + 0.000138582
Did not meet early stopping. Best iteration is:
[200]	cv_agg's l1: 0.00865016 + 0.00186888	cv_agg's l2: 0.000

In [None]:
best["metric"] =  ["mae", "mse", "huber"]
best['objective'] = 'regression'
best

## Getting shap results with the best parameters ##

In [None]:
def make_shap(params: dict):
    partition_shap_pipe = Pipeline([
        ("partitioner", DataPartitioner()),
        ('prepare_for_partitioning', TupleWith(lgb_params)),
        ("shap_computation", ShapSelector())
    ]
    )
    repeated_cv =  Repeat(partition_shap_pipe, repeats, lambda x,i: (x[0], replace(x[1], seed = i)))
    return Pipeline([
        ('extractor', DataExtractor()),
        ('prepare_for_partitioning', TupleWith(params)), # to extract the data required for ML from the dataset
        ("partition_shap", repeated_cv)]
        )