In [198]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

pd.options.display.max_columns = 500

### Загрузим датасет с машинами. Цель - верно восстанавливать для каждой из них цену продажи!

In [199]:
data = pd.read_csv('autos.csv')

data.head()

Unnamed: 0,name,year,selling_price,km_driven,fuel,seller_type,transmission,owner
0,Maruti 800 AC,2007,60000,70000,Petrol,Individual,Manual,First Owner
1,Maruti Wagon R LXI Minor,2007,135000,50000,Petrol,Individual,Manual,First Owner
2,Hyundai Verna 1.6 SX,2012,600000,100000,Diesel,Individual,Manual,First Owner
3,Datsun RediGO T Option,2017,250000,46000,Petrol,Individual,Manual,First Owner
4,Honda Amaze VX i-DTEC,2014,450000,141000,Diesel,Individual,Manual,Second Owner


In [200]:
### Колонка с тергетом - "selling price"

X = data.drop("selling_price", axis=1)
y = data["selling_price"]

### Будем замерять MSLE!
### Поэтому прологарифмируем таргет
### А после оптимизируем MSE

y = y.apply(np.log1p)


In [201]:
### Разделим выборку на трейн и тест!

from sklearn.model_selection import train_test_split 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [202]:
X_train

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner
3294,Maruti Swift Dzire AMT VDI,2018,50000,Diesel,Individual,Automatic,First Owner
2290,Honda City 1.5 V AT,2008,70000,Petrol,Individual,Automatic,First Owner
874,Maruti Alto 800 LXI,2017,50000,Petrol,Individual,Manual,First Owner
1907,Datsun GO T Petrol,2015,92198,Petrol,Dealer,Manual,Second Owner
3244,Hyundai EON Era Plus,2013,3240,Petrol,Individual,Manual,Second Owner
...,...,...,...,...,...,...,...
2895,Maruti Zen Estilo LXI BSIII,2008,40000,Petrol,Individual,Manual,First Owner
2763,Hyundai Grand i10 1.2 Kappa Asta,2019,15000,Petrol,Individual,Manual,First Owner
905,Maruti Ertiga VXI,2015,11918,Petrol,Trustmark Dealer,Manual,First Owner
3980,Hyundai Creta 1.6 CRDi SX Option,2015,90000,Diesel,Individual,Manual,Second Owner


__Задание__ 

Реализуйте свой MeanTargetEncoder с добавленем некоторого шума!

Однажды в лекционном материале, обсуждая счетчики, мы говорили с вами о том, что из-за них модели могут переобучаться. Один из способов бороться с этим - валидировать расчеты среднего таргета (стратегия отложенной выборки / расчеты на кросс-валидации). Но есть еще проще!

Можно просто к значению счетчика добавить случайный шум (зашумить данные)!

Напомним, что рассчитываться новые признаки должны по такой формуле:

$$
g_j(x, X) = \frac{\sum_{i=1}^{l} [f_j(x) = f_j(x_i)][y_i = +1]}{\sum_{i=1}^{l} [f_j(x) = f_j(x_i)]} + C * \epsilon
$$


Пусть шум будет случайной величиной из нормального стандартного распределения, то есть $\epsilon \sim N(0, 1) $, а $ C = 0.006$.

Создавай свой класс-трансформер, наследуйтесь от классов `BaseEstimator, TransformerMixin` из `sklearn.base`. Трансформер не должен модифицировать передаваемую ему выборку inplace, а все необходимые статистики нужно считать только по обучающей выборке в методе `fit`. Ваш трансформер должен принимать при инициализации список из категориальных признаков и список из числовых признаков. На выходе должен получиться датасет того же размера с измененными категориальными признаками

In [203]:
from sklearn.base import BaseEstimator, TransformerMixin
import random

class MeanTargetEncoderNoise(BaseEstimator, TransformerMixin):

    
    def __init__(self, categorical, numeric):
        self.categorical = categorical
        self.numeric = numeric

    
    def fit(self, X, y):
        X_fit, y_fit = X.copy(), y.copy()
        self.C = 0.006
        self.err = random.normalvariate(0, 1)
        df_fit = pd.concat([X, y], axis =1)
        target = df_fit.columns[-1]
        self.dict_of_means = {col : (df_fit.groupby(col)[target].mean()+ self.C*self.err)
                              for col in self.categorical} 

        return self
        
    def transform(self, df):
        df_ = df.copy()
        
        for col in self.categorical:
            df_[col] = df_[col].map(self.dict_of_means[col])
            df_[col] = df_[col].fillna(0)
        
        return df_


In [204]:
### Проверка работы трансформера

np.random.seed(1)
transformer = MeanTargetEncoderNoise(categorical=object_cols, numeric=num_cols)

transformer.fit(X_train, y_train)

train = transformer.transform(X_train)
test = transformer.transform(X_test)

train.head()

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner
3294,13.48171,13.452154,50000,13.101714,12.630687,13.783842,12.985526
2290,12.128045,11.922746,70000,12.46594,12.630687,13.783842,12.985526
874,12.314353,13.349319,50000,12.46594,12.630687,12.65078,12.985526
1907,12.496687,13.069939,92198,12.46594,13.159911,12.65078,12.47278
3244,12.404542,12.873347,3240,12.46594,12.630687,12.65078,12.47278


Обучите несколько деревьев, перебирая максимальную глубину алгоритма из списка `max_depth_list`, а остальные параметры оставьте дефолтными. Выведите лучшее значение гиперпараметра. Постройте график зависимости MSLE на тестовой выборке от значения гиперпараметра. Воспользуйтесь `Pipeline` без `GridSearch`. Проделайте то же самое с `min_samples_split`, `min_impurity_decrease`, `max_leaf_nodes`. (по 2б на каждый параметр)

In [205]:
max_depth_list = [3, 5, 8, 12]
min_samples_split_list = [10, 50, 100, 500]
min_impurity_decrease_list = [0, 0.1, 0.15, 0.2]
max_leaf_nodes_list = [100, 200, 500]

In [212]:
from sklearn.metrics import mean_squared_error as mse
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

np.random.seed(1)

for depth in max_depth_list:
    pipe = Pipeline([('meantarget', MeanTargetEncoderNoise(categorical=object_cols, numeric=num_cols)), 
                     ('model', DecisionTreeRegressor(max_depth = depth))])
    pipe.fit(X_train, y_train)
    print(f'{round(mse(y_test, pipe.predict(X_test)), 4)}')


0.7968
1.4419
1.9886
1.9836


In [207]:
for split in min_samples_split_list:
    pipe = Pipeline([('meantarget', MeanTargetEncoderNoise(object_cols, num_cols)), 
                     ('model', DecisionTreeRegressor(max_depth = 3 , min_samples_split = split))])
    pipe.fit(X_train, y_train)
    print(f'{round(mse(y_test, pipe.predict(X_test)), 3)}')

0.797
0.797
0.797
0.812


In [208]:
for i in min_impurity_decrease_list:
    pipe = Pipeline([('meantarget', MeanTargetEncoderNoise(object_cols, num_cols)), 
                     ('model', DecisionTreeRegressor(max_depth = 3 , min_samples_split = 10, min_impurity_decrease = i))])
    pipe.fit(X_train, y_train)
    print(f'{round(mse(y_test, pipe.predict(X_test)), 10)}')

0.7968292145
0.5204871413
0.5204871413
0.5204871413


In [209]:
for i in max_leaf_nodes_list:
    pipe = Pipeline([('meantarget', MeanTargetEncoderNoise(object_cols, num_cols)), 
                     ('model', DecisionTreeRegressor(max_depth = 3 , min_samples_split = 10, min_impurity_decrease = 0.1, max_leaf_nodes = i))])
    pipe.fit(X_train, y_train)
    print(f'{round(mse(y_test, pipe.predict(X_test)), 3)}')

0.52
0.52
0.52


Подберите лучшую комбинацию параметров, используя `GridSearchCV` и набор массивов значений параметров из предыдущего задания. Для лучшей комбинации посчитайте MSLE на тестовой выборке. Получились ли лучшие параметры такими же, как если бы вы подбирали их по-отдельности при остальных гиперпараметрах по умолчанию (предыдущее задание)? (2б)

In [186]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit

param_grid = {
    "decision_tree__max_depth": [3, 5, 8, 12],
    "decision_tree__min_samples_split": [10, 50, 100, 500],
    "decision_tree__min_impurity_decrease": [0, 0.1, 0.15, 0.2],
    "decision_tree__max_leaf_nodes": [100, 200, 500]
}
np.random.seed(1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

pipe_2 = Pipeline([('meantarget', MeanTargetEncoderNoise(object_cols, num_cols)), ('decision_tree', DecisionTreeRegressor())])

tscv = TimeSeriesSplit(n_splits=3)

search = GridSearchCV(pipe_2, 
                      param_grid, 
                      cv=tscv,
                      scoring='neg_mean_squared_error',
                      verbose=10)

search.fit(X_train, y_train)

print(f"Best parameter (CV score={search.best_score_:.5f}):")
print(search.best_params_)

print(f"Качество лучшей модели на финальном тесте: {search.score(X_test, y_test)}")

Fitting 3 folds for each of 192 candidates, totalling 576 fits
[CV 1/3; 1/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10
[CV 1/3; 1/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10;, score=-1.796 total time=   0.0s
[CV 2/3; 1/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10
[CV 2/3; 1/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10;, score=-1.401 total time=   0.0s
[CV 3/3; 1/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10
[CV 3/3; 1/192] END decision_tree__max_depth=3, decision_tree__max_l

[CV 3/3; 11/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=100;, score=-0.539 total time=   0.0s
[CV 1/3; 12/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=500
[CV 1/3; 12/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=500;, score=-0.783 total time=   0.0s
[CV 2/3; 12/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=500
[CV 2/3; 12/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=500;, score=-0.713 total time=   0.0s
[CV 3/3; 12/192] START decision_tree__max_depth=3, decision_tree__max_leaf

[CV 3/3; 22/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=50;, score=-0.539 total time=   0.0s
[CV 1/3; 23/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100
[CV 1/3; 23/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100;, score=-0.783 total time=   0.0s
[CV 2/3; 23/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100
[CV 2/3; 23/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100;, score=-0.713 total time=   0.0s
[CV 3/3; 23/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes

[CV 1/3; 34/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50;, score=-1.796 total time=   0.0s
[CV 2/3; 34/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50
[CV 2/3; 34/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50;, score=-1.401 total time=   0.0s
[CV 3/3; 34/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50
[CV 3/3; 34/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50;, score=-0.889 total time=   0.0s
[CV 1/3; 35/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500, decision

[CV 1/3; 45/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10;, score=-0.783 total time=   0.0s
[CV 2/3; 45/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10
[CV 2/3; 45/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10;, score=-0.713 total time=   0.0s
[CV 3/3; 45/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10
[CV 3/3; 45/192] END decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10;, score=-0.539 total time=   0.0s
[CV 1/3; 46/192] START decision_tree__max_depth=3, decision_tree__max_leaf_nodes=500

[CV 2/3; 55/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100;, score=-0.713 total time=   0.0s
[CV 3/3; 55/192] START decision_tree__max_depth=5, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100
[CV 3/3; 55/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100;, score=-0.539 total time=   0.0s
[CV 1/3; 56/192] START decision_tree__max_depth=5, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=500
[CV 1/3; 56/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=500;, score=-0.783 total time=   0.0s
[CV 2/3; 56/192] START decision_tree__max_depth=5, decision_tree__max_leaf_node

[CV 2/3; 65/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10;, score=-2.681 total time=   0.0s
[CV 3/3; 65/192] START decision_tree__max_depth=5, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10
[CV 3/3; 65/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10;, score=-1.591 total time=   0.0s
[CV 1/3; 66/192] START decision_tree__max_depth=5, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50
[CV 1/3; 66/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50;, score=-1.784 total time=   0.0s
[CV 2/3; 66/192] START decision_tree__max_depth=5, decision_tree__max_leaf_nodes=200, decision

[CV 2/3; 76/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=500;, score=-0.713 total time=   0.0s
[CV 3/3; 76/192] START decision_tree__max_depth=5, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=500
[CV 3/3; 76/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=500;, score=-0.539 total time=   0.0s
[CV 1/3; 77/192] START decision_tree__max_depth=5, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10
[CV 1/3; 77/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10;, score=-0.783 total time=   0.0s
[CV 2/3; 77/192] START decision_tree__max_depth=5, decision_tree__max_leaf_nod

[CV 1/3; 87/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100;, score=-0.783 total time=   0.0s
[CV 2/3; 87/192] START decision_tree__max_depth=5, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100
[CV 2/3; 87/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100;, score=-0.713 total time=   0.0s
[CV 3/3; 87/192] START decision_tree__max_depth=5, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100
[CV 3/3; 87/192] END decision_tree__max_depth=5, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100;, score=-0.539 total time=   0.0s
[CV 1/3; 88/192] START decision_tree__max_depth=5, decision_tree__max_leaf_node

[CV 1/3; 98/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50
[CV 1/3; 98/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50;, score=-1.783 total time=   0.0s
[CV 2/3; 98/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50
[CV 2/3; 98/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50;, score=-1.870 total time=   0.0s
[CV 3/3; 98/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50
[CV 3/3; 98/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0, de

[CV 2/3; 108/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=500;, score=-0.713 total time=   0.0s
[CV 3/3; 108/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=500
[CV 3/3; 108/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=500;, score=-0.539 total time=   0.0s
[CV 1/3; 109/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10
[CV 1/3; 109/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10;, score=-0.783 total time=   0.0s
[CV 2/3; 109/192] START decision_tree__max_depth=8, decision_tree__max_le

[CV 2/3; 119/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100;, score=-0.713 total time=   0.0s
[CV 3/3; 119/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100
[CV 3/3; 119/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=100;, score=-0.539 total time=   0.0s
[CV 1/3; 120/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=500
[CV 1/3; 120/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=500;, score=-0.783 total time=   0.0s
[CV 2/3; 120/192] START decision_tree__max_depth=8, decision_tree__max_lea

[CV 2/3; 130/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50;, score=-1.870 total time=   0.0s
[CV 3/3; 130/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50
[CV 3/3; 130/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50;, score=-1.154 total time=   0.0s
[CV 1/3; 131/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=100
[CV 1/3; 131/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=100;, score=-1.794 total time=   0.0s
[CV 2/3; 131/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=500, 

[CV 2/3; 141/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10;, score=-0.713 total time=   0.0s
[CV 3/3; 141/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10
[CV 3/3; 141/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10;, score=-0.539 total time=   0.0s
[CV 1/3; 142/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=50
[CV 1/3; 142/192] END decision_tree__max_depth=8, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=50;, score=-0.783 total time=   0.0s
[CV 2/3; 142/192] START decision_tree__max_depth=8, decision_tree__max_leaf_nod

[CV 3/3; 152/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=500;, score=-0.539 total time=   0.0s
[CV 1/3; 153/192] START decision_tree__max_depth=12, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=10
[CV 1/3; 153/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=10;, score=-0.783 total time=   0.0s
[CV 2/3; 153/192] START decision_tree__max_depth=12, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=10
[CV 2/3; 153/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=10;, score=-0.713 total time=   0.0s
[CV 3/3; 153/192] START decision_tree__max_depth=12, decision_tree__m

[CV 3/3; 160/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=100, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=500;, score=-0.539 total time=   0.0s
[CV 1/3; 161/192] START decision_tree__max_depth=12, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10
[CV 1/3; 161/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10;, score=-3.772 total time=   0.0s
[CV 2/3; 161/192] START decision_tree__max_depth=12, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10
[CV 2/3; 161/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=10;, score=-2.678 total time=   0.0s
[CV 3/3; 161/192] START decision_tree__max_depth=12, decision_tree__max_leaf_node

[CV 2/3; 168/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=500;, score=-0.713 total time=   0.0s
[CV 3/3; 168/192] START decision_tree__max_depth=12, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=500
[CV 3/3; 168/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.1, decision_tree__min_samples_split=500;, score=-0.539 total time=   0.0s
[CV 1/3; 169/192] START decision_tree__max_depth=12, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=10
[CV 1/3; 169/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=200, decision_tree__min_impurity_decrease=0.15, decision_tree__min_samples_split=10;, score=-0.783 total time=   0.0s
[CV 2/3; 169/192] START decision_tree__max_depth=12, decision_tree__m

[CV 3/3; 178/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=50;, score=-1.153 total time=   0.0s
[CV 1/3; 179/192] START decision_tree__max_depth=12, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=100
[CV 1/3; 179/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=100;, score=-1.794 total time=   0.0s
[CV 2/3; 179/192] START decision_tree__max_depth=12, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=100
[CV 2/3; 179/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0, decision_tree__min_samples_split=100;, score=-1.874 total time=   0.0s
[CV 3/3; 179/192] START decision_tree__max_depth=12, decision_tree__max_leaf_nod

[CV 3/3; 189/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=10;, score=-0.539 total time=   0.0s
[CV 1/3; 190/192] START decision_tree__max_depth=12, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=50
[CV 1/3; 190/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=50;, score=-0.783 total time=   0.0s
[CV 2/3; 190/192] START decision_tree__max_depth=12, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=50
[CV 2/3; 190/192] END decision_tree__max_depth=12, decision_tree__max_leaf_nodes=500, decision_tree__min_impurity_decrease=0.2, decision_tree__min_samples_split=50;, score=-0.713 total time=   0.0s
[CV 3/3; 190/192] START decision_tree__max_depth=12, decision_tree__max_le