## Загрузка данных и подключение библиотек

In [1]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json
!kaggle competitions download -c tabular-playground-series-may-2021
!pip install catboost
!pip install eli5
!pip install optuna
!pip install shap
!pip install scikit-learn-extra
!unzip /content/test.csv.zip
!unzip /content/train.csv.zip
!unzip /content/sample_submission.csv.zip

kaggle.json
Downloading sample_submission.csv.zip to /content
  0% 0.00/128k [00:00<?, ?B/s]
100% 128k/128k [00:00<00:00, 40.2MB/s]
Downloading test.csv.zip to /content
  0% 0.00/851k [00:00<?, ?B/s]
100% 851k/851k [00:00<00:00, 54.9MB/s]
Downloading train.csv.zip to /content
  0% 0.00/1.72M [00:00<?, ?B/s]
100% 1.72M/1.72M [00:00<00:00, 117MB/s]
Collecting catboost
[?25l  Downloading https://files.pythonhosted.org/packages/47/80/8e9c57ec32dfed6ba2922bc5c96462cbf8596ce1a6f5de532ad1e43e53fe/catboost-0.25.1-cp37-none-manylinux1_x86_64.whl (67.3MB)
[K     |████████████████████████████████| 67.3MB 42kB/s 
Installing collected packages: catboost
Successfully installed catboost-0.25.1
Collecting eli5
[?25l  Downloading https://files.pythonhosted.org/packages/d1/54/04cab6e1c0ae535bec93f795d8403fdf6caf66fa5a6512263202dbb14ea6/eli5-0.11.0-py2.py3-none-any.whl (106kB)
[K     |████████████████████████████████| 112kB 3.0MB/s 
Installing collected packages: eli5
Successfully installed eli5-0.11

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import catboost as cb
import sklearn
import xgboost as xgb
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.decomposition import PCA, FactorAnalysis as FA
from typing import List, Optional
from sklearn.model_selection import KFold, cross_val_score, StratifiedKFold, train_test_split
from typing import List, Tuple
import scipy.stats as ss
from sklearn_extra.cluster import KMedoids
import math
from sklearn.utils.validation import check_is_fitted
import eli5
from sklearn.base import BaseEstimator, TransformerMixin
import time
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix
from sklearn.inspection import permutation_importance
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import lightgbm as lgb
import shap
import missingno as msno
from sklearn.inspection import permutation_importance
from eli5.sklearn import PermutationImportance
import optuna
from sklearn.metrics import log_loss
pd.plotting.register_matplotlib_converters()
%matplotlib inline
sns.set(color_codes=True)
pal = sns.color_palette("viridis", 10)
sns.set_palette(pal)

## Используемые функции

In [2]:
def get_input(data_path: str) -> pd.DataFrame:
  """
  Считывание данных и вывод основной информации о наборе данных.

  Parmeters
  ---------
  data_path: str - название файла

  Returns
  -------
  data: pandas.core.frame.DataFrame - загруженный набор данных в pandas.Dataframe
  """
  base_path = "/content"
  data = pd.read_csv(f"{base_path}/{data_path}")
  data.columns = [col.lower() for col in data.columns]
  print(f"{data_path}: shape = {data.shape[0]} rows, {data.shape[1]} cols")
  return data

In [3]:
def plot_feature_importance(importance, names, model_type, figsize=(10,8)):

  #Create arrays from feature importance and feature names
  feature_importance = np.array(importance)
  feature_names = np.array(names)

  #Create a DataFrame using a Dictionary
  data={'feature_names':feature_names,'feature_importance':feature_importance}
  fi_df = pd.DataFrame(data)

  #Sort the DataFrame in order decreasing feature importance
  fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)

  #Define size of bar plot
  plt.figure(figsize=figsize)
  #Plot Searborn bar chart
  sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
  #Add chart labels
  plt.title(model_type + ' FEATURE IMPORTANCE')
  plt.xlabel('FEATURE IMPORTANCE')
  plt.ylabel('FEATURE NAMES')
  return list(fi_df['feature_names'].values)

In [4]:
def multi_estimators_predict(estimators: List,
                             x_valid: pd.DataFrame,
                             y_valid = None,
                             metric: callable = None,
                             scalers: List = None):
  preds = []
  evals = []
  if scalers:
    assert len(estimators) == len(scalers)

  for i in range(len(estimators)):
    if scalers:
      pred = estimators[i].predict_proba(scalers[i].transform(x_valid))
    else:
      if type(estimators[0]) == xgb.core.Booster:
        pred = estimators[i].predict(x_valid)
      else:
        pred = estimators[i].predict_proba(x_valid)

    
    preds.append(pred)
    if (y_valid is not None) and (metric is not None):
      eval = metric(y_valid, pred)
      evals.append(eval)
  
  result = np.stack([preds], axis=0)
  result = np.squeeze(result, axis=0)
  result = np.mean(result, axis=0)

  if (y_valid is not None) & (metric is not None):
    for i, eval in enumerate(evals):
      print(f"Model {i} metric: {eval:.7}")
    print(f"Result model metric: {metric(y_valid, result):.7}")

  return result

In [5]:
def logistic_cv_fit(params, X, y, cv, scale=False):
    """
    Кросс-валидация для модели LogisticRegression.

    Parameters
    ----------
    params: dict
        Словарь гиперпараметров модели.

    X: pandas.core.frame.DataFrame
        Матрица признаков для обучения модели.

    y: pandas.core.frame.Series
        Вектор целевой переменной для обучения модели.

    cv: KFold or StratifiedKFold generator.
        Объект KFold / StratifiedKFold для определения
        стратегии кросс-валидации модели.

    Returns
    -------
    estimators: list
        Список с объектами обученной модели.

    oof_preds: np.array
        Вектор OOF-прогнозов.

    """

    estimators, folds_scores, scalers = [], [], []
    oof_preds = np.zeros((X.shape[0], 4))
    print(f"{time.ctime()}, Cross-Validation, {X.shape[0]} rows, {X.shape[1]} cols")

    for fold, (train_idx, valid_idx) in enumerate(cv.split(X, y)):
        x_train, x_valid = X.loc[train_idx], X.loc[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]
        if scale:
          scaler = StandardScaler()
          x_train = scaler.fit_transform(x_train)
          x_valid = scaler.transform(x_valid)
        model = LogisticRegression(**params)
        model.fit(x_train, y_train)
        oof_preds[valid_idx] = model.predict_proba(x_valid)
        score = metrics.log_loss(y_valid, oof_preds[valid_idx])
        print(f"Fold {fold+1}, Valid score = {round(score, 5)}")
        folds_scores.append(round(score, 5))
        estimators.append(model)
        if scale:
          scalers.append(scaler)

    print(f"Score by each fold: {folds_scores}")
    print("="*65)
    return estimators, oof_preds, scalers

In [6]:
def catboost_cv_fit(params, X, y, cv, categorical = None):
    """
    Кросс-валидация для модели catboost.

    Parameters
    ----------
    params: dict
        Словарь гиперпараметров модели.

    X: pandas.core.frame.DataFrame
        Матрица признако для обучения модели.

    y: pandas.core.frame.Series
        Вектор целевой переменной для обучения модели.

    cv: KFold or StratifiedKFold generator.
        Объект KFold / StratifiedKFold для определения
        стратегии кросс-валидации модели.

    categorical: str, optional, default = None
        Список категориальных признаков.
        Опциональный параметр, по умолчанию, не используется.

    Returns
    -------
    estimators: list
        Список с объектами обученной модели.

    oof_preds: np.array
        Вектор OOF-прогнозов.

    """
    if not categorical:
        categorical = "auto"

    estimators, folds_scores = [], []
    oof_preds = np.zeros((X.shape[0], 4))
    print(f"{time.ctime()}, Cross-Validation, {X.shape[0]} rows, {X.shape[1]} cols")

    for fold, (train_idx, valid_idx) in enumerate(cv.split(X, y)):
        x_train, x_valid = X.loc[train_idx], X.loc[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        model = cb.CatBoostClassifier(**params)
        model.fit(
            x_train, y_train,
            #eval_set=[(x_valid, y_valid)], 
            verbose=10, 
            #early_stopping_rounds=100,
            #cat_features=categorical
        )
        oof_preds[valid_idx] = model.predict_proba(x_valid)
        score = metrics.log_loss(y_valid, oof_preds[valid_idx])
        print(f"Fold {fold+1}, Valid score = {round(score, 5)}")
        folds_scores.append(round(score, 5))
        estimators.append(model)

    print(f"Score by each fold: {folds_scores}")
    print("="*65)
    return estimators, oof_preds

In [7]:
def lightgbm_cv_fit(params, X, y, cv, categorical = None):
    """
    Кросс-валидация для модели lightgbm.

    Parameters
    ----------
    params: dict
        Словарь гиперпараметров модели.

    X: pandas.core.frame.DataFrame
        Матрица признако для обучения модели.

    y: pandas.core.frame.Series
        Вектор целевой переменной для обучения модели.

    cv: KFold or StratifiedKFold generator.
        Объект KFold / StratifiedKFold для определения
        стратегии кросс-валидации модели.

    categorical: str, optional, default = None
        Список категориальных признаков.
        Опциональный параметр, по умолчанию, не используется.

    Returns
    -------
    estimators: list
        Список с объектами обученной модели.

    oof_preds: np.array
        Вектор OOF-прогнозов.

    """
    if not categorical:
        categorical = "auto"

    estimators, folds_scores = [], []
    oof_preds = np.zeros((X.shape[0], 4))
    print(f"{time.ctime()}, Cross-Validation, {X.shape[0]} rows, {X.shape[1]} cols")

    for fold, (train_idx, valid_idx) in enumerate(cv.split(X, y)):
        x_train, x_valid = X.loc[train_idx], X.loc[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        model = lgb.LGBMClassifier(**params)
        model.fit(
            x_train, y_train,
            #eval_set=[(x_valid, y_valid)],
            #eval_metric="multi_logloss", 
            verbose=10, 
            #early_stopping_rounds=50,
            #categorical_feature=categorical
        )
        oof_preds[valid_idx] = model.predict_proba(x_valid)
        score = metrics.log_loss(y_valid, oof_preds[valid_idx])
        print(f"Fold {fold+1}, Valid score = {round(score, 5)}")
        folds_scores.append(round(score, 5))
        estimators.append(model)

    print(f"Score by each fold: {folds_scores}")
    print("="*65)
    return estimators, oof_preds

In [8]:
def xgboost_cv_fit(params, X, y, cv, categorical = None):
    """
    Кросс-валидация для модели xgboost.

    Parameters
    ----------
    params: dict
        Словарь гиперпараметров модели.

    X: pandas.core.frame.DataFrame
        Матрица признако для обучения модели.

    y: pandas.core.frame.Series
        Вектор целевой переменной для обучения модели.

    cv: KFold or StratifiedKFold generator.
        Объект KFold / StratifiedKFold для определения
        стратегии кросс-валидации модели.

    categorical: str, optional, default = None
        Список категориальных признаков.
        Опциональный параметр, по умолчанию, не используется.

    Returns
    -------
    estimators: list
        Список с объектами обученной модели.

    encoders: dict
        Список с объектами LabelEncoders.

    oof_preds: np.array
        Вектор OOF-прогнозов.

    """
    estimators, encoders = [], {}
    oof_preds = np.zeros((X.shape[0], 4))

    if categorical:
        for feature in categorical:
            encoder = LabelEncoder()
            X[feature] = encoder.fit_transform(X[feature].astype("str").fillna("NA"))
            encoders[feature] = encoder

    print(f"{time.ctime()}, Cross-Validation, {X.shape[0]} rows, {X.shape[1]} cols")

    for fold, (train_idx, valid_idx) in enumerate(cv.split(X, y)):

        x_train, x_valid = X.loc[train_idx], X.loc[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        model = xgb.XGBClassifier(**params)
        model.fit(
            x_train, y_train,
            #eval_set=[(x_valid, y_valid)],
            #eval_metric="mlogloss", 
            verbose=10, 
            #early_stopping_rounds=50,
        )

        oof_preds[valid_idx] = model.predict_proba(x_valid)
        score = metrics.log_loss(y_valid, oof_preds[valid_idx])
        print(f"Fold {fold+1}, Valid score = {round(score, 5)}")
        estimators.append(model)

    return estimators, encoders, oof_preds

## Загрузка данных и построение моделей

### lightgbm + optuna

In [None]:
def objective(trial):
    data = get_input("train.csv")
    data.drop(columns='id', inplace=True)
    mapper = {'Class_1': 0, 'Class_2': 1, 'Class_3': 2, 'Class_4': 3}
    data['target'] = data['target'].map(mapper)
    target = data['target']
    data = data.drop(columns=['target'])

    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.5)
    dtrain = lgb.Dataset(train_x, label=train_y)
    dvalid = lgb.Dataset(valid_x, label=valid_y)

    param = {
        "objective": "multiclass",
        "metric": "multi_logloss",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0),
        "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        #"n_estimators": trial.suggest_int("n_estimators", 200, 800),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        'num_class': 4,
        'max_depth': trial.suggest_int('max_depth', 3,7),
        'learning_rate': trial.suggest_uniform('learning_rate', 0.05, 0.13),
    }

    # Add a callback for pruning.
    pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "multi_logloss")
    gbm = lgb.train(
        param, dtrain, 
        valid_sets=[dvalid], 
        verbose_eval=False, 
        callbacks=[pruning_callback]
    )

    preds = gbm.predict(valid_x)
    pred_labels = np.rint(preds)

    log_loss = sklearn.metrics.log_loss(valid_y, preds)
    return log_loss

In [None]:
# Лучше подбирать гиперпараметры на всей data через кросс валидацию
import optuna.integration.lightgbm as lgb
def objective(trial):
    data = get_input("train.csv")
    data.drop(columns='id', inplace=True)
    mapper = {'Class_1': 0, 'Class_2': 1, 'Class_3': 2, 'Class_4': 3}
    data['target'] = data['target'].map(mapper)
    target = data['target']
    data = data.drop(columns=['target'])

    dtrain = lgb.Dataset(data, label=target)

    param = {
        "objective": "multiclass",
        "metric": "multi_logloss",
        "verbosity": -1,
        "boosting_type": "gbdt",
        "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0),
        "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
        "num_leaves": trial.suggest_int("num_leaves", 2, 256),
        "feature_fraction": trial.suggest_uniform("feature_fraction", 0.4, 1.0),
        "bagging_fraction": trial.suggest_uniform("bagging_fraction", 0.4, 1.0),
        "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
        'num_class': 4,

        'max_depth': trial.suggest_int('max_depth', 3,7),
        'learning_rate': trial.suggest_uniform('learning_rate', 0.03, 0.13 ),
    }

    # Add a callback for pruning.
    pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "multi_logloss")
    result = lgb.cv(
      params=param,
      train_set=dtrain,
      num_boost_round=10000,
      early_stopping_rounds=100,
      #verbose_eval=10,
      stratified=True,
      seed=42,
      metrics="multi_logloss",
      shuffle=True,
      nfold=3
    )
 
    log_loss = result['multi_logloss-mean'][-1] + result['multi_logloss-stdv'][-1]
    return log_loss

In [None]:
study = optuna.create_study(
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=20), direction="minimize"
    )
study.optimize(objective, n_trials=200)

print("Best trial:")
trial = study.best_trial

print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

[32m[I 2021-05-25 20:05:15,460][0m A new study created in memory with name: no-name-67bb9b06-6056-4c5f-9440-69bfd5220d67[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:06:07,912][0m Trial 0 finished with value: 1.095420663265998 and parameters: {'lambda_l1': 0.00024587192153438774, 'lambda_l2': 2.9664723207282242e-08, 'num_leaves': 223, 'feature_fraction': 0.7099892913219672, 'bagging_fraction': 0.7228809986014417, 'bagging_freq': 6, 'min_child_samples': 97, 'max_depth': 5, 'learning_rate': 0.08549446305776937}. Best is trial 0 with value: 1.095420663265998.[0m


train.csv: shape = 100000 rows, 52 cols


KeyboardInterrupt: ignored

In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_slice(study)

In [None]:
optuna.visualization.plot_param_importances(study)

In [None]:
print(study.best_params)

{'lambda_l1': 1.45120566383297e-06, 'lambda_l2': 0.003368552965821498, 'num_leaves': 32, 'feature_fraction': 0.4471131328810426, 'bagging_fraction': 0.5888198372400193, 'bagging_freq': 4, 'min_child_samples': 32}


In [None]:
study.best_value

1.0833801288748757

In [None]:
data = get_input("train.csv")
data.drop(columns='id', inplace=True)
mapper = {'Class_1': 0, 'Class_2': 1, 'Class_3': 2, 'Class_4': 3}
data['target'] = data['target'].map(mapper)
target = data['target']
data = data.drop(columns=['target'])

lgb_params = {
    "boosting_type ": "gbdt",
    "objective": "multiclass",
    "metric": "multi_logloss",
    #'num_boost_round': 100,
    'num_class': 4,
    'lambda_l1': 0.02259438646302076,
    'lambda_l2': 3.3834082296901886e-05,
    'num_leaves': 256,
    'feature_fraction': 0.42939073275678896,
    'bagging_fraction': 0.8760623722003144,
    'bagging_freq': 4,
    'min_child_samples': 71,
    'max_depth': 4,
    'learning_rate': 0.07658957460133804,
}

dtrain = lgb.Dataset(data=data, label=target)

result = lgb.cv(
    params=lgb_params,
    train_set=dtrain,
    num_boost_round=2000,
    early_stopping_rounds=100,
    verbose_eval=10,
    stratified=True,
    seed=42,
    metrics="multi_logloss",
    shuffle=True,
    nfold=5
)

train.csv: shape = 100000 rows, 52 cols
[10]	cv_agg's multi_logloss: 1.11339 + 0.00019763
[20]	cv_agg's multi_logloss: 1.10998 + 0.000341567
[30]	cv_agg's multi_logloss: 1.10748 + 0.000399434
[40]	cv_agg's multi_logloss: 1.1054 + 0.000482044
[50]	cv_agg's multi_logloss: 1.10365 + 0.000591791
[60]	cv_agg's multi_logloss: 1.10218 + 0.000701451
[70]	cv_agg's multi_logloss: 1.10096 + 0.000759168
[80]	cv_agg's multi_logloss: 1.09988 + 0.000727296
[90]	cv_agg's multi_logloss: 1.09888 + 0.000727316
[100]	cv_agg's multi_logloss: 1.09803 + 0.000788178
[110]	cv_agg's multi_logloss: 1.09726 + 0.00084155
[120]	cv_agg's multi_logloss: 1.09667 + 0.000892679
[130]	cv_agg's multi_logloss: 1.09616 + 0.000970538
[140]	cv_agg's multi_logloss: 1.09569 + 0.00102404
[150]	cv_agg's multi_logloss: 1.09526 + 0.00106349
[160]	cv_agg's multi_logloss: 1.09494 + 0.0011131
[170]	cv_agg's multi_logloss: 1.09459 + 0.00114867
[180]	cv_agg's multi_logloss: 1.09433 + 0.00113994
[190]	cv_agg's multi_logloss: 1.09407 + 0.

In [None]:
len(result['multi_logloss-mean'])

334

In [11]:
import lightgbm as lgb
data = get_input("train.csv")
test = get_input("test.csv")

data.drop(columns='id', inplace=True)
mapper = {'Class_1': 0, 'Class_2': 1, 'Class_3': 2, 'Class_4': 3}
data['target'] = data['target'].map(mapper)

target = data['target']
data = data.drop(columns=['target'])

#train, valid = train_test_split(
#    data, train_size=0.7, shuffle=True, random_state=1,
#)

#train.reset_index(inplace=True, drop=True)
#valid.reset_index(inplace=True, drop=True)

#y_train = train['target']
#x_train = train.drop(columns=['target'])
#y_valid = valid['target']
#x_valid = valid.drop(columns=['target'])

train.csv: shape = 100000 rows, 52 cols
test.csv: shape = 50000 rows, 51 cols


In [15]:
lgb_params = {
    "boosting_type ": "gbdt",
    "objective": "multiclass",
    "metric": "multi_logloss",
    'n_estimators': 500,
    'num_class': 4,
    'lambda_l1': 0.02259438646302076,
    'lambda_l2': 3.3834082296901886e-05,
    'num_leaves': 256,
    'feature_fraction': 0.42939073275678896,
    'bagging_fraction': 0.8760623722003144,
    'bagging_freq': 4,
    'min_child_samples': 71,
    'max_depth': 3,
    'learning_rate': 0.07658957460133804,
    'random_state': 42,
}


lgb_params = {
    "boosting_type ": "gbdt",
    "objective": "multiclass",
    "metric": "multi_logloss",
    'n_estimators': 710,
    'num_class': 4,
    'lambda_l1': 0.02,
    'lambda_l2': 150,
    'num_leaves': 7,
    'feature_fraction': 0.11,
    'bagging_fraction': 0.9,
    'bagging_freq': 4,
    'min_child_samples': 80,
    'max_depth': 5,
    'learning_rate': 0.075,
    'random_state': 42,
    #'max_bin': 50,
    #'device' : 'gpu'
}


cv = StratifiedKFold(n_splits=15, random_state=435, shuffle=True)

lgb_estimators, lgb_oof = lightgbm_cv_fit(
    lgb_params, data, target, cv, #categorical=categorical_feature_names
)

Sun May 30 13:25:22 2021, Cross-Validation, 100000 rows, 50 cols
Fold 1, Valid score = 1.09527
Fold 2, Valid score = 1.08856
Fold 3, Valid score = 1.08425
Fold 5, Valid score = 1.08976
Fold 6, Valid score = 1.093
Fold 7, Valid score = 1.09338
Fold 8, Valid score = 1.08778
Fold 9, Valid score = 1.08888
Fold 10, Valid score = 1.09407
Fold 11, Valid score = 1.08974
Fold 12, Valid score = 1.09084
Fold 13, Valid score = 1.09176
Fold 14, Valid score = 1.08488
Fold 15, Valid score = 1.08688
Score by each fold: [1.09527, 1.08856, 1.08425, 1.09275, 1.08976, 1.093, 1.09338, 1.08778, 1.08888, 1.09407, 1.08974, 1.09084, 1.09176, 1.08488, 1.08688]


In [17]:
print(f"Out of fold log loss {metrics.log_loss(target, lgb_oof)}")

Out of fold log loss 1.0901208836954104


In [None]:
lgb_estimators[0]

LGBMClassifier(bagging_fraction=0.8760623722003144, bagging_freq=4,
               boosting_type ='gbdt', feature_fraction=0.42939073275678896,
               lambda_l1=0.02259438646302076, lambda_l2=3.3834082296901886e-05,
               learning_rate=0.07658957460133804, max_depth=4,
               metric='multi_logloss', min_child_samples=71, n_estimators=334,
               num_class=4, num_leaves=256, objective='multiclass',
               random_state=42)

### Catboost

In [None]:
# optuna с кросс валидацией работает очень долго поэтому первое приближение получим 
# на отложенной выборке а потом дотюним
def objective(trial):
    data = get_input("train.csv")
    data.drop(columns='id', inplace=True)
    mapper = {'Class_1': 0, 'Class_2': 1, 'Class_3': 2, 'Class_4': 3}
    data['target'] = data['target'].map(mapper)
    target = data['target']
    data = data.drop(columns=['target'])

    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.35)

    param = {
        "loss_function": "MultiClass",
        "eval_metric": "MultiClass",
        "task_type": "GPU",
        
        #"colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.01, 0.1),
        #"subsample": trial.suggest_float("subsample", 0.1, 1),

        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
        'depth': trial.suggest_int('depth', 3,7),
        'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.13 ),
        'random_strength' :trial.suggest_int('random_strength', 0, 100),
        "l2_leaf_reg": trial.suggest_loguniform("l2_leaf_reg", 1e-8, 100),                       
        #'bagging_temperature' :trial.suggest_loguniform('bagging_temperature', 0.01, 100.00), # for bayesian bootstrap only

        #'bootstrap_type': 'Bernoulli',
        #'leaf_estimation_method': 'Newton',

        'grow_policy': "SymmetricTree",
    }

    # Add a callback for pruning.
    pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "multi_logloss")
    gbm = cb.CatBoostClassifier(**param)

    gbm.fit(train_x, train_y, eval_set=[(valid_x, valid_y)], verbose=0, early_stopping_rounds=100)
    preds = gbm.predict_proba(valid_x)


    log_loss = sklearn.metrics.log_loss(valid_y, preds)
    return log_loss

In [None]:
study = optuna.create_study(
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=20), direction="minimize"
    )
study.optimize(objective, n_trials=200)

print("Best trial:")
trial = study.best_trial

print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

[32m[I 2021-05-25 20:07:18,768][0m A new study created in memory with name: no-name-234ea1e3-17bb-4364-985a-ec4112338ece[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:07:19,978][0m Trial 0 finished with value: 1.186633703881519 and parameters: {'min_data_in_leaf': 24, 'depth': 3, 'learning_rate': 0.12078519555155853, 'random_strength': 58, 'l2_leaf_reg': 2.6581884406103866e-06}. Best is trial 0 with value: 1.186633703881519.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:07:25,654][0m Trial 1 finished with value: 1.0898680635762144 and parameters: {'min_data_in_leaf': 33, 'depth': 4, 'learning_rate': 0.07195112205603399, 'random_strength': 86, 'l2_leaf_reg': 1.0362623676141234}. Best is trial 1 with value: 1.0898680635762144.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:07:26,936][0m Trial 2 finished with value: 1.3173342168736666 and parameters: {'min_data_in_leaf': 48, 'depth': 5, 'learning_rate': 0.12118267255718547, 'random_strength': 91, 'l2_leaf_reg': 2.187737122396502e-08}. Best is trial 1 with value: 1.0898680635762144.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:07:28,074][0m Trial 3 finished with value: 1.2885143238830576 and parameters: {'min_data_in_leaf': 31, 'depth': 3, 'learning_rate': 0.09696703499053426, 'random_strength': 37, 'l2_leaf_reg': 1.2406705861350582e-05}. Best is trial 1 with value: 1.0898680635762144.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:07:29,193][0m Trial 4 finished with value: 1.3487214925450937 and parameters: {'min_data_in_leaf': 54, 'depth': 3, 'learning_rate': 0.03290172667305659, 'random_strength': 27, 'l2_leaf_reg': 4.774231469146752e-06}. Best is trial 1 with value: 1.0898680635762144.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:07:37,878][0m Trial 5 finished with value: 1.0889162908317813 and parameters: {'min_data_in_leaf': 77, 'depth': 7, 'learning_rate': 0.029612677933852936, 'random_strength': 94, 'l2_leaf_reg': 0.00011373313536023518}. Best is trial 5 with value: 1.0889162908317813.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:07:39,281][0m Trial 6 finished with value: 1.302177859838092 and parameters: {'min_data_in_leaf': 24, 'depth': 6, 'learning_rate': 0.08058916149233035, 'random_strength': 93, 'l2_leaf_reg': 1.1370106091251025e-06}. Best is trial 5 with value: 1.0889162908317813.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:07:47,215][0m Trial 7 finished with value: 1.09778252116426 and parameters: {'min_data_in_leaf': 60, 'depth': 7, 'learning_rate': 0.019222503766012825, 'random_strength': 61, 'l2_leaf_reg': 6.0086680216965815}. Best is trial 5 with value: 1.0889162908317813.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:07:50,283][0m Trial 8 finished with value: 1.0949399281089018 and parameters: {'min_data_in_leaf': 79, 'depth': 6, 'learning_rate': 0.09010758048307221, 'random_strength': 42, 'l2_leaf_reg': 0.00031754537242025536}. Best is trial 5 with value: 1.0889162908317813.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:07:57,351][0m Trial 9 finished with value: 1.090675829555737 and parameters: {'min_data_in_leaf': 16, 'depth': 5, 'learning_rate': 0.022515184320922844, 'random_strength': 3, 'l2_leaf_reg': 0.007360147687425196}. Best is trial 5 with value: 1.0889162908317813.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:08:05,433][0m Trial 10 finished with value: 1.089795460988898 and parameters: {'min_data_in_leaf': 89, 'depth': 7, 'learning_rate': 0.05010642411806855, 'random_strength': 75, 'l2_leaf_reg': 0.0082235810452614}. Best is trial 5 with value: 1.0889162908317813.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:08:12,788][0m Trial 11 finished with value: 1.0940156528097917 and parameters: {'min_data_in_leaf': 100, 'depth': 7, 'learning_rate': 0.04243292265973676, 'random_strength': 74, 'l2_leaf_reg': 0.024042200708683355}. Best is trial 5 with value: 1.0889162908317813.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:08:18,801][0m Trial 12 finished with value: 1.0970233447213409 and parameters: {'min_data_in_leaf': 95, 'depth': 7, 'learning_rate': 0.04680601468804839, 'random_strength': 74, 'l2_leaf_reg': 0.00031147846915903304}. Best is trial 5 with value: 1.0889162908317813.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:08:25,122][0m Trial 13 finished with value: 1.0869168701293184 and parameters: {'min_data_in_leaf': 80, 'depth': 6, 'learning_rate': 0.05557664790258706, 'random_strength': 75, 'l2_leaf_reg': 0.17076216460834498}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:08:30,051][0m Trial 14 finished with value: 1.094574348718123 and parameters: {'min_data_in_leaf': 73, 'depth': 6, 'learning_rate': 0.060526678307841975, 'random_strength': 100, 'l2_leaf_reg': 0.2621555442458}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:08:35,964][0m Trial 15 finished with value: 1.0999934903965098 and parameters: {'min_data_in_leaf': 72, 'depth': 6, 'learning_rate': 0.014175811896680296, 'random_strength': 100, 'l2_leaf_reg': 0.19544945187028492}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:08:43,547][0m Trial 16 finished with value: 1.0951472922018242 and parameters: {'min_data_in_leaf': 88, 'depth': 6, 'learning_rate': 0.03164915071400415, 'random_strength': 81, 'l2_leaf_reg': 58.96772387736771}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:08:44,828][0m Trial 17 finished with value: 1.3498115755394768 and parameters: {'min_data_in_leaf': 64, 'depth': 5, 'learning_rate': 0.0620943623388789, 'random_strength': 66, 'l2_leaf_reg': 4.001607179131703e-08}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:08:46,475][0m Trial 18 finished with value: 1.3661173627100829 and parameters: {'min_data_in_leaf': 78, 'depth': 7, 'learning_rate': 0.03384338484436058, 'random_strength': 53, 'l2_leaf_reg': 5.9950172509526934e-05}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:08:52,099][0m Trial 19 finished with value: 1.0930752961393073 and parameters: {'min_data_in_leaf': 47, 'depth': 4, 'learning_rate': 0.059975321977370495, 'random_strength': 95, 'l2_leaf_reg': 46.21602942887901}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:08:58,461][0m Trial 20 finished with value: 1.0956206476692645 and parameters: {'min_data_in_leaf': 86, 'depth': 6, 'learning_rate': 0.0487189829246778, 'random_strength': 81, 'l2_leaf_reg': 0.0034420161371314434}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:09:04,271][0m Trial 21 finished with value: 1.0964208791848402 and parameters: {'min_data_in_leaf': 92, 'depth': 7, 'learning_rate': 0.05155988672844171, 'random_strength': 70, 'l2_leaf_reg': 0.05073263276454544}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:09:08,350][0m Trial 22 finished with value: 1.0943119874389708 and parameters: {'min_data_in_leaf': 100, 'depth': 7, 'learning_rate': 0.07363262492465013, 'random_strength': 81, 'l2_leaf_reg': 0.0014202170487802562}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:09:15,444][0m Trial 23 finished with value: 1.0942957978348875 and parameters: {'min_data_in_leaf': 81, 'depth': 7, 'learning_rate': 0.010415070300543172, 'random_strength': 46, 'l2_leaf_reg': 2.419516376036321}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:09:22,779][0m Trial 24 finished with value: 1.0945391816317032 and parameters: {'min_data_in_leaf': 68, 'depth': 6, 'learning_rate': 0.04093752837633574, 'random_strength': 87, 'l2_leaf_reg': 0.00011512520576207445}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:09:31,433][0m Trial 25 finished with value: 1.0911117665423764 and parameters: {'min_data_in_leaf': 85, 'depth': 7, 'learning_rate': 0.025349720673813872, 'random_strength': 66, 'l2_leaf_reg': 0.01531751738760251}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:09:36,758][0m Trial 26 finished with value: 1.0938253751049742 and parameters: {'min_data_in_leaf': 94, 'depth': 6, 'learning_rate': 0.0561982029135949, 'random_strength': 77, 'l2_leaf_reg': 0.13698238968659376}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:09:45,475][0m Trial 27 finished with value: 1.0948455760112155 and parameters: {'min_data_in_leaf': 73, 'depth': 7, 'learning_rate': 0.038374213706832555, 'random_strength': 97, 'l2_leaf_reg': 0.0034145043003224423}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:09:46,868][0m Trial 28 finished with value: 1.3390547462443718 and parameters: {'min_data_in_leaf': 100, 'depth': 6, 'learning_rate': 0.08153624038253171, 'random_strength': 54, 'l2_leaf_reg': 2.0857776634676497e-07}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:09:48,200][0m Trial 29 finished with value: 1.3230146482528375 and parameters: {'min_data_in_leaf': 59, 'depth': 5, 'learning_rate': 0.1104600254746971, 'random_strength': 59, 'l2_leaf_reg': 2.2118644783930476e-05}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:09:52,531][0m Trial 30 finished with value: 1.0909489404184392 and parameters: {'min_data_in_leaf': 82, 'depth': 7, 'learning_rate': 0.06751713804896142, 'random_strength': 87, 'l2_leaf_reg': 0.0008407576807235519}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:09:57,979][0m Trial 31 finished with value: 1.0947043377890044 and parameters: {'min_data_in_leaf': 12, 'depth': 4, 'learning_rate': 0.069968793451516, 'random_strength': 87, 'l2_leaf_reg': 1.4015505546670397}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:03,459][0m Trial 32 finished with value: 1.0962105962450035 and parameters: {'min_data_in_leaf': 35, 'depth': 4, 'learning_rate': 0.05327634231289161, 'random_strength': 90, 'l2_leaf_reg': 0.8421502382481408}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:09,109][0m Trial 33 finished with value: 1.0888008655046322 and parameters: {'min_data_in_leaf': 44, 'depth': 4, 'learning_rate': 0.08326139993190705, 'random_strength': 82, 'l2_leaf_reg': 4.38787365313175}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:14,757][0m Trial 34 finished with value: 1.0891163228015432 and parameters: {'min_data_in_leaf': 44, 'depth': 4, 'learning_rate': 0.07401870226859777, 'random_strength': 68, 'l2_leaf_reg': 4.806633786667923}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:19,640][0m Trial 35 finished with value: 1.0912007422677925 and parameters: {'min_data_in_leaf': 46, 'depth': 3, 'learning_rate': 0.10260585472884601, 'random_strength': 61, 'l2_leaf_reg': 7.1739537778928675}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:24,885][0m Trial 36 finished with value: 1.0911295965989822 and parameters: {'min_data_in_leaf': 40, 'depth': 4, 'learning_rate': 0.08362685593342127, 'random_strength': 68, 'l2_leaf_reg': 19.5016585222766}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:29,889][0m Trial 37 finished with value: 1.0904304174072934 and parameters: {'min_data_in_leaf': 52, 'depth': 3, 'learning_rate': 0.09195494513929797, 'random_strength': 82, 'l2_leaf_reg': 15.640398186875675}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:34,024][0m Trial 38 finished with value: 1.091773130108656 and parameters: {'min_data_in_leaf': 27, 'depth': 4, 'learning_rate': 0.10431303937438824, 'random_strength': 19, 'l2_leaf_reg': 3.6332782374751846}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:38,194][0m Trial 39 finished with value: 1.091337770913929 and parameters: {'min_data_in_leaf': 41, 'depth': 5, 'learning_rate': 0.07612992552539298, 'random_strength': 92, 'l2_leaf_reg': 0.6074548545386605}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:39,310][0m Trial 40 finished with value: 1.3333257841583332 and parameters: {'min_data_in_leaf': 57, 'depth': 3, 'learning_rate': 0.09234127933122296, 'random_strength': 71, 'l2_leaf_reg': 2.238766542613101e-06}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:44,434][0m Trial 41 finished with value: 1.0929482589286184 and parameters: {'min_data_in_leaf': 39, 'depth': 4, 'learning_rate': 0.06650573765708406, 'random_strength': 74, 'l2_leaf_reg': 0.05051148100289595}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:48,228][0m Trial 42 finished with value: 1.0908082906243195 and parameters: {'min_data_in_leaf': 51, 'depth': 5, 'learning_rate': 0.0857614907520053, 'random_strength': 78, 'l2_leaf_reg': 0.06481261077238604}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:52,727][0m Trial 43 finished with value: 1.0932308932274237 and parameters: {'min_data_in_leaf': 65, 'depth': 4, 'learning_rate': 0.07509229592790023, 'random_strength': 66, 'l2_leaf_reg': 0.42949053571331613}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:10:59,466][0m Trial 44 finished with value: 1.0963491112680257 and parameters: {'min_data_in_leaf': 44, 'depth': 5, 'learning_rate': 0.045194948716566735, 'random_strength': 84, 'l2_leaf_reg': 0.011286360562003087}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:11:05,233][0m Trial 45 finished with value: 1.0938743336946204 and parameters: {'min_data_in_leaf': 31, 'depth': 4, 'learning_rate': 0.05622817277750462, 'random_strength': 76, 'l2_leaf_reg': 0.0003416919361316871}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:11:10,182][0m Trial 46 finished with value: 1.0977939379999926 and parameters: {'min_data_in_leaf': 76, 'depth': 3, 'learning_rate': 0.06371232810186106, 'random_strength': 35, 'l2_leaf_reg': 11.574361044918115}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:11:16,221][0m Trial 47 finished with value: 1.0927217405839302 and parameters: {'min_data_in_leaf': 63, 'depth': 5, 'learning_rate': 0.025194031946044203, 'random_strength': 99, 'l2_leaf_reg': 0.1275162887245796}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:11:23,664][0m Trial 48 finished with value: 1.0908352270553296 and parameters: {'min_data_in_leaf': 91, 'depth': 6, 'learning_rate': 0.03158676067360349, 'random_strength': 62, 'l2_leaf_reg': 2.3115177796261444}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:11:24,844][0m Trial 49 finished with value: 1.3414319704950226 and parameters: {'min_data_in_leaf': 21, 'depth': 4, 'learning_rate': 0.07705141545329378, 'random_strength': 91, 'l2_leaf_reg': 1.904714694293859e-05}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:11:32,799][0m Trial 50 finished with value: 1.0955196981207298 and parameters: {'min_data_in_leaf': 70, 'depth': 7, 'learning_rate': 0.018005019541046767, 'random_strength': 55, 'l2_leaf_reg': 86.5270796898434}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:11:38,523][0m Trial 51 finished with value: 1.0925668709609466 and parameters: {'min_data_in_leaf': 37, 'depth': 4, 'learning_rate': 0.08894556221388104, 'random_strength': 72, 'l2_leaf_reg': 28.55045776692967}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:11:43,592][0m Trial 52 finished with value: 1.093101618666682 and parameters: {'min_data_in_leaf': 32, 'depth': 4, 'learning_rate': 0.0792330468382374, 'random_strength': 85, 'l2_leaf_reg': 0.33291861070436807}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:11:48,297][0m Trial 53 finished with value: 1.094209420501426 and parameters: {'min_data_in_leaf': 27, 'depth': 3, 'learning_rate': 0.0716504908066034, 'random_strength': 77, 'l2_leaf_reg': 5.749125573493157}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:11:54,043][0m Trial 54 finished with value: 1.0910771227398406 and parameters: {'min_data_in_leaf': 43, 'depth': 4, 'learning_rate': 0.05946901879295437, 'random_strength': 95, 'l2_leaf_reg': 1.056046119201111}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:00,238][0m Trial 55 finished with value: 1.0907661649818936 and parameters: {'min_data_in_leaf': 49, 'depth': 7, 'learning_rate': 0.05068452580691591, 'random_strength': 80, 'l2_leaf_reg': 0.0038380670062249478}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:06,111][0m Trial 56 finished with value: 1.0906496093277767 and parameters: {'min_data_in_leaf': 55, 'depth': 6, 'learning_rate': 0.06504868292651468, 'random_strength': 90, 'l2_leaf_reg': 0.029458167721213388}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:07,407][0m Trial 57 finished with value: 1.2889636681170535 and parameters: {'min_data_in_leaf': 84, 'depth': 5, 'learning_rate': 0.0963125667545601, 'random_strength': 49, 'l2_leaf_reg': 6.550951067368021e-05}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:14,460][0m Trial 58 finished with value: 1.097480017620552 and parameters: {'min_data_in_leaf': 90, 'depth': 7, 'learning_rate': 0.0362058499180881, 'random_strength': 65, 'l2_leaf_reg': 0.0009758517718541575}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:22,209][0m Trial 59 finished with value: 1.0901868917398791 and parameters: {'min_data_in_leaf': 76, 'depth': 6, 'learning_rate': 0.04357086163132811, 'random_strength': 84, 'l2_leaf_reg': 0.10809631346662618}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:25,491][0m Trial 60 finished with value: 1.0891165048346743 and parameters: {'min_data_in_leaf': 95, 'depth': 4, 'learning_rate': 0.1290658700086832, 'random_strength': 71, 'l2_leaf_reg': 1.9750289062057864}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:29,318][0m Trial 61 finished with value: 1.0927390977974325 and parameters: {'min_data_in_leaf': 98, 'depth': 4, 'learning_rate': 0.12476981590298908, 'random_strength': 70, 'l2_leaf_reg': 1.235256521569157}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:34,694][0m Trial 62 finished with value: 1.0982548974131083 and parameters: {'min_data_in_leaf': 88, 'depth': 4, 'learning_rate': 0.06962924012572276, 'random_strength': 74, 'l2_leaf_reg': 3.3813478451709313}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:37,644][0m Trial 63 finished with value: 1.0939610493515561 and parameters: {'min_data_in_leaf': 95, 'depth': 4, 'learning_rate': 0.1132028268260794, 'random_strength': 79, 'l2_leaf_reg': 0.3290636329583202}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:43,324][0m Trial 64 finished with value: 1.0947023360395058 and parameters: {'min_data_in_leaf': 81, 'depth': 4, 'learning_rate': 0.057556545694106176, 'random_strength': 88, 'l2_leaf_reg': 41.26139629102288}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:51,741][0m Trial 65 finished with value: 1.0944231780312432 and parameters: {'min_data_in_leaf': 35, 'depth': 7, 'learning_rate': 0.048117073540294195, 'random_strength': 96, 'l2_leaf_reg': 8.056741302291655}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:58,001][0m Trial 66 finished with value: 1.0932632524332595 and parameters: {'min_data_in_leaf': 97, 'depth': 5, 'learning_rate': 0.05434160408880501, 'random_strength': 63, 'l2_leaf_reg': 1.6419772439189682}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:12:59,219][0m Trial 67 finished with value: 1.3679190806644936 and parameters: {'min_data_in_leaf': 88, 'depth': 4, 'learning_rate': 0.030562506204571002, 'random_strength': 57, 'l2_leaf_reg': 0.00013410695950274917}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:02,892][0m Trial 68 finished with value: 1.088620023308173 and parameters: {'min_data_in_leaf': 46, 'depth': 3, 'learning_rate': 0.12922648720072863, 'random_strength': 68, 'l2_leaf_reg': 0.660823146060707}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:06,658][0m Trial 69 finished with value: 1.0935420244713856 and parameters: {'min_data_in_leaf': 47, 'depth': 3, 'learning_rate': 0.11977697187009476, 'random_strength': 69, 'l2_leaf_reg': 0.1870729374831878}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:10,024][0m Trial 70 finished with value: 1.0991958452135608 and parameters: {'min_data_in_leaf': 50, 'depth': 3, 'learning_rate': 0.1266918796276117, 'random_strength': 73, 'l2_leaf_reg': 0.005355801319274543}. Best is trial 13 with value: 1.0869168701293184.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:14,001][0m Trial 71 finished with value: 1.085815400126414 and parameters: {'min_data_in_leaf': 44, 'depth': 4, 'learning_rate': 0.1174584342278517, 'random_strength': 76, 'l2_leaf_reg': 5.305749568383563}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:18,158][0m Trial 72 finished with value: 1.0997956750584494 and parameters: {'min_data_in_leaf': 54, 'depth': 3, 'learning_rate': 0.11768932718054712, 'random_strength': 75, 'l2_leaf_reg': 4.113717754340627}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:21,364][0m Trial 73 finished with value: 1.0904998476981853 and parameters: {'min_data_in_leaf': 45, 'depth': 5, 'learning_rate': 0.12402370799457353, 'random_strength': 67, 'l2_leaf_reg': 0.6432882712545613}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:26,025][0m Trial 74 finished with value: 1.0921964647726978 and parameters: {'min_data_in_leaf': 41, 'depth': 4, 'learning_rate': 0.12853826069104687, 'random_strength': 82, 'l2_leaf_reg': 22.129601025987316}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:29,796][0m Trial 75 finished with value: 1.0952290868789702 and parameters: {'min_data_in_leaf': 38, 'depth': 3, 'learning_rate': 0.12917417318308558, 'random_strength': 70, 'l2_leaf_reg': 0.0019526266380516548}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:32,996][0m Trial 76 finished with value: 1.0951099233649884 and parameters: {'min_data_in_leaf': 84, 'depth': 7, 'learning_rate': 0.11635157923576202, 'random_strength': 59, 'l2_leaf_reg': 0.02365253391092147}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:40,767][0m Trial 77 finished with value: 1.087209637219368 and parameters: {'min_data_in_leaf': 42, 'depth': 6, 'learning_rate': 0.03978145245111596, 'random_strength': 78, 'l2_leaf_reg': 7.946020629316243}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:48,577][0m Trial 78 finished with value: 1.091510055436855 and parameters: {'min_data_in_leaf': 43, 'depth': 6, 'learning_rate': 0.04035488067564389, 'random_strength': 63, 'l2_leaf_reg': 10.489822621565192}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:13:54,861][0m Trial 79 finished with value: 1.0889566945271378 and parameters: {'min_data_in_leaf': 48, 'depth': 5, 'learning_rate': 0.027409671872676806, 'random_strength': 83, 'l2_leaf_reg': 2.201260901268027}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:01,880][0m Trial 80 finished with value: 1.0941020114061375 and parameters: {'min_data_in_leaf': 52, 'depth': 6, 'learning_rate': 0.02858611363160938, 'random_strength': 82, 'l2_leaf_reg': 6.004192388769487}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:07,881][0m Trial 81 finished with value: 1.0915738575100467 and parameters: {'min_data_in_leaf': 47, 'depth': 5, 'learning_rate': 0.023419341355692923, 'random_strength': 78, 'l2_leaf_reg': 69.20635165692036}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:14,231][0m Trial 82 finished with value: 1.1028438728884988 and parameters: {'min_data_in_leaf': 57, 'depth': 6, 'learning_rate': 0.014479894342148358, 'random_strength': 84, 'l2_leaf_reg': 2.8721171904909784}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:20,188][0m Trial 83 finished with value: 1.089497266892655 and parameters: {'min_data_in_leaf': 41, 'depth': 5, 'learning_rate': 0.01977189834763539, 'random_strength': 72, 'l2_leaf_reg': 1.9023607102167628}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:25,658][0m Trial 84 finished with value: 1.0953998261240931 and parameters: {'min_data_in_leaf': 35, 'depth': 4, 'learning_rate': 0.03422194144160972, 'random_strength': 77, 'l2_leaf_reg': 0.5884839707979983}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:32,042][0m Trial 85 finished with value: 1.092804993412369 and parameters: {'min_data_in_leaf': 48, 'depth': 5, 'learning_rate': 0.03844845374270974, 'random_strength': 89, 'l2_leaf_reg': 13.559755787542654}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:37,305][0m Trial 86 finished with value: 1.0899799788880349 and parameters: {'min_data_in_leaf': 60, 'depth': 4, 'learning_rate': 0.02652441411996351, 'random_strength': 80, 'l2_leaf_reg': 30.220319116678656}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:38,549][0m Trial 87 finished with value: 1.2685851446855785 and parameters: {'min_data_in_leaf': 44, 'depth': 5, 'learning_rate': 0.1219338042651884, 'random_strength': 65, 'l2_leaf_reg': 7.963969844134074e-06}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:42,277][0m Trial 88 finished with value: 1.0943803495214046 and parameters: {'min_data_in_leaf': 50, 'depth': 4, 'learning_rate': 0.10463847684883673, 'random_strength': 93, 'l2_leaf_reg': 1.0703753757114405}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:46,248][0m Trial 89 finished with value: 1.091610834004211 and parameters: {'min_data_in_leaf': 37, 'depth': 4, 'learning_rate': 0.10816870227650681, 'random_strength': 75, 'l2_leaf_reg': 4.470818400643836}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:52,633][0m Trial 90 finished with value: 1.0993251958680852 and parameters: {'min_data_in_leaf': 53, 'depth': 6, 'learning_rate': 0.018464620025411174, 'random_strength': 86, 'l2_leaf_reg': 2.02533810734445}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:14:58,821][0m Trial 91 finished with value: 1.0909038350333593 and parameters: {'min_data_in_leaf': 41, 'depth': 5, 'learning_rate': 0.022461243188944185, 'random_strength': 72, 'l2_leaf_reg': 1.7470879843985108}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:15:04,591][0m Trial 92 finished with value: 1.1053396830474258 and parameters: {'min_data_in_leaf': 43, 'depth': 5, 'learning_rate': 0.013055562744416386, 'random_strength': 69, 'l2_leaf_reg': 8.011220841628457}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:15:10,994][0m Trial 93 finished with value: 1.0959840212092649 and parameters: {'min_data_in_leaf': 46, 'depth': 5, 'learning_rate': 0.03539095763911888, 'random_strength': 72, 'l2_leaf_reg': 0.4993260539831106}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:15:17,239][0m Trial 94 finished with value: 1.0922203333054845 and parameters: {'min_data_in_leaf': 39, 'depth': 5, 'learning_rate': 0.020661358506009662, 'random_strength': 76, 'l2_leaf_reg': 2.753881007129451}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:15:23,945][0m Trial 95 finished with value: 1.0952256583572633 and parameters: {'min_data_in_leaf': 49, 'depth': 6, 'learning_rate': 0.01649811390755066, 'random_strength': 79, 'l2_leaf_reg': 16.621968662835737}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:15:29,208][0m Trial 96 finished with value: 1.0938920474055016 and parameters: {'min_data_in_leaf': 34, 'depth': 4, 'learning_rate': 0.026491078225256696, 'random_strength': 83, 'l2_leaf_reg': 0.20897630724209273}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:15:35,698][0m Trial 97 finished with value: 1.0940918569206988 and parameters: {'min_data_in_leaf': 29, 'depth': 5, 'learning_rate': 0.031293034867445446, 'random_strength': 68, 'l2_leaf_reg': 0.9127224172194199}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:15:40,633][0m Trial 98 finished with value: 1.0914719192017377 and parameters: {'min_data_in_leaf': 40, 'depth': 4, 'learning_rate': 0.08498057231413021, 'random_strength': 1, 'l2_leaf_reg': 5.044462866108157}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:15:45,883][0m Trial 99 finished with value: 1.0881271305456548 and parameters: {'min_data_in_leaf': 56, 'depth': 5, 'learning_rate': 0.06206412568952399, 'random_strength': 64, 'l2_leaf_reg': 0.0005818823050644166}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:15:50,840][0m Trial 100 finished with value: 1.0963674059895554 and parameters: {'min_data_in_leaf': 67, 'depth': 3, 'learning_rate': 0.0682530801266307, 'random_strength': 57, 'l2_leaf_reg': 0.0003657178750560515}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:15:56,082][0m Trial 101 finished with value: 1.087810101532721 and parameters: {'min_data_in_leaf': 42, 'depth': 5, 'learning_rate': 0.06289041055953823, 'random_strength': 65, 'l2_leaf_reg': 0.0005788197793181886}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:00,438][0m Trial 102 finished with value: 1.0970001265711606 and parameters: {'min_data_in_leaf': 45, 'depth': 5, 'learning_rate': 0.06367350315501542, 'random_strength': 61, 'l2_leaf_reg': 0.00012759443602834142}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:04,746][0m Trial 103 finished with value: 1.0941351705221842 and parameters: {'min_data_in_leaf': 43, 'depth': 5, 'learning_rate': 0.07280061128717341, 'random_strength': 67, 'l2_leaf_reg': 0.0007062968331914221}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:09,954][0m Trial 104 finished with value: 1.0968565045271266 and parameters: {'min_data_in_leaf': 56, 'depth': 5, 'learning_rate': 0.061362047110361084, 'random_strength': 63, 'l2_leaf_reg': 5.353185983591124e-05}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:14,769][0m Trial 105 finished with value: 1.0917474665799758 and parameters: {'min_data_in_leaf': 61, 'depth': 6, 'learning_rate': 0.05882061585419245, 'random_strength': 74, 'l2_leaf_reg': 0.00020782700997358001}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:20,353][0m Trial 106 finished with value: 1.0957574441161775 and parameters: {'min_data_in_leaf': 71, 'depth': 4, 'learning_rate': 0.0531942897206421, 'random_strength': 70, 'l2_leaf_reg': 0.0019731253844117507}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:27,110][0m Trial 107 finished with value: 1.0916225839707163 and parameters: {'min_data_in_leaf': 37, 'depth': 5, 'learning_rate': 0.04637642173661336, 'random_strength': 80, 'l2_leaf_reg': 10.372122885941156}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:31,138][0m Trial 108 finished with value: 1.0957986233573647 and parameters: {'min_data_in_leaf': 93, 'depth': 5, 'learning_rate': 0.07759334364943074, 'random_strength': 52, 'l2_leaf_reg': 0.0007050725905880832}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:32,356][0m Trial 109 finished with value: 1.339598173194566 and parameters: {'min_data_in_leaf': 46, 'depth': 4, 'learning_rate': 0.0805525814605741, 'random_strength': 65, 'l2_leaf_reg': 7.81065446342873e-05}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:37,403][0m Trial 110 finished with value: 1.0868052071074006 and parameters: {'min_data_in_leaf': 74, 'depth': 4, 'learning_rate': 0.07167721344043389, 'random_strength': 76, 'l2_leaf_reg': 0.00027965608840102986}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:42,188][0m Trial 111 finished with value: 1.098530668863811 and parameters: {'min_data_in_leaf': 76, 'depth': 4, 'learning_rate': 0.06528958750178221, 'random_strength': 78, 'l2_leaf_reg': 0.00039237840131594296}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:46,836][0m Trial 112 finished with value: 1.0953331650159943 and parameters: {'min_data_in_leaf': 79, 'depth': 4, 'learning_rate': 0.0705899991886763, 'random_strength': 74, 'l2_leaf_reg': 0.00022121994914300086}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:51,380][0m Trial 113 finished with value: 1.0943410161819827 and parameters: {'min_data_in_leaf': 79, 'depth': 4, 'learning_rate': 0.06374847843160045, 'random_strength': 76, 'l2_leaf_reg': 0.0006588800784618913}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:52,598][0m Trial 114 finished with value: 1.2779946755393423 and parameters: {'min_data_in_leaf': 75, 'depth': 4, 'learning_rate': 0.0743931279799813, 'random_strength': 71, 'l2_leaf_reg': 2.0569845811055095e-05}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:57,366][0m Trial 115 finished with value: 1.0949878043155232 and parameters: {'min_data_in_leaf': 74, 'depth': 4, 'learning_rate': 0.06741102778301719, 'random_strength': 59, 'l2_leaf_reg': 0.0018038319222565236}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:16:58,480][0m Trial 116 finished with value: 1.3535184076302922 and parameters: {'min_data_in_leaf': 51, 'depth': 3, 'learning_rate': 0.05600259830641588, 'random_strength': 67, 'l2_leaf_reg': 5.15833731272787e-05}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:02,934][0m Trial 117 finished with value: 1.0920244228048288 and parameters: {'min_data_in_leaf': 42, 'depth': 6, 'learning_rate': 0.061936990501191246, 'random_strength': 86, 'l2_leaf_reg': 0.0012277423066983405}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:08,362][0m Trial 118 finished with value: 1.0883840283241315 and parameters: {'min_data_in_leaf': 49, 'depth': 4, 'learning_rate': 0.04378117649773524, 'random_strength': 81, 'l2_leaf_reg': 0.0002501690063546472}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:15,148][0m Trial 119 finished with value: 1.093278624082187 and parameters: {'min_data_in_leaf': 54, 'depth': 5, 'learning_rate': 0.04204059279674974, 'random_strength': 83, 'l2_leaf_reg': 0.00048322061986977923}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:20,743][0m Trial 120 finished with value: 1.0916459995877323 and parameters: {'min_data_in_leaf': 82, 'depth': 4, 'learning_rate': 0.05089286057250381, 'random_strength': 81, 'l2_leaf_reg': 0.000231112436499772}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:21,981][0m Trial 121 finished with value: 1.344824214349678 and parameters: {'min_data_in_leaf': 48, 'depth': 4, 'learning_rate': 0.036810265206777146, 'random_strength': 77, 'l2_leaf_reg': 3.939946217652258e-05}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:23,200][0m Trial 122 finished with value: 1.3576199109543132 and parameters: {'min_data_in_leaf': 45, 'depth': 4, 'learning_rate': 0.048254286512146666, 'random_strength': 73, 'l2_leaf_reg': 0.00015494357933893536}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:24,407][0m Trial 123 finished with value: 1.368933826105342 and parameters: {'min_data_in_leaf': 49, 'depth': 4, 'learning_rate': 0.029161807088689488, 'random_strength': 98, 'l2_leaf_reg': 2.9440029789773278e-05}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:30,340][0m Trial 124 finished with value: 1.0903592267043676 and parameters: {'min_data_in_leaf': 52, 'depth': 4, 'learning_rate': 0.04338331747128379, 'random_strength': 9, 'l2_leaf_reg': 3.7678946011597247}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:31,602][0m Trial 125 finished with value: 1.2268306237961406 and parameters: {'min_data_in_leaf': 47, 'depth': 4, 'learning_rate': 0.12988066603122203, 'random_strength': 88, 'l2_leaf_reg': 0.00011109974921444808}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:35,098][0m Trial 126 finished with value: 1.0925971909418013 and parameters: {'min_data_in_leaf': 45, 'depth': 7, 'learning_rate': 0.12641813524113574, 'random_strength': 69, 'l2_leaf_reg': 1.3260416869613578}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:39,793][0m Trial 127 finished with value: 1.098185042939332 and parameters: {'min_data_in_leaf': 69, 'depth': 5, 'learning_rate': 0.08816677135387892, 'random_strength': 65, 'l2_leaf_reg': 43.065946134716995}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:45,559][0m Trial 128 finished with value: 1.0970039982478146 and parameters: {'min_data_in_leaf': 72, 'depth': 4, 'learning_rate': 0.03943190598465972, 'random_strength': 80, 'l2_leaf_reg': 0.0032265513439438394}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:51,110][0m Trial 129 finished with value: 1.09793928215759 and parameters: {'min_data_in_leaf': 39, 'depth': 4, 'learning_rate': 0.05974093549637181, 'random_strength': 75, 'l2_leaf_reg': 6.9947373037583045}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:17:55,513][0m Trial 130 finished with value: 1.0956198494845795 and parameters: {'min_data_in_leaf': 66, 'depth': 5, 'learning_rate': 0.08246006758953621, 'random_strength': 71, 'l2_leaf_reg': 0.36417064282469175}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:18:00,890][0m Trial 131 finished with value: 1.1053900487988433 and parameters: {'min_data_in_leaf': 42, 'depth': 5, 'learning_rate': 0.010899287850710856, 'random_strength': 72, 'l2_leaf_reg': 1.7601557540588808}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:18:06,973][0m Trial 132 finished with value: 1.0947611860734456 and parameters: {'min_data_in_leaf': 41, 'depth': 5, 'learning_rate': 0.02129232590102709, 'random_strength': 78, 'l2_leaf_reg': 2.206288959751444}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:18:12,994][0m Trial 133 finished with value: 1.094178171759654 and parameters: {'min_data_in_leaf': 36, 'depth': 5, 'learning_rate': 0.023636079776076274, 'random_strength': 85, 'l2_leaf_reg': 0.8822119446722704}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:18:19,447][0m Trial 134 finished with value: 1.096307552433665 and parameters: {'min_data_in_leaf': 44, 'depth': 5, 'learning_rate': 0.02847974299872537, 'random_strength': 94, 'l2_leaf_reg': 3.232344340094915}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:18:25,195][0m Trial 135 finished with value: 1.0979648741001429 and parameters: {'min_data_in_leaf': 39, 'depth': 5, 'learning_rate': 0.09569152375592074, 'random_strength': 68, 'l2_leaf_reg': 23.974691744545613}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:18:30,869][0m Trial 136 finished with value: 1.0962936939671732 and parameters: {'min_data_in_leaf': 47, 'depth': 6, 'learning_rate': 0.06920054352094106, 'random_strength': 43, 'l2_leaf_reg': 5.810828377558208}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:18:36,303][0m Trial 137 finished with value: 1.0972376085519622 and parameters: {'min_data_in_leaf': 50, 'depth': 4, 'learning_rate': 0.032941711977609055, 'random_strength': 76, 'l2_leaf_reg': 0.0005185088237659192}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:18:42,683][0m Trial 138 finished with value: 1.092417259877358 and parameters: {'min_data_in_leaf': 42, 'depth': 5, 'learning_rate': 0.05476820847878669, 'random_strength': 61, 'l2_leaf_reg': 0.7557025003196809}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:18:47,411][0m Trial 139 finished with value: 1.0911198407488274 and parameters: {'min_data_in_leaf': 62, 'depth': 3, 'learning_rate': 0.0775506654643927, 'random_strength': 73, 'l2_leaf_reg': 13.639790096000116}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:18:51,883][0m Trial 140 finished with value: 1.0886253447676029 and parameters: {'min_data_in_leaf': 44, 'depth': 5, 'learning_rate': 0.07175348801715174, 'random_strength': 64, 'l2_leaf_reg': 0.0002922122904403586}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:18:56,744][0m Trial 141 finished with value: 1.094332267712891 and parameters: {'min_data_in_leaf': 44, 'depth': 5, 'learning_rate': 0.07144744202257229, 'random_strength': 63, 'l2_leaf_reg': 0.0012153095073154066}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:00,640][0m Trial 142 finished with value: 1.0944016421352016 and parameters: {'min_data_in_leaf': 86, 'depth': 5, 'learning_rate': 0.07557885884736826, 'random_strength': 64, 'l2_leaf_reg': 0.00026242148560125884}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:05,510][0m Trial 143 finished with value: 1.0968434353588759 and parameters: {'min_data_in_leaf': 40, 'depth': 5, 'learning_rate': 0.065879501204713, 'random_strength': 67, 'l2_leaf_reg': 0.0001053747450875934}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:11,545][0m Trial 144 finished with value: 1.0950806566400613 and parameters: {'min_data_in_leaf': 46, 'depth': 5, 'learning_rate': 0.044770534140768085, 'random_strength': 82, 'l2_leaf_reg': 0.0003047543710502623}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:16,671][0m Trial 145 finished with value: 1.0876127951654182 and parameters: {'min_data_in_leaf': 49, 'depth': 4, 'learning_rate': 0.0729207064984981, 'random_strength': 70, 'l2_leaf_reg': 0.000174075549524529}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:21,063][0m Trial 146 finished with value: 1.0973944818992203 and parameters: {'min_data_in_leaf': 48, 'depth': 4, 'learning_rate': 0.0732027419117292, 'random_strength': 61, 'l2_leaf_reg': 0.0009341611420668118}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:26,329][0m Trial 147 finished with value: 1.0916033337241229 and parameters: {'min_data_in_leaf': 51, 'depth': 4, 'learning_rate': 0.06854090362690525, 'random_strength': 79, 'l2_leaf_reg': 0.00019584855564888206}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:32,100][0m Trial 148 finished with value: 1.0905750727512733 and parameters: {'min_data_in_leaf': 54, 'depth': 4, 'learning_rate': 0.05744808904006431, 'random_strength': 70, 'l2_leaf_reg': 7.878501911013809e-05}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:37,143][0m Trial 149 finished with value: 1.0911456048862536 and parameters: {'min_data_in_leaf': 57, 'depth': 4, 'learning_rate': 0.06379650075554484, 'random_strength': 74, 'l2_leaf_reg': 0.0005264370749679039}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:41,556][0m Trial 150 finished with value: 1.0911800971898242 and parameters: {'min_data_in_leaf': 49, 'depth': 4, 'learning_rate': 0.07202423906086558, 'random_strength': 55, 'l2_leaf_reg': 0.0003133174531328408}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:47,369][0m Trial 151 finished with value: 1.0892752320943926 and parameters: {'min_data_in_leaf': 43, 'depth': 5, 'learning_rate': 0.06654414756871117, 'random_strength': 66, 'l2_leaf_reg': 1.4482568875840014}. Best is trial 71 with value: 1.085815400126414.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:52,817][0m Trial 152 finished with value: 1.0836613774733865 and parameters: {'min_data_in_leaf': 43, 'depth': 5, 'learning_rate': 0.07928546561861827, 'random_strength': 66, 'l2_leaf_reg': 2.9448952422509165}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:19:58,493][0m Trial 153 finished with value: 1.0925303009172498 and parameters: {'min_data_in_leaf': 46, 'depth': 4, 'learning_rate': 0.07583090786077903, 'random_strength': 69, 'l2_leaf_reg': 3.6605917401671424}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:03,564][0m Trial 154 finished with value: 1.0931811557868294 and parameters: {'min_data_in_leaf': 44, 'depth': 4, 'learning_rate': 0.0809056975074222, 'random_strength': 65, 'l2_leaf_reg': 9.30305500086425}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:08,829][0m Trial 155 finished with value: 1.0884331904672047 and parameters: {'min_data_in_leaf': 38, 'depth': 5, 'learning_rate': 0.07847590518639788, 'random_strength': 71, 'l2_leaf_reg': 5.141961854490971}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:14,463][0m Trial 156 finished with value: 1.09120276868818 and parameters: {'min_data_in_leaf': 78, 'depth': 5, 'learning_rate': 0.07891166914785935, 'random_strength': 76, 'l2_leaf_reg': 5.749057669735482}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:17,945][0m Trial 157 finished with value: 1.0946773542602186 and parameters: {'min_data_in_leaf': 38, 'depth': 5, 'learning_rate': 0.0839821258284749, 'random_strength': 58, 'l2_leaf_reg': 0.00016321627785227926}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:23,970][0m Trial 158 finished with value: 1.0860983521044218 and parameters: {'min_data_in_leaf': 33, 'depth': 5, 'learning_rate': 0.07347812110702893, 'random_strength': 91, 'l2_leaf_reg': 16.883797375955126}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:25,233][0m Trial 159 finished with value: 1.3427316910094496 and parameters: {'min_data_in_leaf': 33, 'depth': 5, 'learning_rate': 0.07449700348173086, 'random_strength': 91, 'l2_leaf_reg': 1.0562082189920065e-08}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:31,443][0m Trial 160 finished with value: 1.0914868780332525 and parameters: {'min_data_in_leaf': 33, 'depth': 5, 'learning_rate': 0.06931527087933374, 'random_strength': 88, 'l2_leaf_reg': 16.246299643009486}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:36,229][0m Trial 161 finished with value: 1.0969656222716007 and parameters: {'min_data_in_leaf': 36, 'depth': 5, 'learning_rate': 0.0784015761043092, 'random_strength': 96, 'l2_leaf_reg': 3.066859160764524}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:42,774][0m Trial 162 finished with value: 1.0927371561348818 and parameters: {'min_data_in_leaf': 29, 'depth': 5, 'learning_rate': 0.07308865172849306, 'random_strength': 84, 'l2_leaf_reg': 10.731602217108986}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:48,766][0m Trial 163 finished with value: 1.0950689701609597 and parameters: {'min_data_in_leaf': 41, 'depth': 5, 'learning_rate': 0.08007344886912093, 'random_strength': 68, 'l2_leaf_reg': 31.539222581225758}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:55,697][0m Trial 164 finished with value: 1.094803781637367 and parameters: {'min_data_in_leaf': 39, 'depth': 5, 'learning_rate': 0.07080127143177703, 'random_strength': 92, 'l2_leaf_reg': 92.86810000965558}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:20:57,127][0m Trial 165 finished with value: 1.307830224735433 and parameters: {'min_data_in_leaf': 81, 'depth': 6, 'learning_rate': 0.07443527325712612, 'random_strength': 71, 'l2_leaf_reg': 9.451890499623605e-05}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:02,351][0m Trial 166 finished with value: 1.0927788852941025 and parameters: {'min_data_in_leaf': 22, 'depth': 7, 'learning_rate': 0.07619682094336923, 'random_strength': 78, 'l2_leaf_reg': 5.068860466048079}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:06,379][0m Trial 167 finished with value: 1.0956905364566403 and parameters: {'min_data_in_leaf': 43, 'depth': 5, 'learning_rate': 0.08665584902813231, 'random_strength': 81, 'l2_leaf_reg': 0.00048344733448623533}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:13,186][0m Trial 168 finished with value: 1.0918583184206658 and parameters: {'min_data_in_leaf': 77, 'depth': 5, 'learning_rate': 0.08195379883886662, 'random_strength': 74, 'l2_leaf_reg': 7.725354972232793}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:17,926][0m Trial 169 finished with value: 1.0934594453245696 and parameters: {'min_data_in_leaf': 47, 'depth': 5, 'learning_rate': 0.06144564864172882, 'random_strength': 100, 'l2_leaf_reg': 0.0008598123817229146}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:23,265][0m Trial 170 finished with value: 1.0884463311523356 and parameters: {'min_data_in_leaf': 30, 'depth': 5, 'learning_rate': 0.05202379307279266, 'random_strength': 62, 'l2_leaf_reg': 0.0001637321158679474}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:28,265][0m Trial 171 finished with value: 1.093438530761892 and parameters: {'min_data_in_leaf': 29, 'depth': 5, 'learning_rate': 0.05097733539992173, 'random_strength': 61, 'l2_leaf_reg': 0.00021438786453292533}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:33,908][0m Trial 172 finished with value: 1.0974895080548814 and parameters: {'min_data_in_leaf': 31, 'depth': 5, 'learning_rate': 0.05296999817031669, 'random_strength': 64, 'l2_leaf_reg': 0.00033861458617426}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:35,313][0m Trial 173 finished with value: 1.2728932019424979 and parameters: {'min_data_in_leaf': 26, 'depth': 5, 'learning_rate': 0.0488520655152174, 'random_strength': 67, 'l2_leaf_reg': 5.5914832214997816e-05}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:40,822][0m Trial 174 finished with value: 1.0930689323610159 and parameters: {'min_data_in_leaf': 42, 'depth': 5, 'learning_rate': 0.0587908312619906, 'random_strength': 62, 'l2_leaf_reg': 0.00014159875100269414}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:42,163][0m Trial 175 finished with value: 1.3444021405401412 and parameters: {'min_data_in_leaf': 45, 'depth': 5, 'learning_rate': 0.07182208750868814, 'random_strength': 70, 'l2_leaf_reg': 3.787631354718993e-05}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:47,762][0m Trial 176 finished with value: 1.0961376080534693 and parameters: {'min_data_in_leaf': 37, 'depth': 5, 'learning_rate': 0.07782619308959865, 'random_strength': 65, 'l2_leaf_reg': 4.656879487977448}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:53,835][0m Trial 177 finished with value: 1.0943338744229072 and parameters: {'min_data_in_leaf': 49, 'depth': 6, 'learning_rate': 0.06810964364997912, 'random_strength': 72, 'l2_leaf_reg': 21.833575787039155}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:21:59,891][0m Trial 178 finished with value: 1.0880566264025562 and parameters: {'min_data_in_leaf': 34, 'depth': 5, 'learning_rate': 0.05559280277355272, 'random_strength': 67, 'l2_leaf_reg': 2.5359052546651206}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:22:06,326][0m Trial 179 finished with value: 1.0948775759233456 and parameters: {'min_data_in_leaf': 35, 'depth': 5, 'learning_rate': 0.06238372497804408, 'random_strength': 77, 'l2_leaf_reg': 2.6643281649353665}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:22:11,672][0m Trial 180 finished with value: 1.0931100931739672 and parameters: {'min_data_in_leaf': 30, 'depth': 5, 'learning_rate': 0.05716067649041417, 'random_strength': 86, 'l2_leaf_reg': 0.00014855259420814104}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:22:18,123][0m Trial 181 finished with value: 1.0887997835416472 and parameters: {'min_data_in_leaf': 33, 'depth': 5, 'learning_rate': 0.053118398674966656, 'random_strength': 68, 'l2_leaf_reg': 9.294969298588653}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:22:24,835][0m Trial 182 finished with value: 1.0919243201436704 and parameters: {'min_data_in_leaf': 27, 'depth': 5, 'learning_rate': 0.054581390907615324, 'random_strength': 68, 'l2_leaf_reg': 13.15609617823091}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:22:31,304][0m Trial 183 finished with value: 1.0906293092741595 and parameters: {'min_data_in_leaf': 32, 'depth': 5, 'learning_rate': 0.04564390340930729, 'random_strength': 64, 'l2_leaf_reg': 8.201317971636588}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:22:37,242][0m Trial 184 finished with value: 1.0962833161608678 and parameters: {'min_data_in_leaf': 34, 'depth': 5, 'learning_rate': 0.05358493929714043, 'random_strength': 67, 'l2_leaf_reg': 1.302099550925846}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:22:42,566][0m Trial 185 finished with value: 1.0940983414317458 and parameters: {'min_data_in_leaf': 33, 'depth': 5, 'learning_rate': 0.05215351661808508, 'random_strength': 66, 'l2_leaf_reg': 0.0003364948738427547}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:22:49,019][0m Trial 186 finished with value: 1.090647589059421 and parameters: {'min_data_in_leaf': 38, 'depth': 5, 'learning_rate': 0.059942746866886755, 'random_strength': 70, 'l2_leaf_reg': 2.812008118977799}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:22:55,116][0m Trial 187 finished with value: 1.0870987241850778 and parameters: {'min_data_in_leaf': 40, 'depth': 5, 'learning_rate': 0.049746369150550585, 'random_strength': 60, 'l2_leaf_reg': 0.0005395070731755681}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:23:01,613][0m Trial 188 finished with value: 1.0954463131219676 and parameters: {'min_data_in_leaf': 31, 'depth': 5, 'learning_rate': 0.04145271769472974, 'random_strength': 59, 'l2_leaf_reg': 0.0007351194239337621}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:23:08,026][0m Trial 189 finished with value: 1.0915400434495222 and parameters: {'min_data_in_leaf': 36, 'depth': 5, 'learning_rate': 0.04908197814978314, 'random_strength': 59, 'l2_leaf_reg': 0.0005463567624802137}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:23:14,305][0m Trial 190 finished with value: 1.0933093470652333 and parameters: {'min_data_in_leaf': 40, 'depth': 5, 'learning_rate': 0.04705313048270364, 'random_strength': 62, 'l2_leaf_reg': 0.0011819031981115313}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:23:19,726][0m Trial 191 finished with value: 1.0926327718373277 and parameters: {'min_data_in_leaf': 41, 'depth': 5, 'learning_rate': 0.056050564652737916, 'random_strength': 64, 'l2_leaf_reg': 0.00025625139137674073}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:23:26,330][0m Trial 192 finished with value: 1.0954390660378819 and parameters: {'min_data_in_leaf': 44, 'depth': 5, 'learning_rate': 0.04924181942314102, 'random_strength': 80, 'l2_leaf_reg': 0.0018695337702918408}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:23:33,092][0m Trial 193 finished with value: 1.0927588455811863 and parameters: {'min_data_in_leaf': 35, 'depth': 5, 'learning_rate': 0.050276591661253756, 'random_strength': 74, 'l2_leaf_reg': 0.00011397023766410806}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:23:38,586][0m Trial 194 finished with value: 1.089191193182778 and parameters: {'min_data_in_leaf': 46, 'depth': 5, 'learning_rate': 0.055480613943208884, 'random_strength': 66, 'l2_leaf_reg': 0.00039592678594039507}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:23:45,462][0m Trial 195 finished with value: 1.0923592999451783 and parameters: {'min_data_in_leaf': 38, 'depth': 5, 'learning_rate': 0.05247017851359416, 'random_strength': 69, 'l2_leaf_reg': 4.807183871211283}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:23:53,294][0m Trial 196 finished with value: 1.0941734711084372 and parameters: {'min_data_in_leaf': 42, 'depth': 7, 'learning_rate': 0.04383483131459892, 'random_strength': 56, 'l2_leaf_reg': 2.0564464138298053}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:23:59,664][0m Trial 197 finished with value: 1.0986142342903333 and parameters: {'min_data_in_leaf': 84, 'depth': 5, 'learning_rate': 0.04645086585365146, 'random_strength': 60, 'l2_leaf_reg': 0.00020780594470989943}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:24:06,281][0m Trial 198 finished with value: 1.1032557383383232 and parameters: {'min_data_in_leaf': 80, 'depth': 5, 'learning_rate': 0.06550125070387704, 'random_strength': 83, 'l2_leaf_reg': 7.8067576913576975}. Best is trial 152 with value: 1.0836613774733865.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-25 20:24:13,011][0m Trial 199 finished with value: 1.0937628032274382 and parameters: {'min_data_in_leaf': 48, 'depth': 5, 'learning_rate': 0.03715741624813227, 'random_strength': 52, 'l2_leaf_reg': 3.6429876663953307}. Best is trial 152 with value: 1.0836613774733865.[0m


Best trial:
  Params: 
    min_data_in_leaf: 43
    depth: 5
    learning_rate: 0.07928546561861827
    random_strength: 66
    l2_leaf_reg: 2.9448952422509165


In [None]:
optuna.visualization.plot_param_importances(study)

In [None]:
data = get_input("train.csv")
data.drop(columns='id', inplace=True)
mapper = {'Class_1': 0, 'Class_2': 1, 'Class_3': 2, 'Class_4': 3}
data['target'] = data['target'].map(mapper)
target = data['target']
data = data.drop(columns=['target'])

cb_params = {
        "loss_function": "MultiClass",
        "eval_metric": "MultiClass",
        "task_type": "GPU",
        'min_data_in_leaf': 25,
        'depth': 3,
        'learning_rate': 0.09,
        'random_strength' :10,
        "l2_leaf_reg": 100,                       
        'grow_policy': "SymmetricTree",
        #"thread_count": 10,
        "random_seed": 27,
    }
cv_result = cb.cv(
        pool = cb.Pool(data, target),
        params = cb_params,
        plot=True,
        shuffle = True,
        stratified = True,
        seed = 42,
        iterations = 10000,
        early_stopping_rounds = 100,
        fold_count = 5,
        as_pandas = True,
        verbose_eval = 10
        )

In [13]:
data = get_input("train.csv")
data.drop(columns='id', inplace=True)
mapper = {'Class_1': 0, 'Class_2': 1, 'Class_3': 2, 'Class_4': 3}
data['target'] = data['target'].map(mapper)
target = data['target']
data = data.drop(columns=['target'])

cb_params = {
    "n_estimators": 1700,
    "loss_function": "MultiClass",
    "eval_metric": "MultiClass",
    "task_type": "GPU",
    'min_data_in_leaf': 25,
    'depth': 3,
    'learning_rate': 0.09,
    'random_strength' : 10,
    "l2_leaf_reg": 100,                       
    'grow_policy': "SymmetricTree",
    "random_seed": 27,
}

cb_params = {
    "n_estimators": 4000,
    "loss_function": "MultiClass",
    "eval_metric": "MultiClass",
    "task_type": "GPU",
    'min_data_in_leaf': 35,
    'depth': 3,
    'learning_rate': 0.06,
    'random_strength' : 10,
    "l2_leaf_reg": 300,                       
    'grow_policy': "SymmetricTree",
    "random_seed": 27,
    "classes_count": 4,
    'bootstrap_type':  'Bayesian',
    #'bagging_temperature': 50, #for Bayesian bootstrap_type
    #'subsample': 0.6 # for Poisson, Bernoulli, MVS bootstrap_type
}

cv = StratifiedKFold(n_splits=15, random_state=435, shuffle=True)

cb_estimators, cb_oof = catboost_cv_fit(
    cb_params, data, target, cv, #categorical=categorical_feature_names
)

train.csv: shape = 100000 rows, 52 cols
Sun May 30 13:21:15 2021, Cross-Validation, 100000 rows, 50 cols




0:	learn: 1.3525660	total: 5.74ms	remaining: 22.9s
10:	learn: 1.1880350	total: 44.6ms	remaining: 16.2s
20:	learn: 1.1387308	total: 92.5ms	remaining: 17.5s
30:	learn: 1.1211052	total: 131ms	remaining: 16.8s
40:	learn: 1.1144463	total: 170ms	remaining: 16.4s
50:	learn: 1.1113674	total: 223ms	remaining: 17.3s
60:	learn: 1.1096463	total: 271ms	remaining: 17.5s
70:	learn: 1.1084172	total: 310ms	remaining: 17.1s
80:	learn: 1.1073541	total: 349ms	remaining: 16.9s
90:	learn: 1.1064302	total: 388ms	remaining: 16.7s
100:	learn: 1.1056999	total: 436ms	remaining: 16.8s
110:	learn: 1.1050359	total: 472ms	remaining: 16.5s
120:	learn: 1.1042278	total: 514ms	remaining: 16.5s
130:	learn: 1.1036936	total: 550ms	remaining: 16.2s
140:	learn: 1.1031845	total: 593ms	remaining: 16.2s
150:	learn: 1.1027435	total: 629ms	remaining: 16s
160:	learn: 1.1023729	total: 666ms	remaining: 15.9s
170:	learn: 1.1019932	total: 702ms	remaining: 15.7s
180:	learn: 1.1013862	total: 741ms	remaining: 15.6s
190:	learn: 1.1008386	



0:	learn: 1.3525662	total: 5.02ms	remaining: 20.1s
10:	learn: 1.1883377	total: 42.2ms	remaining: 15.3s
20:	learn: 1.1387009	total: 81.8ms	remaining: 15.5s
30:	learn: 1.1212334	total: 119ms	remaining: 15.2s
40:	learn: 1.1144277	total: 161ms	remaining: 15.6s
50:	learn: 1.1112639	total: 198ms	remaining: 15.3s
60:	learn: 1.1095575	total: 249ms	remaining: 16.1s
70:	learn: 1.1082230	total: 284ms	remaining: 15.7s
80:	learn: 1.1072930	total: 320ms	remaining: 15.5s
90:	learn: 1.1064053	total: 355ms	remaining: 15.2s
100:	learn: 1.1057273	total: 390ms	remaining: 15.1s
110:	learn: 1.1050816	total: 424ms	remaining: 14.9s
120:	learn: 1.1044835	total: 463ms	remaining: 14.8s
130:	learn: 1.1038780	total: 498ms	remaining: 14.7s
140:	learn: 1.1034747	total: 532ms	remaining: 14.6s
150:	learn: 1.1031131	total: 565ms	remaining: 14.4s
160:	learn: 1.1026648	total: 596ms	remaining: 14.2s
170:	learn: 1.1021226	total: 631ms	remaining: 14.1s
180:	learn: 1.1016646	total: 668ms	remaining: 14.1s
190:	learn: 1.101103



0:	learn: 1.3525757	total: 5.18ms	remaining: 20.7s
10:	learn: 1.1884396	total: 44.1ms	remaining: 16s
20:	learn: 1.1388986	total: 82.1ms	remaining: 15.6s
30:	learn: 1.1213731	total: 120ms	remaining: 15.4s
40:	learn: 1.1145534	total: 168ms	remaining: 16.2s
50:	learn: 1.1115165	total: 211ms	remaining: 16.3s
60:	learn: 1.1098832	total: 261ms	remaining: 16.9s
70:	learn: 1.1085309	total: 299ms	remaining: 16.6s
80:	learn: 1.1074879	total: 356ms	remaining: 17.2s
90:	learn: 1.1064720	total: 394ms	remaining: 16.9s
100:	learn: 1.1055901	total: 437ms	remaining: 16.9s
110:	learn: 1.1049636	total: 477ms	remaining: 16.7s
120:	learn: 1.1042366	total: 515ms	remaining: 16.5s
130:	learn: 1.1036838	total: 550ms	remaining: 16.2s
140:	learn: 1.1034074	total: 585ms	remaining: 16s
150:	learn: 1.1029754	total: 618ms	remaining: 15.7s
160:	learn: 1.1025640	total: 656ms	remaining: 15.6s
170:	learn: 1.1021307	total: 691ms	remaining: 15.5s
180:	learn: 1.1016592	total: 728ms	remaining: 15.4s
190:	learn: 1.1012203	to



0:	learn: 1.3525927	total: 5.44ms	remaining: 21.8s
10:	learn: 1.1883344	total: 44.3ms	remaining: 16.1s
20:	learn: 1.1387004	total: 85.2ms	remaining: 16.1s
30:	learn: 1.1210619	total: 126ms	remaining: 16.1s
40:	learn: 1.1143659	total: 173ms	remaining: 16.7s
50:	learn: 1.1112374	total: 231ms	remaining: 17.9s
60:	learn: 1.1095273	total: 274ms	remaining: 17.7s
70:	learn: 1.1081965	total: 314ms	remaining: 17.4s
80:	learn: 1.1071441	total: 355ms	remaining: 17.2s
90:	learn: 1.1061738	total: 393ms	remaining: 16.9s
100:	learn: 1.1053186	total: 438ms	remaining: 16.9s
110:	learn: 1.1046768	total: 475ms	remaining: 16.6s
120:	learn: 1.1039992	total: 515ms	remaining: 16.5s
130:	learn: 1.1034656	total: 550ms	remaining: 16.3s
140:	learn: 1.1031392	total: 587ms	remaining: 16.1s
150:	learn: 1.1026506	total: 627ms	remaining: 16s
160:	learn: 1.1023024	total: 669ms	remaining: 16s
170:	learn: 1.1018107	total: 709ms	remaining: 15.9s
180:	learn: 1.1013022	total: 751ms	remaining: 15.9s
190:	learn: 1.1008898	to



0:	learn: 1.3525814	total: 5.12ms	remaining: 20.5s
10:	learn: 1.1881568	total: 43.3ms	remaining: 15.7s
20:	learn: 1.1388342	total: 87.4ms	remaining: 16.6s
30:	learn: 1.1213044	total: 127ms	remaining: 16.3s
40:	learn: 1.1144221	total: 172ms	remaining: 16.6s
50:	learn: 1.1113856	total: 216ms	remaining: 16.7s
60:	learn: 1.1096983	total: 259ms	remaining: 16.7s
70:	learn: 1.1084684	total: 295ms	remaining: 16.3s
80:	learn: 1.1073970	total: 345ms	remaining: 16.7s
90:	learn: 1.1065047	total: 382ms	remaining: 16.4s
100:	learn: 1.1056344	total: 427ms	remaining: 16.5s
110:	learn: 1.1050744	total: 463ms	remaining: 16.2s
120:	learn: 1.1043582	total: 501ms	remaining: 16.1s
130:	learn: 1.1037290	total: 538ms	remaining: 15.9s
140:	learn: 1.1033846	total: 577ms	remaining: 15.8s
150:	learn: 1.1029450	total: 611ms	remaining: 15.6s
160:	learn: 1.1026068	total: 650ms	remaining: 15.5s
170:	learn: 1.1021133	total: 685ms	remaining: 15.3s
180:	learn: 1.1016353	total: 723ms	remaining: 15.3s
190:	learn: 1.101213



0:	learn: 1.3525859	total: 5.2ms	remaining: 20.8s
10:	learn: 1.1886619	total: 46.6ms	remaining: 16.9s
20:	learn: 1.1388030	total: 86.9ms	remaining: 16.5s
30:	learn: 1.1210192	total: 125ms	remaining: 16s
40:	learn: 1.1142461	total: 165ms	remaining: 15.9s
50:	learn: 1.1112621	total: 209ms	remaining: 16.2s
60:	learn: 1.1095502	total: 242ms	remaining: 15.7s
70:	learn: 1.1081239	total: 278ms	remaining: 15.4s
80:	learn: 1.1072499	total: 313ms	remaining: 15.1s
90:	learn: 1.1062884	total: 348ms	remaining: 14.9s
100:	learn: 1.1053845	total: 383ms	remaining: 14.8s
110:	learn: 1.1046886	total: 423ms	remaining: 14.8s
120:	learn: 1.1040574	total: 458ms	remaining: 14.7s
130:	learn: 1.1034227	total: 493ms	remaining: 14.5s
140:	learn: 1.1029407	total: 531ms	remaining: 14.5s
150:	learn: 1.1024804	total: 565ms	remaining: 14.4s
160:	learn: 1.1020238	total: 598ms	remaining: 14.3s
170:	learn: 1.1015000	total: 640ms	remaining: 14.3s
180:	learn: 1.1010266	total: 675ms	remaining: 14.3s
190:	learn: 1.1005001	t



0:	learn: 1.3533562	total: 5.1ms	remaining: 20.4s
10:	learn: 1.1882820	total: 42.8ms	remaining: 15.5s
20:	learn: 1.1386573	total: 81ms	remaining: 15.3s
30:	learn: 1.1210591	total: 120ms	remaining: 15.4s
40:	learn: 1.1142112	total: 165ms	remaining: 15.9s
50:	learn: 1.1111523	total: 210ms	remaining: 16.2s
60:	learn: 1.1095231	total: 252ms	remaining: 16.3s
70:	learn: 1.1082880	total: 288ms	remaining: 16s
80:	learn: 1.1071938	total: 326ms	remaining: 15.8s
90:	learn: 1.1062551	total: 364ms	remaining: 15.6s
100:	learn: 1.1054240	total: 401ms	remaining: 15.5s
110:	learn: 1.1049337	total: 443ms	remaining: 15.5s
120:	learn: 1.1042508	total: 481ms	remaining: 15.4s
130:	learn: 1.1036299	total: 517ms	remaining: 15.3s
140:	learn: 1.1031811	total: 552ms	remaining: 15.1s
150:	learn: 1.1027602	total: 595ms	remaining: 15.2s
160:	learn: 1.1023489	total: 658ms	remaining: 15.7s
170:	learn: 1.1019286	total: 694ms	remaining: 15.5s
180:	learn: 1.1013997	total: 734ms	remaining: 15.5s
190:	learn: 1.1009607	tot



0:	learn: 1.3525715	total: 5.07ms	remaining: 20.3s
10:	learn: 1.1881897	total: 42.7ms	remaining: 15.5s
20:	learn: 1.1386617	total: 87.6ms	remaining: 16.6s
30:	learn: 1.1211313	total: 125ms	remaining: 16s
40:	learn: 1.1144347	total: 170ms	remaining: 16.4s
50:	learn: 1.1114547	total: 214ms	remaining: 16.6s
60:	learn: 1.1097096	total: 250ms	remaining: 16.1s
70:	learn: 1.1084220	total: 287ms	remaining: 15.9s
80:	learn: 1.1073482	total: 324ms	remaining: 15.7s
90:	learn: 1.1064099	total: 362ms	remaining: 15.5s
100:	learn: 1.1056187	total: 400ms	remaining: 15.4s
110:	learn: 1.1050245	total: 443ms	remaining: 15.5s
120:	learn: 1.1043584	total: 480ms	remaining: 15.4s
130:	learn: 1.1037960	total: 518ms	remaining: 15.3s
140:	learn: 1.1033797	total: 566ms	remaining: 15.5s
150:	learn: 1.1028439	total: 606ms	remaining: 15.4s
160:	learn: 1.1025243	total: 639ms	remaining: 15.2s
170:	learn: 1.1020667	total: 681ms	remaining: 15.3s
180:	learn: 1.1014981	total: 719ms	remaining: 15.2s
190:	learn: 1.1009578	



0:	learn: 1.3525692	total: 4.8ms	remaining: 19.2s
10:	learn: 1.1884899	total: 40.3ms	remaining: 14.6s
20:	learn: 1.1388438	total: 75.9ms	remaining: 14.4s
30:	learn: 1.1212065	total: 113ms	remaining: 14.5s
40:	learn: 1.1144798	total: 157ms	remaining: 15.1s
50:	learn: 1.1115077	total: 192ms	remaining: 14.9s
60:	learn: 1.1096991	total: 244ms	remaining: 15.7s
70:	learn: 1.1082941	total: 279ms	remaining: 15.5s
80:	learn: 1.1073105	total: 322ms	remaining: 15.6s
90:	learn: 1.1064491	total: 358ms	remaining: 15.4s
100:	learn: 1.1055574	total: 402ms	remaining: 15.5s
110:	learn: 1.1048748	total: 437ms	remaining: 15.3s
120:	learn: 1.1041251	total: 479ms	remaining: 15.4s
130:	learn: 1.1035623	total: 512ms	remaining: 15.1s
140:	learn: 1.1031072	total: 546ms	remaining: 14.9s
150:	learn: 1.1026145	total: 578ms	remaining: 14.7s
160:	learn: 1.1021957	total: 609ms	remaining: 14.5s
170:	learn: 1.1017579	total: 648ms	remaining: 14.5s
180:	learn: 1.1012398	total: 698ms	remaining: 14.7s
190:	learn: 1.1008852



0:	learn: 1.3525829	total: 5.25ms	remaining: 21s
10:	learn: 1.1882503	total: 45.3ms	remaining: 16.4s
20:	learn: 1.1386321	total: 93.6ms	remaining: 17.7s
30:	learn: 1.1210085	total: 141ms	remaining: 18s
40:	learn: 1.1142556	total: 182ms	remaining: 17.6s
50:	learn: 1.1113153	total: 234ms	remaining: 18.1s
60:	learn: 1.1095372	total: 272ms	remaining: 17.5s
70:	learn: 1.1081288	total: 316ms	remaining: 17.5s
80:	learn: 1.1071547	total: 354ms	remaining: 17.1s
90:	learn: 1.1061312	total: 397ms	remaining: 17.1s
100:	learn: 1.1053105	total: 443ms	remaining: 17.1s
110:	learn: 1.1046511	total: 481ms	remaining: 16.9s
120:	learn: 1.1039643	total: 519ms	remaining: 16.6s
130:	learn: 1.1034167	total: 557ms	remaining: 16.5s
140:	learn: 1.1028699	total: 595ms	remaining: 16.3s
150:	learn: 1.1024725	total: 627ms	remaining: 16s
160:	learn: 1.1021838	total: 676ms	remaining: 16.1s
170:	learn: 1.1016285	total: 716ms	remaining: 16s
180:	learn: 1.1011353	total: 754ms	remaining: 15.9s
190:	learn: 1.1006962	total:



0:	learn: 1.3525642	total: 4.71ms	remaining: 18.9s
10:	learn: 1.1885605	total: 40.3ms	remaining: 14.6s
20:	learn: 1.1388795	total: 75.7ms	remaining: 14.3s
30:	learn: 1.1212410	total: 111ms	remaining: 14.2s
40:	learn: 1.1145551	total: 156ms	remaining: 15s
50:	learn: 1.1115610	total: 191ms	remaining: 14.8s
60:	learn: 1.1098745	total: 234ms	remaining: 15.1s
70:	learn: 1.1085350	total: 269ms	remaining: 14.9s
80:	learn: 1.1075906	total: 309ms	remaining: 15s
90:	learn: 1.1066313	total: 346ms	remaining: 14.9s
100:	learn: 1.1057821	total: 381ms	remaining: 14.7s
110:	learn: 1.1050293	total: 416ms	remaining: 14.6s
120:	learn: 1.1043215	total: 459ms	remaining: 14.7s
130:	learn: 1.1037031	total: 494ms	remaining: 14.6s
140:	learn: 1.1032828	total: 526ms	remaining: 14.4s
150:	learn: 1.1027404	total: 559ms	remaining: 14.3s
160:	learn: 1.1022827	total: 596ms	remaining: 14.2s
170:	learn: 1.1018391	total: 641ms	remaining: 14.3s
180:	learn: 1.1013502	total: 674ms	remaining: 14.2s
190:	learn: 1.1008491	to



0:	learn: 1.3525827	total: 5.09ms	remaining: 20.4s
10:	learn: 1.1883195	total: 43ms	remaining: 15.6s
20:	learn: 1.1390094	total: 84.9ms	remaining: 16.1s
30:	learn: 1.1213932	total: 124ms	remaining: 15.9s
40:	learn: 1.1146038	total: 177ms	remaining: 17s
50:	learn: 1.1115327	total: 222ms	remaining: 17.2s
60:	learn: 1.1097942	total: 258ms	remaining: 16.7s
70:	learn: 1.1084994	total: 301ms	remaining: 16.6s
80:	learn: 1.1074423	total: 339ms	remaining: 16.4s
90:	learn: 1.1064931	total: 376ms	remaining: 16.2s
100:	learn: 1.1055630	total: 417ms	remaining: 16.1s
110:	learn: 1.1049411	total: 475ms	remaining: 16.6s
120:	learn: 1.1042893	total: 513ms	remaining: 16.4s
130:	learn: 1.1036408	total: 550ms	remaining: 16.2s
140:	learn: 1.1032687	total: 584ms	remaining: 16s
150:	learn: 1.1030505	total: 615ms	remaining: 15.7s
160:	learn: 1.1026358	total: 657ms	remaining: 15.7s
170:	learn: 1.1021997	total: 701ms	remaining: 15.7s
180:	learn: 1.1016886	total: 738ms	remaining: 15.6s
190:	learn: 1.1011196	tota



0:	learn: 1.3525924	total: 5.13ms	remaining: 20.5s
10:	learn: 1.1883053	total: 43.2ms	remaining: 15.7s
20:	learn: 1.1386697	total: 81.3ms	remaining: 15.4s
30:	learn: 1.1210482	total: 119ms	remaining: 15.2s
40:	learn: 1.1142343	total: 166ms	remaining: 16s
50:	learn: 1.1112076	total: 212ms	remaining: 16.4s
60:	learn: 1.1095317	total: 252ms	remaining: 16.3s
70:	learn: 1.1081351	total: 291ms	remaining: 16.1s
80:	learn: 1.1072236	total: 329ms	remaining: 15.9s
90:	learn: 1.1062715	total: 369ms	remaining: 15.8s
100:	learn: 1.1054617	total: 407ms	remaining: 15.7s
110:	learn: 1.1046993	total: 456ms	remaining: 16s
120:	learn: 1.1040842	total: 494ms	remaining: 15.8s
130:	learn: 1.1035548	total: 529ms	remaining: 15.6s
140:	learn: 1.1031537	total: 565ms	remaining: 15.5s
150:	learn: 1.1027962	total: 601ms	remaining: 15.3s
160:	learn: 1.1024425	total: 645ms	remaining: 15.4s
170:	learn: 1.1020255	total: 686ms	remaining: 15.4s
180:	learn: 1.1014944	total: 721ms	remaining: 15.2s
190:	learn: 1.1009743	to



0:	learn: 1.3525773	total: 5.04ms	remaining: 20.2s
10:	learn: 1.1884321	total: 42.9ms	remaining: 15.6s
20:	learn: 1.1388612	total: 80.6ms	remaining: 15.3s
30:	learn: 1.1213686	total: 119ms	remaining: 15.2s
40:	learn: 1.1145232	total: 171ms	remaining: 16.5s
50:	learn: 1.1113502	total: 219ms	remaining: 16.9s
60:	learn: 1.1096784	total: 255ms	remaining: 16.5s
70:	learn: 1.1085103	total: 291ms	remaining: 16.1s
80:	learn: 1.1075162	total: 333ms	remaining: 16.1s
90:	learn: 1.1066340	total: 370ms	remaining: 15.9s
100:	learn: 1.1057793	total: 411ms	remaining: 15.9s
110:	learn: 1.1052044	total: 456ms	remaining: 16s
120:	learn: 1.1046604	total: 503ms	remaining: 16.1s
130:	learn: 1.1039945	total: 539ms	remaining: 15.9s
140:	learn: 1.1035062	total: 575ms	remaining: 15.7s
150:	learn: 1.1029783	total: 612ms	remaining: 15.6s
160:	learn: 1.1025572	total: 647ms	remaining: 15.4s
170:	learn: 1.1020543	total: 699ms	remaining: 15.6s
180:	learn: 1.1015182	total: 747ms	remaining: 15.8s
190:	learn: 1.1011226	



0:	learn: 1.3525778	total: 5.1ms	remaining: 20.4s
10:	learn: 1.1883453	total: 43.1ms	remaining: 15.6s
20:	learn: 1.1387900	total: 82.5ms	remaining: 15.6s
30:	learn: 1.1212146	total: 120ms	remaining: 15.4s
40:	learn: 1.1145156	total: 166ms	remaining: 16.1s
50:	learn: 1.1114850	total: 221ms	remaining: 17.1s
60:	learn: 1.1098458	total: 259ms	remaining: 16.7s
70:	learn: 1.1086052	total: 296ms	remaining: 16.4s
80:	learn: 1.1075598	total: 334ms	remaining: 16.2s
90:	learn: 1.1066566	total: 371ms	remaining: 16s
100:	learn: 1.1058283	total: 409ms	remaining: 15.8s
110:	learn: 1.1053460	total: 454ms	remaining: 15.9s
120:	learn: 1.1046936	total: 496ms	remaining: 15.9s
130:	learn: 1.1042087	total: 532ms	remaining: 15.7s
140:	learn: 1.1037861	total: 569ms	remaining: 15.6s
150:	learn: 1.1031712	total: 604ms	remaining: 15.4s
160:	learn: 1.1028783	total: 637ms	remaining: 15.2s
170:	learn: 1.1024407	total: 683ms	remaining: 15.3s
180:	learn: 1.1018628	total: 720ms	remaining: 15.2s
190:	learn: 1.1014078	t

In [18]:
print(f"Out of fold log loss {metrics.log_loss(target, cb_oof)}")

Out of fold log loss 1.0903385608051261


In [None]:
cb_estimators[0]

<catboost.core.CatBoostClassifier at 0x7f5ccceaad10>

### XGBoost

In [None]:
def objective(trial):
    data = get_input("train.csv")
    data.drop(columns='id', inplace=True)
    mapper = {'Class_1': 0, 'Class_2': 1, 'Class_3': 2, 'Class_4': 3}
    data['target'] = data['target'].map(mapper)
    target = data['target']
    data = data.drop(columns=['target'])

    train_x, valid_x, train_y, valid_y = train_test_split(data, target, test_size=0.35)
    dtrain = xgb.Dataset(train_x, label=train_y)
    dvalid = xgb.Dataset(valid_x, label=valid_y)

    param = {
        'objective': 'multi:softprob',
        'eval_metric': 'mlogloss',
        "verbosity": -1,
        'num_class': 4,
        'max_depth': trial.suggest_int('max_depth', 3,7),
        'learning_rate': trial.suggest_uniform('learning_rate', 0, 0.13 ),
    }

    # Add a callback for pruning.
    pruning_callback = optuna.integration.XGBoostPruningCallback(trial, "multi_logloss")
    gbm = lgb.train(
        param, dtrain, valid_sets=[dvalid], verbose_eval=False, callbacks=[pruning_callback]
    )

    preds = gbm.predict(valid_x)
    log_loss = sklearn.metrics.log_loss(valid_y, preds)
    return log_loss

In [None]:
study = optuna.create_study(
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=20), direction="minimize"
    )
study.optimize(objective, n_trials=200)

print("Best trial:")
trial = study.best_trial

print("  Params: ")
for key, value in trial.params.items():
  print("    {}: {}".format(key, value))

In [None]:
dtrain = xgb.DMatrix(
    data=data, label=target
)

xgb_params = {
    "booster": "gbtree",
    "eta": "0.1",
    "max_depth": 3,
    "random_seed": 42,
    'objective': 'multi:softprob',
    'eval_metric': 'mlogloss',
    'random_state': 13,
    'learning_rate': 0.08356451010151393, 
    'gamma': 0.02911685058980812, 
    'max_depth': 3, 
    'min_child_weight': 10.748514454096288, 
    'max_delta_step': 2.4474818433727927, 
    'subsample': 0.6445037550866027, 
    'colsample_bytree': 0.07634753656242108, 
    'lambda': 13.663280761461781, 
    'alpha': 21.521205761694137, 
    'max_leaves': 48,
    'num_class': 4,
    'tree_method': 'gpu_hist',
}

cv_result_xgb = xgb.cv(
    params=xgb_params,
    dtrain=dtrain,
    num_boost_round=2000,
    #categorical_feature=categorical_feature_names,
    early_stopping_rounds=100,
    verbose_eval=10,
    stratified=True,
    seed=42,
    metrics="mlogloss",
    shuffle=True,
    nfold=5,
    #maximaze=True
)

In [None]:
cv_result_xgb.shape[0]

1838

In [9]:
import lightgbm as lgb
data = get_input("train.csv")
test = get_input("test.csv")

data.drop(columns='id', inplace=True)
mapper = {'Class_1': 0, 'Class_2': 1, 'Class_3': 2, 'Class_4': 3}
data['target'] = data['target'].map(mapper)

target = data['target']
data = data.drop(columns=['target'])

train.csv: shape = 100000 rows, 52 cols
test.csv: shape = 50000 rows, 51 cols


In [10]:
xgb_params = {
    "booster": "gbtree",
    #"eta": 0.1,
    "random_seed": 42,
    'objective': 'multi:softprob',
    'eval_metric': 'mlogloss',
    'random_state': 13,
    'learning_rate': 0.08, 
    'gamma': 0.02911685058980812, 
    'max_depth': 3,
    'min_child_weight': 10.748514454096288, 
    'max_delta_step': 2.4474818433727927,
    'subsample': 0.6445037550866027,
    'colsample_bytree': 0.07,
    #'colsample_bylevel': 0.7,
    #'colsample_bynode': 0.7,
    'reg_lambda': 1, 
    'reg_alpha': 5, 
    'num_class': 4,
    'n_estimators': 710,  # надо потюнить
    'tree_method': 'gpu_hist',
} # 1.09042

xgb_params = {
    "booster": "gbtree",
    #"eta": 0.1,
    "random_seed": 42,
    'objective': 'multi:softprob',
    'eval_metric': 'mlogloss',
    'random_state': 13,
    'learning_rate': 0.08, 
    'gamma': 0.02911685058980812, 
    'max_depth': 3,
    'min_child_weight': 10.748514454096288, 
    'max_delta_step': 2.4474818433727927,
    'subsample': 0.6445037550866027,
    'colsample_bytree': 0.07,
    #'colsample_bylevel': 0.7,
    #'colsample_bynode': 0.7,
    'reg_lambda': 1, 
    'reg_alpha': 5, 
    'num_class': 4,
    'n_estimators': 850,  # надо потюнить
    'tree_method': 'gpu_hist',
}

In [11]:
cv = StratifiedKFold(n_splits=15, random_state=435, shuffle=True)

xgb_estimators, xgb_encoders, xgb_oof = xgboost_cv_fit(
    xgb_params, data, target, cv, #categorical=categorical_feature_names
)

Sun May 30 13:18:10 2021, Cross-Validation, 100000 rows, 50 cols
Fold 1, Valid score = 1.09484
Fold 2, Valid score = 1.08842
Fold 3, Valid score = 1.08464
Fold 4, Valid score = 1.09311
Fold 5, Valid score = 1.0897
Fold 6, Valid score = 1.09275
Fold 7, Valid score = 1.09376
Fold 8, Valid score = 1.08843
Fold 9, Valid score = 1.08901
Fold 10, Valid score = 1.09406
Fold 11, Valid score = 1.0894
Fold 12, Valid score = 1.09054
Fold 13, Valid score = 1.09157
Fold 14, Valid score = 1.08504
Fold 15, Valid score = 1.08761


In [12]:
print(f"Out of fold log loss {metrics.log_loss(target, xgb_oof)}")

Out of fold log loss 1.0901912643016558


In [None]:
xgb_estimators[0]

XGBClassifier(alpha=21.521205761694137, colsample_bytree=0.07634753656242108,
              eta=0.1, eval_metric='mlogloss', gamma=0.02911685058980812,
              lambda=13.663280761461781, learning_rate=0.08356451010151393,
              max_delta_step=2.4474818433727927, max_leaves=48,
              min_child_weight=10.748514454096288, n_estimators=700,
              num_class=4, objective='multi:softprob', random_seed=42,
              random_state=13, subsample=0.6445037550866027,
              tree_method='gpu_hist')

Проверим как работает усреднение

In [38]:
result_lgb_xgb_cb = np.dstack((lgb_oof, cb_oof, xgb_oof))
result_lgb_xgb_cb_mean = np.mean(result_lgb_xgb_cb, axis=2)
print(f"Out of fold log loss {metrics.log_loss(target, result_lgb_xgb_cb_mean)}")

Out of fold log loss 1.0898707403357817


### Модель второго уровня - логистическая регрессия

In [19]:
result_lgbm_df = pd.DataFrame(lgb_oof)
result_xgb_df = pd.DataFrame(xgb_oof)
result_cb_df = pd.DataFrame(cb_oof)
result_lgbm_df.rename(columns={0:'lgb_class_1',	1:'lgb_class_2',	2:'lgb_class_3',	3:'lgb_class_4'}, inplace=True)
result_xgb_df.rename(columns={0:'xgb_class_1',	1:'xgb_class_2',	2:'xgb_class_3',	3:'xgb_class_4'}, inplace=True)
result_cb_df.rename(columns={0:'cb_class_1',	1:'cb_class_2',	2:'cb_class_3',	3:'cb_class_4'}, inplace=True)
result_valid = pd.concat([result_lgbm_df, result_xgb_df, result_cb_df], axis=1) 

In [20]:
x_train_logreg, x_valid_logreg, y_train_logreg, y_valid_logreg = train_test_split(
    result_valid, target, train_size=0.7, shuffle=True, random_state=30,
)

params = {'random_state': 0, 
          'max_iter': 300, 
          'multi_class': 'multinomial', 
          'solver': 'lbfgs'}

logreg = LogisticRegression(**params)
logreg.fit(x_train_logreg, y_train_logreg)

# 1.08521
y_pred_logreg = logreg.predict_proba(x_valid_logreg)
metrics.log_loss(y_valid_logreg, y_pred_logreg)

1.0847737572719598

In [21]:
# Обучаем на всем наборе 1.08522
logreg = LogisticRegression(**params)
logreg.fit(result_valid, target)

LogisticRegression(max_iter=300, multi_class='multinomial', random_state=0)

### Модель второго уровня - ансамбль логистических регрессий

In [48]:
logreg_params = {'random_state': 0, 
          'max_iter': 500, 
          'multi_class': 'multinomial', 
          'solver': 'lbfgs',
          'C':5
          }

cv = StratifiedKFold(n_splits=10, random_state=435, shuffle=True)
estimators_log_reg, oof_log_reg, scalers = logistic_cv_fit(logreg_params, result_valid, target, cv)

Sun May 30 14:24:26 2021, Cross-Validation, 100000 rows, 12 cols
Fold 1, Valid score = 1.09193
Fold 2, Valid score = 1.0852
Fold 3, Valid score = 1.08927
Fold 4, Valid score = 1.09131
Fold 5, Valid score = 1.08915
Fold 6, Valid score = 1.08704
Fold 7, Valid score = 1.088
Fold 8, Valid score = 1.09155
Fold 9, Valid score = 1.08871
Fold 10, Valid score = 1.08573
Score by each fold: [1.09193, 1.0852, 1.08927, 1.09131, 1.08915, 1.08704, 1.088, 1.09155, 1.08871, 1.08573]


In [49]:
# 1.08522
print(f"Out of fold log loss {metrics.log_loss(target, oof_log_reg)}")

Out of fold log loss 1.0887875510703202


## Получение результата (одна модель логистической регрессии)

In [24]:
result_xgb_test = multi_estimators_predict(xgb_estimators, test.drop(columns='id'))
result_lgb_test = multi_estimators_predict(lgb_estimators, test.drop(columns='id'))
result_cb_test = multi_estimators_predict(cb_estimators, test.drop(columns='id'))

result_lgb_test_df = pd.DataFrame(result_lgb_test)
result_xgb_test_df = pd.DataFrame(result_xgb_test)
result_cb_test_df = pd.DataFrame(result_cb_test)
result_lgb_test_df.rename(columns={0:'lgb_class_1',	1:'lgb_class_2',	2:'lgb_class_3',	3:'lgb_class_4'}, inplace=True)
result_xgb_test_df.rename(columns={0:'xgb_class_1',	1:'xgb_class_2',	2:'xgb_class_3',	3:'xgb_class_4'}, inplace=True)
result_cb_test_df.rename(columns={0:'cb_class_1',	1:'cb_class_2',	2:'cb_class_3',	3:'cb_class_4'}, inplace=True)
result_for_logreg = pd.concat([result_lgb_test_df, result_xgb_test_df, result_cb_test_df], axis=1) 

y_pred_logreg = logreg.predict_proba(result_for_logreg)
test_pred_df = pd.DataFrame(y_pred_logreg)
test_pred_df.rename(columns={0:'class_1',	1:'class_2',	2:'class_3',	3:'class_4'}, inplace=True)
result = test.drop(columns = test.columns.to_list()[1:])
result = pd.concat([result, test_pred_df], axis=1)
result.to_csv('result.csv', index=False)

## Получение результата (одна модель логистической регрессии) без усреднения бустинговых моделей

In [109]:
def multi_estimators_predict_1(estimators: List,
                             X: pd.DataFrame):
  
  cv = KFold(n_splits=len(estimators), random_state=435, shuffle=True)
  preds = np.zeros((X.shape[0], 4))

  for fold, (valid_idx, g) in enumerate(cv.split(X, X['feature_0'])):
    preds[valid_idx] = estimators[fold].predict_proba(X.loc[valid_idx])

  return preds

In [90]:
test.reset_index(inplace=True, drop=True)

In [110]:
result_xgb_test = multi_estimators_predict_1(xgb_estimators, test.drop(columns='id'))
result_lgb_test = multi_estimators_predict_1(lgb_estimators, test.drop(columns='id'))
result_cb_test = multi_estimators_predict_1(cb_estimators, test.drop(columns='id'))

result_lgb_test_df = pd.DataFrame(result_lgb_test)
result_xgb_test_df = pd.DataFrame(result_xgb_test)
result_cb_test_df = pd.DataFrame(result_cb_test)
result_lgb_test_df.rename(columns={0:'lgb_class_1',	1:'lgb_class_2',	2:'lgb_class_3',	3:'lgb_class_4'}, inplace=True)
result_xgb_test_df.rename(columns={0:'xgb_class_1',	1:'xgb_class_2',	2:'xgb_class_3',	3:'xgb_class_4'}, inplace=True)
result_cb_test_df.rename(columns={0:'cb_class_1',	1:'cb_class_2',	2:'cb_class_3',	3:'cb_class_4'}, inplace=True)
result_for_logreg = pd.concat([result_lgb_test_df, result_xgb_test_df, result_cb_test_df], axis=1) 

y_pred_logreg = logreg.predict_proba(result_for_logreg)
test_pred_df = pd.DataFrame(y_pred_logreg)
test_pred_df.rename(columns={0:'class_1',	1:'class_2',	2:'class_3',	3:'class_4'}, inplace=True)
result = test.drop(columns = test.columns.to_list()[1:])
result = pd.concat([result, test_pred_df], axis=1)
result.to_csv('result.csv', index=False)

Сработало плохо

## Получение результата (ансамбль логистических регрессий)

In [27]:
result_xgb_test = multi_estimators_predict(xgb_estimators, test.drop(columns='id'))
result_lgb_test = multi_estimators_predict(lgb_estimators, test.drop(columns='id'))
result_cb_test = multi_estimators_predict(cb_estimators, test.drop(columns='id'))

result_lgb_test_df = pd.DataFrame(result_lgb_test)
result_xgb_test_df = pd.DataFrame(result_xgb_test)
result_cb_test_df = pd.DataFrame(result_cb_test)
result_lgb_test_df.rename(columns={0:'lgb_class_1',	1:'lgb_class_2',	2:'lgb_class_3',	3:'lgb_class_4'}, inplace=True)
result_xgb_test_df.rename(columns={0:'xgb_class_1',	1:'xgb_class_2',	2:'xgb_class_3',	3:'xgb_class_4'}, inplace=True)
result_cb_test_df.rename(columns={0:'cb_class_1',	1:'cb_class_2',	2:'cb_class_3',	3:'cb_class_4'}, inplace=True)
result_for_logreg = pd.concat([result_lgb_test_df, result_xgb_test_df, result_cb_test_df], axis=1) 

In [28]:
result = multi_estimators_predict(estimators_log_reg, result_for_logreg)

In [29]:
test_pred_df = pd.DataFrame(result)
test_pred_df.rename(columns={0:'class_1',	1:'class_2',	2:'class_3',	3:'class_4'}, inplace=True)
result = test.drop(columns = test.columns.to_list()[1:])
result = pd.concat([result, test_pred_df], axis=1)
result.to_csv('result.csv', index=False)

С предыдущим вариантом различий нет, так как линейные модели очень похожи друг на друга

## Получение результата (одна модель логистической регрессии) с предсказаниями лучших бустинговых моделей

In [43]:
result_cb_test = cb_estimators[2].predict_proba(test.drop(columns='id'))
result_lgb_test = lgb_estimators[2].predict_proba(test.drop(columns='id'))
result_xgb_test = xgb_estimators[10].predict_proba(test.drop(columns='id'))

result_lgb_test_df = pd.DataFrame(result_lgb_test)
result_xgb_test_df = pd.DataFrame(result_xgb_test)
result_cb_test_df = pd.DataFrame(result_cb_test)
result_lgb_test_df.rename(columns={0:'lgb_class_1',	1:'lgb_class_2',	2:'lgb_class_3',	3:'lgb_class_4'}, inplace=True)
result_xgb_test_df.rename(columns={0:'xgb_class_1',	1:'xgb_class_2',	2:'xgb_class_3',	3:'xgb_class_4'}, inplace=True)
result_cb_test_df.rename(columns={0:'cb_class_1',	1:'cb_class_2',	2:'cb_class_3',	3:'cb_class_4'}, inplace=True)
result_for_logreg = pd.concat([result_lgb_test_df, result_xgb_test_df, result_cb_test_df], axis=1) 

In [44]:
y_pred_logreg = logreg.predict_proba(result_for_logreg)
test_pred_df = pd.DataFrame(y_pred_logreg)
test_pred_df.rename(columns={0:'class_1',	1:'class_2',	2:'class_3',	3:'class_4'}, inplace=True)
result = test.drop(columns = test.columns.to_list()[1:])
result = pd.concat([result, test_pred_df], axis=1)
result.to_csv('result.csv', index=False)

## Получение результата (одна модель логистической регрессии) с предсказаниями бустинговых моделей, заново обученных на исходном датасете

In [54]:
lgb_params = {
    "boosting_type ": "gbdt",
    "objective": "multiclass",
    "metric": "multi_logloss",
    'n_estimators': 710,
    'num_class': 4,
    'lambda_l1': 0.02,
    'lambda_l2': 150,
    'num_leaves': 7,
    'feature_fraction': 0.11,
    'bagging_fraction': 0.9,
    'bagging_freq': 4,
    'min_child_samples': 80,
    'max_depth': 5,
    'learning_rate': 0.075,
    'random_state': 42,
    #'max_bin': 50,
    #'device' : 'gpu'
}

cb_params = {
    "n_estimators": 4000,
    "loss_function": "MultiClass",
    "eval_metric": "MultiClass",
    "task_type": "GPU",
    'min_data_in_leaf': 35,
    'depth': 3,
    'learning_rate': 0.06,
    'random_strength' : 10,
    "l2_leaf_reg": 300,                       
    'grow_policy': "SymmetricTree",
    "random_seed": 27,
    "classes_count": 4,
    'bootstrap_type':  'Bayesian',
    #'bagging_temperature': 50, #for Bayesian bootstrap_type
    #'subsample': 0.6 # for Poisson, Bernoulli, MVS bootstrap_type
}

xgb_params = {
    "booster": "gbtree",
    #"eta": 0.1,
    "random_seed": 42,
    'objective': 'multi:softprob',
    'eval_metric': 'mlogloss',
    'random_state': 13,
    'learning_rate': 0.08, 
    'gamma': 0.02911685058980812, 
    'max_depth': 3,
    'min_child_weight': 10.748514454096288, 
    'max_delta_step': 2.4474818433727927,
    'subsample': 0.6445037550866027,
    'colsample_bytree': 0.07,
    #'colsample_bylevel': 0.7,
    #'colsample_bynode': 0.7,
    'reg_lambda': 1, 
    'reg_alpha': 5, 
    'num_class': 4,
    'n_estimators': 850,  # надо потюнить
    'tree_method': 'gpu_hist',
}

In [None]:
model_catb = cb.CatBoostClassifier(**cb_params)
model_catb.fit(data, target, verbose=10)

model_lgb = lgb.LGBMClassifier(**lgb_params)
model_lgb.fit(data, target, verbose=10)

model_xgb = xgb.XGBClassifier(**xgb_params)
model_xgb.fit(data, target, verbose=10)

result_cb_test_1 = model_catb.predict_proba(test.drop(columns='id'))
result_lgb_test_1 = model_lgb.predict_proba(test.drop(columns='id'))
result_xgb_test_1 = model_xgb.predict_proba(test.drop(columns='id'))

In [58]:
result_lgb_test_df = pd.DataFrame(result_lgb_test_1)
result_xgb_test_df = pd.DataFrame(result_xgb_test_1)
result_cb_test_df = pd.DataFrame(result_cb_test_1)
result_lgb_test_df.rename(columns={0:'lgb_class_1',	1:'lgb_class_2',	2:'lgb_class_3',	3:'lgb_class_4'}, inplace=True)
result_xgb_test_df.rename(columns={0:'xgb_class_1',	1:'xgb_class_2',	2:'xgb_class_3',	3:'xgb_class_4'}, inplace=True)
result_cb_test_df.rename(columns={0:'cb_class_1',	1:'cb_class_2',	2:'cb_class_3',	3:'cb_class_4'}, inplace=True)
result_for_logreg = pd.concat([result_lgb_test_df, result_xgb_test_df, result_cb_test_df], axis=1) 

In [59]:
y_pred_logreg = logreg.predict_proba(result_for_logreg)
test_pred_df = pd.DataFrame(y_pred_logreg)
test_pred_df.rename(columns={0:'class_1',	1:'class_2',	2:'class_3',	3:'class_4'}, inplace=True)
result = test.drop(columns = test.columns.to_list()[1:])
result = pd.concat([result, test_pred_df], axis=1)
result.to_csv('result.csv', index=False)

1.08558 видимо переобучать заново бустинговые алгоритмы - не лучший выбор