## Загрузка данных и подключение библиотек

In [1]:
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json
!kaggle competitions download -c tabular-playground-series-may-2021
!pip install catboost
!pip install eli5
!pip install optuna
!pip install shap
!pip install scikit-learn-extra
!unzip /content/test.csv.zip
!unzip /content/train.csv.zip
!unzip /content/sample_submission.csv.zip

kaggle.json
Downloading test.csv.zip to /content
  0% 0.00/851k [00:00<?, ?B/s]
100% 851k/851k [00:00<00:00, 46.4MB/s]
Downloading sample_submission.csv.zip to /content
  0% 0.00/128k [00:00<?, ?B/s]
100% 128k/128k [00:00<00:00, 34.3MB/s]
Downloading train.csv.zip to /content
  0% 0.00/1.72M [00:00<?, ?B/s]
100% 1.72M/1.72M [00:00<00:00, 103MB/s]
Collecting catboost
[?25l  Downloading https://files.pythonhosted.org/packages/47/80/8e9c57ec32dfed6ba2922bc5c96462cbf8596ce1a6f5de532ad1e43e53fe/catboost-0.25.1-cp37-none-manylinux1_x86_64.whl (67.3MB)
[K     |████████████████████████████████| 67.3MB 57kB/s 
Installing collected packages: catboost
Successfully installed catboost-0.25.1
Collecting eli5
[?25l  Downloading https://files.pythonhosted.org/packages/d1/54/04cab6e1c0ae535bec93f795d8403fdf6caf66fa5a6512263202dbb14ea6/eli5-0.11.0-py2.py3-none-any.whl (106kB)
[K     |████████████████████████████████| 112kB 12.7MB/s 
Installing collected packages: eli5
Successfully installed eli5-0.1

In [2]:
import pandas as pd
import seaborn as sns
import numpy as np
import catboost as cb
import sklearn
import xgboost as xgb
from sklearn.model_selection import train_test_split, RepeatedKFold
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.decomposition import PCA, FactorAnalysis as FA
from typing import List, Optional
from sklearn.model_selection import KFold, cross_val_score, StratifiedKFold, train_test_split
from typing import List, Tuple
import scipy.stats as ss
from sklearn_extra.cluster import KMedoids
import math
from sklearn.utils.validation import check_is_fitted
import eli5
from sklearn.base import BaseEstimator, TransformerMixin
import time
from sklearn.cluster import KMeans
from sklearn.metrics import confusion_matrix
from sklearn.inspection import permutation_importance
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import lightgbm as lgb
import shap
import missingno as msno
from sklearn.inspection import permutation_importance
from eli5.sklearn import PermutationImportance
import optuna
from sklearn.metrics import log_loss
pd.plotting.register_matplotlib_converters()
%matplotlib inline
sns.set(color_codes=True)
pal = sns.color_palette("viridis", 10)
sns.set_palette(pal)

## Используемые функции

In [3]:
def get_input(data_path: str) -> pd.DataFrame:
  """
  Считывание данных и вывод основной информации о наборе данных.

  Parmeters
  ---------
  data_path: str - название файла

  Returns
  -------
  data: pandas.core.frame.DataFrame - загруженный набор данных в pandas.Dataframe
  """
  base_path = "/content"
  data = pd.read_csv(f"{base_path}/{data_path}")
  data.columns = [col.lower() for col in data.columns]
  print(f"{data_path}: shape = {data.shape[0]} rows, {data.shape[1]} cols")
  return data

In [4]:
def plot_feature_importance(importance, names, model_type, figsize=(10,8)):

  #Create arrays from feature importance and feature names
  feature_importance = np.array(importance)
  feature_names = np.array(names)

  #Create a DataFrame using a Dictionary
  data={'feature_names':feature_names,'feature_importance':feature_importance}
  fi_df = pd.DataFrame(data)

  #Sort the DataFrame in order decreasing feature importance
  fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)

  #Define size of bar plot
  plt.figure(figsize=figsize)
  #Plot Searborn bar chart
  sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
  #Add chart labels
  plt.title(model_type + ' FEATURE IMPORTANCE')
  plt.xlabel('FEATURE IMPORTANCE')
  plt.ylabel('FEATURE NAMES')
  return list(fi_df['feature_names'].values)

In [5]:
def multi_estimators_predict(estimators: List,
                             x_valid: pd.DataFrame,
                             y_valid = None,
                             metric: callable = None,
                             scalers: List = None):
  preds = []
  evals = []
  if scalers:
    assert len(estimators) == len(scalers)

  for i in range(len(estimators)):
    if scalers:
      pred = estimators[i].predict_proba(scalers[i].transform(x_valid))
    else:
      if type(estimators[0]) == xgb.core.Booster:
        pred = estimators[i].predict(x_valid)
      else:
        pred = estimators[i].predict_proba(x_valid)

    
    preds.append(pred)
    if (y_valid is not None) and (metric is not None):
      eval = metric(y_valid, pred)
      evals.append(eval)
  
  result = np.stack([preds], axis=0)
  result = np.squeeze(result, axis=0)
  result = np.mean(result, axis=0)

  if (y_valid is not None) & (metric is not None):
    for i, eval in enumerate(evals):
      print(f"Model {i} metric: {eval:.7}")
    print(f"Result model metric: {metric(y_valid, result):.7}")

  return result

In [6]:
def catboost_cv_fit(params, X, y, cv, categorical = None):
    """
    Кросс-валидация для модели catboost.

    Parameters
    ----------
    params: dict
        Словарь гиперпараметров модели.

    X: pandas.core.frame.DataFrame
        Матрица признако для обучения модели.

    y: pandas.core.frame.Series
        Вектор целевой переменной для обучения модели.

    cv: KFold or StratifiedKFold generator.
        Объект KFold / StratifiedKFold для определения
        стратегии кросс-валидации модели.

    categorical: str, optional, default = None
        Список категориальных признаков.
        Опциональный параметр, по умолчанию, не используется.

    Returns
    -------
    estimators: list
        Список с объектами обученной модели.

    oof_preds: np.array
        Вектор OOF-прогнозов.

    """
    if not categorical:
        categorical = "auto"

    estimators, folds_scores = [], []
    oof_preds = np.zeros((X.shape[0], 4))
    print(f"{time.ctime()}, Cross-Validation, {X.shape[0]} rows, {X.shape[1]} cols")

    for fold, (train_idx, valid_idx) in enumerate(cv.split(X, y)):
        x_train, x_valid = X.loc[train_idx], X.loc[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        model = cb.CatBoostClassifier(**params)
        model.fit(
            x_train, y_train,
            eval_set=[(x_valid, y_valid)], verbose=10, early_stopping_rounds=50,
            #cat_features=categorical
        )
        oof_preds[valid_idx] = model.predict_proba(x_valid)
        score = metrics.log_loss(y_valid, oof_preds[valid_idx])
        print(f"Fold {fold+1}, Valid score = {round(score, 5)}")
        folds_scores.append(round(score, 5))
        estimators.append(model)

    print(f"Score by each fold: {folds_scores}")
    print("="*65)
    return estimators, oof_preds

In [7]:
def lightgbm_cv_fit(params, X, y, cv, categorical = None):
    """
    Кросс-валидация для модели lightgbm.

    Parameters
    ----------
    params: dict
        Словарь гиперпараметров модели.

    X: pandas.core.frame.DataFrame
        Матрица признако для обучения модели.

    y: pandas.core.frame.Series
        Вектор целевой переменной для обучения модели.

    cv: KFold or StratifiedKFold generator.
        Объект KFold / StratifiedKFold для определения
        стратегии кросс-валидации модели.

    categorical: str, optional, default = None
        Список категориальных признаков.
        Опциональный параметр, по умолчанию, не используется.

    Returns
    -------
    estimators: list
        Список с объектами обученной модели.

    oof_preds: np.array
        Вектор OOF-прогнозов.

    """
    if not categorical:
        categorical = "auto"

    estimators, folds_scores = [], []
    oof_preds = np.zeros((X.shape[0], 4))
    print(f"{time.ctime()}, Cross-Validation, {X.shape[0]} rows, {X.shape[1]} cols")

    for fold, (train_idx, valid_idx) in enumerate(cv.split(X, y)):
        x_train, x_valid = X.loc[train_idx], X.loc[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        model = lgb.LGBMClassifier(**params)
        model.fit(
            x_train, y_train,
            eval_set=[(x_valid, y_valid)],
            eval_metric="multi_logloss", verbose=10, early_stopping_rounds=50,
            categorical_feature=categorical
        )
        oof_preds[valid_idx] = model.predict_proba(x_valid)
        score = metrics.log_loss(y_valid, oof_preds[valid_idx])
        print(f"Fold {fold+1}, Valid score = {round(score, 5)}")
        folds_scores.append(round(score, 5))
        estimators.append(model)

    print(f"Score by each fold: {folds_scores}")
    print("="*65)
    return estimators, oof_preds

In [70]:
def xgboost_cv_fit(params, X, y, cv, categorical = None):
    """
    Кросс-валидация для модели xgboost.

    Parameters
    ----------
    params: dict
        Словарь гиперпараметров модели.

    X: pandas.core.frame.DataFrame
        Матрица признако для обучения модели.

    y: pandas.core.frame.Series
        Вектор целевой переменной для обучения модели.

    cv: KFold or StratifiedKFold generator.
        Объект KFold / StratifiedKFold для определения
        стратегии кросс-валидации модели.

    categorical: str, optional, default = None
        Список категориальных признаков.
        Опциональный параметр, по умолчанию, не используется.

    Returns
    -------
    estimators: list
        Список с объектами обученной модели.

    encoders: dict
        Список с объектами LabelEncoders.

    oof_preds: np.array
        Вектор OOF-прогнозов.

    """
    estimators, encoders = [], {}
    oof_preds = np.zeros((X.shape[0], 4))

    if categorical:
        for feature in categorical:
            encoder = LabelEncoder()
            X[feature] = encoder.fit_transform(X[feature].astype("str").fillna("NA"))
            encoders[feature] = encoder

    print(f"{time.ctime()}, Cross-Validation, {X.shape[0]} rows, {X.shape[1]} cols")

    for fold, (train_idx, valid_idx) in enumerate(cv.split(X, y)):

        x_train, x_valid = X.loc[train_idx], X.loc[valid_idx]
        y_train, y_valid = y[train_idx], y[valid_idx]

        model = xgb.XGBClassifier(**params)
        model.fit(
            x_train, y_train,
            eval_set=[(x_valid, y_valid)],
            eval_metric="mlogloss", 
            verbose=10, 
            early_stopping_rounds=50,
        )

        oof_preds[valid_idx] = model.predict_proba(x_valid)
        score = metrics.log_loss(y_valid, oof_preds[valid_idx])
        print(f"Fold {fold+1}, Valid score = {round(score, 5)}")
        estimators.append(model)

    return estimators, encoders, oof_preds

## Загрузка данных и построение моделей

### lightgbm + optuna

In [None]:
def objective(trial):
    data = get_input("train.csv")
    data.drop(columns='id', inplace=True)
    mapper = {'Class_1': 0, 'Class_2': 1, 'Class_3': 2, 'Class_4': 3}
    data['target'] = data['target'].map(mapper)
    target = data['target']
    data = data.drop(columns=['target'])
    
    
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.3)
    dtrain = lgb.Dataset(train_x, label=train_y)
 
    #param = {
    #    'objective': 'multiclass',
    #    'metric': 'multi_logloss',
    #    'num_class': 4,
    #    'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
    #    'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
    #    'num_leaves': trial.suggest_int('num_leaves', 2, 256),
    #    'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
    #    'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
    #    'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
    #    'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
    #}

    param = {
         'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-5, 30.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-5, 30.0),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),
        'subsample': trial.suggest_uniform('subsample', 0,1),
        'learning_rate': trial.suggest_uniform('learning_rate', 0, 0.1 ),
        'max_depth': trial.suggest_int('max_depth', 1,100),
        'num_leaves' : trial.suggest_int('num_leaves', 2, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 300),
        'min_child_weight' : trial.suggest_loguniform('min_child_weight' , 1e-5 , 1),
        'cat_smooth' : trial.suggest_int('cat_smooth', 1, 100),
        'cat_l2': trial.suggest_int('cat_l2',1,20),
        'metric': 'multi_logloss', 
        'random_state': 42,
        'num_class': 4,
        'objective': 'multiclass',
        
    }
 
    gbm = lgb.train(param, dtrain)
    preds = gbm.predict(test_x)
    #pred_labels = np.rint(preds)
    log_loss = sklearn.metrics.log_loss(test_y, preds)
    return log_loss
 
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)
 
print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)
print('Best value:', study.best_trial)

[32m[I 2021-05-16 18:25:20,684][0m A new study created in memory with name: no-name-882d6856-d646-41a5-a0ff-8f875f9c288c[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:25:37,726][0m Trial 0 finished with value: 1.0930728477741078 and parameters: {'reg_alpha': 0.0018382181760043421, 'reg_lambda': 0.0002564331226829421, 'colsample_bytree': 0.5, 'subsample': 0.11843285679715676, 'learning_rate': 0.034182391469566045, 'max_depth': 29, 'num_leaves': 229, 'min_child_samples': 162, 'min_child_weight': 0.00011634068652074863, 'cat_smooth': 44, 'cat_l2': 4}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:25:51,617][0m Trial 1 finished with value: 1.0955685356214455 and parameters: {'reg_alpha': 1.6323695248442425, 'reg_lambda': 0.0036906669717957823, 'colsample_bytree': 0.8, 'subsample': 0.9637292807894524, 'learning_rate': 0.05302797282829857, 'max_depth': 30, 'num_leaves': 114, 'min_child_samples': 55, 'min_child_weight': 0.0002587880463853362, 'cat_smooth': 55, 'cat_l2': 15}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:26:06,770][0m Trial 2 finished with value: 1.1005712477990852 and parameters: {'reg_alpha': 0.09703434541202088, 'reg_lambda': 9.29203171038271e-05, 'colsample_bytree': 0.6, 'subsample': 0.22599667448212835, 'learning_rate': 0.015091349253031072, 'max_depth': 78, 'num_leaves': 459, 'min_child_samples': 288, 'min_child_weight': 0.10351586001387278, 'cat_smooth': 87, 'cat_l2': 15}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:26:15,205][0m Trial 3 finished with value: 1.0947276050717174 and parameters: {'reg_alpha': 2.1071655797497653, 'reg_lambda': 0.0012309407169892109, 'colsample_bytree': 0.7, 'subsample': 0.9782956649626166, 'learning_rate': 0.05293788834639292, 'max_depth': 55, 'num_leaves': 27, 'min_child_samples': 220, 'min_child_weight': 0.03888810784315362, 'cat_smooth': 65, 'cat_l2': 3}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:26:36,783][0m Trial 4 finished with value: 1.1050866936268615 and parameters: {'reg_alpha': 0.00021243381990744743, 'reg_lambda': 0.00012987191371437035, 'colsample_bytree': 1.0, 'subsample': 0.34591329613547417, 'learning_rate': 0.06889366811394837, 'max_depth': 95, 'num_leaves': 491, 'min_child_samples': 234, 'min_child_weight': 0.0004041285330566758, 'cat_smooth': 53, 'cat_l2': 5}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:27:04,904][0m Trial 5 finished with value: 1.1064935508863833 and parameters: {'reg_alpha': 0.0005345386323427009, 'reg_lambda': 3.511832821950963, 'colsample_bytree': 0.8, 'subsample': 0.5607781578638047, 'learning_rate': 0.06973840008398186, 'max_depth': 97, 'num_leaves': 519, 'min_child_samples': 18, 'min_child_weight': 0.012243807186647318, 'cat_smooth': 90, 'cat_l2': 3}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:27:08,296][0m Trial 6 finished with value: 1.1101047647935305 and parameters: {'reg_alpha': 0.022479286823623736, 'reg_lambda': 4.725859453522173, 'colsample_bytree': 1.0, 'subsample': 0.28174584995414687, 'learning_rate': 0.06086672463013248, 'max_depth': 1, 'num_leaves': 231, 'min_child_samples': 69, 'min_child_weight': 5.3697388465258046e-05, 'cat_smooth': 12, 'cat_l2': 1}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:27:12,193][0m Trial 7 finished with value: 1.1032285015323893 and parameters: {'reg_alpha': 0.27952205424430115, 'reg_lambda': 3.9365785498936146e-05, 'colsample_bytree': 0.3, 'subsample': 0.6734069736960099, 'learning_rate': 0.07092864084355008, 'max_depth': 3, 'num_leaves': 332, 'min_child_samples': 263, 'min_child_weight': 0.022177315592072904, 'cat_smooth': 20, 'cat_l2': 12}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:27:27,400][0m Trial 8 finished with value: 1.0968530512095067 and parameters: {'reg_alpha': 2.2157156336654815e-05, 'reg_lambda': 0.28247539588802595, 'colsample_bytree': 0.5, 'subsample': 0.9172734689766643, 'learning_rate': 0.07046581133547022, 'max_depth': 27, 'num_leaves': 198, 'min_child_samples': 32, 'min_child_weight': 0.00011170963896952472, 'cat_smooth': 62, 'cat_l2': 9}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:27:42,942][0m Trial 9 finished with value: 1.1034504848063675 and parameters: {'reg_alpha': 6.50612485612051, 'reg_lambda': 3.788665351703791e-05, 'colsample_bytree': 0.4, 'subsample': 0.520604430744945, 'learning_rate': 0.015447343223005994, 'max_depth': 46, 'num_leaves': 423, 'min_child_samples': 36, 'min_child_weight': 0.0265473590105504, 'cat_smooth': 34, 'cat_l2': 2}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:27:57,551][0m Trial 10 finished with value: 1.0931887003119531 and parameters: {'reg_alpha': 0.0022447255333835144, 'reg_lambda': 0.06342615100804282, 'colsample_bytree': 0.5, 'subsample': 0.003966463608017723, 'learning_rate': 0.09787860988738747, 'max_depth': 17, 'num_leaves': 827, 'min_child_samples': 145, 'min_child_weight': 1.0715233166632674e-05, 'cat_smooth': 36, 'cat_l2': 8}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:28:14,264][0m Trial 11 finished with value: 1.0973996124682044 and parameters: {'reg_alpha': 0.001506041736045482, 'reg_lambda': 0.06477872252765644, 'colsample_bytree': 0.5, 'subsample': 0.009041024006455088, 'learning_rate': 0.03250266928954279, 'max_depth': 18, 'num_leaves': 855, 'min_child_samples': 143, 'min_child_weight': 1.5108831954372247e-05, 'cat_smooth': 38, 'cat_l2': 8}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:28:40,195][0m Trial 12 finished with value: 1.1021461658733485 and parameters: {'reg_alpha': 0.002471476184177303, 'reg_lambda': 0.006849608659571087, 'colsample_bytree': 0.9, 'subsample': 0.0052743295037780795, 'learning_rate': 0.03656558621655266, 'max_depth': 45, 'num_leaves': 928, 'min_child_samples': 118, 'min_child_weight': 1.0407814441074308e-05, 'cat_smooth': 32, 'cat_l2': 20}. Best is trial 0 with value: 1.0930728477741078.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:28:51,866][0m Trial 13 finished with value: 1.0908766056039574 and parameters: {'reg_alpha': 2.2009081099059318e-05, 'reg_lambda': 0.12787206406656534, 'colsample_bytree': 0.5, 'subsample': 0.12147163408858569, 'learning_rate': 0.0910725816623649, 'max_depth': 13, 'num_leaves': 678, 'min_child_samples': 171, 'min_child_weight': 0.0012008914115114993, 'cat_smooth': 77, 'cat_l2': 6}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:29:03,465][0m Trial 14 finished with value: 1.0989594963590876 and parameters: {'reg_alpha': 1.0334330367217357e-05, 'reg_lambda': 0.0006423528883795067, 'colsample_bytree': 0.5, 'subsample': 0.17065453122757987, 'learning_rate': 0.09584791341017727, 'max_depth': 13, 'num_leaves': 696, 'min_child_samples': 194, 'min_child_weight': 0.0013724375088851601, 'cat_smooth': 76, 'cat_l2': 6}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:29:21,021][0m Trial 15 finished with value: 1.1016589244506654 and parameters: {'reg_alpha': 5.32651843342831e-05, 'reg_lambda': 0.5166040337245751, 'colsample_bytree': 0.5, 'subsample': 0.15774084768104518, 'learning_rate': 0.03384438045747896, 'max_depth': 31, 'num_leaves': 642, 'min_child_samples': 189, 'min_child_weight': 0.8969856229806891, 'cat_smooth': 75, 'cat_l2': 5}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:29:39,758][0m Trial 16 finished with value: 1.0989474731582702 and parameters: {'reg_alpha': 0.00010617142893146885, 'reg_lambda': 25.766812014002774, 'colsample_bytree': 0.4, 'subsample': 0.10398125156777967, 'learning_rate': 0.085988055123711, 'max_depth': 61, 'num_leaves': 702, 'min_child_samples': 97, 'min_child_weight': 0.0025144751638574723, 'cat_smooth': 98, 'cat_l2': 11}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:29:47,296][0m Trial 17 finished with value: 1.1148620391662822 and parameters: {'reg_alpha': 0.01392314927875406, 'reg_lambda': 1.1353001497262547e-05, 'colsample_bytree': 0.6, 'subsample': 0.39050761064023354, 'learning_rate': 0.0008916103734858127, 'max_depth': 6, 'num_leaves': 604, 'min_child_samples': 178, 'min_child_weight': 0.0007748642059432129, 'cat_smooth': 73, 'cat_l2': 6}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:30:04,224][0m Trial 18 finished with value: 1.1025611944507727 and parameters: {'reg_alpha': 1.0960638843809606e-05, 'reg_lambda': 0.03341114771256298, 'colsample_bytree': 0.3, 'subsample': 0.40751592361399347, 'learning_rate': 0.020300033080566285, 'max_depth': 42, 'num_leaves': 290, 'min_child_samples': 103, 'min_child_weight': 5.457472392091434e-05, 'cat_smooth': 45, 'cat_l2': 1}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:30:09,137][0m Trial 19 finished with value: 1.1069990629606703 and parameters: {'reg_alpha': 0.009364617255036163, 'reg_lambda': 0.0005390344403329836, 'colsample_bytree': 0.7, 'subsample': 0.6733490136522495, 'learning_rate': 0.04089521898190581, 'max_depth': 67, 'num_leaves': 6, 'min_child_samples': 169, 'min_child_weight': 0.005284848438692423, 'cat_smooth': 1, 'cat_l2': 4}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:30:29,650][0m Trial 20 finished with value: 1.0960596641949725 and parameters: {'reg_alpha': 0.000499195079470326, 'reg_lambda': 0.32438375021489374, 'colsample_bytree': 0.9, 'subsample': 0.09772540678144566, 'learning_rate': 0.0850220760635108, 'max_depth': 36, 'num_leaves': 354, 'min_child_samples': 219, 'min_child_weight': 0.00020059207598366666, 'cat_smooth': 87, 'cat_l2': 13}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:30:43,873][0m Trial 21 finished with value: 1.1044994063199582 and parameters: {'reg_alpha': 0.006196853440535386, 'reg_lambda': 0.07096663621371521, 'colsample_bytree': 0.5, 'subsample': 0.036021890676073184, 'learning_rate': 0.09940577091469038, 'max_depth': 17, 'num_leaves': 807, 'min_child_samples': 148, 'min_child_weight': 2.3762788566970454e-05, 'cat_smooth': 23, 'cat_l2': 8}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:31:00,672][0m Trial 22 finished with value: 1.0977050497515188 and parameters: {'reg_alpha': 0.04962657290765173, 'reg_lambda': 0.011305812320784392, 'colsample_bytree': 0.5, 'subsample': 0.2526155986009091, 'learning_rate': 0.08901246007144012, 'max_depth': 21, 'num_leaves': 994, 'min_child_samples': 130, 'min_child_weight': 4.833081483435855e-05, 'cat_smooth': 43, 'cat_l2': 9}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:31:09,481][0m Trial 23 finished with value: 1.104185200549606 and parameters: {'reg_alpha': 0.001426804320493504, 'reg_lambda': 1.3844516493834131, 'colsample_bytree': 0.5, 'subsample': 0.0811024373985767, 'learning_rate': 0.023745464557144796, 'max_depth': 8, 'num_leaves': 797, 'min_child_samples': 160, 'min_child_weight': 0.0007053745623714637, 'cat_smooth': 22, 'cat_l2': 7}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:31:25,976][0m Trial 24 finished with value: 1.09099953987274 and parameters: {'reg_alpha': 5.1696577740156105e-05, 'reg_lambda': 0.02632768962192075, 'colsample_bytree': 0.5, 'subsample': 0.1860770273323995, 'learning_rate': 0.04549759346465972, 'max_depth': 23, 'num_leaves': 589, 'min_child_samples': 202, 'min_child_weight': 0.00011777455186652023, 'cat_smooth': 47, 'cat_l2': 10}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:31:41,465][0m Trial 25 finished with value: 1.0949188093509472 and parameters: {'reg_alpha': 4.134985996382763e-05, 'reg_lambda': 0.0031555669053980845, 'colsample_bytree': 0.5, 'subsample': 0.17779959990240324, 'learning_rate': 0.04542068710180265, 'max_depth': 26, 'num_leaves': 584, 'min_child_samples': 253, 'min_child_weight': 0.00012953138285559069, 'cat_smooth': 60, 'cat_l2': 10}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:31:58,751][0m Trial 26 finished with value: 1.1156180888409006 and parameters: {'reg_alpha': 0.00021412341227715278, 'reg_lambda': 0.026927465400248794, 'colsample_bytree': 0.5, 'subsample': 0.3054224963844817, 'learning_rate': 0.004864648676082769, 'max_depth': 39, 'num_leaves': 735, 'min_child_samples': 200, 'min_child_weight': 0.003635657336118054, 'cat_smooth': 49, 'cat_l2': 4}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:32:15,470][0m Trial 27 finished with value: 1.0960881575329515 and parameters: {'reg_alpha': 1.0029005318994509e-05, 'reg_lambda': 0.14804820119149328, 'colsample_bytree': 0.5, 'subsample': 0.43949669522464346, 'learning_rate': 0.027082878351016686, 'max_depth': 35, 'num_leaves': 578, 'min_child_samples': 204, 'min_child_weight': 0.0005682913797472484, 'cat_smooth': 68, 'cat_l2': 14}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:32:26,392][0m Trial 28 finished with value: 1.0949230709773012 and parameters: {'reg_alpha': 7.826090476372986e-05, 'reg_lambda': 1.0066407721719806, 'colsample_bytree': 0.6, 'subsample': 0.21296475773524, 'learning_rate': 0.05918764208454551, 'max_depth': 11, 'num_leaves': 525, 'min_child_samples': 240, 'min_child_weight': 0.001404445097823495, 'cat_smooth': 55, 'cat_l2': 17}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:32:42,062][0m Trial 29 finished with value: 1.0995186051080554 and parameters: {'reg_alpha': 0.00035331777757037876, 'reg_lambda': 0.0018332453508498557, 'colsample_bytree': 0.8, 'subsample': 0.09463612371818464, 'learning_rate': 0.04566854760722555, 'max_depth': 29, 'num_leaves': 120, 'min_child_samples': 291, 'min_child_weight': 0.00024296817964441302, 'cat_smooth': 56, 'cat_l2': 6}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:33:02,352][0m Trial 30 finished with value: 1.1007996727087441 and parameters: {'reg_alpha': 1.8619934475271715e-05, 'reg_lambda': 0.007887274368944664, 'colsample_bytree': 0.5, 'subsample': 0.32176586960326803, 'learning_rate': 0.05863007603398718, 'max_depth': 24, 'num_leaves': 397, 'min_child_samples': 78, 'min_child_weight': 8.799252556571772e-05, 'cat_smooth': 29, 'cat_l2': 11}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:33:15,781][0m Trial 31 finished with value: 1.0978143574423012 and parameters: {'reg_alpha': 0.003994278150102134, 'reg_lambda': 0.09690351689948508, 'colsample_bytree': 0.5, 'subsample': 0.01868171228576679, 'learning_rate': 0.07847869287865422, 'max_depth': 15, 'num_leaves': 869, 'min_child_samples': 169, 'min_child_weight': 3.0279547298047315e-05, 'cat_smooth': 41, 'cat_l2': 8}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:33:32,992][0m Trial 32 finished with value: 1.1054941215664336 and parameters: {'reg_alpha': 0.0007646583161819077, 'reg_lambda': 0.02116836632523376, 'colsample_bytree': 0.5, 'subsample': 0.13317460068547365, 'learning_rate': 0.09333329736099558, 'max_depth': 22, 'num_leaves': 738, 'min_child_samples': 132, 'min_child_weight': 1.0456644485542795e-05, 'cat_smooth': 49, 'cat_l2': 10}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:33:42,261][0m Trial 33 finished with value: 1.0964755988329116 and parameters: {'reg_alpha': 0.20411363358707035, 'reg_lambda': 0.13535821038097764, 'colsample_bytree': 0.5, 'subsample': 0.05089968679882485, 'learning_rate': 0.051553087228431244, 'max_depth': 9, 'num_leaves': 664, 'min_child_samples': 180, 'min_child_weight': 0.0002241659375435918, 'cat_smooth': 37, 'cat_l2': 7}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:33:45,370][0m Trial 34 finished with value: 1.1150964399081225 and parameters: {'reg_alpha': 0.0330263832332424, 'reg_lambda': 0.036256386508886206, 'colsample_bytree': 0.7, 'subsample': 0.22494898217369394, 'learning_rate': 0.04166952995542061, 'max_depth': 1, 'num_leaves': 993, 'min_child_samples': 149, 'min_child_weight': 2.1355421133286346e-05, 'cat_smooth': 29, 'cat_l2': 4}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:34:05,489][0m Trial 35 finished with value: 1.0992755537194971 and parameters: {'reg_alpha': 0.0001579911706972085, 'reg_lambda': 0.00355515048257303, 'colsample_bytree': 1.0, 'subsample': 0.20420345571088616, 'learning_rate': 0.07776243211637346, 'max_depth': 32, 'num_leaves': 772, 'min_child_samples': 224, 'min_child_weight': 0.16795568558331, 'cat_smooth': 43, 'cat_l2': 9}. Best is trial 13 with value: 1.0908766056039574.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:34:22,744][0m Trial 36 finished with value: 1.0903456649783263 and parameters: {'reg_alpha': 0.0008845019617946607, 'reg_lambda': 0.0003441531178517913, 'colsample_bytree': 0.5, 'subsample': 0.007409831111379966, 'learning_rate': 0.029075093110584444, 'max_depth': 18, 'num_leaves': 465, 'min_child_samples': 121, 'min_child_weight': 0.0003298064325145202, 'cat_smooth': 15, 'cat_l2': 3}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:34:35,310][0m Trial 37 finished with value: 1.1050799287467428 and parameters: {'reg_alpha': 3.3981336686061327e-05, 'reg_lambda': 0.0002566547751044009, 'colsample_bytree': 0.8, 'subsample': 0.1374413359077778, 'learning_rate': 0.028662051556542365, 'max_depth': 48, 'num_leaves': 80, 'min_child_samples': 112, 'min_child_weight': 0.0013642265495844115, 'cat_smooth': 7, 'cat_l2': 2}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:34:49,427][0m Trial 38 finished with value: 1.1134839980766846 and parameters: {'reg_alpha': 0.0008469156095372393, 'reg_lambda': 9.425739328934548e-05, 'colsample_bytree': 0.3, 'subsample': 0.2573805823151111, 'learning_rate': 0.009890119827909608, 'max_depth': 21, 'num_leaves': 496, 'min_child_samples': 215, 'min_child_weight': 0.000364947025870703, 'cat_smooth': 83, 'cat_l2': 3}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:34:53,620][0m Trial 39 finished with value: 1.105675036825571 and parameters: {'reg_alpha': 0.00032464364495111075, 'reg_lambda': 4.4921683927212247e-05, 'colsample_bytree': 0.4, 'subsample': 0.06298989797781453, 'learning_rate': 0.038336143478055376, 'max_depth': 3, 'num_leaves': 449, 'min_child_samples': 82, 'min_child_weight': 0.006278063704742522, 'cat_smooth': 10, 'cat_l2': 1}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:35:15,342][0m Trial 40 finished with value: 1.1019874634157347 and parameters: {'reg_alpha': 0.00010936362478530482, 'reg_lambda': 1.5218212744091352e-05, 'colsample_bytree': 1.0, 'subsample': 0.916355003471323, 'learning_rate': 0.018364639784084744, 'max_depth': 82, 'num_leaves': 246, 'min_child_samples': 128, 'min_child_weight': 0.00013732614175927815, 'cat_smooth': 97, 'cat_l2': 5}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:35:28,801][0m Trial 41 finished with value: 1.0968837206856 and parameters: {'reg_alpha': 0.0033899994340125146, 'reg_lambda': 0.0004165419312331211, 'colsample_bytree': 0.5, 'subsample': 0.0032380682821549767, 'learning_rate': 0.046454980496726896, 'max_depth': 14, 'num_leaves': 564, 'min_child_samples': 159, 'min_child_weight': 7.335958667104189e-05, 'cat_smooth': 16, 'cat_l2': 7}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:35:45,722][0m Trial 42 finished with value: 1.1005925396235567 and parameters: {'reg_alpha': 0.0016771189779439483, 'reg_lambda': 0.001126811938117383, 'colsample_bytree': 0.5, 'subsample': 0.008144775406474354, 'learning_rate': 0.03188096337645136, 'max_depth': 18, 'num_leaves': 640, 'min_child_samples': 139, 'min_child_weight': 0.0003452768173475644, 'cat_smooth': 49, 'cat_l2': 3}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:36:04,093][0m Trial 43 finished with value: 1.1024041064875254 and parameters: {'reg_alpha': 0.007431204259127635, 'reg_lambda': 0.0001857820401750365, 'colsample_bytree': 0.5, 'subsample': 0.12327892494262474, 'learning_rate': 0.06439661926061047, 'max_depth': 26, 'num_leaves': 883, 'min_child_samples': 162, 'min_child_weight': 3.404345251150127e-05, 'cat_smooth': 1, 'cat_l2': 5}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:36:16,273][0m Trial 44 finished with value: 1.0975781649162613 and parameters: {'reg_alpha': 0.000762942896301563, 'reg_lambda': 0.009438182224937221, 'colsample_bytree': 0.9, 'subsample': 0.17723993744115304, 'learning_rate': 0.09989779389035382, 'max_depth': 11, 'num_leaves': 929, 'min_child_samples': 118, 'min_child_weight': 0.0005512661105757818, 'cat_smooth': 27, 'cat_l2': 12}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:36:32,311][0m Trial 45 finished with value: 1.0964068562429987 and parameters: {'reg_alpha': 0.00023218859982848076, 'reg_lambda': 0.2714678900939977, 'colsample_bytree': 0.5, 'subsample': 0.0570681181009094, 'learning_rate': 0.052547452042333936, 'max_depth': 55, 'num_leaves': 171, 'min_child_samples': 186, 'min_child_weight': 0.0021670663838240178, 'cat_smooth': 35, 'cat_l2': 2}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:36:53,769][0m Trial 46 finished with value: 1.0998902051976776 and parameters: {'reg_alpha': 2.249107635416463e-05, 'reg_lambda': 4.947110990729126, 'colsample_bytree': 0.5, 'subsample': 0.36423291787458534, 'learning_rate': 0.0250909442542025, 'max_depth': 33, 'num_leaves': 670, 'min_child_samples': 100, 'min_child_weight': 0.000994324332130367, 'cat_smooth': 61, 'cat_l2': 6}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:37:00,118][0m Trial 47 finished with value: 1.1035097815074335 and parameters: {'reg_alpha': 0.02030269946446779, 'reg_lambda': 0.04671245332077026, 'colsample_bytree': 0.6, 'subsample': 0.27688625266666417, 'learning_rate': 0.03123795488860441, 'max_depth': 5, 'num_leaves': 370, 'min_child_samples': 201, 'min_child_weight': 0.00013785202431533048, 'cat_smooth': 17, 'cat_l2': 8}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:37:21,869][0m Trial 48 finished with value: 1.101901218693159 and parameters: {'reg_alpha': 0.0022685943671948137, 'reg_lambda': 0.012400614080762734, 'colsample_bytree': 0.5, 'subsample': 0.1560782225904579, 'learning_rate': 0.03684758383881503, 'max_depth': 20, 'num_leaves': 531, 'min_child_samples': 56, 'min_child_weight': 1.3390073418094704e-05, 'cat_smooth': 53, 'cat_l2': 4}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:37:37,363][0m Trial 49 finished with value: 1.1002952150298675 and parameters: {'reg_alpha': 0.07777100424697979, 'reg_lambda': 0.0015143575593829005, 'colsample_bytree': 0.3, 'subsample': 0.657665802108842, 'learning_rate': 0.04795222241118591, 'max_depth': 41, 'num_leaves': 437, 'min_child_samples': 175, 'min_child_weight': 0.011359818892871089, 'cat_smooth': 66, 'cat_l2': 9}. Best is trial 36 with value: 1.0903456649783263.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:37:56,972][0m Trial 50 finished with value: 1.088279019087586 and parameters: {'reg_alpha': 0.9557349372478526, 'reg_lambda': 0.00565822311014884, 'colsample_bytree': 0.7, 'subsample': 0.7981326760346236, 'learning_rate': 0.04164055940276916, 'max_depth': 29, 'num_leaves': 622, 'min_child_samples': 150, 'min_child_weight': 0.0003566489059353313, 'cat_smooth': 38, 'cat_l2': 3}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:38:10,233][0m Trial 51 finished with value: 1.1046674185277277 and parameters: {'reg_alpha': 10.84104749340899, 'reg_lambda': 0.004997548546156438, 'colsample_bytree': 0.7, 'subsample': 0.5228564828390005, 'learning_rate': 0.04149674034059467, 'max_depth': 27, 'num_leaves': 611, 'min_child_samples': 151, 'min_child_weight': 0.00048617223208970416, 'cat_smooth': 39, 'cat_l2': 3}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:38:27,013][0m Trial 52 finished with value: 1.1061549327488045 and parameters: {'reg_alpha': 0.33265954959628263, 'reg_lambda': 0.0007609040952851989, 'colsample_bytree': 0.7, 'subsample': 0.818902427991351, 'learning_rate': 0.036053623580867875, 'max_depth': 16, 'num_leaves': 474, 'min_child_samples': 124, 'min_child_weight': 0.0003001015122387385, 'cat_smooth': 45, 'cat_l2': 2}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:38:44,281][0m Trial 53 finished with value: 1.0976381679815508 and parameters: {'reg_alpha': 3.8301639449869667, 'reg_lambda': 5.924800590732535e-05, 'colsample_bytree': 0.7, 'subsample': 0.6148235548870756, 'learning_rate': 0.02942926572539167, 'max_depth': 30, 'num_leaves': 722, 'min_child_samples': 137, 'min_child_weight': 0.00019727763283969773, 'cat_smooth': 32, 'cat_l2': 5}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:39:04,489][0m Trial 54 finished with value: 1.0964551324172955 and parameters: {'reg_alpha': 0.8937055645242837, 'reg_lambda': 2.308296971418878e-05, 'colsample_bytree': 0.7, 'subsample': 0.8764323791637878, 'learning_rate': 0.056132085448365815, 'max_depth': 37, 'num_leaves': 824, 'min_child_samples': 157, 'min_child_weight': 6.973361945110408e-05, 'cat_smooth': 46, 'cat_l2': 7}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:39:20,565][0m Trial 55 finished with value: 1.0963611783576697 and parameters: {'reg_alpha': 0.005087185795566841, 'reg_lambda': 0.0158807542145073, 'colsample_bytree': 0.4, 'subsample': 0.8029392475747579, 'learning_rate': 0.0419682436930581, 'max_depth': 23, 'num_leaves': 547, 'min_child_samples': 188, 'min_child_weight': 0.0008814503705839244, 'cat_smooth': 34, 'cat_l2': 5}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:39:34,476][0m Trial 56 finished with value: 1.0969084642334346 and parameters: {'reg_alpha': 7.163743266388808e-05, 'reg_lambda': 0.20816129334780079, 'colsample_bytree': 0.5, 'subsample': 0.08416670427514072, 'learning_rate': 0.02295115898188417, 'max_depth': 14, 'num_leaves': 616, 'min_child_samples': 168, 'min_child_weight': 4.19899661350743e-05, 'cat_smooth': 24, 'cat_l2': 3}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:39:51,210][0m Trial 57 finished with value: 1.1009597871639614 and parameters: {'reg_alpha': 0.0009993762696474968, 'reg_lambda': 0.4790956585736722, 'colsample_bytree': 0.5, 'subsample': 0.7418310574264061, 'learning_rate': 0.06470429724554969, 'max_depth': 18, 'num_leaves': 320, 'min_child_samples': 109, 'min_child_weight': 0.0025338931715474804, 'cat_smooth': 39, 'cat_l2': 4}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:40:02,399][0m Trial 58 finished with value: 1.1081830954757723 and parameters: {'reg_alpha': 0.011101201030516506, 'reg_lambda': 0.04728309999953047, 'colsample_bytree': 0.9, 'subsample': 0.9849655551198209, 'learning_rate': 0.012496028991955621, 'max_depth': 8, 'num_leaves': 768, 'min_child_samples': 89, 'min_child_weight': 0.00016537845104589555, 'cat_smooth': 58, 'cat_l2': 6}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:40:13,995][0m Trial 59 finished with value: 1.0920150094767496 and parameters: {'reg_alpha': 20.239525854393616, 'reg_lambda': 0.07112375335702158, 'colsample_bytree': 0.8, 'subsample': 0.47298326635271165, 'learning_rate': 0.09315990657458999, 'max_depth': 44, 'num_leaves': 693, 'min_child_samples': 212, 'min_child_weight': 0.0016344838262468916, 'cat_smooth': 72, 'cat_l2': 1}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:40:24,421][0m Trial 60 finished with value: 1.0933912448356524 and parameters: {'reg_alpha': 25.450473705272625, 'reg_lambda': 0.00033008097978130856, 'colsample_bytree': 0.8, 'subsample': 0.46478484341447834, 'learning_rate': 0.09265541045531178, 'max_depth': 53, 'num_leaves': 691, 'min_child_samples': 262, 'min_child_weight': 0.0016187058706464358, 'cat_smooth': 71, 'cat_l2': 1}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:40:42,267][0m Trial 61 finished with value: 1.1013844545741363 and parameters: {'reg_alpha': 0.9976446453693755, 'reg_lambda': 0.07475263288353039, 'colsample_bytree': 0.8, 'subsample': 0.04308685300020354, 'learning_rate': 0.09587820976322904, 'max_depth': 27, 'num_leaves': 647, 'min_child_samples': 207, 'min_child_weight': 0.0045427366587558705, 'cat_smooth': 81, 'cat_l2': 2}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:40:55,149][0m Trial 62 finished with value: 1.094934510562066 and parameters: {'reg_alpha': 13.188561988406091, 'reg_lambda': 0.11981027987329078, 'colsample_bytree': 0.8, 'subsample': 0.7332619015493984, 'learning_rate': 0.08413477576413991, 'max_depth': 43, 'num_leaves': 757, 'min_child_samples': 192, 'min_child_weight': 0.001013398050568569, 'cat_smooth': 78, 'cat_l2': 10}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:41:17,717][0m Trial 63 finished with value: 1.1047657482831463 and parameters: {'reg_alpha': 0.0004935478966018859, 'reg_lambda': 0.02171315886537697, 'colsample_bytree': 0.8, 'subsample': 0.5854379466887122, 'learning_rate': 0.08977795436512288, 'max_depth': 35, 'num_leaves': 701, 'min_child_samples': 143, 'min_child_weight': 0.0004248450433492117, 'cat_smooth': 88, 'cat_l2': 1}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:41:31,787][0m Trial 64 finished with value: 1.098961899956441 and parameters: {'reg_alpha': 2.6359478261455944, 'reg_lambda': 0.00012830595042529776, 'colsample_bytree': 0.5, 'subsample': 0.0014901228588672932, 'learning_rate': 0.08085156293041287, 'max_depth': 24, 'num_leaves': 619, 'min_child_samples': 232, 'min_child_weight': 0.0001022842267516066, 'cat_smooth': 52, 'cat_l2': 12}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:41:50,112][0m Trial 65 finished with value: 1.091111160133087 and parameters: {'reg_alpha': 0.002484465755322094, 'reg_lambda': 0.0021907757203510434, 'colsample_bytree': 0.5, 'subsample': 0.10521805804374923, 'learning_rate': 0.03426137609984985, 'max_depth': 62, 'num_leaves': 832, 'min_child_samples': 177, 'min_child_weight': 0.000614896660963236, 'cat_smooth': 95, 'cat_l2': 3}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:41:59,258][0m Trial 66 finished with value: 1.1033447631791156 and parameters: {'reg_alpha': 26.09298258543287, 'reg_lambda': 0.0022702186010213296, 'colsample_bytree': 0.5, 'subsample': 0.10797440524931007, 'learning_rate': 0.03422452945433475, 'max_depth': 70, 'num_leaves': 905, 'min_child_samples': 211, 'min_child_weight': 0.0007297516497555454, 'cat_smooth': 93, 'cat_l2': 3}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:42:21,946][0m Trial 67 finished with value: 1.0975363678694214 and parameters: {'reg_alpha': 1.8272703213615228e-05, 'reg_lambda': 0.006472158073130813, 'colsample_bytree': 1.0, 'subsample': 0.1953174222480148, 'learning_rate': 0.03800310648679842, 'max_depth': 66, 'num_leaves': 578, 'min_child_samples': 178, 'min_child_weight': 0.0002642701878525467, 'cat_smooth': 100, 'cat_l2': 2}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:42:40,171][0m Trial 68 finished with value: 1.096455217564803 and parameters: {'reg_alpha': 0.03814477830687743, 'reg_lambda': 0.0027850083957766197, 'colsample_bytree': 0.7, 'subsample': 0.24429302703364936, 'learning_rate': 0.04406033203084147, 'max_depth': 59, 'num_leaves': 500, 'min_child_samples': 227, 'min_child_weight': 0.0016447354877530442, 'cat_smooth': 64, 'cat_l2': 4}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:42:57,609][0m Trial 69 finished with value: 1.096784779143783 and parameters: {'reg_alpha': 0.0031831627120017057, 'reg_lambda': 0.0008717346091815293, 'colsample_bytree': 0.6, 'subsample': 0.3464207620618056, 'learning_rate': 0.0491540224969649, 'max_depth': 50, 'num_leaves': 846, 'min_child_samples': 246, 'min_child_weight': 0.0070308472409642345, 'cat_smooth': 93, 'cat_l2': 15}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:43:15,396][0m Trial 70 finished with value: 1.096154460467905 and parameters: {'reg_alpha': 0.0014350124721200332, 'reg_lambda': 0.006118499054171327, 'colsample_bytree': 0.5, 'subsample': 0.14808622219841755, 'learning_rate': 0.026531024776119502, 'max_depth': 74, 'num_leaves': 802, 'min_child_samples': 194, 'min_child_weight': 0.0005132054830782512, 'cat_smooth': 84, 'cat_l2': 3}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:43:26,032][0m Trial 71 finished with value: 1.0990282266269167 and parameters: {'reg_alpha': 0.002193215447876107, 'reg_lambda': 0.050243455890213715, 'colsample_bytree': 0.5, 'subsample': 0.08615774685527358, 'learning_rate': 0.09663630997895269, 'max_depth': 11, 'num_leaves': 968, 'min_child_samples': 154, 'min_child_weight': 0.00364921948607638, 'cat_smooth': 71, 'cat_l2': 6}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:43:41,985][0m Trial 72 finished with value: 1.0999251363054412 and parameters: {'reg_alpha': 0.005218565385259455, 'reg_lambda': 0.03176665647830611, 'colsample_bytree': 0.5, 'subsample': 0.030044244785548846, 'learning_rate': 0.021127315880510592, 'max_depth': 19, 'num_leaves': 679, 'min_child_samples': 178, 'min_child_weight': 0.0007091597983178582, 'cat_smooth': 47, 'cat_l2': 4}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:44:00,864][0m Trial 73 finished with value: 1.0930926702521788 and parameters: {'reg_alpha': 0.016937075692440007, 'reg_lambda': 0.07245999617230355, 'colsample_bytree': 0.5, 'subsample': 0.07546810285779143, 'learning_rate': 0.032776196444777604, 'max_depth': 60, 'num_leaves': 838, 'min_child_samples': 167, 'min_child_weight': 0.0020236614385373687, 'cat_smooth': 42, 'cat_l2': 1}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:44:19,982][0m Trial 74 finished with value: 1.0892000968035467 and parameters: {'reg_alpha': 0.017112387733506474, 'reg_lambda': 0.014204812273754594, 'colsample_bytree': 0.5, 'subsample': 0.11994138416082709, 'learning_rate': 0.040124070288139795, 'max_depth': 62, 'num_leaves': 718, 'min_child_samples': 166, 'min_child_weight': 0.0011507570310181163, 'cat_smooth': 42, 'cat_l2': 1}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:44:37,866][0m Trial 75 finished with value: 1.100576112928113 and parameters: {'reg_alpha': 0.2873690221976698, 'reg_lambda': 0.015185932237435657, 'colsample_bytree': 0.5, 'subsample': 0.1167057125502475, 'learning_rate': 0.039310699298463966, 'max_depth': 83, 'num_leaves': 719, 'min_child_samples': 183, 'min_child_weight': 0.0012999221972199368, 'cat_smooth': 58, 'cat_l2': 2}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:44:55,655][0m Trial 76 finished with value: 1.0953699336430873 and parameters: {'reg_alpha': 0.009128741257016737, 'reg_lambda': 0.0012823439314004555, 'colsample_bytree': 0.5, 'subsample': 0.16535623458644244, 'learning_rate': 0.035689379863211246, 'max_depth': 63, 'num_leaves': 641, 'min_child_samples': 197, 'min_child_weight': 0.00032082011596076885, 'cat_smooth': 5, 'cat_l2': 1}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:45:17,587][0m Trial 77 finished with value: 1.1007241888952806 and parameters: {'reg_alpha': 0.14052222198328962, 'reg_lambda': 0.004574540652395601, 'colsample_bytree': 0.8, 'subsample': 0.18834388645677413, 'learning_rate': 0.055544878674770384, 'max_depth': 45, 'num_leaves': 592, 'min_child_samples': 170, 'min_child_weight': 0.00017320887247717983, 'cat_smooth': 41, 'cat_l2': 20}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:45:31,260][0m Trial 78 finished with value: 1.0966343987965763 and parameters: {'reg_alpha': 0.027776754742999385, 'reg_lambda': 0.00027096713582689763, 'colsample_bytree': 0.3, 'subsample': 0.21916154987503192, 'learning_rate': 0.04332448060080492, 'max_depth': 55, 'num_leaves': 747, 'min_child_samples': 279, 'min_child_weight': 0.0011540035980958324, 'cat_smooth': 49, 'cat_l2': 3}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:45:51,176][0m Trial 79 finished with value: 1.1001033522667385 and parameters: {'reg_alpha': 0.00012587635260014496, 'reg_lambda': 0.0005176535503833122, 'colsample_bytree': 0.5, 'subsample': 0.1358007579279074, 'learning_rate': 0.029727145028382175, 'max_depth': 69, 'num_leaves': 549, 'min_child_samples': 136, 'min_child_weight': 0.0006274266031719462, 'cat_smooth': 53, 'cat_l2': 2}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:46:01,414][0m Trial 80 finished with value: 1.098636132168558 and parameters: {'reg_alpha': 0.07380008736975927, 'reg_lambda': 0.008946657415524594, 'colsample_bytree': 0.4, 'subsample': 0.40625677387320774, 'learning_rate': 0.05068795058980355, 'max_depth': 65, 'num_leaves': 74, 'min_child_samples': 219, 'min_child_weight': 0.002760494853343263, 'cat_smooth': 56, 'cat_l2': 4}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:46:20,320][0m Trial 81 finished with value: 1.0963771270717302 and parameters: {'reg_alpha': 0.0232024514813636, 'reg_lambda': 0.08360605117812361, 'colsample_bytree': 0.5, 'subsample': 0.05842045694932848, 'learning_rate': 0.033351384789002264, 'max_depth': 59, 'num_leaves': 822, 'min_child_samples': 166, 'min_child_weight': 0.0003982250592457567, 'cat_smooth': 43, 'cat_l2': 1}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:46:40,175][0m Trial 82 finished with value: 1.0979423498468388 and parameters: {'reg_alpha': 0.01779924606262696, 'reg_lambda': 0.023667704067611286, 'colsample_bytree': 0.5, 'subsample': 0.0726777769760788, 'learning_rate': 0.038669278563925914, 'max_depth': 62, 'num_leaves': 782, 'min_child_samples': 150, 'min_child_weight': 0.0017908042375963664, 'cat_smooth': 41, 'cat_l2': 17}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:46:59,046][0m Trial 83 finished with value: 1.0963147617846356 and parameters: {'reg_alpha': 0.04901850024652215, 'reg_lambda': 0.1326447903175217, 'colsample_bytree': 0.5, 'subsample': 0.032335939955961464, 'learning_rate': 0.03208998487154414, 'max_depth': 58, 'num_leaves': 848, 'min_child_samples': 160, 'min_child_weight': 0.0020823578232041576, 'cat_smooth': 31, 'cat_l2': 1}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:47:16,284][0m Trial 84 finished with value: 1.0988126206372415 and parameters: {'reg_alpha': 0.01256872923663908, 'reg_lambda': 0.4505214554134184, 'colsample_bytree': 0.5, 'subsample': 0.0949437143022794, 'learning_rate': 0.04011942446055791, 'max_depth': 76, 'num_leaves': 714, 'min_child_samples': 203, 'min_child_weight': 0.003381004309419244, 'cat_smooth': 44, 'cat_l2': 3}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:47:40,501][0m Trial 85 finished with value: 1.0978169824796715 and parameters: {'reg_alpha': 1.358226248422295e-05, 'reg_lambda': 0.03819104782981987, 'colsample_bytree': 0.5, 'subsample': 0.13390084747828254, 'learning_rate': 0.046404912453054954, 'max_depth': 52, 'num_leaves': 651, 'min_child_samples': 174, 'min_child_weight': 0.0008452794034127547, 'cat_smooth': 50, 'cat_l2': 2}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:48:01,800][0m Trial 86 finished with value: 1.099595142485049 and parameters: {'reg_alpha': 5.4285642233343735e-05, 'reg_lambda': 0.19940875992540782, 'colsample_bytree': 0.5, 'subsample': 0.30161457456303814, 'learning_rate': 0.026973849892804286, 'max_depth': 57, 'num_leaves': 678, 'min_child_samples': 123, 'min_child_weight': 7.204393578192381e-05, 'cat_smooth': 37, 'cat_l2': 5}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:48:20,120][0m Trial 87 finished with value: 1.097358006426133 and parameters: {'reg_alpha': 0.007531317401790098, 'reg_lambda': 0.013918316140692727, 'colsample_bytree': 0.7, 'subsample': 0.1175280695570756, 'learning_rate': 0.017417722121811342, 'max_depth': 39, 'num_leaves': 232, 'min_child_samples': 185, 'min_child_weight': 0.00011511768056818703, 'cat_smooth': 77, 'cat_l2': 1}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:48:43,988][0m Trial 88 finished with value: 1.0903226907431578 and parameters: {'reg_alpha': 3.483988630554395e-05, 'reg_lambda': 0.0001911764209373926, 'colsample_bytree': 0.9, 'subsample': 0.1655474963477185, 'learning_rate': 0.03485308765669396, 'max_depth': 47, 'num_leaves': 874, 'min_child_samples': 146, 'min_child_weight': 0.00024509936934904525, 'cat_smooth': 34, 'cat_l2': 3}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:49:08,110][0m Trial 89 finished with value: 1.099329944522781 and parameters: {'reg_alpha': 3.2580421522962195e-05, 'reg_lambda': 0.0001513834409812288, 'colsample_bytree': 0.9, 'subsample': 0.26685279626769065, 'learning_rate': 0.0350211796080422, 'max_depth': 48, 'num_leaves': 933, 'min_child_samples': 144, 'min_child_weight': 0.0002076886057935766, 'cat_smooth': 25, 'cat_l2': 3}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:49:31,170][0m Trial 90 finished with value: 1.1037814271020392 and parameters: {'reg_alpha': 0.00038003431665927775, 'reg_lambda': 8.593781169635508e-05, 'colsample_bytree': 0.9, 'subsample': 0.23323082035907156, 'learning_rate': 0.04386130729095512, 'max_depth': 29, 'num_leaves': 878, 'min_child_samples': 131, 'min_child_weight': 0.0002530830006584904, 'cat_smooth': 19, 'cat_l2': 4}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:49:53,360][0m Trial 91 finished with value: 1.1006350247795291 and parameters: {'reg_alpha': 2.7940236134534378e-05, 'reg_lambda': 0.0002161100573716321, 'colsample_bytree': 0.9, 'subsample': 0.15895715706139557, 'learning_rate': 0.0303276484685861, 'max_depth': 33, 'num_leaves': 960, 'min_child_samples': 164, 'min_child_weight': 0.0004371691592387172, 'cat_smooth': 33, 'cat_l2': 2}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:50:15,670][0m Trial 92 finished with value: 1.0991054965311817 and parameters: {'reg_alpha': 0.4811132230565324, 'reg_lambda': 0.05850320558897908, 'colsample_bytree': 0.9, 'subsample': 0.18552859933199078, 'learning_rate': 0.024100539917130855, 'max_depth': 71, 'num_leaves': 902, 'min_child_samples': 154, 'min_child_weight': 0.0011738044818746698, 'cat_smooth': 40, 'cat_l2': 2}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:50:37,979][0m Trial 93 finished with value: 1.0980691933917548 and parameters: {'reg_alpha': 4.5290925723428335e-05, 'reg_lambda': 0.0004458459644607825, 'colsample_bytree': 0.5, 'subsample': 0.09868952137790851, 'learning_rate': 0.037010743374917984, 'max_depth': 48, 'num_leaves': 742, 'min_child_samples': 117, 'min_child_weight': 0.00013674062018862937, 'cat_smooth': 29, 'cat_l2': 3}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:51:01,296][0m Trial 94 finished with value: 1.1012207391744187 and parameters: {'reg_alpha': 1.4433508640223643e-05, 'reg_lambda': 1.0955640472126513, 'colsample_bytree': 1.0, 'subsample': 0.06648588852866864, 'learning_rate': 0.047941005056071365, 'max_depth': 63, 'num_leaves': 784, 'min_child_samples': 172, 'min_child_weight': 9.495067776840248e-05, 'cat_smooth': 35, 'cat_l2': 1}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:51:21,357][0m Trial 95 finished with value: 1.0966014435269962 and parameters: {'reg_alpha': 0.00022516882948541727, 'reg_lambda': 0.0009376055401431377, 'colsample_bytree': 0.5, 'subsample': 0.20848831165734968, 'learning_rate': 0.03362113493919659, 'max_depth': 54, 'num_leaves': 624, 'min_child_samples': 145, 'min_child_weight': 5.323800344235899e-05, 'cat_smooth': 47, 'cat_l2': 5}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:51:38,962][0m Trial 96 finished with value: 1.092937359612438 and parameters: {'reg_alpha': 0.0010178096286429204, 'reg_lambda': 8.926111495893527e-05, 'colsample_bytree': 0.8, 'subsample': 0.021343436390862607, 'learning_rate': 0.04251684224407627, 'max_depth': 51, 'num_leaves': 161, 'min_child_samples': 190, 'min_child_weight': 0.0005971286093381831, 'cat_smooth': 13, 'cat_l2': 4}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:51:59,480][0m Trial 97 finished with value: 1.0998443331694872 and parameters: {'reg_alpha': 0.0008620464656904043, 'reg_lambda': 0.0019481378071050607, 'colsample_bytree': 0.8, 'subsample': 0.47624898321655657, 'learning_rate': 0.04042677664815464, 'max_depth': 42, 'num_leaves': 278, 'min_child_samples': 187, 'min_child_weight': 0.0006068724062083184, 'cat_smooth': 11, 'cat_l2': 4}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:52:17,499][0m Trial 98 finished with value: 1.097967890559086 and parameters: {'reg_alpha': 0.0010887442460382354, 'reg_lambda': 6.85770693776466e-05, 'colsample_bytree': 0.8, 'subsample': 0.02449513281945133, 'learning_rate': 0.042983706612790516, 'max_depth': 51, 'num_leaves': 182, 'min_child_samples': 214, 'min_child_weight': 0.00033639916015951063, 'cat_smooth': 8, 'cat_l2': 13}. Best is trial 50 with value: 1.088279019087586.[0m


train.csv: shape = 100000 rows, 52 cols


[32m[I 2021-05-16 18:52:34,115][0m Trial 99 finished with value: 1.094605906603208 and parameters: {'reg_alpha': 0.0005789338442277418, 'reg_lambda': 2.447736393204762e-05, 'colsample_bytree': 0.8, 'subsample': 0.000486993365153488, 'learning_rate': 0.04565651204537804, 'max_depth': 39, 'num_leaves': 133, 'min_child_samples': 198, 'min_child_weight': 0.0009042688882242668, 'cat_smooth': 16, 'cat_l2': 4}. Best is trial 50 with value: 1.088279019087586.[0m


Number of finished trials: 100
Best trial: {'reg_alpha': 0.9557349372478526, 'reg_lambda': 0.00565822311014884, 'colsample_bytree': 0.7, 'subsample': 0.7981326760346236, 'learning_rate': 0.04164055940276916, 'max_depth': 29, 'num_leaves': 622, 'min_child_samples': 150, 'min_child_weight': 0.0003566489059353313, 'cat_smooth': 38, 'cat_l2': 3}
Best value: FrozenTrial(number=50, values=[1.088279019087586], datetime_start=datetime.datetime(2021, 5, 16, 18, 37, 37, 365754), datetime_complete=datetime.datetime(2021, 5, 16, 18, 37, 56, 972100), params={'reg_alpha': 0.9557349372478526, 'reg_lambda': 0.00565822311014884, 'colsample_bytree': 0.7, 'subsample': 0.7981326760346236, 'learning_rate': 0.04164055940276916, 'max_depth': 29, 'num_leaves': 622, 'min_child_samples': 150, 'min_child_weight': 0.0003566489059353313, 'cat_smooth': 38, 'cat_l2': 3}, distributions={'reg_alpha': LogUniformDistribution(high=30.0, low=1e-05), 'reg_lambda': LogUniformDistribution(high=30.0, low=1e-05), 'colsample_by

In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_slice(study)

In [None]:
optuna.visualization.plot_param_importances(study)

In [None]:
print(study.best_params)

{'reg_alpha': 0.9557349372478526, 'reg_lambda': 0.00565822311014884, 'colsample_bytree': 0.7, 'subsample': 0.7981326760346236, 'learning_rate': 0.04164055940276916, 'max_depth': 29, 'num_leaves': 622, 'min_child_samples': 150, 'min_child_weight': 0.0003566489059353313, 'cat_smooth': 38, 'cat_l2': 3}


In [None]:
study.best_value

1.088279019087586

In [11]:
data = get_input("train.csv")
test = get_input("test.csv")
sample_submission = get_input("sample_submission.csv")

data.drop(columns='id', inplace=True)

mapper = {'Class_1': 0, 'Class_2': 1, 'Class_3': 2, 'Class_4': 3}
data['target'] = data['target'].map(mapper)

train, valid = train_test_split(
    data, train_size=0.7, shuffle=True, random_state=1,
)

train.reset_index(inplace=True, drop=True)
valid.reset_index(inplace=True, drop=True)

y_train = train['target']
x_train = train.drop(columns=['target'])
y_valid = valid['target']
x_valid = valid.drop(columns=['target'])

train.csv: shape = 100000 rows, 52 cols
test.csv: shape = 50000 rows, 51 cols
sample_submission.csv: shape = 50000 rows, 5 cols


In [34]:
lgbm_params = {
    "boosting_type ": "gbdt",
    "objective": "multiclass",
    "metric": "multi_logloss",
    'n_estimators': 1000,
    "learning_rate": 0.12,
    "reg_lambda": 100,
    "n_jobs": -1,
    "seed": 27,
    'device_type': 'cpu',
    'num_class': 4,
    'random_state': 2,
    'lambda_l1': 1.044071701696505e-08, 
    'lambda_l2': 0.627862736041027, 
    'num_leaves': 16, 
    #"max_depth": 3,
    'feature_fraction': 0.9457112060526248, 
    'bagging_fraction': 0.6064976789336679, 
    'bagging_freq': 5, 
    'min_child_samples': 28
}

lgbm_params = {
    "boosting_type ": "gbdt",
    "objective": "multiclass",
    "metric": "multi_logloss",
    'n_estimators': 1000,
    'reg_alpha': 0.9557349372478526, 
    'reg_lambda': 0.00565822311014884, 
    'colsample_bytree': 0.7, 
    'subsample': 0.7981326760346236, 
    'learning_rate': 0.04164055940276916, 
    'max_depth': 29, 
    'num_leaves': 622, 
    'min_child_samples': 150, 
    'min_child_weight': 0.0003566489059353313, 
    'cat_smooth': 38, 
    'cat_l2': 3
}

lgbm_params = {
    "boosting_type ": "gbdt",
    "objective": "multiclass",
    "metric": "multi_logloss",
    'n_estimators': 5000,
    'reg_alpha': 11.159097442782404, 
    'reg_lambda': 6.133258909357973e-05, 
    'colsample_bytree': 0.3, 
    'subsample': 0.40110232869776463, 
    'learning_rate': 0.07591597509135133, 
    #'max_depth': 92, 
    #'num_leaves': 867,
    'max_depth': 8, 
    'num_leaves': 6, 
    'min_child_samples': 165, 
    'min_child_weight': 0.0029014788191160327, 
    'cat_smooth': 79, 
    'cat_l2': 3
}



cv = StratifiedKFold(n_splits=7, random_state=435, shuffle=True)

lgb_estimators, lgb_oof = lightgbm_cv_fit(
    lgbm_params, x_train, y_train, cv, #categorical=categorical_feature_names
)

Mon May 17 20:29:05 2021, Cross-Validation, 70000 rows, 50 cols
Training until validation scores don't improve for 50 rounds.
[10]	valid_0's multi_logloss: 1.11448
[20]	valid_0's multi_logloss: 1.11139
[30]	valid_0's multi_logloss: 1.10881
[40]	valid_0's multi_logloss: 1.10694
[50]	valid_0's multi_logloss: 1.10521
[60]	valid_0's multi_logloss: 1.10365
[70]	valid_0's multi_logloss: 1.10248
[80]	valid_0's multi_logloss: 1.10133
[90]	valid_0's multi_logloss: 1.10036
[100]	valid_0's multi_logloss: 1.09952
[110]	valid_0's multi_logloss: 1.09876
[120]	valid_0's multi_logloss: 1.09804
[130]	valid_0's multi_logloss: 1.09746
[140]	valid_0's multi_logloss: 1.0969
[150]	valid_0's multi_logloss: 1.09631
[160]	valid_0's multi_logloss: 1.09581
[170]	valid_0's multi_logloss: 1.09532
[180]	valid_0's multi_logloss: 1.09502
[190]	valid_0's multi_logloss: 1.09464
[200]	valid_0's multi_logloss: 1.09434
[210]	valid_0's multi_logloss: 1.09407
[220]	valid_0's multi_logloss: 1.0938
[230]	valid_0's multi_loglo

In [35]:
print(f"Out of fold log loss {metrics.log_loss(y_train, lgb_oof)}")

Out of fold log loss 1.0927230279706703


In [36]:
result_lgbm = multi_estimators_predict(lgb_estimators, x_valid, y_valid, metrics.log_loss)

Model 0 metric: 1.090256
Model 1 metric: 1.089374
Model 2 metric: 1.090248
Model 3 metric: 1.090099
Model 4 metric: 1.090486
Model 5 metric: 1.089739
Model 6 metric: 1.090338
Result model metric: 1.089295


### Catboost

In [37]:
cb_params = {
    "n_estimators": 5000,
    "loss_function": "MultiClass",
    "eval_metric": "MultiClass",
    "task_type": "CPU",
    'learning_rate': 0.08992656195498482, 
    'reg_lambda': 27.77866189042851, 
    'subsample': 0.6044483085727145, 
    'random_strength': 0.5855495663972144, 
    'depth': 3, 
    'min_data_in_leaf': 25, 
    'leaf_estimation_iterations': 1,
    'bootstrap_type': 'Bernoulli',
    'leaf_estimation_method': 'Newton',
}

cv = StratifiedKFold(n_splits=7, random_state=435, shuffle=True)

cb_estimators, cb_oof = catboost_cv_fit(
    cb_params, x_train, y_train, cv, #categorical=categorical_feature_names
)

Mon May 17 20:32:36 2021, Cross-Validation, 70000 rows, 50 cols
0:	learn: 1.3351753	test: 1.3352115	best: 1.3352115 (0)	total: 86.7ms	remaining: 7m 13s
10:	learn: 1.1515302	test: 1.1518409	best: 1.1518409 (10)	total: 474ms	remaining: 3m 34s
20:	learn: 1.1204992	test: 1.1203623	best: 1.1203623 (20)	total: 908ms	remaining: 3m 35s
30:	learn: 1.1132077	test: 1.1130026	best: 1.1130026 (30)	total: 1.34s	remaining: 3m 35s
40:	learn: 1.1100112	test: 1.1100711	best: 1.1100711 (40)	total: 1.79s	remaining: 3m 36s
50:	learn: 1.1080076	test: 1.1080511	best: 1.1080511 (50)	total: 2.21s	remaining: 3m 34s
60:	learn: 1.1065421	test: 1.1066315	best: 1.1066315 (60)	total: 2.63s	remaining: 3m 32s
70:	learn: 1.1050763	test: 1.1053989	best: 1.1053989 (70)	total: 3.07s	remaining: 3m 33s
80:	learn: 1.1038282	test: 1.1043449	best: 1.1043449 (80)	total: 3.48s	remaining: 3m 31s
90:	learn: 1.1026597	test: 1.1033713	best: 1.1033713 (90)	total: 3.89s	remaining: 3m 29s
100:	learn: 1.1016839	test: 1.1026175	best: 1.1

In [38]:
print(f"Out of fold log loss {metrics.log_loss(y_train, cb_oof)}")

Out of fold log loss 1.092977742733346


In [39]:
result_cb = multi_estimators_predict(cb_estimators, x_valid, y_valid, metrics.log_loss)

Model 0 metric: 1.091101
Model 1 metric: 1.091115
Model 2 metric: 1.091027
Model 3 metric: 1.090776
Model 4 metric: 1.091148
Model 5 metric: 1.090821
Model 6 metric: 1.091873
Result model metric: 1.090472


### XGBoost

In [26]:
xgb_params = {
    'objective': 'multi:softprob',
    #'eval_metric': 'mlogloss',
    'random_state': 13,
    'max_depth': 5, 
    'n_estimators': 2000,
    'num_class': 4
}

xgb_params = {
    'objective': 'multi:softprob',
    'eval_metric': 'mlogloss',
    'random_state': 13,
    'learning_rate': 0.08356451010151393, 
    'gamma': 0.02911685058980812, 
    'max_depth': 5, 
    'min_child_weight': 10.748514454096288, 
    'max_delta_step': 2.4474818433727927, 
    'subsample': 0.6445037550866027, 
    'colsample_bytree': 0.07634753656242108, 
    'lambda': 13.663280761461781, 
    'alpha': 21.521205761694137, 
    'max_leaves': 48,
    'n_estimators': 2000,
    'num_class': 4
}

cv = StratifiedKFold(n_splits=7, random_state=435, shuffle=True)

xgb_estimators, xgb_encoders, xgb_oof = xgboost_cv_fit(
    xgb_params, x_train, y_train, cv, #categorical=categorical_feature_names
)

Mon May 17 20:18:43 2021, Cross-Validation, 70000 rows, 50 cols
[0]	validation_0-mlogloss:1.35373
Will train until validation_0-mlogloss hasn't improved in 50 rounds.
[10]	validation_0-mlogloss:1.18542
[20]	validation_0-mlogloss:1.13521
[30]	validation_0-mlogloss:1.11891
[40]	validation_0-mlogloss:1.11292
[50]	validation_0-mlogloss:1.11037
[60]	validation_0-mlogloss:1.1082
[70]	validation_0-mlogloss:1.10654
[80]	validation_0-mlogloss:1.10537
[90]	validation_0-mlogloss:1.10413
[100]	validation_0-mlogloss:1.10282
[110]	validation_0-mlogloss:1.10171
[120]	validation_0-mlogloss:1.10082
[130]	validation_0-mlogloss:1.10004
[140]	validation_0-mlogloss:1.09912
[150]	validation_0-mlogloss:1.09832
[160]	validation_0-mlogloss:1.09761
[170]	validation_0-mlogloss:1.09711
[180]	validation_0-mlogloss:1.09639
[190]	validation_0-mlogloss:1.09575
[200]	validation_0-mlogloss:1.09518
[210]	validation_0-mlogloss:1.09475
[220]	validation_0-mlogloss:1.09432
[230]	validation_0-mlogloss:1.09393
[240]	validatio

In [27]:
print(f"Out of fold log loss {metrics.log_loss(y_train, xgb_oof)}")

Out of fold log loss 1.0937984271524215


In [28]:
result_xgb = multi_estimators_predict(xgb_estimators, x_valid, y_valid, metrics.log_loss)

Model 0 metric: 1.092112
Model 1 metric: 1.091548
Model 2 metric: 1.0923
Model 3 metric: 1.092527
Model 4 metric: 1.09258
Model 5 metric: 1.092014
Model 6 metric: 1.091958
Result model metric: 1.090985


In [57]:
metrics.log_loss(y_valid, result_cb)

1.0904720673204586

In [58]:
metrics.log_loss(y_valid, result_xgb)

1.0909847912356256

In [59]:
metrics.log_loss(y_valid, result_lgbm)

1.089295294594471

In [62]:
result_lgb_xgb = np.dstack((result_lgbm, result_xgb))
result_lgb_xgb_mean = np.mean(result_lgb_xgb, axis=2)
print(f"result_lgb_xgb: {metrics.log_loss(y_valid, result_lgb_xgb_mean)}")

result_lgb_xgb: 1.0892976651266912


In [66]:
result_lgb_cb = np.dstack((result_lgbm, result_cb))
result_lgb_cb_mean = np.mean(result_lgb_cb, axis=2)
print(f"result_lgb_cb: {metrics.log_loss(y_valid, result_lgb_cb_mean)}")

result_lgb_cb: 1.089453366934431


In [67]:
result_xgb_cb = np.dstack((result_xgb, result_cb))
result_xgb_cb_mean = np.mean(result_xgb_cb, axis=2)
print(f"result_xgb_cb: {metrics.log_loss(y_valid, result_xgb_cb_mean)}")

result_xgb_cb: 1.0899651967669564


In [68]:
result_lgb_xgb_cb = np.dstack((result_lgbm, result_xgb, result_cb))
result_lgb_xgb_cb_mean = np.mean(result_lgb_xgb_cb, axis=2)
print(f"result_lgb_xgb_cb: {metrics.log_loss(y_valid, result_lgb_xgb_cb_mean)}")

result_lgb_xgb_cb: 1.0893458701040053


## Получение результата

In [69]:
data = get_input("train.csv")
test = get_input("test.csv")
data.drop(columns='id', inplace=True)
sample_submission = get_input("sample_submission.csv")

#data['sum'] = data[data.columns.to_list()[:50]].sum(axis=1) # норм
#data['max'] = data[data.columns.to_list()[:50]].max(axis=1) # норм
#data['not_nul_features'] = (data!=0)[data.columns.to_list()[:50]].sum(axis=1)
#data['2+13'] = data['feature_2'] + data['feature_13']
#data['6+15'] = data['feature_6'] + data['feature_15']

#test['sum'] = test[test.columns.to_list()[:50]].sum(axis=1) # норм
#test['max'] = test[test.columns.to_list()[:50]].max(axis=1) # норм
#test['not_nul_features'] = (test!=0)[test.columns.to_list()[:50]].sum(axis=1)
#test['2+13'] = test['feature_2'] + test['feature_13']
#test['6+15'] = test['feature_6'] + test['feature_15']

mapper = {'Class_1': 1, 'Class_2': 2, 'Class_3': 3, 'Class_4': 4}
data['target'] = data['target'].map(mapper)

y_train = data['target']
x_train = data.drop(columns=['target'])

train.csv: shape = 100000 rows, 52 cols
test.csv: shape = 50000 rows, 51 cols
sample_submission.csv: shape = 50000 rows, 5 cols


In [71]:
lgbm_params = {
    "boosting_type ": "gbdt",
    "objective": "multiclass",
    "metric": "multi_logloss",
    'n_estimators': 5000,
    'reg_alpha': 11.159097442782404, 
    'reg_lambda': 6.133258909357973e-05, 
    'colsample_bytree': 0.3, 
    'subsample': 0.40110232869776463, 
    'learning_rate': 0.07591597509135133, 
    #'max_depth': 92, 
    #'num_leaves': 867,
    'max_depth': 8, 
    'num_leaves': 6, 
    'min_child_samples': 165, 
    'min_child_weight': 0.0029014788191160327, 
    'cat_smooth': 79, 
    'cat_l2': 3
}

cv = StratifiedKFold(n_splits=7, random_state=435, shuffle=True)

lgb_estimators, lgb_oof = lightgbm_cv_fit(
    lgbm_params, x_train, y_train, cv, #categorical=categorical_feature_names
)

print(f"Out of fold log loss {metrics.log_loss(y_train, lgb_oof)}")
result_lgb = multi_estimators_predict(lgb_estimators, test.drop(columns='id'))

Mon May 17 20:51:57 2021, Cross-Validation, 100000 rows, 50 cols
Training until validation scores don't improve for 50 rounds.
[10]	valid_0's multi_logloss: 1.11463
[20]	valid_0's multi_logloss: 1.11174
[30]	valid_0's multi_logloss: 1.10928
[40]	valid_0's multi_logloss: 1.10737
[50]	valid_0's multi_logloss: 1.10574
[60]	valid_0's multi_logloss: 1.10432
[70]	valid_0's multi_logloss: 1.10316
[80]	valid_0's multi_logloss: 1.10201
[90]	valid_0's multi_logloss: 1.10105
[100]	valid_0's multi_logloss: 1.10029
[110]	valid_0's multi_logloss: 1.09952
[120]	valid_0's multi_logloss: 1.0989
[130]	valid_0's multi_logloss: 1.09833
[140]	valid_0's multi_logloss: 1.09786
[150]	valid_0's multi_logloss: 1.09742
[160]	valid_0's multi_logloss: 1.09698
[170]	valid_0's multi_logloss: 1.09653
[180]	valid_0's multi_logloss: 1.09612
[190]	valid_0's multi_logloss: 1.09581
[200]	valid_0's multi_logloss: 1.09555
[210]	valid_0's multi_logloss: 1.09527
[220]	valid_0's multi_logloss: 1.09497
[230]	valid_0's multi_log

In [72]:
cb_params = {
    "n_estimators": 5000,
    "loss_function": "MultiClass",
    "eval_metric": "MultiClass",
    "task_type": "CPU",
    'learning_rate': 0.08992656195498482, 
    'reg_lambda': 27.77866189042851, 
    'subsample': 0.6044483085727145, 
    'random_strength': 0.5855495663972144, 
    'depth': 3, 
    'min_data_in_leaf': 25, 
    'leaf_estimation_iterations': 1,
    'bootstrap_type': 'Bernoulli',
    'leaf_estimation_method': 'Newton',
}

cv = StratifiedKFold(n_splits=7, random_state=435, shuffle=True)

cb_estimators, cb_oof = catboost_cv_fit(
    cb_params, x_train, y_train, cv, #categorical=categorical_feature_names
)

print(f"Out of fold log loss {metrics.log_loss(y_train, lgb_oof)}")
result_cb = multi_estimators_predict(cb_estimators, test.drop(columns='id'))

Mon May 17 20:57:58 2021, Cross-Validation, 100000 rows, 50 cols
0:	learn: 1.3351817	test: 1.3351765	best: 1.3351765 (0)	total: 58.3ms	remaining: 4m 51s
10:	learn: 1.1504047	test: 1.1507554	best: 1.1507554 (10)	total: 622ms	remaining: 4m 42s
20:	learn: 1.1191083	test: 1.1197820	best: 1.1197820 (20)	total: 1.22s	remaining: 4m 50s
30:	learn: 1.1116408	test: 1.1126188	best: 1.1126188 (30)	total: 1.86s	remaining: 4m 58s
40:	learn: 1.1087680	test: 1.1101258	best: 1.1101258 (40)	total: 2.48s	remaining: 4m 59s
50:	learn: 1.1066456	test: 1.1080972	best: 1.1080972 (50)	total: 3.09s	remaining: 4m 59s
60:	learn: 1.1050360	test: 1.1067681	best: 1.1067681 (60)	total: 3.67s	remaining: 4m 57s
70:	learn: 1.1036214	test: 1.1056447	best: 1.1056447 (70)	total: 4.26s	remaining: 4m 55s
80:	learn: 1.1022938	test: 1.1045768	best: 1.1045768 (80)	total: 4.86s	remaining: 4m 55s
90:	learn: 1.1011285	test: 1.1035656	best: 1.1035656 (90)	total: 5.45s	remaining: 4m 54s
100:	learn: 1.1001110	test: 1.1026694	best: 1.

In [73]:
xgb_params = {
    'objective': 'multi:softprob',
    'eval_metric': 'mlogloss',
    'random_state': 13,
    'learning_rate': 0.08356451010151393, 
    'gamma': 0.02911685058980812, 
    'max_depth': 5, 
    'min_child_weight': 10.748514454096288, 
    'max_delta_step': 2.4474818433727927, 
    'subsample': 0.6445037550866027, 
    'colsample_bytree': 0.07634753656242108, 
    'lambda': 13.663280761461781, 
    'alpha': 21.521205761694137, 
    'max_leaves': 48,
    'n_estimators': 2000,
    'num_class': 4
}

cv = StratifiedKFold(n_splits=7, random_state=435, shuffle=True)

xgb_estimators, xgb_encoders, xgb_oof = xgboost_cv_fit(
    xgb_params, x_train, y_train, cv, #categorical=categorical_feature_names
)

print(f"Out of fold log loss {metrics.log_loss(y_train, lgb_oof)}")
result_xgb = multi_estimators_predict(xgb_estimators, test.drop(columns='id'))

Mon May 17 21:05:06 2021, Cross-Validation, 100000 rows, 50 cols
[0]	validation_0-mlogloss:1.35363
Will train until validation_0-mlogloss hasn't improved in 50 rounds.
[10]	validation_0-mlogloss:1.18494
[20]	validation_0-mlogloss:1.13507
[30]	validation_0-mlogloss:1.11864
[40]	validation_0-mlogloss:1.11229
[50]	validation_0-mlogloss:1.10949
[60]	validation_0-mlogloss:1.10738
[70]	validation_0-mlogloss:1.10571
[80]	validation_0-mlogloss:1.10457
[90]	validation_0-mlogloss:1.10326
[100]	validation_0-mlogloss:1.10222
[110]	validation_0-mlogloss:1.10135
[120]	validation_0-mlogloss:1.10035
[130]	validation_0-mlogloss:1.09956
[140]	validation_0-mlogloss:1.09892
[150]	validation_0-mlogloss:1.09848
[160]	validation_0-mlogloss:1.09784
[170]	validation_0-mlogloss:1.09737
[180]	validation_0-mlogloss:1.09687
[190]	validation_0-mlogloss:1.09633
[200]	validation_0-mlogloss:1.09583
[210]	validation_0-mlogloss:1.09545
[220]	validation_0-mlogloss:1.0951
[230]	validation_0-mlogloss:1.09481
[240]	validati

In [74]:
result_lgb_xgb_cb = np.dstack((result_lgb, result_xgb, result_cb))
result_lgb_xgb_cb_mean = np.mean(result_lgb_xgb_cb, axis=2)

In [75]:
test_pred_df = pd.DataFrame(result_lgb_xgb_cb_mean)
test_pred_df.rename(columns={0:'class_1',	1:'class_2',	2:'class_3',	3:'class_4'}, inplace=True)
result = test.drop(columns = test.columns.to_list()[1:])
result = pd.concat([result, test_pred_df], axis=1)
result.to_csv('result.csv', index=False)

In [76]:
result

Unnamed: 0,id,class_1,class_2,class_3,class_4
0,100000,0.090962,0.621724,0.165693,0.121621
1,100001,0.074952,0.701683,0.134995,0.088370
2,100002,0.083937,0.643118,0.175356,0.097588
3,100003,0.083970,0.535519,0.293697,0.086813
4,100004,0.074636,0.624492,0.185662,0.115210
...,...,...,...,...,...
49995,149995,0.091246,0.685393,0.163664,0.059698
49996,149996,0.077159,0.654197,0.131921,0.136723
49997,149997,0.081517,0.531436,0.226084,0.160963
49998,149998,0.081800,0.592620,0.166096,0.159484
