In [1]:
import pandas as pd
import numpy as np

%matplotlib inline 
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)

#import sklearn
import sklearn.model_selection as ms
from sklearn.model_selection import train_test_split

# Featureset Scalers
from sklearn.preprocessing import MinMaxScaler, StandardScaler, PowerTransformer


# Catboost model
import catboost as cb


# Grid Search and Cross Validation for Hyper-parameter tuning
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

#Model Eval Metrics
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error


# Examining feature importance
from sklearn.inspection import permutation_importance
import shap


# Style and plotting
 
# Set the style
plt.style.use('seaborn-v0_8-whitegrid')
# Display all columns
pd.set_option('display.max_columns', None)


  @jit
  @jit
  @jit
  @jit
  @jit
  @numba.jit
  @numba.jit
  @numba.jit
  @numba.jit
  @jit # we can't use this when using a custom link function...
  @jit
  @jit
  @jit
  @jit
  @jit
  @jit
  @jit
The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.
The 'nopython' keyword argument was not supplied to the 'numba.jit' decorator. The implicit default value for this argument is currently False, but it will be changed to True in Numba 0.59.0. See https://numba.readthedocs.io/en/stable/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit for details.


# FUNCTION DEFINITION

In [80]:
# Define some function for evaluation and feature importance

def evaluate(model, X_train, X_test, y_train, y_test, logy=False):
    pred_dict = {
    'Train RMSE': [],
    'Test RMSE':[],
    'Train MAPE':[],
    'Test MAPE':[],
    'Train MAE':[],
    'Test MAE': [],
    'Train R2':[],
    'Test R2': []
    }
    

    
    r2_test = model.score(X_test, y_test)
    r2_train = model.score(X_train, y_train)
    pred_dict["Train R2"].append(r2_train)
    pred_dict["Test R2"].append(r2_test)

    
    # Train / Test Predictions for Base Model
    y_predict = model.predict(X_train)
    
    if logy:
        y_predict = np.exp(y_predict)
        y_test = np.exp(y_test)
        y_train = np.exp(y_train)
    
    #TRAIN
    #MSE = mean_squared_error(y_train, y_predict, squared=True)
    RMSE = mean_squared_error(y_train, y_predict, squared=False)
    MAE = mean_absolute_error(y_train, y_predict)
    MAPE = mean_absolute_percentage_error(y_train, y_predict)*100
    #print('MSE Train: ', MSE)
    print('RMSE Train: ', RMSE)
    print('MAE Train: ', MAE)
    print('MAPE Train: ', MAPE)
    
    pred_dict["Train RMSE"].append(RMSE)
    pred_dict["Train MAPE"].append(MAPE)
    pred_dict["Train MAE"].append(MAE)
    
    
    # TEST
    y_predict = model.predict(X_test)
    if logy:
        y_predict = np.exp(y_predict)
    #MSE = mean_squared_error(y_test, y_predict, squared=True)
    RMSE = mean_squared_error(y_test, y_predict, squared=False)
    MAE = mean_absolute_error(y_test, y_predict)
    MAPE = mean_absolute_percentage_error(y_test, y_predict)*100

    #print('MSE Val: ', MSE)
    print('RMSE Val: ', RMSE)
    print('MAE Val: ', MAE)
    print('MAPE Val: ', MAPE)
    pred_dict["Test MAE"].append(MAE)
    pred_dict["Test MAPE"].append(MAPE)
    pred_dict["Test RMSE"].append(RMSE)
    
    return pred_dict



## Data Loading and Feature, Target Partition

In [23]:
# Import Cleaned and Processed Data (no encoding or dummification)
data = pd.read_csv('../data/L1/ames_house_price_processed.csv', na_filter=False)

print(data.columns.to_series().groupby(data.dtypes).groups)

# Force categorical variables that are 'numeric' to be categorical
data["ms_sub_class"] = data['ms_sub_class'].astype('category')
data["overall_qual"] = data['overall_qual'].astype('category')
data["overall_cond"] = data['overall_cond'].astype('category')

# Define features and target
features = data.drop(['pid', 'sale_price', 'log_sale_price'], axis = 1)
target = data['log_sale_price']
logy = True # we are going with log(y) because our model sandbox demonstrated higher performance for logged depend.

from pandas.api.types import is_numeric_dtype  
def convert_cats(df):
        cats = []
        for col in df.columns:
            if is_numeric_dtype(df[col]):
                pass
            else:
                cats.append(col)
        for col in cats:
            df[col] = df[col].astype('category')

# Convert categorical variables in features
convert_cats(features)

# Get list of which columns are categorical
cols = features.columns
numeric_cols = features._get_numeric_data().columns

cat_cols1 = list(set(cols) - set(numeric_cols))
print(cat_cols1)

{int64: ['pid', 'gr_liv_area', 'sale_price', 'ms_sub_class', 'lot_area', 'overall_qual', 'overall_cond', 'central_air', '1st_flr_sf', '2nd_flr_sf', 'low_qual_fin_sf', 'full_bath', 'half_bath', 'bedroom_abv_gr', 'kitchen_abv_gr', 'tot_rms_abv_grd', 'fireplaces', 'wood_deck_sf', 'open_porch_sf', 'enclosed_porch', '3_ssn_porch', 'screen_porch', 'pool_area', 'misc_val', 'mo_sold', 'yr_sold', 'near_rr', 'near_main_rd', 'near_pos', 'house_age_at_sale', 'construction_age_at_sale'], float64: ['lot_frontage', 'mas_vnr_area', 'bsmt_fin_sf1', 'bsmt_fin_sf2', 'bsmt_unf_sf', 'total_bsmt_sf', 'bsmt_full_bath', 'bsmt_half_bath', 'garage_cars', 'garage_area', 'log_sale_price'], object: ['ms_zoning', 'alley', 'lot_shape', 'land_contour', 'lot_config', 'land_slope', 'neighborhood', 'bldg_type', 'house_style', 'roof_style', 'exterior1st', 'mas_vnr_type', 'exter_qual', 'exter_cond', 'foundation', 'bsmt_qual', 'bsmt_cond', 'bsmt_exposure', 'bsmt_fin_type1', 'bsmt_fin_type2', 'heating_qc', 'electrical', 'ki

## Train Test Split

In [4]:
# Train / test / split to isolate a test dataset
X_train, X_test, y_train, y_test = train_test_split(features, target, \
                                                    test_size=0.10, shuffle = True, random_state = 8)



In [5]:

# Train / test split to isolate a validation dataset
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, \
                                                    test_size=0.10, random_state = 8)


print("X_train shape: {}".format(X_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("X_val shape: {}".format(X_val.shape))
print("y_train shape: {}".format(y_train.shape))
print("y_test shape: {}".format(y_test.shape))
print("y val shape: {}".format(y_val.shape))

X_train shape: (2088, 73)
X_test shape: (258, 73)
X_val shape: (232, 73)
y_train shape: (2088,)
y_test shape: (258,)
y val shape: (232,)


## CatBoost Recursive Feature Elimination

In [45]:
cbr = cb.CatBoostRegressor(eval_metric = 'RMSE',iterations = 100, \
                           cat_features=cat_cols1, random_state = 66, learning_rate = 0.1)

In [46]:
cbr.fit(X_train, y_train)

0:	learn: 0.3581779	total: 5.71ms	remaining: 566ms
1:	learn: 0.3338600	total: 11.3ms	remaining: 553ms
2:	learn: 0.3135490	total: 15.4ms	remaining: 499ms
3:	learn: 0.2945875	total: 18.9ms	remaining: 452ms
4:	learn: 0.2780207	total: 21.9ms	remaining: 416ms
5:	learn: 0.2633796	total: 24.7ms	remaining: 387ms
6:	learn: 0.2498689	total: 27.4ms	remaining: 364ms
7:	learn: 0.2373148	total: 29.9ms	remaining: 344ms
8:	learn: 0.2262258	total: 32.4ms	remaining: 328ms
9:	learn: 0.2162016	total: 37.1ms	remaining: 334ms
10:	learn: 0.2061914	total: 39.6ms	remaining: 321ms
11:	learn: 0.1976030	total: 42.3ms	remaining: 310ms
12:	learn: 0.1894464	total: 45ms	remaining: 301ms
13:	learn: 0.1822867	total: 47.6ms	remaining: 292ms
14:	learn: 0.1756761	total: 50.3ms	remaining: 285ms
15:	learn: 0.1696606	total: 54.7ms	remaining: 287ms
16:	learn: 0.1646409	total: 58ms	remaining: 283ms
17:	learn: 0.1602069	total: 60.6ms	remaining: 276ms
18:	learn: 0.1558794	total: 63.3ms	remaining: 270ms
19:	learn: 0.1518878	total

<catboost.core.CatBoostRegressor at 0x169ca6350>

In [47]:
#EVALUATE FULL MODEL
print('Train R2:', cbr.score(X_train, y_train))
print('Validate R2:', cbr.score(X_val, y_val))
# EVAL
pred = evaluate(cbr, X_train, X_test, y_train, y_test, logy = True)
pred_df = pd.DataFrame(pred)
pred_df

Train R2: 0.9484030538284082
Validate R2: 0.928002884470312
MSE Train:  16777.67990669347
RMSE Train:  129.5286837217667
MAE Train:  11552.063415764742
MAPE Train:  6.557580026320946
MSE Val:  18689.268274194434
RMSE Val:  136.70869860471365
MAE Val:  13255.37781731752
MAPE Val:  9.296018334475695


Unnamed: 0,Train RMSE,Test RMSE,Train MAPE,Test MAPE,Train MAE,Test MAE,Train R2,Test R2
0,129.528684,136.708699,6.55758,9.296018,11552.063416,13255.377817,0.948403,0.870104


In [None]:
cbr = cb.CatBoostRegressor(eval_metric = 'RMSE',iterations = 100, \
                           learning_rate = 0.1, cat_features=cat_cols, random_state = 123)

In [None]:
# DEFINE RFE PROCESS
rfe_dict = cbr.select_features(X = X_train, 
                                     y = np.exp(y_train), 
                                     eval_set = (X_val,np.exp(y_val)), # Walkforward validation set we have created earlier
                                     features_for_select = '0-72', # Features that will be selected on the RFE
                                     num_features_to_select = 30, # Number of features to keep from the selected
                                     steps = 72, # Number of model iterations performed in the RFE
                                     verbose = 50, #
                                     train_final_model = False, # Train final model after RFE is finished
                                     plot = True # plot the ??? after the RFE is finished
                                     )

In [None]:
# The above indicates a loss minimum of RMSE of around 18.5K at 28 removed features (73-28) selected features

In [None]:
from catboost import Pool

train_pool = Pool(X_train, np.exp(y_train), cat_features=cat_cols)
val_pool = Pool(X_val, np.exp(y_val), cat_features=cat_cols)
test_pool = Pool(X_test, np.exp(y_test), cat_features=cat_cols)

from catboost import EShapCalcType, EFeaturesSelectionAlgorithm, CatBoostRegressor

def select_features_syntetic(algorithm: EFeaturesSelectionAlgorithm, steps: int = 73, select: int = 73-28):
    print('Algorithm:', algorithm)
    model = CatBoostRegressor(eval_metric = 'RMSE',iterations = 1000, \
                           learning_rate = 0.01, cat_features=cat_cols, random_state = 66)
    summary = model.select_features(
        train_pool,
        eval_set=val_pool,
        features_for_select='0-72',     # we will select from all features
        num_features_to_select=select,  # we want to select exactly important features
        steps=steps,                                     # more steps - more accurate selection
        algorithm=algorithm,
        shap_calc_type=EShapCalcType.Regular,            # can be Approximate, Regular and Exact
        train_final_model=True,                          # to train model with selected features
        logging_level='Silent',
        plot=True
    )
    print('Selected features indices:', summary['selected_features'])
    print('Selected features names:', summary['selected_features_names'])
    print('Eliminated features indices:', summary['eliminated_features'])
    print('Eliminated features names:', summary['eliminated_features_names'])
    
    return summary


In [None]:
# run with 45 selected (28 removed)
synthetic_shap_summary = select_features_syntetic(algorithm=EFeaturesSelectionAlgorithm.RecursiveByShapValues)


- results indicate that removing 18 (selecting 55) features results in the optimal minimization of the loss function. Thus, we will run this once more, selecting 55 features

In [None]:
# The loss is really unstable after removing more than 20 features so we will conclude feature selection by choosing the best 20 to remove
synthetic_shap_summary = select_features_syntetic(algorithm=EFeaturesSelectionAlgorithm.RecursiveByShapValues, \
                                                  steps=73-18, select = 73-18)


In [None]:
synthetic_shap_summary.items()

In [6]:
#features_to_eliminate = synthetic_shap_summary['eliminated_features_names']

features_to_eliminate = ['fireplace_qu',
 'bsmt_qual',
 'lot_area',
 'sale_condition',
 'sale_type',
 'tot_rms_abv_grd',
 'full_bath',
 'misc_feature',
 'enclosed_porch',
 'garage_type',
 'bsmt_cond',
 'near_pos',
 'land_slope',
 'overall_cond',
 'exterior1st',
 'house_style',
 'mo_sold',
 'mas_vnr_type']


X_train_se = X_train.drop(features_to_eliminate, axis = 1)

X_val_se = X_val.drop(features_to_eliminate, axis = 1)
X_test_se = X_test.drop(features_to_eliminate, axis = 1)

# Get list of which columns are categorical
cols = X_train_se.columns
numeric_cols = X_train_se._get_numeric_data().columns

cat_cols = list(set(cols) - set(numeric_cols))
features_to_eliminate

['fireplace_qu',
 'bsmt_qual',
 'lot_area',
 'sale_condition',
 'sale_type',
 'tot_rms_abv_grd',
 'full_bath',
 'misc_feature',
 'enclosed_porch',
 'garage_type',
 'bsmt_cond',
 'near_pos',
 'land_slope',
 'overall_cond',
 'exterior1st',
 'house_style',
 'mo_sold',
 'mas_vnr_type']

## HYPER PARAMETER TUNING

Bayesian optimization

Random and grid search pay no attention to past results when searching the best hyperparametes. Bayesian optimization, in contrast to random or grid search, keeps track of past evaluation results which it uses to form a probabilistic model mapping hyperparameters to a probability of a score on the objective function. There're a number of libraries that can do it. 

From this tutorial: 
https://github.com/catboost/catboost/blob/93da9cea2e898560caa8a64ce4fe8d695752261e/catboost/tutorials/hyperparameters_tuning/hyperparameters_tuning_using_optuna_and_hyperopt.ipynb

In [8]:
import optuna

import optuna.distributions as od
from optuna.samplers import TPESampler
from catboost.utils import eval_metric
from sklearn.pipeline import Pipeline
#from sklearn.feature_selection import SelectKBest, chi2 # feature selection (18, 19, 20, 21) to remove so  




RANDOM_SEED = 79
EARLY_STOPPING_ROUND = 20 # Catboost will stop before reaching num_boost_round

In [63]:
from sklearn.model_selection import cross_val_score

#train_pool = Pool(data=X_train_se, label=y_train, cat_features=cat_cols)
#val_pool = Pool(data=X_val_se, label=y_val, cat_features=cat_cols)
def objective(trial):
    
    param = {
        "iterations": 1000,
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "depth": trial.suggest_int("depth", 1, 10),
        "subsample": trial.suggest_float("subsample", 0.05, 1.0),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.05, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100)
    }
    
    
#     param = {}
#     param['learning_rate'] = trial.suggest_discrete_uniform("learning_rate", 0.001, 0.02, 0.001)
#     param['depth'] = trial.suggest_int('depth', 9, 15)
#     param['l2_leaf_reg'] = trial.suggest_discrete_uniform('l2_leaf_reg', 1.0, 5.5, 0.5)
#     param['min_child_samples'] = trial.suggest_categorical('min_child_samples', [1, 4, 8, 16, 32])
    
    
#     param['grow_policy'] = 'Depthwise'
#     param['iterations'] = 1000
#     param['use_best_model'] = True
#     param['eval_metric'] = 'MAE'
#     param['od_type'] = 'Iter'
#     param['od_wait'] = 20
#     param['random_state'] = RANDOM_SEED
#     param['logging_level'] = 'Silent'
    
    regressor = cb.CatBoostRegressor(**param, cat_features=cat_cols, silent=True)

    regressor.fit(X_train_se, y_train,
                  early_stopping_rounds=EARLY_STOPPING_ROUND)
    

    predictions = regressor.predict(X_val_se)
    rmse = mean_squared_error(np.exp(y_val), np.exp(predictions), squared=False)
    
    return rmse


In [55]:
# Create Study and optimize the optuna objective function
tune_se = optuna.create_study(study_name=f'catboost-seed--reduced18features', direction='minimize')
tune_se.optimize(objective, n_trials=200, n_jobs=-1, timeout=24000)


[I 2023-06-12 20:00:17,788] A new study created in memory with name: catboost-seed--reduced18features
[I 2023-06-12 20:00:18,216] Trial 5 finished with value: 0.20293458468451012 and parameters: {'learning_rate': 0.003169406932032436, 'depth': 1, 'subsample': 0.3110345126136329, 'colsample_bylevel': 0.1616092685606133, 'min_data_in_leaf': 74}. Best is trial 5 with value: 0.20293458468451012.
[I 2023-06-12 20:00:19,217] Trial 8 finished with value: 0.12765874120581966 and parameters: {'learning_rate': 0.004781954414371613, 'depth': 3, 'subsample': 0.736464099511869, 'colsample_bylevel': 0.06638354933101523, 'min_data_in_leaf': 6}. Best is trial 8 with value: 0.12765874120581966.
[I 2023-06-12 20:00:19,665] Trial 9 finished with value: 0.26719110476239893 and parameters: {'learning_rate': 0.0012086872240139723, 'depth': 2, 'subsample': 0.25457428486130856, 'colsample_bylevel': 0.05735828268717488, 'min_data_in_leaf': 93}. Best is trial 8 with value: 0.12765874120581966.
[I 2023-06-12 20:

[I 2023-06-12 20:01:51,651] Trial 23 finished with value: 0.10474507610217107 and parameters: {'learning_rate': 0.010209867545934823, 'depth': 7, 'subsample': 0.06292124395542126, 'colsample_bylevel': 0.9421779113589337, 'min_data_in_leaf': 47}. Best is trial 29 with value: 0.09978661302843006.
[I 2023-06-12 20:01:51,947] Trial 31 finished with value: 0.10179773076004589 and parameters: {'learning_rate': 0.04318302680456953, 'depth': 6, 'subsample': 0.16088733423938972, 'colsample_bylevel': 0.406400508856327, 'min_data_in_leaf': 83}. Best is trial 29 with value: 0.09978661302843006.
[I 2023-06-12 20:01:51,948] Trial 27 finished with value: 0.10218168229303487 and parameters: {'learning_rate': 0.012916655525188293, 'depth': 6, 'subsample': 0.3766589882942274, 'colsample_bylevel': 0.9967964801343234, 'min_data_in_leaf': 83}. Best is trial 29 with value: 0.09978661302843006.
[I 2023-06-12 20:01:52,528] Trial 32 finished with value: 0.1023828604575628 and parameters: {'learning_rate': 0.04

[I 2023-06-12 20:02:51,849] Trial 60 finished with value: 0.09942371264152522 and parameters: {'learning_rate': 0.05303619877042585, 'depth': 3, 'subsample': 0.37473061973466715, 'colsample_bylevel': 0.4656893478328775, 'min_data_in_leaf': 79}. Best is trial 33 with value: 0.09838801650665882.
[I 2023-06-12 20:02:52,387] Trial 58 finished with value: 0.10075026831542375 and parameters: {'learning_rate': 0.05281674327906526, 'depth': 3, 'subsample': 0.3662312246029409, 'colsample_bylevel': 0.6947254902635844, 'min_data_in_leaf': 86}. Best is trial 33 with value: 0.09838801650665882.
[I 2023-06-12 20:02:53,135] Trial 65 finished with value: 0.11069910057082337 and parameters: {'learning_rate': 0.039474194579934785, 'depth': 1, 'subsample': 0.4215254978980839, 'colsample_bylevel': 0.4649556150423355, 'min_data_in_leaf': 66}. Best is trial 33 with value: 0.09838801650665882.
[I 2023-06-12 20:02:54,545] Trial 64 finished with value: 0.10351665524154102 and parameters: {'learning_rate': 0.07

[I 2023-06-12 20:04:10,843] Trial 74 finished with value: 0.10954530288607542 and parameters: {'learning_rate': 0.04167214739204106, 'depth': 10, 'subsample': 0.294961594899741, 'colsample_bylevel': 0.7228845388624437, 'min_data_in_leaf': 82}. Best is trial 33 with value: 0.09838801650665882.
[I 2023-06-12 20:04:14,013] Trial 75 finished with value: 0.106343559943207 and parameters: {'learning_rate': 0.04180696231339241, 'depth': 10, 'subsample': 0.29171350281752867, 'colsample_bylevel': 0.6010711400398876, 'min_data_in_leaf': 82}. Best is trial 33 with value: 0.09838801650665882.
[I 2023-06-12 20:04:16,569] Trial 88 finished with value: 0.10024764711583402 and parameters: {'learning_rate': 0.03142113341259898, 'depth': 5, 'subsample': 0.2645964891758261, 'colsample_bylevel': 0.5332622713431966, 'min_data_in_leaf': 96}. Best is trial 33 with value: 0.09838801650665882.
[I 2023-06-12 20:04:16,953] Trial 89 finished with value: 0.10055049310514254 and parameters: {'learning_rate': 0.0317

[I 2023-06-12 20:05:52,747] Trial 117 finished with value: 0.10005540258566564 and parameters: {'learning_rate': 0.03481107741623295, 'depth': 5, 'subsample': 0.37992822009314403, 'colsample_bylevel': 0.7034268365389678, 'min_data_in_leaf': 79}. Best is trial 98 with value: 0.0975573561666987.
[I 2023-06-12 20:05:52,861] Trial 116 finished with value: 0.10015764295883998 and parameters: {'learning_rate': 0.027360601482473552, 'depth': 5, 'subsample': 0.3839497298313245, 'colsample_bylevel': 0.7016873568317585, 'min_data_in_leaf': 100}. Best is trial 98 with value: 0.0975573561666987.
[I 2023-06-12 20:05:56,895] Trial 118 finished with value: 0.10047262678322447 and parameters: {'learning_rate': 0.05679407797638799, 'depth': 5, 'subsample': 0.3620636284679829, 'colsample_bylevel': 0.7080073862232328, 'min_data_in_leaf': 84}. Best is trial 98 with value: 0.0975573561666987.
[I 2023-06-12 20:05:59,027] Trial 119 finished with value: 0.09950020377005735 and parameters: {'learning_rate': 0.

[I 2023-06-12 20:06:39,473] Trial 136 finished with value: 0.10326078760174727 and parameters: {'learning_rate': 0.03424938495586907, 'depth': 7, 'subsample': 0.29978583340935633, 'colsample_bylevel': 0.6368567403714193, 'min_data_in_leaf': 72}. Best is trial 98 with value: 0.0975573561666987.
[I 2023-06-12 20:06:39,702] Trial 143 finished with value: 0.10168299569374102 and parameters: {'learning_rate': 0.0220999021620279, 'depth': 6, 'subsample': 0.21789281389502752, 'colsample_bylevel': 0.5317708652556736, 'min_data_in_leaf': 72}. Best is trial 98 with value: 0.0975573561666987.
[I 2023-06-12 20:06:40,807] Trial 141 finished with value: 0.10203693608069601 and parameters: {'learning_rate': 0.03517959555646204, 'depth': 7, 'subsample': 0.3047884851005876, 'colsample_bylevel': 0.6533179280941009, 'min_data_in_leaf': 72}. Best is trial 98 with value: 0.0975573561666987.
[I 2023-06-12 20:06:42,465] Trial 145 finished with value: 0.1016648552406974 and parameters: {'learning_rate': 0.031

[I 2023-06-12 20:07:11,359] Trial 168 finished with value: 0.10165008572528109 and parameters: {'learning_rate': 0.029504946098694782, 'depth': 5, 'subsample': 0.2764004482451799, 'colsample_bylevel': 0.7302358686367344, 'min_data_in_leaf': 54}. Best is trial 157 with value: 0.09664845941727414.
[I 2023-06-12 20:07:11,678] Trial 170 finished with value: 0.09926360301133297 and parameters: {'learning_rate': 0.030043289060665942, 'depth': 5, 'subsample': 0.23797681798458908, 'colsample_bylevel': 0.7209717564553426, 'min_data_in_leaf': 49}. Best is trial 157 with value: 0.09664845941727414.
[I 2023-06-12 20:07:12,364] Trial 169 finished with value: 0.10041467009978897 and parameters: {'learning_rate': 0.030271187373076727, 'depth': 5, 'subsample': 0.1968138110855198, 'colsample_bylevel': 0.7288742282260917, 'min_data_in_leaf': 37}. Best is trial 157 with value: 0.09664845941727414.
[I 2023-06-12 20:07:15,900] Trial 173 finished with value: 0.10011122516901848 and parameters: {'learning_ra

[I 2023-06-12 20:07:47,560] Trial 197 finished with value: 0.1006199604473755 and parameters: {'learning_rate': 0.015915598110080444, 'depth': 6, 'subsample': 0.2584604402656491, 'colsample_bylevel': 0.6651990453596046, 'min_data_in_leaf': 46}. Best is trial 157 with value: 0.09664845941727414.
[I 2023-06-12 20:07:47,825] Trial 195 finished with value: 0.10160970578453643 and parameters: {'learning_rate': 0.02769266131757369, 'depth': 6, 'subsample': 0.25261097396218124, 'colsample_bylevel': 0.6675955865425185, 'min_data_in_leaf': 39}. Best is trial 157 with value: 0.09664845941727414.
[I 2023-06-12 20:07:47,931] Trial 196 finished with value: 0.10305982423065041 and parameters: {'learning_rate': 0.027589226314229512, 'depth': 6, 'subsample': 0.2538418622427782, 'colsample_bylevel': 0.6660403809263608, 'min_data_in_leaf': 47}. Best is trial 157 with value: 0.09664845941727414.
[I 2023-06-12 20:07:55,845] Trial 199 finished with value: 0.10412496248711091 and parameters: {'learning_rate

In [56]:
tune_se.best_params

{'learning_rate': 0.026028185473201213,
 'depth': 6,
 'subsample': 0.2198898716301622,
 'colsample_bylevel': 0.6659011781789096,
 'min_data_in_leaf': 40}

In [57]:
optuna.visualization.plot_optimization_history(tune_se).show(renderer="browser")

In [58]:
optuna.visualization.plot_param_importances(tune_se).show(renderer="browser")

In [59]:
# Train the final model using the best hyperparameters
final_model_se = cb.CatBoostRegressor(**tune_se.best_params, cat_features=cat_cols, eval_metric="MAE")
final_model_se.fit(X_train_se, y_train)

0:	learn: 0.2912892	total: 9.68ms	remaining: 9.67s
1:	learn: 0.2858019	total: 15.8ms	remaining: 7.87s
2:	learn: 0.2804179	total: 21.8ms	remaining: 7.25s
3:	learn: 0.2754398	total: 27.5ms	remaining: 6.84s
4:	learn: 0.2708135	total: 32.1ms	remaining: 6.4s
5:	learn: 0.2661043	total: 37.8ms	remaining: 6.27s
6:	learn: 0.2611257	total: 41.8ms	remaining: 5.93s
7:	learn: 0.2560228	total: 46.1ms	remaining: 5.72s
8:	learn: 0.2519805	total: 48.5ms	remaining: 5.34s
9:	learn: 0.2471202	total: 52.9ms	remaining: 5.24s
10:	learn: 0.2426998	total: 58.4ms	remaining: 5.25s
11:	learn: 0.2386066	total: 62.7ms	remaining: 5.17s
12:	learn: 0.2346812	total: 66.3ms	remaining: 5.03s
13:	learn: 0.2305195	total: 70.4ms	remaining: 4.96s
14:	learn: 0.2261419	total: 73.9ms	remaining: 4.85s
15:	learn: 0.2219208	total: 78.4ms	remaining: 4.82s
16:	learn: 0.2180078	total: 83.5ms	remaining: 4.83s
17:	learn: 0.2143075	total: 87ms	remaining: 4.75s
18:	learn: 0.2106797	total: 90.9ms	remaining: 4.69s
19:	learn: 0.2074799	tota

191:	learn: 0.0790042	total: 812ms	remaining: 3.42s
192:	learn: 0.0789172	total: 816ms	remaining: 3.41s
193:	learn: 0.0788716	total: 820ms	remaining: 3.4s
194:	learn: 0.0788342	total: 824ms	remaining: 3.4s
195:	learn: 0.0788105	total: 829ms	remaining: 3.4s
196:	learn: 0.0787115	total: 832ms	remaining: 3.39s
197:	learn: 0.0785937	total: 836ms	remaining: 3.39s
198:	learn: 0.0785021	total: 841ms	remaining: 3.38s
199:	learn: 0.0784468	total: 845ms	remaining: 3.38s
200:	learn: 0.0783867	total: 850ms	remaining: 3.38s
201:	learn: 0.0782739	total: 854ms	remaining: 3.37s
202:	learn: 0.0782054	total: 858ms	remaining: 3.37s
203:	learn: 0.0781107	total: 862ms	remaining: 3.36s
204:	learn: 0.0780110	total: 865ms	remaining: 3.35s
205:	learn: 0.0779613	total: 869ms	remaining: 3.35s
206:	learn: 0.0778605	total: 873ms	remaining: 3.34s
207:	learn: 0.0777711	total: 877ms	remaining: 3.34s
208:	learn: 0.0777293	total: 880ms	remaining: 3.33s
209:	learn: 0.0776771	total: 885ms	remaining: 3.33s
210:	learn: 0.0

376:	learn: 0.0698918	total: 1.62s	remaining: 2.68s
377:	learn: 0.0698641	total: 1.63s	remaining: 2.68s
378:	learn: 0.0698356	total: 1.63s	remaining: 2.67s
379:	learn: 0.0697963	total: 1.64s	remaining: 2.67s
380:	learn: 0.0697811	total: 1.64s	remaining: 2.66s
381:	learn: 0.0697636	total: 1.64s	remaining: 2.66s
382:	learn: 0.0697149	total: 1.65s	remaining: 2.65s
383:	learn: 0.0696639	total: 1.65s	remaining: 2.65s
384:	learn: 0.0696446	total: 1.65s	remaining: 2.64s
385:	learn: 0.0696177	total: 1.66s	remaining: 2.64s
386:	learn: 0.0696022	total: 1.66s	remaining: 2.63s
387:	learn: 0.0695228	total: 1.67s	remaining: 2.63s
388:	learn: 0.0694907	total: 1.67s	remaining: 2.63s
389:	learn: 0.0694355	total: 1.68s	remaining: 2.62s
390:	learn: 0.0694213	total: 1.68s	remaining: 2.62s
391:	learn: 0.0693986	total: 1.68s	remaining: 2.61s
392:	learn: 0.0693716	total: 1.69s	remaining: 2.61s
393:	learn: 0.0693491	total: 1.69s	remaining: 2.6s
394:	learn: 0.0693207	total: 1.7s	remaining: 2.6s
395:	learn: 0.0

583:	learn: 0.0632726	total: 2.42s	remaining: 1.73s
584:	learn: 0.0632494	total: 2.43s	remaining: 1.72s
585:	learn: 0.0632144	total: 2.43s	remaining: 1.72s
586:	learn: 0.0631987	total: 2.43s	remaining: 1.71s
587:	learn: 0.0631952	total: 2.44s	remaining: 1.71s
588:	learn: 0.0631768	total: 2.44s	remaining: 1.7s
589:	learn: 0.0631601	total: 2.45s	remaining: 1.7s
590:	learn: 0.0631406	total: 2.45s	remaining: 1.7s
591:	learn: 0.0631088	total: 2.45s	remaining: 1.69s
592:	learn: 0.0630846	total: 2.46s	remaining: 1.69s
593:	learn: 0.0630724	total: 2.46s	remaining: 1.68s
594:	learn: 0.0630377	total: 2.47s	remaining: 1.68s
595:	learn: 0.0630040	total: 2.47s	remaining: 1.68s
596:	learn: 0.0629657	total: 2.48s	remaining: 1.67s
597:	learn: 0.0629536	total: 2.48s	remaining: 1.67s
598:	learn: 0.0629376	total: 2.48s	remaining: 1.66s
599:	learn: 0.0629037	total: 2.49s	remaining: 1.66s
600:	learn: 0.0628832	total: 2.49s	remaining: 1.65s
601:	learn: 0.0628652	total: 2.5s	remaining: 1.65s
602:	learn: 0.06

787:	learn: 0.0587315	total: 3.22s	remaining: 866ms
788:	learn: 0.0587243	total: 3.22s	remaining: 862ms
789:	learn: 0.0587107	total: 3.23s	remaining: 858ms
790:	learn: 0.0586766	total: 3.23s	remaining: 854ms
791:	learn: 0.0586690	total: 3.23s	remaining: 849ms
792:	learn: 0.0586330	total: 3.24s	remaining: 845ms
793:	learn: 0.0586094	total: 3.24s	remaining: 841ms
794:	learn: 0.0585905	total: 3.25s	remaining: 837ms
795:	learn: 0.0585724	total: 3.25s	remaining: 833ms
796:	learn: 0.0585432	total: 3.25s	remaining: 829ms
797:	learn: 0.0585104	total: 3.26s	remaining: 825ms
798:	learn: 0.0585023	total: 3.26s	remaining: 821ms
799:	learn: 0.0584930	total: 3.27s	remaining: 817ms
800:	learn: 0.0584511	total: 3.27s	remaining: 813ms
801:	learn: 0.0584363	total: 3.27s	remaining: 808ms
802:	learn: 0.0584185	total: 3.28s	remaining: 804ms
803:	learn: 0.0583997	total: 3.28s	remaining: 800ms
804:	learn: 0.0583614	total: 3.28s	remaining: 796ms
805:	learn: 0.0583508	total: 3.29s	remaining: 792ms
806:	learn: 

991:	learn: 0.0544100	total: 4.01s	remaining: 32.4ms
992:	learn: 0.0543862	total: 4.02s	remaining: 28.3ms
993:	learn: 0.0543649	total: 4.02s	remaining: 24.3ms
994:	learn: 0.0543514	total: 4.03s	remaining: 20.2ms
995:	learn: 0.0543230	total: 4.03s	remaining: 16.2ms
996:	learn: 0.0543067	total: 4.03s	remaining: 12.1ms
997:	learn: 0.0542956	total: 4.04s	remaining: 8.09ms
998:	learn: 0.0542927	total: 4.04s	remaining: 4.05ms
999:	learn: 0.0542865	total: 4.05s	remaining: 0us


<catboost.core.CatBoostRegressor at 0x17c22c190>

In [60]:
final_model_se.best_score_

{'learn': {'MAE': 0.054286490067159306, 'RMSE': 0.07246888638677378}}

In [62]:
evaluate(final_model_se, X_train_se, X_test_se, y_train, y_test, logy=True)

MSE Train:  14005.475724801558
RMSE Train:  118.34473256043785
MAE Train:  9880.08928631296
MAPE Train:  5.7455985777081295
MSE Val:  19140.49110669891
RMSE Val:  138.34916373689762
MAE Val:  13135.33584142903
MAPE Val:  9.448960471798914


{'Train RMSE': [118.34473256043785],
 'Test RMSE': [138.34916373689762],
 'Train MAPE': [5.7455985777081295],
 'Test MAPE': [9.448960471798914],
 'Train MAE': [9880.08928631296],
 'Test MAE': [13135.33584142903],
 'Train R2': [0.9601196891122048],
 'Test R2': [0.8599013216983318]}

# TUNE FULL MODEL WITH NO FEATURE REDUCTION AS COMPARISON

In [69]:
from sklearn.model_selection import cross_val_score

#train_pool = Pool(data=X_train_se, label=y_train, cat_features=cat_cols)
#val_pool = Pool(data=X_val_se, label=y_val, cat_features=cat_cols)
def objective(trial):
    
    param = {
        "iterations": 1000,
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.1, log=True),
        "depth": trial.suggest_int("depth", 1, 10),
        "subsample": trial.suggest_float("subsample", 0.05, 1.0),
        "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.05, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 100)
    }
    
    
#     param = {}
#     param['learning_rate'] = trial.suggest_discrete_uniform("learning_rate", 0.001, 0.02, 0.001)
#     param['depth'] = trial.suggest_int('depth', 9, 15)
#     param['l2_leaf_reg'] = trial.suggest_discrete_uniform('l2_leaf_reg', 1.0, 5.5, 0.5)
#     param['min_child_samples'] = trial.suggest_categorical('min_child_samples', [1, 4, 8, 16, 32])
    
    
#     param['grow_policy'] = 'Depthwise'
#     param['iterations'] = 1000
#     param['use_best_model'] = True
#     param['eval_metric'] = 'MAE'
#     param['od_type'] = 'Iter'
#     param['od_wait'] = 20
#     param['random_state'] = RANDOM_SEED
#     param['logging_level'] = 'Silent'
    
    regressor = cb.CatBoostRegressor(**param, cat_features=cat_cols1, silent=True)

    regressor.fit(X_train, y_train,
                  early_stopping_rounds=EARLY_STOPPING_ROUND)
    

    predictions = regressor.predict(X_val)
    rmse = mean_squared_error(np.exp(y_val), np.exp(predictions), squared=False)
    
    return rmse

# Create Study and optimize the optuna objective function
tune_full = optuna.create_study(study_name=f'catboost-seed--fullfeatures', direction='minimize')
tune_full.optimize(objective, n_trials=1000, n_jobs=-1, timeout=24000)

[I 2023-06-12 22:07:23,101] A new study created in memory with name: catboost-seed--fullfeatures
[I 2023-06-12 22:07:24,846] Trial 7 finished with value: 35208.01851301862 and parameters: {'learning_rate': 0.00226949320958069, 'depth': 4, 'subsample': 0.5286755725076551, 'colsample_bylevel': 0.0519674650804283, 'min_data_in_leaf': 90}. Best is trial 7 with value: 35208.01851301862.
[I 2023-06-12 22:07:25,444] Trial 2 finished with value: 19505.361929645383 and parameters: {'learning_rate': 0.06579348096308839, 'depth': 2, 'subsample': 0.33949099899030366, 'colsample_bylevel': 0.36974697406620344, 'min_data_in_leaf': 74}. Best is trial 2 with value: 19505.361929645383.
[I 2023-06-12 22:07:26,112] Trial 4 finished with value: 25229.726363337842 and parameters: {'learning_rate': 0.0035153926658792168, 'depth': 4, 'subsample': 0.44315767351113083, 'colsample_bylevel': 0.13563569260666614, 'min_data_in_leaf': 70}. Best is trial 2 with value: 19505.361929645383.
[I 2023-06-12 22:07:33,851] T

[I 2023-06-12 22:10:41,937] Trial 24 finished with value: 21377.994297174126 and parameters: {'learning_rate': 0.05612553728902747, 'depth': 10, 'subsample': 0.9760618079493346, 'colsample_bylevel': 0.9743077791759958, 'min_data_in_leaf': 61}. Best is trial 27 with value: 18858.009177641088.
[I 2023-06-12 22:10:51,432] Trial 22 finished with value: 20541.80940479958 and parameters: {'learning_rate': 0.07063987900795078, 'depth': 10, 'subsample': 0.9771297308700494, 'colsample_bylevel': 0.936490970247891, 'min_data_in_leaf': 55}. Best is trial 27 with value: 18858.009177641088.
[I 2023-06-12 22:10:55,212] Trial 30 finished with value: 18831.65756098153 and parameters: {'learning_rate': 0.050799919863109236, 'depth': 7, 'subsample': 0.6608727350262562, 'colsample_bylevel': 0.8583302783841547, 'min_data_in_leaf': 32}. Best is trial 30 with value: 18831.65756098153.
[I 2023-06-12 22:11:04,660] Trial 23 finished with value: 20047.32203059538 and parameters: {'learning_rate': 0.0663983239334

[I 2023-06-12 22:14:21,358] Trial 57 finished with value: 18990.82349712475 and parameters: {'learning_rate': 0.06720298146812383, 'depth': 6, 'subsample': 0.4917191136936668, 'colsample_bylevel': 0.9263763595923274, 'min_data_in_leaf': 36}. Best is trial 30 with value: 18831.65756098153.
[I 2023-06-12 22:14:22,916] Trial 58 finished with value: 19489.79084413324 and parameters: {'learning_rate': 0.03370307805196649, 'depth': 6, 'subsample': 0.5033478810069325, 'colsample_bylevel': 0.9188591547150371, 'min_data_in_leaf': 4}. Best is trial 30 with value: 18831.65756098153.
[I 2023-06-12 22:14:25,095] Trial 59 finished with value: 19462.906308058173 and parameters: {'learning_rate': 0.026316582613998312, 'depth': 6, 'subsample': 0.4728940917817821, 'colsample_bylevel': 0.7272812315754191, 'min_data_in_leaf': 4}. Best is trial 30 with value: 18831.65756098153.
[I 2023-06-12 22:14:30,329] Trial 66 finished with value: 20069.261034729196 and parameters: {'learning_rate': 0.02662008186400096

[I 2023-06-12 22:15:12,592] Trial 87 finished with value: 19130.191047460245 and parameters: {'learning_rate': 0.07797451065995052, 'depth': 4, 'subsample': 0.5974648872306318, 'colsample_bylevel': 0.6141157010065399, 'min_data_in_leaf': 9}. Best is trial 75 with value: 18146.509062254638.
[I 2023-06-12 22:15:12,993] Trial 92 finished with value: 18384.906106186016 and parameters: {'learning_rate': 0.08588560576990228, 'depth': 4, 'subsample': 0.32515847988008123, 'colsample_bylevel': 0.6030017143901076, 'min_data_in_leaf': 4}. Best is trial 75 with value: 18146.509062254638.
[I 2023-06-12 22:15:13,212] Trial 88 finished with value: 19020.471696321914 and parameters: {'learning_rate': 0.08162779117651324, 'depth': 4, 'subsample': 0.6713170851581461, 'colsample_bylevel': 0.6095980820542936, 'min_data_in_leaf': 9}. Best is trial 75 with value: 18146.509062254638.
[I 2023-06-12 22:15:13,264] Trial 74 finished with value: 19514.94749065021 and parameters: {'learning_rate': 0.07011997115700

[I 2023-06-12 22:15:42,386] Trial 111 finished with value: 18623.220813204487 and parameters: {'learning_rate': 0.06361367648446967, 'depth': 5, 'subsample': 0.6457531964332734, 'colsample_bylevel': 0.8960049281620722, 'min_data_in_leaf': 11}. Best is trial 95 with value: 18046.754058953127.
[I 2023-06-12 22:15:43,662] Trial 113 finished with value: 18971.716646060337 and parameters: {'learning_rate': 0.06491227921017675, 'depth': 5, 'subsample': 0.6575791690657394, 'colsample_bylevel': 0.5904481568794545, 'min_data_in_leaf': 7}. Best is trial 95 with value: 18046.754058953127.
[I 2023-06-12 22:15:44,969] Trial 118 finished with value: 18857.87700321056 and parameters: {'learning_rate': 0.06417158810220439, 'depth': 5, 'subsample': 0.3260025395765096, 'colsample_bylevel': 0.5859890855797076, 'min_data_in_leaf': 12}. Best is trial 95 with value: 18046.754058953127.
[I 2023-06-12 22:15:46,504] Trial 120 finished with value: 18749.527173760947 and parameters: {'learning_rate': 0.077875219

[I 2023-06-12 22:16:12,010] Trial 141 finished with value: 18434.55972860696 and parameters: {'learning_rate': 0.0816159222425998, 'depth': 4, 'subsample': 0.318218714802614, 'colsample_bylevel': 0.6946378630348484, 'min_data_in_leaf': 25}. Best is trial 125 with value: 17979.829790374264.
[I 2023-06-12 22:16:12,762] Trial 143 finished with value: 18732.545501752273 and parameters: {'learning_rate': 0.09305455579858724, 'depth': 4, 'subsample': 0.30824277161398805, 'colsample_bylevel': 0.7385004607194383, 'min_data_in_leaf': 5}. Best is trial 125 with value: 17979.829790374264.
[I 2023-06-12 22:16:15,872] Trial 145 finished with value: 18506.94299058871 and parameters: {'learning_rate': 0.09415181325023132, 'depth': 4, 'subsample': 0.261665087013706, 'colsample_bylevel': 0.7066827256185348, 'min_data_in_leaf': 26}. Best is trial 125 with value: 17979.829790374264.
[I 2023-06-12 22:16:17,162] Trial 148 finished with value: 18600.1771481987 and parameters: {'learning_rate': 0.09266540039

[I 2023-06-12 22:16:48,561] Trial 172 finished with value: 18640.03760986606 and parameters: {'learning_rate': 0.08242580543514413, 'depth': 4, 'subsample': 0.19497720160634194, 'colsample_bylevel': 0.6487061227494278, 'min_data_in_leaf': 23}. Best is trial 125 with value: 17979.829790374264.
[I 2023-06-12 22:16:49,474] Trial 171 finished with value: 19132.398114033465 and parameters: {'learning_rate': 0.08089902377837735, 'depth': 5, 'subsample': 0.28443542256237253, 'colsample_bylevel': 0.6479703437226199, 'min_data_in_leaf': 19}. Best is trial 125 with value: 17979.829790374264.
[I 2023-06-12 22:16:51,467] Trial 170 finished with value: 18454.866130565195 and parameters: {'learning_rate': 0.0828870445210627, 'depth': 5, 'subsample': 0.2864844994720444, 'colsample_bylevel': 0.6512519737366192, 'min_data_in_leaf': 39}. Best is trial 125 with value: 17979.829790374264.
[I 2023-06-12 22:16:51,521] Trial 174 finished with value: 19240.019292866153 and parameters: {'learning_rate': 0.0799

[I 2023-06-12 22:17:36,118] Trial 203 finished with value: 18925.70112236193 and parameters: {'learning_rate': 0.0515450852633277, 'depth': 4, 'subsample': 0.19543372629850375, 'colsample_bylevel': 0.8056134483590164, 'min_data_in_leaf': 26}. Best is trial 125 with value: 17979.829790374264.
[I 2023-06-12 22:17:36,486] Trial 202 finished with value: 18702.919531931726 and parameters: {'learning_rate': 0.04510076643327626, 'depth': 4, 'subsample': 0.24661751074238367, 'colsample_bylevel': 0.6285927689843144, 'min_data_in_leaf': 28}. Best is trial 125 with value: 17979.829790374264.
[I 2023-06-12 22:17:37,735] Trial 204 finished with value: 19022.614875468957 and parameters: {'learning_rate': 0.08795352765906847, 'depth': 4, 'subsample': 0.2650435253660807, 'colsample_bylevel': 0.6288337922979598, 'min_data_in_leaf': 48}. Best is trial 125 with value: 17979.829790374264.
[I 2023-06-12 22:17:38,437] Trial 196 finished with value: 19586.044885751147 and parameters: {'learning_rate': 0.0639

[I 2023-06-12 22:18:13,097] Trial 228 finished with value: 19440.60628649936 and parameters: {'learning_rate': 0.07916297195931561, 'depth': 4, 'subsample': 0.5768811586741628, 'colsample_bylevel': 0.7042772581373341, 'min_data_in_leaf': 22}. Best is trial 214 with value: 17907.108949176956.
[I 2023-06-12 22:18:13,590] Trial 230 finished with value: 18977.415165951632 and parameters: {'learning_rate': 0.0781352555654445, 'depth': 4, 'subsample': 0.5324609819226576, 'colsample_bylevel': 0.7014175235396123, 'min_data_in_leaf': 21}. Best is trial 214 with value: 17907.108949176956.
[I 2023-06-12 22:18:14,875] Trial 227 finished with value: 18976.98348076462 and parameters: {'learning_rate': 0.0766013412903988, 'depth': 4, 'subsample': 0.5708793878564896, 'colsample_bylevel': 0.7007035181564327, 'min_data_in_leaf': 22}. Best is trial 214 with value: 17907.108949176956.
[I 2023-06-12 22:18:15,415] Trial 229 finished with value: 18738.012136041707 and parameters: {'learning_rate': 0.07724681

[I 2023-06-12 22:26:10,059] Trial 253 finished with value: 21728.93969523936 and parameters: {'learning_rate': 0.0056846257559364155, 'depth': 4, 'subsample': 0.5024974570027908, 'colsample_bylevel': 0.5880476751826863, 'min_data_in_leaf': 28}. Best is trial 240 with value: 17602.821799771602.
[I 2023-06-12 22:26:10,533] Trial 256 finished with value: 22003.96409386507 and parameters: {'learning_rate': 0.005335641958864621, 'depth': 4, 'subsample': 0.5446175719866966, 'colsample_bylevel': 0.6025201265717578, 'min_data_in_leaf': 28}. Best is trial 240 with value: 17602.821799771602.
[I 2023-06-12 22:26:10,723] Trial 254 finished with value: 21945.401871720678 and parameters: {'learning_rate': 0.00544366143177865, 'depth': 4, 'subsample': 0.5092410353634194, 'colsample_bylevel': 0.6089309357227032, 'min_data_in_leaf': 32}. Best is trial 240 with value: 17602.821799771602.
[I 2023-06-12 22:26:12,594] Trial 258 finished with value: 19477.28428662731 and parameters: {'learning_rate': 0.0349

[I 2023-06-12 22:26:41,393] Trial 284 finished with value: 20275.23110825175 and parameters: {'learning_rate': 0.01553028991875298, 'depth': 4, 'subsample': 0.42889008450027, 'colsample_bylevel': 0.9226222435070183, 'min_data_in_leaf': 26}. Best is trial 240 with value: 17602.821799771602.
[I 2023-06-12 22:26:44,127] Trial 283 finished with value: 17533.85983973332 and parameters: {'learning_rate': 0.07298732324687149, 'depth': 4, 'subsample': 0.4061504960714524, 'colsample_bylevel': 0.9051413382658291, 'min_data_in_leaf': 26}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:26:46,366] Trial 286 finished with value: 18100.19855189659 and parameters: {'learning_rate': 0.09972614689948466, 'depth': 4, 'subsample': 0.20557907565851474, 'colsample_bylevel': 0.9073651958485183, 'min_data_in_leaf': 5}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:26:46,658] Trial 285 finished with value: 18742.28099133259 and parameters: {'learning_rate': 0.099582897264

[I 2023-06-12 22:35:22,861] Trial 314 finished with value: 18663.060898143103 and parameters: {'learning_rate': 0.0486575181081425, 'depth': 4, 'subsample': 0.42626496135462466, 'colsample_bylevel': 0.9419077417757074, 'min_data_in_leaf': 23}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:35:23,068] Trial 312 finished with value: 19529.1372943745 and parameters: {'learning_rate': 0.05791841242541562, 'depth': 4, 'subsample': 0.7447152769200026, 'colsample_bylevel': 0.9168788061695692, 'min_data_in_leaf': 23}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:35:27,232] Trial 313 finished with value: 18776.247288156894 and parameters: {'learning_rate': 0.09970839789251223, 'depth': 4, 'subsample': 0.7358962272681627, 'colsample_bylevel': 0.9318617632389131, 'min_data_in_leaf': 23}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:35:27,833] Trial 316 finished with value: 18637.50489992307 and parameters: {'learning_rate': 0.0993615019

[I 2023-06-12 22:36:02,559] Trial 345 finished with value: 18824.24029151127 and parameters: {'learning_rate': 0.08340489892927717, 'depth': 3, 'subsample': 0.2257023087080745, 'colsample_bylevel': 0.8123560349117835, 'min_data_in_leaf': 10}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:36:02,968] Trial 340 finished with value: 17899.928812756203 and parameters: {'learning_rate': 0.08446715467537537, 'depth': 4, 'subsample': 0.2390712708705268, 'colsample_bylevel': 0.7827285896830817, 'min_data_in_leaf': 18}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:36:03,075] Trial 343 finished with value: 18371.658795656116 and parameters: {'learning_rate': 0.0839881252495093, 'depth': 3, 'subsample': 0.2248119052070261, 'colsample_bylevel': 0.8287268351489715, 'min_data_in_leaf': 16}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:36:03,929] Trial 342 finished with value: 18885.634241118798 and parameters: {'learning_rate': 0.083776089

[I 2023-06-12 22:36:55,228] Trial 369 finished with value: 17646.1566456926 and parameters: {'learning_rate': 0.09247410652597685, 'depth': 4, 'subsample': 0.47045637801158563, 'colsample_bylevel': 0.8876174948286393, 'min_data_in_leaf': 25}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:37:02,240] Trial 375 finished with value: 20108.657451035215 and parameters: {'learning_rate': 0.0533590735862391, 'depth': 2, 'subsample': 0.4359423873527917, 'colsample_bylevel': 0.8511425440550333, 'min_data_in_leaf': 29}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:37:05,347] Trial 373 finished with value: 19014.36582393537 and parameters: {'learning_rate': 0.09982784551656412, 'depth': 4, 'subsample': 0.48063310393559394, 'colsample_bylevel': 0.8130913947532132, 'min_data_in_leaf': 29}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:37:11,758] Trial 374 finished with value: 21006.093353860608 and parameters: {'learning_rate': 0.007852076

[I 2023-06-12 22:38:59,798] Trial 397 finished with value: 19741.63621208724 and parameters: {'learning_rate': 0.023884888918971094, 'depth': 4, 'subsample': 0.47223332826946546, 'colsample_bylevel': 0.8666983398456783, 'min_data_in_leaf': 25}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:38:59,812] Trial 398 finished with value: 19590.57216651566 and parameters: {'learning_rate': 0.028069223861566458, 'depth': 4, 'subsample': 0.45323659530044436, 'colsample_bylevel': 0.8685113941398006, 'min_data_in_leaf': 26}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:39:03,430] Trial 399 finished with value: 18575.312921256573 and parameters: {'learning_rate': 0.09413903916854821, 'depth': 4, 'subsample': 0.4570907213811184, 'colsample_bylevel': 0.9185487848628184, 'min_data_in_leaf': 27}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:39:05,593] Trial 401 finished with value: 18449.35990297409 and parameters: {'learning_rate': 0.093140

[I 2023-06-12 22:48:28,858] Trial 420 finished with value: 18484.77632159013 and parameters: {'learning_rate': 0.08427535211701662, 'depth': 4, 'subsample': 0.3998001371347937, 'colsample_bylevel': 0.8144515750638004, 'min_data_in_leaf': 27}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:48:29,812] Trial 431 finished with value: 20650.379105271542 and parameters: {'learning_rate': 0.08901828628158581, 'depth': 1, 'subsample': 0.294216602263236, 'colsample_bylevel': 0.86759803037406, 'min_data_in_leaf': 25}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:48:29,899] Trial 425 finished with value: 19970.68624935009 and parameters: {'learning_rate': 0.01628010358994965, 'depth': 3, 'subsample': 0.38688883785526795, 'colsample_bylevel': 0.8127851100080336, 'min_data_in_leaf': 25}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 22:48:30,787] Trial 430 finished with value: 19727.10943704913 and parameters: {'learning_rate': 0.090098549289

[I 2023-06-12 22:49:06,526] Trial 455 finished with value: 20245.394653619853 and parameters: {'learning_rate': 0.01799480680609582, 'depth': 4, 'subsample': 0.16017554294480957, 'colsample_bylevel': 0.8615172958628838, 'min_data_in_leaf': 31}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:00:56,437] Trial 454 finished with value: 19558.836061431983 and parameters: {'learning_rate': 0.01743383229003853, 'depth': 5, 'subsample': 0.339247072153285, 'colsample_bylevel': 0.8728863578134358, 'min_data_in_leaf': 27}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:00:58,852] Trial 460 finished with value: 19431.241286279248 and parameters: {'learning_rate': 0.0924706565680651, 'depth': 4, 'subsample': 0.24580630118721877, 'colsample_bylevel': 0.5334532146670264, 'min_data_in_leaf': 23}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:00:59,326] Trial 458 finished with value: 18210.260137756806 and parameters: {'learning_rate': 0.0905443

[I 2023-06-12 23:01:41,754] Trial 482 finished with value: 18510.731247338328 and parameters: {'learning_rate': 0.04536883237884824, 'depth': 4, 'subsample': 0.37743876459734677, 'colsample_bylevel': 0.7842592063768361, 'min_data_in_leaf': 29}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:01:45,415] Trial 484 finished with value: 19045.706573128904 and parameters: {'learning_rate': 0.0914194327621489, 'depth': 4, 'subsample': 0.19053095439463125, 'colsample_bylevel': 0.785210473234049, 'min_data_in_leaf': 19}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:01:46,521] Trial 453 finished with value: 23748.082864984983 and parameters: {'learning_rate': 0.003708051166449953, 'depth': 8, 'subsample': 0.30027917436828405, 'colsample_bylevel': 0.8654104320880823, 'min_data_in_leaf': 27}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:01:49,218] Trial 485 finished with value: 20045.07265074084 and parameters: {'learning_rate': 0.020536

[I 2023-06-12 23:03:08,032] Trial 514 finished with value: 20172.905034003266 and parameters: {'learning_rate': 0.06875804932230027, 'depth': 1, 'subsample': 0.5445320810982103, 'colsample_bylevel': 0.8521883321259557, 'min_data_in_leaf': 6}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:03:08,877] Trial 481 finished with value: 20150.203484696314 and parameters: {'learning_rate': 0.0913674671271681, 'depth': 10, 'subsample': 0.41556274230468054, 'colsample_bylevel': 0.6017187587159161, 'min_data_in_leaf': 29}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:03:09,192] Trial 515 finished with value: 27261.711208340374 and parameters: {'learning_rate': 0.00826199806050183, 'depth': 1, 'subsample': 0.5614460455244211, 'colsample_bylevel': 0.8564267009339448, 'min_data_in_leaf': 9}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:03:10,315] Trial 511 finished with value: 18900.87970730077 and parameters: {'learning_rate': 0.065496601

[I 2023-06-12 23:18:58,275] Trial 536 finished with value: 18567.243712213545 and parameters: {'learning_rate': 0.08570843630635182, 'depth': 4, 'subsample': 0.4833593320181552, 'colsample_bylevel': 0.8244303190266514, 'min_data_in_leaf': 9}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:18:59,654] Trial 537 finished with value: 19071.34947995366 and parameters: {'learning_rate': 0.02665410422945864, 'depth': 4, 'subsample': 0.47480365501461824, 'colsample_bylevel': 0.9332084233243361, 'min_data_in_leaf': 2}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:19:00,795] Trial 541 finished with value: 19000.27501621222 and parameters: {'learning_rate': 0.0868294081210743, 'depth': 3, 'subsample': 0.47594990443766405, 'colsample_bylevel': 0.6154549103016488, 'min_data_in_leaf': 24}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:19:00,913] Trial 542 finished with value: 19627.04372590155 and parameters: {'learning_rate': 0.02598858990

[I 2023-06-12 23:19:26,362] Trial 567 finished with value: 19534.648993846724 and parameters: {'learning_rate': 0.09288720878970776, 'depth': 4, 'subsample': 0.2888677161714059, 'colsample_bylevel': 0.33039183123809035, 'min_data_in_leaf': 19}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:19:27,250] Trial 564 finished with value: 20605.986116529642 and parameters: {'learning_rate': 0.0081794920952964, 'depth': 4, 'subsample': 0.5165852757584187, 'colsample_bylevel': 0.8571758667751559, 'min_data_in_leaf': 19}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:19:31,222] Trial 566 finished with value: 18166.52221171924 and parameters: {'learning_rate': 0.09308101153603304, 'depth': 4, 'subsample': 0.6325591217274223, 'colsample_bylevel': 0.6373683450440821, 'min_data_in_leaf': 28}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:19:31,538] Trial 570 finished with value: 18916.100430645412 and parameters: {'learning_rate': 0.09965878

[I 2023-06-12 23:20:01,085] Trial 597 finished with value: 18896.857322717817 and parameters: {'learning_rate': 0.058686929690846754, 'depth': 4, 'subsample': 0.561505457235428, 'colsample_bylevel': 0.3511264969457335, 'min_data_in_leaf': 29}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:20:01,329] Trial 596 finished with value: 21291.087341728155 and parameters: {'learning_rate': 0.006900822278574566, 'depth': 4, 'subsample': 0.3421330723710381, 'colsample_bylevel': 0.4812969192911288, 'min_data_in_leaf': 34}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:20:02,486] Trial 594 finished with value: 17935.153909309956 and parameters: {'learning_rate': 0.06175488174728377, 'depth': 4, 'subsample': 0.34967279478858404, 'colsample_bylevel': 0.46712625754865794, 'min_data_in_leaf': 34}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:20:03,153] Trial 602 finished with value: 20025.460267846516 and parameters: {'learning_rate': 0.0200

[I 2023-06-12 23:35:52,310] Trial 619 finished with value: 18861.18777011951 and parameters: {'learning_rate': 0.08697195571704786, 'depth': 5, 'subsample': 0.6502024009516894, 'colsample_bylevel': 0.36935693689053667, 'min_data_in_leaf': 38}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:35:54,462] Trial 622 finished with value: 19028.039336281643 and parameters: {'learning_rate': 0.08638278579463705, 'depth': 4, 'subsample': 0.8073229089059469, 'colsample_bylevel': 0.8618025068829656, 'min_data_in_leaf': 39}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:35:54,522] Trial 624 finished with value: 18597.817641551523 and parameters: {'learning_rate': 0.0726538180664868, 'depth': 4, 'subsample': 0.3253291686948087, 'colsample_bylevel': 0.49209392936575497, 'min_data_in_leaf': 27}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:35:55,258] Trial 620 finished with value: 18343.582368563093 and parameters: {'learning_rate': 0.0852778

[I 2023-06-12 23:36:15,202] Trial 649 finished with value: 19149.79787653594 and parameters: {'learning_rate': 0.09174486631671597, 'depth': 4, 'subsample': 0.8375766110376857, 'colsample_bylevel': 0.23468118763439466, 'min_data_in_leaf': 23}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:36:15,481] Trial 643 finished with value: 44924.65023335504 and parameters: {'learning_rate': 0.0011281867903102068, 'depth': 4, 'subsample': 0.46336213824003986, 'colsample_bylevel': 0.572858714094874, 'min_data_in_leaf': 24}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:36:15,591] Trial 654 finished with value: 19241.53433379824 and parameters: {'learning_rate': 0.07905000758552447, 'depth': 3, 'subsample': 0.18371244779891283, 'colsample_bylevel': 0.37589648209226567, 'min_data_in_leaf': 30}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:36:16,973] Trial 653 finished with value: 20115.691452086437 and parameters: {'learning_rate': 0.01359

[I 2023-06-12 23:36:33,284] Trial 675 finished with value: 19004.033569770545 and parameters: {'learning_rate': 0.08978913409280562, 'depth': 4, 'subsample': 0.48667488960195565, 'colsample_bylevel': 0.643048528354917, 'min_data_in_leaf': 20}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:36:34,074] Trial 676 finished with value: 19393.964575903374 and parameters: {'learning_rate': 0.03477936348885223, 'depth': 4, 'subsample': 0.522543006021585, 'colsample_bylevel': 0.7009604214727209, 'min_data_in_leaf': 26}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:36:34,499] Trial 680 finished with value: 18626.017454726683 and parameters: {'learning_rate': 0.09970900902109922, 'depth': 4, 'subsample': 0.43116572438999634, 'colsample_bylevel': 0.41924736832924353, 'min_data_in_leaf': 21}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:36:35,090] Trial 686 finished with value: 20669.063197492607 and parameters: {'learning_rate': 0.099403

[I 2023-06-12 23:36:55,215] Trial 701 finished with value: 25953.956658807547 and parameters: {'learning_rate': 0.0030963869401540067, 'depth': 4, 'subsample': 0.6020488527238423, 'colsample_bylevel': 0.9115765783182539, 'min_data_in_leaf': 29}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:36:55,320] Trial 705 finished with value: 19423.777947013208 and parameters: {'learning_rate': 0.07686559743571085, 'depth': 4, 'subsample': 0.9386580765425941, 'colsample_bylevel': 0.29747450013211724, 'min_data_in_leaf': 5}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:36:58,086] Trial 709 finished with value: 18731.59583496503 and parameters: {'learning_rate': 0.07362233401982625, 'depth': 4, 'subsample': 0.23750884451686502, 'colsample_bylevel': 0.4429903130252899, 'min_data_in_leaf': 6}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:36:58,146] Trial 708 finished with value: 19398.95880374604 and parameters: {'learning_rate': 0.0775224

[I 2023-06-12 23:37:23,406] Trial 732 finished with value: 18020.42274918592 and parameters: {'learning_rate': 0.08457137733857378, 'depth': 4, 'subsample': 0.35378219710816033, 'colsample_bylevel': 0.8380290374183775, 'min_data_in_leaf': 26}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:37:23,628] Trial 731 finished with value: 20544.225497038646 and parameters: {'learning_rate': 0.02008069547355307, 'depth': 4, 'subsample': 0.1240964904290956, 'colsample_bylevel': 0.8140002268730555, 'min_data_in_leaf': 26}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:37:24,126] Trial 733 finished with value: 19017.9396136901 and parameters: {'learning_rate': 0.05941038119504852, 'depth': 4, 'subsample': 0.4293337661206212, 'colsample_bylevel': 0.8106315629741282, 'min_data_in_leaf': 53}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:37:25,111] Trial 734 finished with value: 19612.55780437902 and parameters: {'learning_rate': 0.0154349818

[I 2023-06-12 23:37:57,291] Trial 764 finished with value: 19784.177327549107 and parameters: {'learning_rate': 0.09973506450980021, 'depth': 5, 'subsample': 0.29593217903193675, 'colsample_bylevel': 0.3220646085656493, 'min_data_in_leaf': 24}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:38:01,286] Trial 761 finished with value: 19687.883329563865 and parameters: {'learning_rate': 0.017045247349029454, 'depth': 4, 'subsample': 0.5470305884166555, 'colsample_bylevel': 0.8924217623252956, 'min_data_in_leaf': 24}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:38:02,202] Trial 759 finished with value: 21420.963638981346 and parameters: {'learning_rate': 0.006161304644278806, 'depth': 4, 'subsample': 0.7626518463262395, 'colsample_bylevel': 0.8903857316742464, 'min_data_in_leaf': 25}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:38:07,553] Trial 763 finished with value: 18982.01056562145 and parameters: {'learning_rate': 0.08556

[I 2023-06-12 23:38:39,737] Trial 788 finished with value: 19277.786806649023 and parameters: {'learning_rate': 0.07565582952895562, 'depth': 4, 'subsample': 0.8881700687913036, 'colsample_bylevel': 0.6241930465482057, 'min_data_in_leaf': 21}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:38:42,559] Trial 790 finished with value: 18117.41873984743 and parameters: {'learning_rate': 0.07717831745898619, 'depth': 4, 'subsample': 0.333992917314984, 'colsample_bylevel': 0.8663033074304741, 'min_data_in_leaf': 19}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:38:42,866] Trial 794 finished with value: 20136.398812566727 and parameters: {'learning_rate': 0.014125057599965192, 'depth': 4, 'subsample': 0.14366025190533072, 'colsample_bylevel': 0.3575541623980949, 'min_data_in_leaf': 9}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:38:45,241] Trial 792 finished with value: 17630.502974479106 and parameters: {'learning_rate': 0.07530374

[I 2023-06-12 23:54:34,002] Trial 823 finished with value: 35016.81477072667 and parameters: {'learning_rate': 0.0020395159315794256, 'depth': 4, 'subsample': 0.9111669609800608, 'colsample_bylevel': 0.08623872205298527, 'min_data_in_leaf': 88}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:54:34,407] Trial 817 finished with value: 40335.93572258086 and parameters: {'learning_rate': 0.0014146336632138616, 'depth': 4, 'subsample': 0.4162697959843624, 'colsample_bylevel': 0.39208320979350336, 'min_data_in_leaf': 25}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:54:36,136] Trial 816 finished with value: 35625.09374218411 and parameters: {'learning_rate': 0.001959671589649523, 'depth': 3, 'subsample': 0.28532791679592967, 'colsample_bylevel': 0.9163326456633518, 'min_data_in_leaf': 64}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-12 23:54:37,281] Trial 819 finished with value: 20463.287387535158 and parameters: {'learning_rate': 0.01

[I 2023-06-13 00:11:19,555] Trial 844 finished with value: 18584.536376736625 and parameters: {'learning_rate': 0.0997464681397888, 'depth': 5, 'subsample': 0.3645645612666644, 'colsample_bylevel': 0.5337129055394396, 'min_data_in_leaf': 51}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:11:21,881] Trial 843 finished with value: 28882.071749989915 and parameters: {'learning_rate': 0.0023470283048705946, 'depth': 5, 'subsample': 0.35692572514848175, 'colsample_bylevel': 0.7345317213105155, 'min_data_in_leaf': 22}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:11:24,147] Trial 848 finished with value: 18788.60824912714 and parameters: {'learning_rate': 0.07944081437781184, 'depth': 4, 'subsample': 0.4397592790238349, 'colsample_bylevel': 0.7983975942431555, 'min_data_in_leaf': 6}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:11:24,268] Trial 850 finished with value: 19717.980718870247 and parameters: {'learning_rate': 0.0781466

[I 2023-06-13 00:11:47,734] Trial 873 finished with value: 19824.92873580569 and parameters: {'learning_rate': 0.01662786617899339, 'depth': 4, 'subsample': 0.4119737595791985, 'colsample_bylevel': 0.710044138851026, 'min_data_in_leaf': 30}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:11:48,508] Trial 876 finished with value: 18348.362287796608 and parameters: {'learning_rate': 0.09186464868332571, 'depth': 4, 'subsample': 0.4103876445435561, 'colsample_bylevel': 0.7175105942604061, 'min_data_in_leaf': 33}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:11:49,182] Trial 877 finished with value: 19424.386161017137 and parameters: {'learning_rate': 0.0999239218589325, 'depth': 4, 'subsample': 0.38243492819653063, 'colsample_bylevel': 0.6424761102291646, 'min_data_in_leaf': 21}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:11:50,745] Trial 875 finished with value: 18299.435147975048 and parameters: {'learning_rate': 0.099814377

[I 2023-06-13 00:12:42,571] Trial 905 finished with value: 19114.68064091187 and parameters: {'learning_rate': 0.0916085444343175, 'depth': 4, 'subsample': 0.43936436489873565, 'colsample_bylevel': 0.9408448891791084, 'min_data_in_leaf': 31}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:12:48,762] Trial 906 finished with value: 19519.2208948567 and parameters: {'learning_rate': 0.03813110751480669, 'depth': 4, 'subsample': 0.4800928051978346, 'colsample_bylevel': 0.8889669095855066, 'min_data_in_leaf': 30}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:12:52,115] Trial 908 finished with value: 20435.37354364299 and parameters: {'learning_rate': 0.013241695304919308, 'depth': 5, 'subsample': 0.4878741395131726, 'colsample_bylevel': 0.500456317466003, 'min_data_in_leaf': 26}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:12:56,749] Trial 907 finished with value: 19247.173047315508 and parameters: {'learning_rate': 0.09033398625

[I 2023-06-13 00:14:16,547] Trial 928 finished with value: 18705.731609217186 and parameters: {'learning_rate': 0.05035286859641524, 'depth': 5, 'subsample': 0.6778992923414766, 'colsample_bylevel': 0.8845005708945813, 'min_data_in_leaf': 22}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:14:19,101] Trial 932 finished with value: 18744.939677177455 and parameters: {'learning_rate': 0.033610784974693754, 'depth': 5, 'subsample': 0.16009661526466396, 'colsample_bylevel': 0.6934371711403544, 'min_data_in_leaf': 31}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:14:19,472] Trial 933 finished with value: 22549.35983817845 and parameters: {'learning_rate': 0.0053309122465650454, 'depth': 4, 'subsample': 0.11670780196077243, 'colsample_bylevel': 0.999350297677848, 'min_data_in_leaf': 29}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:14:19,994] Trial 930 finished with value: 26859.50255095601 and parameters: {'learning_rate': 0.00264

[I 2023-06-13 00:15:08,442] Trial 955 finished with value: 18309.138087581476 and parameters: {'learning_rate': 0.08455544909117402, 'depth': 4, 'subsample': 0.38692148721292086, 'colsample_bylevel': 0.8594897362616507, 'min_data_in_leaf': 32}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:15:13,238] Trial 958 finished with value: 18790.64719286938 and parameters: {'learning_rate': 0.05530544460518239, 'depth': 4, 'subsample': 0.5827529347012664, 'colsample_bylevel': 0.862376956764079, 'min_data_in_leaf': 32}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:15:16,698] Trial 962 finished with value: 19067.650065696424 and parameters: {'learning_rate': 0.07298322947151392, 'depth': 3, 'subsample': 0.5666200826847079, 'colsample_bylevel': 0.8587940448317445, 'min_data_in_leaf': 24}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:15:18,569] Trial 964 finished with value: 18941.689802487446 and parameters: {'learning_rate': 0.07305501

[I 2023-06-13 00:16:09,685] Trial 983 finished with value: 19289.177800170593 and parameters: {'learning_rate': 0.07003138345080707, 'depth': 4, 'subsample': 0.635714215958765, 'colsample_bylevel': 0.9730707972398447, 'min_data_in_leaf': 22}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:16:10,453] Trial 986 finished with value: 18284.118837183618 and parameters: {'learning_rate': 0.0744330096852555, 'depth': 4, 'subsample': 0.5295805750747955, 'colsample_bylevel': 0.9410063618456009, 'min_data_in_leaf': 30}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:16:12,413] Trial 991 finished with value: 31396.85816483267 and parameters: {'learning_rate': 0.0021090131287671, 'depth': 5, 'subsample': 0.3187371327052099, 'colsample_bylevel': 0.35669472023404947, 'min_data_in_leaf': 25}. Best is trial 283 with value: 17533.85983973332.
[I 2023-06-13 00:16:14,909] Trial 987 finished with value: 22149.518288878076 and parameters: {'learning_rate': 0.0050919980

In [70]:
# EVALUATION
tune_full.best_params
optuna.visualization.plot_optimization_history(tune_full).show(renderer="browser")
optuna.visualization.plot_param_importances(tune_full).show(renderer="browser")

In [71]:
# Train the final full model using the best hyperparameters
final_model_full = cb.CatBoostRegressor(**tune_full.best_params, cat_features=cat_cols1, eval_metric="MAE")
final_model_full.fit(X_train, y_train)


0:	learn: 0.2829837	total: 6.79ms	remaining: 6.78s
1:	learn: 0.2673756	total: 13.6ms	remaining: 6.8s
2:	learn: 0.2535706	total: 17.9ms	remaining: 5.94s
3:	learn: 0.2409416	total: 22.6ms	remaining: 5.62s
4:	learn: 0.2289257	total: 25.8ms	remaining: 5.14s
5:	learn: 0.2190586	total: 29.5ms	remaining: 4.89s
6:	learn: 0.2091667	total: 33ms	remaining: 4.68s
7:	learn: 0.2003992	total: 36.5ms	remaining: 4.52s
8:	learn: 0.1917844	total: 41.1ms	remaining: 4.53s
9:	learn: 0.1847417	total: 45.3ms	remaining: 4.49s
10:	learn: 0.1770447	total: 48.6ms	remaining: 4.37s
11:	learn: 0.1703995	total: 52ms	remaining: 4.28s
12:	learn: 0.1643302	total: 56.1ms	remaining: 4.26s
13:	learn: 0.1581669	total: 59ms	remaining: 4.16s
14:	learn: 0.1523869	total: 62.4ms	remaining: 4.09s
15:	learn: 0.1479441	total: 66.1ms	remaining: 4.07s
16:	learn: 0.1434867	total: 69.4ms	remaining: 4.01s
17:	learn: 0.1398310	total: 73.6ms	remaining: 4.01s
18:	learn: 0.1365518	total: 77.1ms	remaining: 3.98s
19:	learn: 0.1327636	total: 8

166:	learn: 0.0700884	total: 607ms	remaining: 3.03s
167:	learn: 0.0699195	total: 610ms	remaining: 3.02s
168:	learn: 0.0698593	total: 614ms	remaining: 3.02s
169:	learn: 0.0698517	total: 617ms	remaining: 3.01s
170:	learn: 0.0697224	total: 620ms	remaining: 3.01s
171:	learn: 0.0697164	total: 624ms	remaining: 3s
172:	learn: 0.0696766	total: 627ms	remaining: 3s
173:	learn: 0.0696074	total: 631ms	remaining: 2.99s
174:	learn: 0.0695077	total: 634ms	remaining: 2.99s
175:	learn: 0.0694818	total: 639ms	remaining: 2.99s
176:	learn: 0.0693664	total: 642ms	remaining: 2.99s
177:	learn: 0.0693504	total: 646ms	remaining: 2.98s
178:	learn: 0.0692642	total: 649ms	remaining: 2.98s
179:	learn: 0.0692334	total: 653ms	remaining: 2.98s
180:	learn: 0.0691824	total: 657ms	remaining: 2.97s
181:	learn: 0.0691638	total: 660ms	remaining: 2.97s
182:	learn: 0.0691248	total: 663ms	remaining: 2.96s
183:	learn: 0.0691189	total: 667ms	remaining: 2.96s
184:	learn: 0.0689935	total: 671ms	remaining: 2.96s
185:	learn: 0.0689

329:	learn: 0.0618370	total: 1.2s	remaining: 2.44s
330:	learn: 0.0617671	total: 1.21s	remaining: 2.45s
331:	learn: 0.0617524	total: 1.21s	remaining: 2.44s
332:	learn: 0.0617284	total: 1.22s	remaining: 2.44s
333:	learn: 0.0617169	total: 1.22s	remaining: 2.44s
334:	learn: 0.0617028	total: 1.23s	remaining: 2.43s
335:	learn: 0.0615968	total: 1.23s	remaining: 2.43s
336:	learn: 0.0615639	total: 1.23s	remaining: 2.42s
337:	learn: 0.0614936	total: 1.24s	remaining: 2.42s
338:	learn: 0.0614909	total: 1.24s	remaining: 2.42s
339:	learn: 0.0614863	total: 1.24s	remaining: 2.41s
340:	learn: 0.0614657	total: 1.25s	remaining: 2.41s
341:	learn: 0.0614101	total: 1.25s	remaining: 2.41s
342:	learn: 0.0613603	total: 1.25s	remaining: 2.4s
343:	learn: 0.0613270	total: 1.26s	remaining: 2.4s
344:	learn: 0.0612596	total: 1.26s	remaining: 2.39s
345:	learn: 0.0612382	total: 1.26s	remaining: 2.39s
346:	learn: 0.0612303	total: 1.27s	remaining: 2.38s
347:	learn: 0.0611601	total: 1.27s	remaining: 2.38s
348:	learn: 0.0

495:	learn: 0.0560701	total: 1.8s	remaining: 1.83s
496:	learn: 0.0560139	total: 1.8s	remaining: 1.83s
497:	learn: 0.0559845	total: 1.81s	remaining: 1.82s
498:	learn: 0.0559620	total: 1.81s	remaining: 1.82s
499:	learn: 0.0558947	total: 1.82s	remaining: 1.82s
500:	learn: 0.0558887	total: 1.82s	remaining: 1.81s
501:	learn: 0.0558270	total: 1.82s	remaining: 1.81s
502:	learn: 0.0557940	total: 1.83s	remaining: 1.81s
503:	learn: 0.0557907	total: 1.83s	remaining: 1.8s
504:	learn: 0.0557818	total: 1.83s	remaining: 1.8s
505:	learn: 0.0557351	total: 1.84s	remaining: 1.79s
506:	learn: 0.0556988	total: 1.84s	remaining: 1.79s
507:	learn: 0.0556830	total: 1.84s	remaining: 1.79s
508:	learn: 0.0556753	total: 1.85s	remaining: 1.78s
509:	learn: 0.0556727	total: 1.85s	remaining: 1.78s
510:	learn: 0.0556706	total: 1.85s	remaining: 1.77s
511:	learn: 0.0556522	total: 1.86s	remaining: 1.77s
512:	learn: 0.0556459	total: 1.86s	remaining: 1.77s
513:	learn: 0.0556019	total: 1.87s	remaining: 1.76s
514:	learn: 0.05

661:	learn: 0.0522948	total: 2.4s	remaining: 1.23s
662:	learn: 0.0522396	total: 2.4s	remaining: 1.22s
663:	learn: 0.0522269	total: 2.41s	remaining: 1.22s
664:	learn: 0.0522147	total: 2.41s	remaining: 1.22s
665:	learn: 0.0521950	total: 2.42s	remaining: 1.21s
666:	learn: 0.0521513	total: 2.42s	remaining: 1.21s
667:	learn: 0.0521334	total: 2.42s	remaining: 1.2s
668:	learn: 0.0520926	total: 2.43s	remaining: 1.2s
669:	learn: 0.0520869	total: 2.43s	remaining: 1.2s
670:	learn: 0.0520673	total: 2.44s	remaining: 1.19s
671:	learn: 0.0520442	total: 2.44s	remaining: 1.19s
672:	learn: 0.0520338	total: 2.44s	remaining: 1.19s
673:	learn: 0.0520267	total: 2.45s	remaining: 1.18s
674:	learn: 0.0520176	total: 2.45s	remaining: 1.18s
675:	learn: 0.0519739	total: 2.45s	remaining: 1.18s
676:	learn: 0.0519565	total: 2.46s	remaining: 1.17s
677:	learn: 0.0519341	total: 2.46s	remaining: 1.17s
678:	learn: 0.0519140	total: 2.46s	remaining: 1.17s
679:	learn: 0.0518929	total: 2.47s	remaining: 1.16s
680:	learn: 0.051

823:	learn: 0.0489046	total: 3s	remaining: 641ms
824:	learn: 0.0488834	total: 3s	remaining: 637ms
825:	learn: 0.0488693	total: 3.01s	remaining: 634ms
826:	learn: 0.0488498	total: 3.01s	remaining: 630ms
827:	learn: 0.0488287	total: 3.02s	remaining: 626ms
828:	learn: 0.0487462	total: 3.02s	remaining: 623ms
829:	learn: 0.0487139	total: 3.02s	remaining: 619ms
830:	learn: 0.0486747	total: 3.03s	remaining: 616ms
831:	learn: 0.0486551	total: 3.03s	remaining: 612ms
832:	learn: 0.0486254	total: 3.03s	remaining: 608ms
833:	learn: 0.0486189	total: 3.04s	remaining: 604ms
834:	learn: 0.0486140	total: 3.04s	remaining: 601ms
835:	learn: 0.0485927	total: 3.04s	remaining: 597ms
836:	learn: 0.0485759	total: 3.05s	remaining: 594ms
837:	learn: 0.0485716	total: 3.05s	remaining: 590ms
838:	learn: 0.0485688	total: 3.05s	remaining: 586ms
839:	learn: 0.0485694	total: 3.06s	remaining: 583ms
840:	learn: 0.0485569	total: 3.07s	remaining: 580ms
841:	learn: 0.0485431	total: 3.07s	remaining: 576ms
842:	learn: 0.0485

<catboost.core.CatBoostRegressor at 0x17c067050>

In [81]:
print(final_model_full.best_score_)

evaluate(final_model_full, X_train, X_test, y_train, y_test, logy=True)

{'learn': {'MAE': 0.04576584628756357, 'RMSE': 0.060027348774606894}}
RMSE Train:  11366.626576481764
MAE Train:  8213.442964227143
MAPE Train:  4.776713459334203
RMSE Val:  17323.32413568199
MAE Val:  12168.876720968517
MAPE Val:  8.493950050035801


{'Train RMSE': [11366.626576481764],
 'Test RMSE': [17323.32413568199],
 'Train MAPE': [4.776713459334203],
 'Test MAPE': [8.493950050035801],
 'Train MAE': [8213.442964227143],
 'Test MAE': [12168.876720968517],
 'Train R2': [0.9734811594143148],
 'Test R2': [0.8821549101939576]}

In [None]:
cross_val_score(estimator=final_model_full, )

In [83]:
preds = final_model_full.predict(features)

features['sales_price_predict'] = np.exp(preds)



features.to_csv('../data/L2/CatBoost_Sales_Price_Predictions_Full.csv', index=False)

In [84]:
features.head()

Unnamed: 0,gr_liv_area,ms_sub_class,ms_zoning,lot_frontage,lot_area,alley,lot_shape,land_contour,lot_config,land_slope,neighborhood,bldg_type,house_style,overall_qual,overall_cond,roof_style,exterior1st,mas_vnr_type,mas_vnr_area,exter_qual,exter_cond,foundation,bsmt_qual,bsmt_cond,bsmt_exposure,bsmt_fin_type1,bsmt_fin_sf1,bsmt_fin_type2,bsmt_fin_sf2,bsmt_unf_sf,total_bsmt_sf,heating_qc,central_air,electrical,1st_flr_sf,2nd_flr_sf,low_qual_fin_sf,bsmt_full_bath,bsmt_half_bath,full_bath,half_bath,bedroom_abv_gr,kitchen_abv_gr,kitchen_qual,tot_rms_abv_grd,functional,fireplaces,fireplace_qu,garage_type,garage_finish,garage_cars,garage_area,garage_qual,garage_cond,paved_drive,wood_deck_sf,open_porch_sf,enclosed_porch,3_ssn_porch,screen_porch,pool_area,fence,misc_feature,misc_val,mo_sold,yr_sold,sale_type,sale_condition,near_rr,near_main_rd,near_pos,house_age_at_sale,construction_age_at_sale,sales_price_predict
0,856,30,RL,80.0,7890,,Reg,Lvl,Corner,Gtl,SWISU,1Fam,1Story,6,6,Gable,Wd Sdng,,0.0,TA,TA,CBlock,TA,TA,No,Rec,238.0,Unf,0.0,618.0,856.0,TA,1,SBrkr,856,0,0,1.0,0.0,1,0,2,1,TA,4,Typ,1,Gd,Detchd,Unf,2.0,399.0,TA,TA,Y,0,0,0,0,166,0,,,0,3,2010,WD,Normal,0,0,0,71,60,123474.615904
1,1049,120,RL,42.0,4235,,Reg,Lvl,Inside,Gtl,Edwards,TwnhsE,1Story,5,5,Gable,HdBoard,BrkFace,149.0,Gd,TA,CBlock,Gd,TA,Mn,GLQ,552.0,ALQ,393.0,104.0,1049.0,TA,1,SBrkr,1049,0,0,1.0,0.0,2,0,2,1,Gd,5,Typ,0,,Attchd,Fin,1.0,266.0,TA,TA,Y,0,105,0,0,0,0,,,0,2,2009,WD,Normal,0,0,0,25,25,133024.54837
2,1001,30,C (all),60.0,6060,,Reg,Lvl,Inside,Gtl,IDOTRR,1Fam,1Story,5,9,Hip,MetalSd,,0.0,Gd,TA,BrkTil,TA,TA,No,ALQ,737.0,Unf,0.0,100.0,837.0,Ex,1,SBrkr,1001,0,0,0.0,0.0,1,0,2,1,Gd,5,Typ,0,,Detchd,Unf,1.0,216.0,TA,Po,N,154,0,42,86,0,0,,,0,11,2007,WD,Normal,0,0,0,77,0,111847.007854
3,1039,70,RL,80.0,8146,,Reg,Lvl,Corner,Gtl,OldTown,1Fam,2Story,4,8,Gable,MetalSd,,0.0,Gd,Gd,BrkTil,Fa,TA,No,Unf,0.0,Unf,0.0,405.0,405.0,Gd,1,SBrkr,717,322,0,0.0,0.0,1,0,2,1,TA,6,Typ,0,,Detchd,Unf,1.0,281.0,TA,TA,N,0,0,168,0,111,0,,,0,5,2009,WD,Normal,0,0,0,109,6,109593.125039
4,1665,60,RL,70.0,8400,,Reg,Lvl,Inside,Gtl,NWAmes,1Fam,2Story,8,6,Gable,VinylSd,,0.0,Gd,TA,PConc,Gd,TA,No,GLQ,643.0,Unf,0.0,167.0,810.0,Ex,1,SBrkr,810,855,0,1.0,0.0,2,1,3,1,Gd,6,Typ,0,,Attchd,Fin,2.0,528.0,TA,TA,Y,0,45,0,0,0,0,,,0,11,2009,WD,Normal,0,0,0,8,8,213097.937215


## Catboost Pipeline with Nested Cross Validation and Optuna Hyper-parameter tuning



In [None]:
# COMPUTATIONALLY TOO EXPENSIVE

# import optuna
# from catboost import CatBoostRegressor
# from sklearn.model_selection import cross_val_score, KFold


# def objective_catboost(trial):
    
#     # Catboost parameters
#     param = {}       
#     param['learning_rate'] = trial.suggest_uniform("learning_rate", 0.001, 0.02, 0.1)
#     param['iterations'] = trial.suggest_int( "iterations", 100,1000,20)
#     param['depth'] = trial.suggest_int('depth', 9, 15)
#     param['l2_leaf_reg'] = trial.suggest_discrete_uniform('l2_leaf_reg', 1.0, 5.5, 0.5)
#     param['min_child_samples'] = trial.suggest_categorical('min_child_samples', [1, 4, 8, 16, 32])
#     param['grow_policy'] = 'Depthwise'
#     param['use_best_model'] = True
#     param['eval_metric'] = 'MAE'
#     param['od_type'] = 'iter'
#     param['od_wait'] = 20
#     param['random_state'] = RANDOM_SEED
#     param['logging_level'] = 'Silent'
    
#     # Perform nested cross-validation
#     kf_outer = KFold(n_splits=5, shuffle=True, random_state=RANDOM_SEED)
#     outer_scores = []
    
#     for train_index, val_index in kf_outer.split(X):
#         X_train, X_val = X[train_index], X[val_index]
#         y_train, y_val = y[train_index], y[val_index]
        
#          # Define the model with the current set of hyperparameters
#         model = CatBoostRegressor(**params)
        
#         # Perform hyperparameter tuning using inner cross-validation
#         kf_inner = KFold(n_splits=3, shuffle=True, random_state=42)
#         inner_scores = []
        
#         def inner_objective(trial_inner):
#             inner_params = {}
#             inner_params['learning_rate'] = trial.suggest_uniform("learning_rate", 0.001, 0.02, 0.1)
#             inner_params['iterations'] = trial.suggest_int( "iterations", 100,1000,20)
#             inner_params['depth'] = trial.suggest_int('depth', 9, 15)
#             inner_params['l2_leaf_reg'] = trial.suggest_discrete_uniform('l2_leaf_reg', 1.0, 5.5, 0.5)
#             inner_params['min_child_samples'] = trial.suggest_categorical('min_child_samples', [1, 4, 8, 16, 32])
#             inner_params['grow_policy'] = 'Depthwise'
#             inner_params['use_best_model'] = True
#             inner_params['eval_metric'] = 'MAE'
#             inner_params['od_type'] = 'iter'
#             inner_params['od_wait'] = 20
#             inner_params['random_state'] = RANDOM_SEED
#             inner_params['logging_level'] = 'Silent'
            
#             inner_model = CatBoostRegressor(**inner_params)
#             scores = cross_val_score(inner_model, X_train, y_train, cv=kf_inner)
#             return np.mean(scores)
        
#     # Optimize hyperparameters with Optuna
#     study_inner = optuna.create_study(direction='minimize')
#     study_inner.optimize(inner_objective, n_trials=100)
#     best_params = study_inner.best_params
        
#     # Train the model with the best hyperparameters
#     model = CatBoostRegressor(**best_params)
#     model.fit(X_train, y_train)
        
#     # Evaluate the model on the test set
#     score = model.score(X_val, y_val)
#     outer_scores.append(score)
    
#     # Calculate the mean score of outer cross-validation splits
#     mean_outer_score = np.mean(outer_scores)
#     return mean_outer_score

# Create the study object and optimize the objective function
#study = optuna.create_study(direction='minimize')
#study.optimize(objective, n_trials=500)
