# Эксперименты с финальной версией
В final_clean_version была реализована ВРОДЕ КАК правильная структура с разбиением на KFold и обучением (валидацией) на ряде фолдов и с соответствующей оценкой. Но получили ухудшение оценок сильное и тут, и в kaggle. Тут попробуем повторить все тоже самое, но без логарифмов и без доп. фич, чтобы попробовать на Kfold-ах получить приемлемые потери и скор = 12800 в kaggle (самый лучший из improved_v6)

In [1]:
from IPython.lib.deepreload import reload
%load_ext autoreload
%autoreload 2

import joblib
import numpy as np
import matplotlib.pyplot as plt
import mlflow
import mlflow.sklearn
import pandas as pd
import seaborn as sns
import warnings

from catboost import CatBoostRegressor
from sklearn import set_config
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Вместо одного фиксированного разбиения на train/test используем стабильную стратегию кросс-валидации.
# Используем тут Cross-validation, потому что:
# 	•	нужно надёжно сравнить несколько разных моделей или гиперпараметров и понять, какая модель стабильнее и лучше в целом.
# 	•	хотим избежать случайных удач или провалов, связанных с конкретным разбиением на train/test.
# 	•	выбираем модель или гиперпараметры, которые потом будешь использовать для финального сабмишна на Kaggle.
# Делаем эту оценку, чтобы в дальнейших блокнотах-улучшениях сравнивать более корректно.
from sklearn.model_selection import KFold, RepeatedKFold, cross_val_score, train_test_split

# Используем IterativeImputer:
# 	•	Он итеративно заполняет все пропуски сразу.
# 	•	Работает одновременно со всеми признаками, учитывая связи между ними.
# 	•	Не требует ручного управления порядком заполнения.
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

from utils.data_manager import DataManager
from utils.model_manager import ModelManager

In [2]:
# --- Глобально включаем вывод Pandas для всех трансформеров ---
# (Можно применять и к отдельным трансформерам/пайплайнам .set_output(transform="pandas"))
set_config(transform_output = "pandas")

In [3]:
dm = DataManager()
mm = ModelManager()

# Отключаем автологгирование, чтобы использовать ручное
mlflow.sklearn.autolog(disable=True)
warnings.filterwarnings("ignore", module="mlflow")  # Игнорируем предупреждения MLflow


In [4]:
RANDOM_STATE = 42

## 1. Загрузка данных

In [5]:
data_path = 'data/home-data-for-ml-course'
train_data = pd.read_csv(data_path + '/train.csv')
test_data = pd.read_csv(data_path + '/test.csv')

## 2. Предобработка данных 

In [6]:
# Определение колонок для удаления
intuitively_bad_features = [
    'LotShape',  # Общая форма участка
    'LandContour',  # Рельеф участка
    'LotConfig',  # Конфигурация участка
    'LandSlope',  # Уклон участка
    'MiscFeature',
    'MiscVal',
]
bad_columns = dm.get_all_nan_cols(train_data)
bad_columns.append('Id')
bad_columns.extend(intuitively_bad_features)

In [7]:
# Разделение на X / y
X, y = dm.split_data_set_to_x_y(train_data, 'SalePrice')
print(X.shape, y.shape)
X_test = test_data.copy()
print(X_test.shape)

(1460, 80) (1460,)
(1459, 80)


In [8]:
X.drop(columns=bad_columns, inplace=True)
X_test.drop(columns=bad_columns, inplace=True)

In [9]:
def make_feature_eng_great_again(train_X_in, test_X_in):
    """Хелпер, который создает фичи, логарифмирует и выравнивает колонки."""
    # Работаем с копиями, чтобы не изменять оригинальные X, X_test вне функции
    train_X = train_X_in.copy()
    test_X = test_X_in.copy()

    # Словарь качественных признаков
    quality_dict = {'Ex': 5, 'Gd':4, 'TA':3, 'Fa':2, 'Po':1, np.nan:0}

    def create_features(df):
        # Interactions (с проверкой на наличие колонок)
        if 'Neighborhood' in df.columns and 'MSZoning' in df.columns:
            df['Neighborhood_Zoning'] = df['Neighborhood'].astype(str) + '_' + df['MSZoning'].astype(str)
        if 'SaleType' in df.columns and 'SaleCondition' in df.columns:
            df['SaleType_Condition'] = df['SaleType'].astype(str) + '_' + df['SaleCondition'].astype(str)

        # Quality Score
        df['TotalQualScore'] = 0
        quality_cols = ['ExterQual', 'KitchenQual', 'BsmtQual', 'HeatingQC', 'GarageQual', 'FireplaceQu']
        for col in quality_cols:
            if col in df.columns:
                 df['TotalQualScore'] += df[col].map(quality_dict).fillna(0)

        # Porch/Deck Area and Flags
        df['PorchDeckArea'] = 0
        porch_cols = ['WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch']
        for col in porch_cols:
             if col in df.columns:
                df['PorchDeckArea'] += df[col].fillna(0)

        if 'Fireplaces' in df.columns:
            df['HasFireplace'] = (df['Fireplaces'] > 0).astype(int)
        if 'GarageType' in df.columns:
            df['HasGarage'] = (~df['GarageType'].isna()).astype(int)
        if 'Fence' in df.columns:
            df['HasFence'] = (~df['Fence'].isna()).astype(int)
        df['HasPorchDeck'] = (df['PorchDeckArea'] > 0).astype(int)

        return df

    def log_features(df, cols_to_log_list):  # Принимает СПИСОК колонок
        print(f"Applying log1p to: {cols_to_log_list}")
        for col_name in cols_to_log_list:
            if col_name in df.columns:
                # Добавим проверку на отрицательные значения перед логарифмированием
                if (df[col_name] < 0).any():
                     print(f"Warning: Column {col_name} contains negative values. Skipping log1p.")
                else:
                    df[col_name] = np.log1p(df[col_name])
            else:
                print(f"Warning: Column {col_name} not found in DF during log transform.")
        return df

    # 1. Создаем фичи
    train_X = create_features(train_X)
    test_X = create_features(test_X)
    print("Features created.")

    # 2. Определяем колонки для логарифмирования (ТОЛЬКО по трейну)
    numeric_cols = train_X.select_dtypes(include=np.number).columns
    skew_values = train_X[numeric_cols].skew()
    # Используем .index.tolist() чтобы получить список имен
    cols_to_log_list = skew_values[skew_values > 1].index.tolist()
    print(f"Columns identified for logging: {cols_to_log_list}")

    # 3. Логарифмируем (используя ОДИН и тот же список)
    train_X = log_features(train_X, cols_to_log_list)
    test_X = log_features(test_X, cols_to_log_list)
    print("Log transform applied.")

    # 4. Согласуем и сортируем колонки ПОСЛЕ всех манипуляций
    final_feature_cols = sorted(train_X.columns.tolist()) # Сортируем для стабильности
    train_X = train_X[final_feature_cols]
    test_X = test_X.reindex(columns=final_feature_cols, fill_value=0)
    print("Columns aligned and sorted.")

    return train_X, test_X

In [10]:
# Вызываем функцию с правильными данными (X, X_test)
X, X_test = make_feature_eng_great_again(X, X_test)

print("\nProcessing complete. Final shapes:")
print(f"X_processed: {X.shape}")
print(f"X_test_processed: {X_test.shape}")
print("\nExample processed X:")
print(X.head())

Features created.
Columns identified for logging: ['MSSubClass', 'LotFrontage', 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'TotalBsmtSF', '1stFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtHalfBath', 'KitchenAbvGr', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'PorchDeckArea', 'HasFence']
Applying log1p to: ['MSSubClass', 'LotFrontage', 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'TotalBsmtSF', '1stFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtHalfBath', 'KitchenAbvGr', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'PorchDeckArea', 'HasFence']
Applying log1p to: ['MSSubClass', 'LotFrontage', 'LotArea', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'TotalBsmtSF', '1stFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtHalfBath', 'KitchenAbvGr', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'PorchDeckArea', 'HasFence']
Log transform applied.
Columns aligned and sorted.

Pr

In [11]:
# Получение числовых колонок
numeric_columns = X.select_dtypes(include=['float64', 'int64']).columns
# Получение нечисловых колонок (всех остальных)
non_numeric_columns = X.select_dtypes(exclude=['float64', 'int64']).columns

### Аналитика проблемных образцов, дающих очень плохие ошибки в фолдах

In [12]:
# # Проблемные данные с жуткими ошибками из 3 Фолда
# # 523           60       RL        130.0    40094   Pave   NaN    AllPub   
# # 1298          60       RL        313.0    63887   Pave   NaN    AllPub   
# # 1169          60       RL        118.0    35760   Pave   NaN    AllPub   
# problematic_indices = [523, 1298, 1169, 1324, 440] # Индексы из твоего отчета об ошибках
# 
# # Посмотреть всю информацию по этим строкам (домам)
# print("Original data for problematic houses:")
# print(train_data.loc[problematic_indices])
# 
# # Посмотреть только SalePrice для проверки
# print("\nSalePrice for problematic houses:")
# print(train_data.loc[problematic_indices, 'SalePrice'])


In [13]:
# # Предполагаем, что X - это твои обработанные фичи (DataFrame)
# problematic_indices = [523, 1298, 1169, 1324, 440] # Как определили ранее
# problematic_indices = [523, 1298]  # конкретно эти дома модель сильно переоценила
# 
# # Разделяем индексы
# all_indices = X.index # Все индексы в твоем трейн-сете X
# rest_indices = all_indices.difference(problematic_indices)
# 
# # Сравниваем числовые признаки
# print("--- Описательные статистики для ПРОБЛЕМНЫХ домов ---")
# print(X.loc[problematic_indices, numeric_columns].describe()) # numeric_columns - список числовых колонок
# 
# print("\n--- Описательные статистики для ОСТАЛЬНЫХ домов ---")
# print(X.loc[rest_indices, numeric_columns].describe())
# 
# # Сравниваем категориальные признаки (пример для одной колонки)
# cat_col_to_compare = 'Neighborhood'
# print(f"\n--- Распределение '{cat_col_to_compare}' для ПРОБЛЕМНЫХ домов ---")
# print(X.loc[problematic_indices, cat_col_to_compare].value_counts(normalize=True))
# 
# print(f"\n--- Распределение '{cat_col_to_compare}' для ОСТАЛЬНЫХ домов ---")
# print(X.loc[rest_indices, cat_col_to_compare].value_counts(normalize=True))

In [14]:
# # "--- Описательные статистики для ПРОБЛЕМНЫХ домов ---"
# """
# Top 5 errors for this fold:
#       Actual      Predicted          Error
# 523   184750  515922.462045  331172.462045
# 1298  160000  428474.481532  268474.481532
# 1169  625000  467748.048480  157251.951520
# 1324  147000  281692.643270  134692.643270
# 440   555000  421663.071273  133336.928727
# """
# X.loc[problematic_indices, numeric_columns]

In [15]:
# # Попробуем найти схожие по стоимости дома и сравнить показатели
# target_price = 184_000
# tolerance = 10_000  # Максимально допустимая разница
# 
# rest_df = train_data.loc[rest_indices]
# same_houses = rest_df[abs(rest_df['SalePrice'] - target_price) <= tolerance]
# same_houses

In [16]:
# # -- Описательные статистики для ОСТАЛЬНЫХ домов ---
# X.loc[rest_indices, numeric_columns].describe()

In [17]:
# # Пример: Scatter plot GrLivArea vs SalePrice
# plt.figure(figsize=(10, 6))
# # Сначала рисуем все точки
# sns.scatterplot(x=X.loc[rest_indices, 'GrLivArea'], y=y.loc[rest_indices], label='Остальные', alpha=0.5)
# # Затем выделяем проблемные
# sns.scatterplot(x=X.loc[problematic_indices, 'GrLivArea'], y=y.loc[problematic_indices], color='red', s=100, label='Проблемные (Fold 3)', marker='X')
# plt.title('GrLivArea vs SalePrice')
# plt.xlabel('GrLivArea')
# plt.ylabel('SalePrice')
# plt.legend()
# plt.grid(True)
# plt.show()
# 
# # Пример: Box plot для OverallQual
# plt.figure(figsize=(8, 5))
# # Создаем временный DataFrame для удобства
# plot_df = X[['OverallQual']].copy()
# plot_df['Group'] = np.where(plot_df.index.isin(problematic_indices), 'Проблемные', 'Остальные')
# sns.boxplot(x='Group', y='OverallQual', data=plot_df)
# plt.title('Сравнение OverallQual')
# plt.show()

## 3. Обучаем модель с CV и корректируя данные

In [18]:
# --- Логарифмирование целевой переменной ---
y_log = np.log1p(y)

#### Нормализация данных через ColumnTransformer и Pipeline
В данном кейсе мы реализуем Заполнение числовых пропусков с помощью модели (Predictive imputation). Т.е. то, что пропущено в числовых признаках - будем заполнять не медианой или средним, а будем обучать модель, которая будет предсказывать пропуски (IterativeImputer + RandomForestRegressor)

In [19]:
# Создаем preprocessor с разными трансформерами для разных типов данных
# Числовые данные пропущенные предсказываем с помощью модели RandomForestRegressor

# Пайплайн для числовых признаков (итеративное заполнение)
numeric_transformer = Pipeline(steps=[
    ('imputer', IterativeImputer(
        estimator=RandomForestRegressor(n_estimators=50, random_state=RANDOM_STATE),
        max_iter=10,
        random_state=RANDOM_STATE
    )),  # Дает примерно +100 прирост качества vs mean/median
    # ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())  # разницы не дает 
])

# Пайплайн для категориальных признаков (заполнение частым значением и кодирование)
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

# --- Объединяем препроцессоры ---
preprocessor = ColumnTransformer(
    transformers=[
        # Применяем к исходным числовым колонкам
        ('num', numeric_transformer, numeric_columns),
        # Применяем к исходным категориальным колонкам
        ('cat', categorical_transformer, non_numeric_columns)
    ],
    remainder='drop',   # 'passthrough' сохранит полиномиальные и другие колонки, которые не были ни числовыми, ни категориальными ИЗНАЧАЛЬНО
    verbose_feature_names_out=False  # Чтобы имена колонок не менялись на 'num__colname' и т.д.
)

default_params = {
    'iterations': 1000, 
    'learning_rate': 0.05, 
    'depth': 6, 
    'loss_function': 'RMSE', 
    'verbose': 0, 
    'random_seed': RANDOM_STATE,
    'early_stopping_rounds': 500  # ?
}
catboost_params = {
    'iterations': 3000,  # Увеличиваем запас итераций, учитывая early_stopping
    'learning_rate': 0.03,
    'depth': 6,
    'loss_function': 'RMSE', # Оптимизируем RMSE на логарифмированной шкале
    'verbose': 0,
    'random_seed': RANDOM_STATE,
    'early_stopping_rounds': 100 # Включаем early stopping
}
optuna_best_params = {
    'iterations': 2279,
    'depth': 5,
    'learning_rate': 0.01489174151229139,
    'l2_leaf_reg': 2.3495954457881623,
    'bagging_temperature': 0.9217322652060611,
    'loss_function': 'RMSE',
    'random_seed': 42,
    'verbose': 0,
    'early_stopping_rounds': 500
}
optuna_best_params_v2 = {
    'iterations': 1775,
    'depth': 5,
    'learning_rate': 0.06039828474626542,
    'l2_leaf_reg': 2.132889759843395,
    'bagging_temperature': 0.9470181666458671,
    'subsample': 0.833424506156886,
    'colsample_bylevel': 0.9622754820457864,
    'loss_function': 'RMSE',
    'random_seed': 42,
    'verbose': 0,
    'early_stopping_rounds': 500
}
catboost_params = optuna_best_params_v2

# Включить Early Stopping очень рекомендуется в параметрах. Это позволит модели на каждом фолде останавливаться тогда, когда метрика на валидационной части этого фолда (X_val, y_val_log) перестает улучшаться. Это самый надежный способ подобрать оптимальное число итераций для каждого фолда и избежать переобучения

# --- Финальный пайплайн ---
# Мы будем использовать preprocessor и модель отдельно в цикле CV
# для корректной работы early stopping с пайплайном sklearn. Поэтому нам финальный пайплайн - не нужен.
# final_pipeline = Pipeline([
#     ('preprocessing', preprocessor),
#     ('model', CatBoostRegressor(**catboost_params))
# ])

In [20]:
"""
Пояснения к тому, что тут происходит:

Тут мы ОЦЕНИВАЕМ и ТЕСТИРУЕМ подобранные фичи и гиперпараметры. В цикле бежим по N_FOLD-ам, бьем данные тренировочные в соотношении 80% на обучение, 20% на валидацию. Обучаем модель на трейне, валидируем на валидации. Смотрим на ошибки. Если по итогу нас все устраивает, мы дальше идем и обучаем модель с нуля на всех данных со всеми гиперпараметрами и фичами (да еще можно вычленить из цикла итерацию самую лучшую для нужного количества итераций обучения финальной модели),

oof_predictions — это массив, который в итоге будет содержать предсказания для каждого объекта из исходного тренировочного набора (X). Важно, что предсказание для конкретного объекта (например, дома №100) делается той моделью (из цикла CV), которая обучалась без этого объекта. Надежная оценка качества: OOF-предикты позволяют посчитать метрику качества (например, oof_rmse) на всем тренировочном наборе, при этом каждое предсказание было сделано "честно" (модель не видела этот объект при обучении). 
Эта оценка часто бывает более надежной, чем простое усреднение метрик по фолдам (mean_cv_rmse), так как она считается на полном наборе данных один раз. 
Можно сравнить oof_predictions с реальными значениями y_log (или y), чтобы понять, на каких объектах модель ошибается сильнее всего.
Стэкинг/Блендинг: OOF-предикты часто используются как новые признаки для обучения модели второго уровня (мета-модели) в ансамблях (стэкинг).
"""

# --- Кросс-Валидация ---
N_FOLDS = 5  # Например, 5 или 10
kf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE)

oof_predictions_log = np.zeros(X.shape[0])  # Для хранения out-of-fold предсказаний
fold_rmses = []
fold_best_iterations = []  # Будем сохранять лучшие итерации

mlflow.set_experiment("Final Version With KFold CV WITH Log + With IterativeImputer + WITH Features")
with mlflow.start_run() as run:  # Сохраняем run для логирования артефактов
    for fold, (train_idx, val_idx) in enumerate(kf.split(X, y)): 
        print(f"--- Fold {fold+1}/{N_FOLDS} ---")
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train_log, y_val_log = y_log.iloc[train_idx], y_log.iloc[val_idx]

        # 1. Обучаем препроцессор ТОЛЬКО на трейне текущего фолда
        preprocessor.fit(X_train)

        # 2. Трансформируем трейн и валидацию
        X_train_prep = preprocessor.transform(X_train)
        X_val_prep = preprocessor.transform(X_val)

        # 3. Обучаем модель с early stopping
        model = CatBoostRegressor(**catboost_params)
        model.fit(X_train_prep, y_train_log,
                  eval_set=[(X_val_prep, y_val_log)],
                  verbose=0,  # Отключаем вывод, 100 - если хотим видеть обучение 
                 )

        # Сохраняем лучшую итерацию
        best_iter = model.get_best_iteration()
        fold_best_iterations.append(best_iter)
        print(f"Best iteration for fold {fold+1}: {best_iter}")

        # 4. Предсказания на валидации
        val_preds_log = model.predict(X_val_prep)
        oof_predictions_log[val_idx] = val_preds_log

        # 5. Оценка на фолде (в исходной шкале)
        val_preds_orig = np.expm1(val_preds_log)
        y_val_orig = np.expm1(y_val_log)
        fold_rmse = np.sqrt(mean_squared_error(y_val_orig, val_preds_orig))
        print(f"Fold {fold+1} RMSE: {fold_rmse}")
        fold_rmses.append(fold_rmse)
        mlflow.log_metric(f"fold_{fold+1}_rmse", fold_rmse, step=fold+1)
        mlflow.log_metric(f"fold_{fold+1}_best_iter", best_iter, step=fold+1)
    
    # --- Итоговая оценка CV ---
    mean_cv_rmse = np.mean(fold_rmses)
    std_cv_rmse = np.std(fold_rmses)
    oof_rmse = np.sqrt(mean_squared_error(np.expm1(y_log), np.expm1(oof_predictions_log)))

    print(f"\nMean CV RMSE: {mean_cv_rmse:.4f} +/- {std_cv_rmse:.4f}")
    print(f"OOF RMSE: {oof_rmse:.4f}")
    print(f"Mean best iteration: {np.mean(fold_best_iterations):.0f}")

    # Логгирование итоговых метрик вручную
    mlflow.log_metric("mean_cv_rmse", mean_cv_rmse)
    mlflow.log_metric("std_cv_rmse", std_cv_rmse)
    mlflow.log_metric("oof_rmse", oof_rmse)
    mlflow.log_metric("mean_best_iteration", np.mean(fold_best_iterations))

--- Fold 1/5 ---
Best iteration for fold 1: 450
Fold 1 RMSE: 24679.24964434139
--- Fold 2/5 ---
Best iteration for fold 2: 683
Fold 2 RMSE: 23542.51539513202
--- Fold 3/5 ---
Best iteration for fold 3: 78
Fold 3 RMSE: 34876.44034313925
--- Fold 4/5 ---
Best iteration for fold 4: 1654
Fold 4 RMSE: 24917.10796261652
--- Fold 5/5 ---
Best iteration for fold 5: 623
Fold 5 RMSE: 17868.597663028373

Mean CV RMSE: 25176.7822 +/- 5485.9589
OOF RMSE: 25767.5398
Mean best iteration: 698


In [21]:
# К чему стремимся
# --- Fold 1/5 ---
# Best iteration for fold 1: 450
# Fold 1 RMSE: 24679.24964434139
# --- Fold 2/5 ---
# Best iteration for fold 2: 683
# Fold 2 RMSE: 23542.51539513202
# --- Fold 3/5 ---
# Best iteration for fold 3: 78
# Fold 3 RMSE: 34876.44034313925
# --- Fold 4/5 ---
# Best iteration for fold 4: 1654
# Fold 4 RMSE: 24917.10796261652
# --- Fold 5/5 ---
# Best iteration for fold 5: 623
# Fold 5 RMSE: 17868.597663028373
# 
# Mean CV RMSE: 25176.7822 +/- 5485.9589
# OOF RMSE: 25767.5398
# Mean best iteration: 698


In [26]:
# На прошлом шаге мы сделали KFOLD обучение, если довольны метриками, то получаем лучшую итерацию и обучаем финальную модель на ВСЕХ данных.

final_iterations = catboost_params['iterations']  # optuna нашла уже лучшие параметры
print(f"\nTraining final model on all data with {final_iterations} iterations...")

final_catboost_params = catboost_params.copy()
final_catboost_params['iterations'] = final_iterations
final_catboost_params.pop('early_stopping_rounds', None)  # Убираем early stopping для финального обучения
print(f"Финальные параметры обычения модели: {final_catboost_params}")

final_model_pipeline = Pipeline([
    ('preprocessing', preprocessor),
    ('model', CatBoostRegressor(**final_catboost_params))
])

final_model_pipeline.fit(X, y_log)
print("Final model trained.")


Training final model on all data with 1775 iterations...
Финальные параметры обычения модели: {'iterations': 1775, 'depth': 5, 'learning_rate': 0.06039828474626542, 'l2_leaf_reg': 2.132889759843395, 'bagging_temperature': 0.9470181666458671, 'subsample': 0.833424506156886, 'colsample_bylevel': 0.9622754820457864, 'loss_function': 'RMSE', 'random_seed': 42, 'verbose': 0}
Final model trained.


In [27]:
# Предсказание на тест данных для сабмишна
final_test_pred_log_single_model = final_model_pipeline.predict(X_test)
final_test_pred_single_model = np.expm1(final_test_pred_log_single_model)
submission_single = pd.DataFrame({'Id': test_data['Id'], 'SalePrice': final_test_pred_single_model})
submission_single.to_csv('submission_final_kfold_with_log_v21.csv', index=False)
submission_single.head()

Unnamed: 0,Id,SalePrice
0,1461,128897.222323
1,1462,162091.781886
2,1463,190982.43875
3,1464,199984.837778
4,1465,184856.317464


In [28]:
# ===== Попробуем сделать Deep Ensemble =====
# Попытаемся сделать ансамбль моделей одной и той же архитектуры, но с разными Random State-ами, что по теории должно дать прирост точности. Можно это называть еще "Seed Ensembling".

N_SEEDS = 10  # Количество моделей в ансамбле
all_test_preds_log = []

# Шаги 3 и 4: Обучаем препроцессор и трансформируем данные
print("Fitting preprocessor on all X...")
preprocessor.fit(X)
print("Transforming X...")
X_prep_full = preprocessor.transform(X)
print("Transforming X_test...")
X_test_prep = preprocessor.transform(X_test)
print("Preprocessing complete.")

# Шаги 1 и 2 уже сделаны - best_params и final_iterations известны
final_catboost_params_ensemble = catboost_params.copy()
final_catboost_params_ensemble['iterations'] = final_iterations
final_catboost_params_ensemble.pop('early_stopping_rounds', None)
final_catboost_params_ensemble.pop('random_seed', None)
print(f"\nBest params for ensemble: {final_catboost_params_ensemble}")

# Шаг 5: Обучаем N моделей
print(f"\nTraining {N_SEEDS} models for ensemble...")
for i in range(N_SEEDS):
    current_seed = RANDOM_STATE + i
    print(f"Training model {i+1}/{N_SEEDS} with seed {current_seed}...")
    model = CatBoostRegressor(**final_catboost_params_ensemble, random_seed=current_seed)
    model.fit(X_prep_full, y_log) # Обучаем на всех X_prep_full

    # Шаг 6: Предсказание на тесте
    test_preds_log = model.predict(X_test_prep)
    all_test_preds_log.append(test_preds_log)
    print(f"Model {i+1} finished.")

# Шаг 7: Усреднение предсказаний
print("\nAveraging predictions...")
averaged_test_preds_log = np.mean(all_test_preds_log, axis=0)

# Шаг 8: Преобразование обратно
averaged_test_preds_orig = np.expm1(averaged_test_preds_log)
# Обработка возможных отрицательных значений
averaged_test_preds_orig[averaged_test_preds_orig < 0] = 0

# Шаг 9: Создание сабмишна
print("\nCreating submission file...")
submission_ensemble = pd.DataFrame({'Id': test_data['Id'], 'SalePrice': averaged_test_preds_orig})
submission_ensemble.to_csv('submission_seed_ensemble_v1.csv', index=False)
print("Submission file created: submission_seed_ensemble_v1.csv")
print(submission_ensemble.head())

Fitting preprocessor on all X...
Transforming X...
Transforming X_test...
Preprocessing complete.

Best params for ensemble: {'iterations': 1775, 'depth': 5, 'learning_rate': 0.06039828474626542, 'l2_leaf_reg': 2.132889759843395, 'bagging_temperature': 0.9470181666458671, 'subsample': 0.833424506156886, 'colsample_bylevel': 0.9622754820457864, 'loss_function': 'RMSE', 'verbose': 0}

Training 10 models for ensemble...
Training model 1/10 with seed 42...
Model 1 finished.
Training model 2/10 with seed 43...
Model 2 finished.
Training model 3/10 with seed 44...
Model 3 finished.
Training model 4/10 with seed 45...
Model 4 finished.
Training model 5/10 with seed 46...
Model 5 finished.
Training model 6/10 with seed 47...
Model 6 finished.
Training model 7/10 with seed 48...
Model 7 finished.
Training model 8/10 with seed 49...
Model 8 finished.
Training model 9/10 with seed 50...
Model 9 finished.
Training model 10/10 with seed 51...
Model 10 finished.

Averaging predictions...

Creating s

# ИТОГО
Выше мы получили корректный код с K-FOLD валидацией, логарифмированием целевой переменной и предсказанием. Теперь его надо оптимизировать. Попробуем optuna.

In [27]:
import optuna

# Определяем KFold один раз вне objective
kf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=RANDOM_STATE)

# --- Функция цели для Optuna ---
def objective(trial):
    """Функция, которую Optuna будет минимизировать."""

    # 1. Определяем подбираемые параметры CatBoost
    catboost_params = {
        'iterations': trial.suggest_int('iterations', 1000, 4000),  # Оставляем диапазон, т.к. есть early stopping
        'depth': trial.suggest_int('depth', 4, 8),  # Слегка ограничиваем максимальную глубину
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.07, log=True),  # Сужаем диапазон LR
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1.0, 10.0, log=True),  # Хороший диапазон для L2
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0.0, 1.0), # Оставляем
        # Доля объектов, используемых для построения каждого дерева. Значения меньше 1.0 могут сделать модель более устойчивой к выбросам в данных.
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),  # --- Добавлено для робастности ---
        # Доля признаков, используемых на каждом уровне дерева. Тоже помогает бороться с переобучением.
        'colsample_bylevel': trial.suggest_float('colsample_bylevel', 0.6, 1.0),  # --- Добавлено для робастности ---
        # --- Фиксированные параметры ---
        'loss_function': 'RMSE',
        'verbose': 0,
        'random_seed': RANDOM_STATE,
        'early_stopping_rounds': 500  # --- Увеличиваем терпение ---
    }

    fold_rmses = [] # Список для хранения RMSE по фолдам для текущего trial

    # 2. Запускаем цикл KFold CV
    for fold, (train_idx, val_idx) in enumerate(kf.split(X, y_log)):
        X_train, X_val = X.iloc[train_idx], X.iloc[val_idx]
        y_train_log, y_val_log = y_log.iloc[train_idx], y_log.iloc[val_idx]

        try:
            # 3. Препроцессинг
            # Копирование preprocessor не нужно, т.к. fit перезаписывает состояние
            preprocessor.fit(X_train)
            X_train_prep = preprocessor.transform(X_train)
            X_val_prep = preprocessor.transform(X_val)

            # 4. Обучение модели CatBoost
            model = CatBoostRegressor(**catboost_params)
            model.fit(X_train_prep, y_train_log,
                      eval_set=[(X_val_prep, y_val_log)],
                      verbose=0
                     )

            # 5. Предсказание и оценка
            val_preds_log = model.predict(X_val_prep)
            val_preds_orig = np.expm1(val_preds_log)
            y_val_orig = np.expm1(y_val_log)
            val_preds_orig[val_preds_orig < 0] = 0 # Обработка на всякий случай
            fold_rmse = np.sqrt(mean_squared_error(y_val_orig, val_preds_orig))

            # Проверка на NaN/inf перед добавлением
            if np.isnan(fold_rmse) or np.isinf(fold_rmse):
                 print(f"Warning: Invalid RMSE ({fold_rmse}) encountered in fold {fold+1}, trial {trial.number}. Pruning.")
                 # Можно сразу прекратить trial, если RMSE некорректен
                 raise optuna.exceptions.TrialPruned()

            fold_rmses.append(fold_rmse)

            # 6. Отчет для Optuna Pruning
            trial.report(fold_rmse, step=fold)
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()

        except optuna.exceptions.TrialPruned:
             # Если trial был остановлен прунером, просто выходим
             raise
        except Exception as e:
             print(f"Error during fold {fold+1} for trial {trial.number}: {e}")
             # В случае другой ошибки, возвращаем 'inf'
             return float('inf')

    # 7. Возвращаем средний RMSE
    if not fold_rmses:  # Если все фолды вызвали ошибку (маловероятно)
        return float('inf')

    mean_rmse = np.mean(fold_rmses)
    std_rmse = np.std(fold_rmses)  # Считаем std dev для информации

    # Сохраняем std dev как атрибут, чтобы видеть его в результатах
    trial.set_user_attr("std_cv_rmse", std_rmse)

    print(f"Trial {trial.number} finished with Mean CV RMSE: {mean_rmse:.4f} +/- {std_rmse:.4f}")
    return mean_rmse

# --- Запуск исследования Optuna ---
# Pruner MedianPruner с n_warmup_steps=2 - хороший выбор по умолчанию
# "Pruning" (обрезка, отсечение) — это механизм в Optuna, который позволяет досрочно останавливать "бесперспективные" испытания (trials).
# Вместо того чтобы запускать каждую комбинацию гиперпараметров на всех 5 фолдах, Optuna после нескольких фолдов (в нашем случае после 3-го фолда, т.к. n_warmup_steps=2) смотрит на промежуточный результат (RMSE).
# Если этот промежуточный результат значительно хуже, чем у других, уже завершенных испытаний на том же шаге, Optuna решает, что нет смысла тратить время на оставшиеся фолды для этой комбинации параметров, и "обрезает" испытание.
# study = optuna.create_study(direction='minimize',
#                             pruner=optuna.pruners.MedianPruner(n_warmup_steps=2))
# Выбираем менее агрессивный прунер: Вместо MedianPruner (отсекает все, что хуже 50-го перцентиля) - PercentilePruner с меньшим порогом, например, 75-й перцентиль (проним лишь те испытания, которые хуже 25% самых плохих прошлых). Потому что у нас на 3 фолде очень большие ошибки почему-то всегда.
study = optuna.create_study(direction='minimize',
                            pruner=optuna.pruners.PercentilePruner(percentile=75, n_warmup_steps=3))

# Запускаем оптимизацию
N_TRIALS = 200 # Можно начать со 100, потом увеличить если нужно
study.optimize(objective, n_trials=N_TRIALS, n_jobs=1)  # n_jobs=1 - безопаснее

# --- Результаты ---
# (Код вывода результатов остается таким же)
# ...

# Сохраняем лучшие параметры
best_params = study.best_trial.params
best_params['loss_function'] = 'RMSE'
best_params['random_seed'] = RANDOM_STATE
best_params['verbose'] = 0
# Важно: Добавляем early_stopping_rounds, который использовался при подборе,
# чтобы потом использовать его при оценке OOF или убрать для финального обучения.
# best_params['early_stopping_rounds'] = 500 # Либо убрать эту строку перед финальным обучением на всех данных


[I 2025-04-05 01:05:26,242] A new study created in memory with name: no-name-176e6879-488b-4ed8-8b8d-38bb99d0194a
[I 2025-04-05 01:08:19,755] Trial 0 finished with value: 27848.124842901587 and parameters: {'iterations': 1037, 'depth': 8, 'learning_rate': 0.02081290525303602, 'l2_leaf_reg': 1.1389644199702988, 'bagging_temperature': 0.4559690629472064, 'subsample': 0.6295096493408531, 'colsample_bylevel': 0.9608216269854182}. Best is trial 0 with value: 27848.124842901587.


Trial 0 finished with Mean CV RMSE: 27848.1248 +/- 6056.7948


[I 2025-04-05 01:11:55,678] Trial 1 finished with value: 27096.902905509975 and parameters: {'iterations': 3920, 'depth': 8, 'learning_rate': 0.0277213807928953, 'l2_leaf_reg': 4.4422510454905195, 'bagging_temperature': 0.3428148178742335, 'subsample': 0.7479900843522618, 'colsample_bylevel': 0.6339947717586244}. Best is trial 1 with value: 27096.902905509975.


Trial 1 finished with Mean CV RMSE: 27096.9029 +/- 5292.3158


[I 2025-04-05 01:14:53,490] Trial 2 finished with value: 27807.622968536125 and parameters: {'iterations': 1844, 'depth': 8, 'learning_rate': 0.04682754211161001, 'l2_leaf_reg': 5.05748664919862, 'bagging_temperature': 0.7332151795635945, 'subsample': 0.8217664737296213, 'colsample_bylevel': 0.9607990789384044}. Best is trial 1 with value: 27096.902905509975.


Trial 2 finished with Mean CV RMSE: 27807.6230 +/- 6420.2894


[I 2025-04-05 01:18:08,632] Trial 3 finished with value: 27631.352534690952 and parameters: {'iterations': 3037, 'depth': 8, 'learning_rate': 0.03426096723288277, 'l2_leaf_reg': 3.7107658153544545, 'bagging_temperature': 0.073625628096054, 'subsample': 0.8478616041850507, 'colsample_bylevel': 0.8361755421603994}. Best is trial 1 with value: 27096.902905509975.


Trial 3 finished with Mean CV RMSE: 27631.3525 +/- 5956.9930


[I 2025-04-05 01:20:52,336] Trial 4 finished with value: 27555.87758777177 and parameters: {'iterations': 3931, 'depth': 7, 'learning_rate': 0.0438158717311762, 'l2_leaf_reg': 1.1810594694409295, 'bagging_temperature': 0.702094034442202, 'subsample': 0.7637330519164215, 'colsample_bylevel': 0.626411223632047}. Best is trial 1 with value: 27096.902905509975.


Trial 4 finished with Mean CV RMSE: 27555.8776 +/- 6682.4131


[I 2025-04-05 01:23:24,111] Trial 5 finished with value: 26016.642407277723 and parameters: {'iterations': 1895, 'depth': 4, 'learning_rate': 0.023690742431531832, 'l2_leaf_reg': 2.959777818558143, 'bagging_temperature': 0.6676416759854477, 'subsample': 0.7936751640787109, 'colsample_bylevel': 0.874683434110616}. Best is trial 5 with value: 26016.642407277723.


Trial 5 finished with Mean CV RMSE: 26016.6424 +/- 5439.2409


[I 2025-04-05 01:25:57,728] Trial 6 finished with value: 26555.62466649747 and parameters: {'iterations': 1838, 'depth': 5, 'learning_rate': 0.03867067805812294, 'l2_leaf_reg': 3.9481177189453454, 'bagging_temperature': 0.017141285991240762, 'subsample': 0.6168583087277925, 'colsample_bylevel': 0.8637815541231066}. Best is trial 5 with value: 26016.642407277723.


Trial 6 finished with Mean CV RMSE: 26555.6247 +/- 6579.3690


[I 2025-04-05 01:28:52,774] Trial 7 finished with value: 27203.958527930477 and parameters: {'iterations': 1077, 'depth': 8, 'learning_rate': 0.06528085783465698, 'l2_leaf_reg': 3.1159209605282987, 'bagging_temperature': 0.45664823942852983, 'subsample': 0.9373782216525547, 'colsample_bylevel': 0.7616888139836268}. Best is trial 5 with value: 26016.642407277723.


Trial 7 finished with Mean CV RMSE: 27203.9585 +/- 5594.3692


[I 2025-04-05 01:32:04,804] Trial 8 pruned. 
[I 2025-04-05 01:35:19,302] Trial 9 pruned. 
[I 2025-04-05 01:37:52,783] Trial 10 finished with value: 26714.48354607128 and parameters: {'iterations': 2309, 'depth': 4, 'learning_rate': 0.011331257337511271, 'l2_leaf_reg': 7.835389740824009, 'bagging_temperature': 0.987421246117606, 'subsample': 0.6948921266510962, 'colsample_bylevel': 0.7279367335888705}. Best is trial 5 with value: 26016.642407277723.


Trial 10 finished with Mean CV RMSE: 26714.4835 +/- 5427.8484


[I 2025-04-05 01:40:24,675] Trial 11 finished with value: 26136.46028357631 and parameters: {'iterations': 1832, 'depth': 4, 'learning_rate': 0.015713018966629543, 'l2_leaf_reg': 2.516197701782506, 'bagging_temperature': 0.2338138627521709, 'subsample': 0.6082227292669469, 'colsample_bylevel': 0.8859463341653272}. Best is trial 5 with value: 26016.642407277723.


Trial 11 finished with Mean CV RMSE: 26136.4603 +/- 5609.4495


[I 2025-04-05 01:42:56,287] Trial 12 finished with value: 26031.483530075257 and parameters: {'iterations': 1743, 'depth': 4, 'learning_rate': 0.01489036694910338, 'l2_leaf_reg': 2.40807463869986, 'bagging_temperature': 0.24092032358630366, 'subsample': 0.8964612654061852, 'colsample_bylevel': 0.8885342155286365}. Best is trial 5 with value: 26016.642407277723.


Trial 12 finished with Mean CV RMSE: 26031.4835 +/- 5445.6195


[I 2025-04-05 01:45:32,945] Trial 13 finished with value: 25852.38621926142 and parameters: {'iterations': 2358, 'depth': 5, 'learning_rate': 0.014495257888970754, 'l2_leaf_reg': 2.1503748600283097, 'bagging_temperature': 0.8788554749426187, 'subsample': 0.898269688082009, 'colsample_bylevel': 0.899284938253865}. Best is trial 13 with value: 25852.38621926142.


Trial 13 finished with Mean CV RMSE: 25852.3862 +/- 5486.6545


[I 2025-04-05 01:48:10,931] Trial 14 finished with value: 26239.76837325001 and parameters: {'iterations': 2471, 'depth': 5, 'learning_rate': 0.010008224721380752, 'l2_leaf_reg': 1.7616100090748394, 'bagging_temperature': 0.9645799070065072, 'subsample': 0.8742613269778938, 'colsample_bylevel': 0.9053327043500667}. Best is trial 13 with value: 25852.38621926142.


Trial 14 finished with Mean CV RMSE: 26239.7684 +/- 5641.1870


[I 2025-04-05 01:50:49,300] Trial 15 finished with value: 26307.768350028164 and parameters: {'iterations': 2844, 'depth': 5, 'learning_rate': 0.017175006981247756, 'l2_leaf_reg': 5.517322158451795, 'bagging_temperature': 0.8184200420315684, 'subsample': 0.7891685182106211, 'colsample_bylevel': 0.9959492649806746}. Best is trial 13 with value: 25852.38621926142.


Trial 15 finished with Mean CV RMSE: 26307.7684 +/- 6078.9082


[I 2025-04-05 01:53:31,763] Trial 16 finished with value: 26280.950685771706 and parameters: {'iterations': 2221, 'depth': 6, 'learning_rate': 0.013744916462474688, 'l2_leaf_reg': 2.5283788888120426, 'bagging_temperature': 0.8448786780459021, 'subsample': 0.9198751627612297, 'colsample_bylevel': 0.7801664888603621}. Best is trial 13 with value: 25852.38621926142.


Trial 16 finished with Mean CV RMSE: 26280.9507 +/- 5532.0644


[I 2025-04-05 01:56:08,949] Trial 17 pruned. 
[I 2025-04-05 01:58:48,247] Trial 18 finished with value: 26215.66974040737 and parameters: {'iterations': 2894, 'depth': 5, 'learning_rate': 0.011973641372264518, 'l2_leaf_reg': 1.4757368924417829, 'bagging_temperature': 0.8592345261471654, 'subsample': 0.8443188065540718, 'colsample_bylevel': 0.9193874288310782}. Best is trial 13 with value: 25852.38621926142.


Trial 18 finished with Mean CV RMSE: 26215.6697 +/- 5366.0614


[I 2025-04-05 02:01:22,191] Trial 19 finished with value: 25964.792315355204 and parameters: {'iterations': 2195, 'depth': 4, 'learning_rate': 0.01954623417599561, 'l2_leaf_reg': 2.8988011631229824, 'bagging_temperature': 0.7066524064348632, 'subsample': 0.944768545640013, 'colsample_bylevel': 0.8497889091600587}. Best is trial 13 with value: 25852.38621926142.


Trial 19 finished with Mean CV RMSE: 25964.7923 +/- 5825.9775


[I 2025-04-05 02:04:00,439] Trial 20 finished with value: 26142.601720517872 and parameters: {'iterations': 2617, 'depth': 5, 'learning_rate': 0.01856366643776337, 'l2_leaf_reg': 2.3600287521919556, 'bagging_temperature': 0.7629335567560824, 'subsample': 0.9300445717912273, 'colsample_bylevel': 0.8417437562441724}. Best is trial 13 with value: 25852.38621926142.


Trial 20 finished with Mean CV RMSE: 26142.6017 +/- 5581.8839


[I 2025-04-05 02:06:34,180] Trial 21 finished with value: 26021.97730188223 and parameters: {'iterations': 2146, 'depth': 4, 'learning_rate': 0.023473779895343116, 'l2_leaf_reg': 3.0792588654367683, 'bagging_temperature': 0.6330311946879679, 'subsample': 0.9657168303950324, 'colsample_bylevel': 0.7934225943075979}. Best is trial 13 with value: 25852.38621926142.


Trial 21 finished with Mean CV RMSE: 26021.9773 +/- 5684.5854


[I 2025-04-05 02:09:05,967] Trial 22 finished with value: 25912.562468649245 and parameters: {'iterations': 1541, 'depth': 4, 'learning_rate': 0.029881520568017413, 'l2_leaf_reg': 3.3528506072627047, 'bagging_temperature': 0.5351781769438693, 'subsample': 0.8828638609800996, 'colsample_bylevel': 0.854689554293677}. Best is trial 13 with value: 25852.38621926142.


Trial 22 finished with Mean CV RMSE: 25912.5625 +/- 5469.5956


[I 2025-04-05 02:11:37,119] Trial 23 finished with value: 25839.2939356207 and parameters: {'iterations': 1414, 'depth': 4, 'learning_rate': 0.031367918649534386, 'l2_leaf_reg': 2.1012152837557103, 'bagging_temperature': 0.5245431283168895, 'subsample': 0.890822194460737, 'colsample_bylevel': 0.921935931961773}. Best is trial 23 with value: 25839.2939356207.


Trial 23 finished with Mean CV RMSE: 25839.2939 +/- 5634.6562


[I 2025-04-05 02:14:10,095] Trial 24 finished with value: 26507.71320725538 and parameters: {'iterations': 1438, 'depth': 5, 'learning_rate': 0.033813205160083494, 'l2_leaf_reg': 1.408815094378314, 'bagging_temperature': 0.5391172258244308, 'subsample': 0.865706321604495, 'colsample_bylevel': 0.9269255591383785}. Best is trial 23 with value: 25839.2939356207.


Trial 24 finished with Mean CV RMSE: 26507.7132 +/- 6194.0650


[I 2025-04-05 02:16:45,599] Trial 25 finished with value: 26625.79659057879 and parameters: {'iterations': 1301, 'depth': 6, 'learning_rate': 0.03093190317996749, 'l2_leaf_reg': 2.0467725861602633, 'bagging_temperature': 0.36757417463355646, 'subsample': 0.8952569267601609, 'colsample_bylevel': 0.9842239422195944}. Best is trial 23 with value: 25839.2939356207.


Trial 25 finished with Mean CV RMSE: 26625.7966 +/- 6217.3820


[I 2025-04-05 02:19:16,318] Trial 26 pruned. 
[I 2025-04-05 02:21:49,251] Trial 27 finished with value: 26110.507043251244 and parameters: {'iterations': 1202, 'depth': 5, 'learning_rate': 0.038459750044312124, 'l2_leaf_reg': 3.77841707305076, 'bagging_temperature': 0.25205540920268793, 'subsample': 0.8291663223790972, 'colsample_bylevel': 0.8203623078036254}. Best is trial 23 with value: 25839.2939356207.


Trial 27 finished with Mean CV RMSE: 26110.5070 +/- 5512.4892


[I 2025-04-05 02:24:26,279] Trial 28 finished with value: 26327.34535664094 and parameters: {'iterations': 1579, 'depth': 6, 'learning_rate': 0.029131820851366362, 'l2_leaf_reg': 2.1580880459272, 'bagging_temperature': 0.5253965746334122, 'subsample': 0.8752105627582111, 'colsample_bylevel': 0.9015879055858637}. Best is trial 23 with value: 25839.2939356207.


Trial 28 finished with Mean CV RMSE: 26327.3454 +/- 6277.2780


[I 2025-04-05 02:26:56,357] Trial 29 finished with value: 25996.52664993263 and parameters: {'iterations': 1001, 'depth': 4, 'learning_rate': 0.06297530890055783, 'l2_leaf_reg': 9.69534748351377, 'bagging_temperature': 0.9121995057701278, 'subsample': 0.8076066978445909, 'colsample_bylevel': 0.9749580149533162}. Best is trial 23 with value: 25839.2939356207.


Trial 29 finished with Mean CV RMSE: 25996.5266 +/- 5576.8341


[I 2025-04-05 02:29:31,621] Trial 30 pruned. 
[I 2025-04-05 02:32:06,125] Trial 31 pruned. 
[I 2025-04-05 02:34:38,599] Trial 32 pruned. 
[I 2025-04-05 02:37:13,954] Trial 33 finished with value: 25922.133132368996 and parameters: {'iterations': 2689, 'depth': 4, 'learning_rate': 0.017887095656593188, 'l2_leaf_reg': 4.414509214081761, 'bagging_temperature': 0.6992077693169307, 'subsample': 0.954472084999803, 'colsample_bylevel': 0.862146658228779}. Best is trial 23 with value: 25839.2939356207.


Trial 33 finished with Mean CV RMSE: 25922.1331 +/- 5483.8174


[I 2025-04-05 02:39:50,470] Trial 34 finished with value: 25960.599612312035 and parameters: {'iterations': 3275, 'depth': 4, 'learning_rate': 0.017491592096114318, 'l2_leaf_reg': 4.94496401445948, 'bagging_temperature': 0.41390547817601986, 'subsample': 0.9542799439553925, 'colsample_bylevel': 0.9109220102812869}. Best is trial 23 with value: 25839.2939356207.


Trial 34 finished with Mean CV RMSE: 25960.5996 +/- 5698.7743


[I 2025-04-05 02:42:26,134] Trial 35 finished with value: 25906.224085077596 and parameters: {'iterations': 2810, 'depth': 5, 'learning_rate': 0.036526344849613396, 'l2_leaf_reg': 4.644387634601094, 'bagging_temperature': 0.29624613902770547, 'subsample': 0.9995940445069864, 'colsample_bylevel': 0.6013869221144237}. Best is trial 23 with value: 25839.2939356207.


Trial 35 finished with Mean CV RMSE: 25906.2241 +/- 5346.5335


[I 2025-04-05 02:45:17,818] Trial 36 finished with value: 26418.224002225586 and parameters: {'iterations': 3138, 'depth': 7, 'learning_rate': 0.04119833229360973, 'l2_leaf_reg': 4.238719582136169, 'bagging_temperature': 0.2963547801505412, 'subsample': 0.99471216257548, 'colsample_bylevel': 0.6582297801426278}. Best is trial 23 with value: 25839.2939356207.


Trial 36 finished with Mean CV RMSE: 26418.2240 +/- 5656.1498


[I 2025-04-05 02:47:51,564] Trial 37 finished with value: 26232.48001733602 and parameters: {'iterations': 2032, 'depth': 5, 'learning_rate': 0.05119464392248356, 'l2_leaf_reg': 5.741072734661971, 'bagging_temperature': 0.11281220432400324, 'subsample': 0.7602150472720125, 'colsample_bylevel': 0.6000510074927701}. Best is trial 23 with value: 25839.2939356207.


Trial 37 finished with Mean CV RMSE: 26232.4800 +/- 5902.8615


[I 2025-04-05 02:50:34,051] Trial 38 finished with value: 26049.807756617483 and parameters: {'iterations': 3587, 'depth': 6, 'learning_rate': 0.031040133637621505, 'l2_leaf_reg': 3.4694078785481066, 'bagging_temperature': 0.5133559351323929, 'subsample': 0.7171350169980977, 'colsample_bylevel': 0.6737013095969969}. Best is trial 23 with value: 25839.2939356207.


Trial 38 finished with Mean CV RMSE: 26049.8078 +/- 5487.8375


[I 2025-04-05 02:53:08,824] Trial 39 finished with value: 26259.159374669816 and parameters: {'iterations': 2858, 'depth': 5, 'learning_rate': 0.03445036430382193, 'l2_leaf_reg': 1.0265393931015916, 'bagging_temperature': 0.1417119363117963, 'subsample': 0.8549790264509027, 'colsample_bylevel': 0.8169687055593419}. Best is trial 23 with value: 25839.2939356207.


Trial 39 finished with Mean CV RMSE: 26259.1594 +/- 5711.2623


[I 2025-04-05 02:55:50,560] Trial 40 pruned. 
[I 2025-04-05 02:58:24,446] Trial 41 finished with value: 26268.684448935073 and parameters: {'iterations': 2674, 'depth': 4, 'learning_rate': 0.026710483376037257, 'l2_leaf_reg': 3.9100844913844535, 'bagging_temperature': 0.657587406631233, 'subsample': 0.9757532272031278, 'colsample_bylevel': 0.8658684825776356}. Best is trial 23 with value: 25839.2939356207.


Trial 41 finished with Mean CV RMSE: 26268.6844 +/- 5661.5700


[I 2025-04-05 03:00:58,271] Trial 42 finished with value: 25919.62192229969 and parameters: {'iterations': 2692, 'depth': 4, 'learning_rate': 0.025293116409502655, 'l2_leaf_reg': 4.525667667177704, 'bagging_temperature': 0.7321766558678586, 'subsample': 0.95200405354616, 'colsample_bylevel': 0.8915693381360156}. Best is trial 23 with value: 25839.2939356207.


Trial 42 finished with Mean CV RMSE: 25919.6219 +/- 5540.4394


[I 2025-04-05 03:03:30,539] Trial 43 finished with value: 26058.260187048494 and parameters: {'iterations': 3036, 'depth': 4, 'learning_rate': 0.04402651451327947, 'l2_leaf_reg': 6.218803897242992, 'bagging_temperature': 0.42053817893783524, 'subsample': 0.881582848092318, 'colsample_bylevel': 0.9616773717543448}. Best is trial 23 with value: 25839.2939356207.


Trial 43 finished with Mean CV RMSE: 26058.2602 +/- 5688.4409


[I 2025-04-05 03:06:07,289] Trial 44 finished with value: 26159.65513018246 and parameters: {'iterations': 2338, 'depth': 5, 'learning_rate': 0.02479822887576807, 'l2_leaf_reg': 4.788892951808906, 'bagging_temperature': 0.5647569821500591, 'subsample': 0.9154489148703173, 'colsample_bylevel': 0.8855531499245556}. Best is trial 23 with value: 25839.2939356207.


Trial 44 finished with Mean CV RMSE: 26159.6551 +/- 5971.2537


[I 2025-04-05 03:08:40,585] Trial 45 finished with value: 26080.309868084576 and parameters: {'iterations': 3004, 'depth': 4, 'learning_rate': 0.031544087726540354, 'l2_leaf_reg': 3.483316945679956, 'bagging_temperature': 0.7592984188980606, 'subsample': 0.9892610524362075, 'colsample_bylevel': 0.9405950599911623}. Best is trial 23 with value: 25839.2939356207.


Trial 45 finished with Mean CV RMSE: 26080.3099 +/- 5707.6577


[I 2025-04-05 03:11:17,669] Trial 46 pruned. 
[I 2025-04-05 03:13:52,273] Trial 47 pruned. 
[I 2025-04-05 03:16:35,324] Trial 48 finished with value: 26372.043533624725 and parameters: {'iterations': 2734, 'depth': 6, 'learning_rate': 0.02582951858700767, 'l2_leaf_reg': 6.496866807883539, 'bagging_temperature': 0.18270036207559726, 'subsample': 0.9222312459829001, 'colsample_bylevel': 0.9611081388672641}. Best is trial 23 with value: 25839.2939356207.


Trial 48 finished with Mean CV RMSE: 26372.0435 +/- 5322.7101


[I 2025-04-05 03:19:09,435] Trial 49 finished with value: 25719.499860431395 and parameters: {'iterations': 1729, 'depth': 5, 'learning_rate': 0.04771304567354759, 'l2_leaf_reg': 2.2086944348684048, 'bagging_temperature': 0.922757160395502, 'subsample': 0.8456884460043302, 'colsample_bylevel': 0.8816905328352515}. Best is trial 49 with value: 25719.499860431395.


Trial 49 finished with Mean CV RMSE: 25719.4999 +/- 5810.9101


[I 2025-04-05 03:21:42,506] Trial 50 finished with value: 25818.582799838703 and parameters: {'iterations': 1405, 'depth': 5, 'learning_rate': 0.05344261061249411, 'l2_leaf_reg': 2.198642496460103, 'bagging_temperature': 0.9957830247005865, 'subsample': 0.8455649203195492, 'colsample_bylevel': 0.7019756129019105}. Best is trial 49 with value: 25719.499860431395.


Trial 50 finished with Mean CV RMSE: 25818.5828 +/- 5364.0679


[I 2025-04-05 03:24:15,644] Trial 51 finished with value: 26017.53714822643 and parameters: {'iterations': 1757, 'depth': 5, 'learning_rate': 0.05762552023118305, 'l2_leaf_reg': 2.255732178356021, 'bagging_temperature': 0.9378649970938825, 'subsample': 0.8398760235876912, 'colsample_bylevel': 0.6952468824855894}. Best is trial 49 with value: 25719.499860431395.


Trial 51 finished with Mean CV RMSE: 26017.5371 +/- 5949.0915


[I 2025-04-05 03:26:48,653] Trial 52 finished with value: 25765.672862862964 and parameters: {'iterations': 1478, 'depth': 5, 'learning_rate': 0.049682820404871736, 'l2_leaf_reg': 1.8621658856053216, 'bagging_temperature': 0.9759149687242179, 'subsample': 0.7893932756941113, 'colsample_bylevel': 0.6190476133389664}. Best is trial 49 with value: 25719.499860431395.


Trial 52 finished with Mean CV RMSE: 25765.6729 +/- 5038.8747


[I 2025-04-05 03:29:22,029] Trial 53 pruned. 
[I 2025-04-05 03:31:54,913] Trial 54 finished with value: 25692.088035522214 and parameters: {'iterations': 1934, 'depth': 5, 'learning_rate': 0.06866774111645706, 'l2_leaf_reg': 2.6411449422210342, 'bagging_temperature': 0.8909747768632686, 'subsample': 0.8072642635585155, 'colsample_bylevel': 0.6002205990089923}. Best is trial 54 with value: 25692.088035522214.


Trial 54 finished with Mean CV RMSE: 25692.0880 +/- 5328.5809


[I 2025-04-05 03:34:31,353] Trial 55 finished with value: 26780.50331730926 and parameters: {'iterations': 1895, 'depth': 6, 'learning_rate': 0.06979221939743566, 'l2_leaf_reg': 1.9715915644571445, 'bagging_temperature': 0.8710009367209155, 'subsample': 0.8064663735655928, 'colsample_bylevel': 0.6582096816974021}. Best is trial 54 with value: 25692.088035522214.


Trial 55 finished with Mean CV RMSE: 26780.5033 +/- 6173.8236


[I 2025-04-05 03:37:04,165] Trial 56 pruned. 
[I 2025-04-05 03:39:39,394] Trial 57 pruned. 
[I 2025-04-05 03:42:14,110] Trial 58 finished with value: 25940.166349874835 and parameters: {'iterations': 1987, 'depth': 5, 'learning_rate': 0.04734898906956573, 'l2_leaf_reg': 1.7369527421753825, 'bagging_temperature': 0.9968022151905435, 'subsample': 0.653604938083979, 'colsample_bylevel': 0.614480853757082}. Best is trial 54 with value: 25692.088035522214.


Trial 58 finished with Mean CV RMSE: 25940.1663 +/- 5952.1087


[I 2025-04-05 03:44:46,014] Trial 59 pruned. 
[I 2025-04-05 03:47:28,630] Trial 60 finished with value: 26545.65706013054 and parameters: {'iterations': 1488, 'depth': 7, 'learning_rate': 0.05170781491811597, 'l2_leaf_reg': 2.3491579290244844, 'bagging_temperature': 0.9326626428960568, 'subsample': 0.8340943522693001, 'colsample_bylevel': 0.7591895147417853}. Best is trial 54 with value: 25692.088035522214.


Trial 60 finished with Mean CV RMSE: 26545.6571 +/- 5865.5423


[I 2025-04-05 03:50:01,567] Trial 61 finished with value: 26064.95527498304 and parameters: {'iterations': 1271, 'depth': 5, 'learning_rate': 0.042217257816770985, 'l2_leaf_reg': 1.9501412137946283, 'bagging_temperature': 0.9483677412846255, 'subsample': 0.8081989996186733, 'colsample_bylevel': 0.6184708572453373}. Best is trial 54 with value: 25692.088035522214.


Trial 61 finished with Mean CV RMSE: 26064.9553 +/- 6090.7560


[I 2025-04-05 03:52:35,130] Trial 62 finished with value: 26246.67164917293 and parameters: {'iterations': 1659, 'depth': 5, 'learning_rate': 0.03971943889470134, 'l2_leaf_reg': 2.1212160270146834, 'bagging_temperature': 0.8745732344484471, 'subsample': 0.8650674277205682, 'colsample_bylevel': 0.6000285055956409}. Best is trial 54 with value: 25692.088035522214.


Trial 62 finished with Mean CV RMSE: 26246.6716 +/- 6174.8881


[I 2025-04-05 03:55:09,350] Trial 63 finished with value: 25816.317702020227 and parameters: {'iterations': 1828, 'depth': 5, 'learning_rate': 0.0457403338056049, 'l2_leaf_reg': 2.9881526536884055, 'bagging_temperature': 0.8324041004764597, 'subsample': 0.8188478358179048, 'colsample_bylevel': 0.6470384912151042}. Best is trial 54 with value: 25692.088035522214.


Trial 63 finished with Mean CV RMSE: 25816.3177 +/- 5568.5063


[I 2025-04-05 03:57:41,975] Trial 64 finished with value: 25586.979838158335 and parameters: {'iterations': 1821, 'depth': 5, 'learning_rate': 0.06937803483105755, 'l2_leaf_reg': 2.8740463024913296, 'bagging_temperature': 0.8036834375065834, 'subsample': 0.8165317384883806, 'colsample_bylevel': 0.6493712886851817}. Best is trial 64 with value: 25586.979838158335.


Trial 64 finished with Mean CV RMSE: 25586.9798 +/- 5277.2066


[I 2025-04-05 04:00:15,513] Trial 65 pruned. 
[I 2025-04-05 04:02:51,827] Trial 66 pruned. 
[I 2025-04-05 04:05:26,585] Trial 67 pruned. 
[I 2025-04-05 04:07:59,089] Trial 68 finished with value: 25972.119731125345 and parameters: {'iterations': 1329, 'depth': 5, 'learning_rate': 0.05357912333384511, 'l2_leaf_reg': 2.2542772496850416, 'bagging_temperature': 0.9998755509896912, 'subsample': 0.7776084711838318, 'colsample_bylevel': 0.668406105702763}. Best is trial 64 with value: 25586.979838158335.


Trial 68 finished with Mean CV RMSE: 25972.1197 +/- 5767.6189


[I 2025-04-05 04:10:31,139] Trial 69 pruned. 
[I 2025-04-05 04:13:07,679] Trial 70 pruned. 
[I 2025-04-05 04:15:41,875] Trial 71 finished with value: 25585.410340075687 and parameters: {'iterations': 1961, 'depth': 5, 'learning_rate': 0.05921325152057508, 'l2_leaf_reg': 2.213446373214455, 'bagging_temperature': 0.8672521740064147, 'subsample': 0.7949784128198849, 'colsample_bylevel': 0.7358309340925587}. Best is trial 71 with value: 25585.410340075687.


Trial 71 finished with Mean CV RMSE: 25585.4103 +/- 5260.1351


[I 2025-04-05 04:18:15,032] Trial 72 finished with value: 26521.98422778835 and parameters: {'iterations': 1960, 'depth': 5, 'learning_rate': 0.06007375563723836, 'l2_leaf_reg': 2.6172248138003726, 'bagging_temperature': 0.8932455458031544, 'subsample': 0.8030111890467764, 'colsample_bylevel': 0.7245762810751671}. Best is trial 71 with value: 25585.410340075687.


Trial 72 finished with Mean CV RMSE: 26521.9842 +/- 6680.5813


[I 2025-04-05 04:20:47,988] Trial 73 pruned. 
[I 2025-04-05 04:23:20,564] Trial 74 finished with value: 26014.092635932968 and parameters: {'iterations': 1827, 'depth': 5, 'learning_rate': 0.053646051638065656, 'l2_leaf_reg': 1.9098281635399346, 'bagging_temperature': 0.761156348665063, 'subsample': 0.8297772459654802, 'colsample_bylevel': 0.7760648957107479}. Best is trial 71 with value: 25585.410340075687.


Trial 74 finished with Mean CV RMSE: 26014.0926 +/- 5700.4153


[I 2025-04-05 04:25:52,008] Trial 75 finished with value: 26731.22872756978 and parameters: {'iterations': 1377, 'depth': 5, 'learning_rate': 0.06581331145276058, 'l2_leaf_reg': 1.6554420780310324, 'bagging_temperature': 0.8188836070365723, 'subsample': 0.8522890963881407, 'colsample_bylevel': 0.6280071107208063}. Best is trial 71 with value: 25585.410340075687.


Trial 75 finished with Mean CV RMSE: 26731.2287 +/- 5496.4632


[I 2025-04-05 04:28:02,456] Trial 76 pruned. 
[I 2025-04-05 04:30:34,276] Trial 77 finished with value: 26154.86742056987 and parameters: {'iterations': 1098, 'depth': 5, 'learning_rate': 0.0605549786242205, 'l2_leaf_reg': 2.7990620824654404, 'bagging_temperature': 0.9675561840507263, 'subsample': 0.8155845082922499, 'colsample_bylevel': 0.61066077609509}. Best is trial 71 with value: 25585.410340075687.


Trial 77 finished with Mean CV RMSE: 26154.8674 +/- 6020.4545


[I 2025-04-05 04:33:06,884] Trial 78 pruned. 
[I 2025-04-05 04:35:40,071] Trial 79 finished with value: 25895.20299730249 and parameters: {'iterations': 2254, 'depth': 5, 'learning_rate': 0.0645213763051643, 'l2_leaf_reg': 2.3914179825008777, 'bagging_temperature': 0.7192708416236782, 'subsample': 0.7840710888252751, 'colsample_bylevel': 0.6368861267069666}. Best is trial 71 with value: 25585.410340075687.


Trial 79 finished with Mean CV RMSE: 25895.2030 +/- 5732.2272


[I 2025-04-05 04:38:15,490] Trial 80 pruned. 
[I 2025-04-05 04:40:51,569] Trial 81 finished with value: 26079.276471814374 and parameters: {'iterations': 2040, 'depth': 5, 'learning_rate': 0.016211743339313585, 'l2_leaf_reg': 1.3598249884020184, 'bagging_temperature': 0.9147119647904979, 'subsample': 0.8686207808195939, 'colsample_bylevel': 0.9127545033688426}. Best is trial 71 with value: 25585.410340075687.


Trial 81 finished with Mean CV RMSE: 26079.2765 +/- 5883.3763


[I 2025-04-05 04:43:26,803] Trial 82 pruned. 
[I 2025-04-05 04:45:58,925] Trial 83 pruned. 
[I 2025-04-05 04:48:32,033] Trial 84 pruned. 
[I 2025-04-05 04:51:03,016] Trial 85 pruned. 
[I 2025-04-05 04:53:36,642] Trial 86 pruned. 
[I 2025-04-05 04:56:09,365] Trial 87 pruned. 
[I 2025-04-05 04:58:41,543] Trial 88 finished with value: 26300.575174413978 and parameters: {'iterations': 1837, 'depth': 4, 'learning_rate': 0.01394652079328998, 'l2_leaf_reg': 3.2914197484776175, 'bagging_temperature': 0.8990481857477381, 'subsample': 0.8764563052432585, 'colsample_bylevel': 0.7847762598000532}. Best is trial 71 with value: 25585.410340075687.


Trial 88 finished with Mean CV RMSE: 26300.5752 +/- 5401.8743


[I 2025-04-05 05:01:13,627] Trial 89 pruned. 
[I 2025-04-05 05:03:51,762] Trial 90 pruned. 
[I 2025-04-05 05:06:24,011] Trial 91 finished with value: 25957.480804003328 and parameters: {'iterations': 2530, 'depth': 5, 'learning_rate': 0.06438088198954271, 'l2_leaf_reg': 2.4165786367659576, 'bagging_temperature': 0.8232460181051706, 'subsample': 0.7837438218402805, 'colsample_bylevel': 0.6377627795329683}. Best is trial 71 with value: 25585.410340075687.


Trial 91 finished with Mean CV RMSE: 25957.4808 +/- 5789.1778


[I 2025-04-05 05:08:56,134] Trial 92 finished with value: 26605.26650133697 and parameters: {'iterations': 2268, 'depth': 5, 'learning_rate': 0.06403033026793926, 'l2_leaf_reg': 2.3618934281793593, 'bagging_temperature': 0.7278608324156357, 'subsample': 0.7839578846245555, 'colsample_bylevel': 0.6308638791807616}. Best is trial 71 with value: 25585.410340075687.


Trial 92 finished with Mean CV RMSE: 26605.2665 +/- 6185.5124


[I 2025-04-05 05:11:28,626] Trial 93 pruned. 
[I 2025-04-05 05:14:01,759] Trial 94 finished with value: 26292.890630108974 and parameters: {'iterations': 1973, 'depth': 5, 'learning_rate': 0.062407953834999474, 'l2_leaf_reg': 3.023022022476519, 'bagging_temperature': 0.6505051105231979, 'subsample': 0.8350329120053964, 'colsample_bylevel': 0.6396189325338192}. Best is trial 71 with value: 25585.410340075687.


Trial 94 finished with Mean CV RMSE: 26292.8906 +/- 5701.2477


[I 2025-04-05 05:16:34,659] Trial 95 pruned. 
[I 2025-04-05 05:19:08,255] Trial 96 pruned. 
[I 2025-04-05 05:21:43,627] Trial 97 pruned. 
[I 2025-04-05 05:24:14,825] Trial 98 finished with value: 25509.539843419865 and parameters: {'iterations': 1564, 'depth': 4, 'learning_rate': 0.05541834192101109, 'l2_leaf_reg': 2.781733285057108, 'bagging_temperature': 0.7403740336979067, 'subsample': 0.8103416411319855, 'colsample_bylevel': 0.9502889737735148}. Best is trial 98 with value: 25509.539843419865.


Trial 98 finished with Mean CV RMSE: 25509.5398 +/- 5511.2806


[I 2025-04-05 05:26:46,468] Trial 99 finished with value: 25677.116805669113 and parameters: {'iterations': 1577, 'depth': 4, 'learning_rate': 0.03839639299578068, 'l2_leaf_reg': 2.7848398161952432, 'bagging_temperature': 0.917185192411765, 'subsample': 0.8125985618104157, 'colsample_bylevel': 0.9535045687125608}. Best is trial 98 with value: 25509.539843419865.


Trial 99 finished with Mean CV RMSE: 25677.1168 +/- 5413.3686


[I 2025-04-05 05:29:17,961] Trial 100 pruned. 
[I 2025-04-05 05:31:49,087] Trial 101 pruned. 
[I 2025-04-05 05:34:19,882] Trial 102 pruned. 
[I 2025-04-05 05:36:51,419] Trial 103 pruned. 
[I 2025-04-05 05:39:23,426] Trial 104 pruned. 
[I 2025-04-05 05:41:54,781] Trial 105 finished with value: 25522.290971655464 and parameters: {'iterations': 1709, 'depth': 4, 'learning_rate': 0.046020686129076865, 'l2_leaf_reg': 2.6798206250656023, 'bagging_temperature': 0.8046947860831624, 'subsample': 0.7978699879711963, 'colsample_bylevel': 0.9690172891658564}. Best is trial 98 with value: 25509.539843419865.


Trial 105 finished with Mean CV RMSE: 25522.2910 +/- 5612.2140


[I 2025-04-05 05:44:26,465] Trial 106 pruned. 
[I 2025-04-05 05:46:57,993] Trial 107 pruned. 
[I 2025-04-05 05:49:29,237] Trial 108 finished with value: 25702.829449274355 and parameters: {'iterations': 1806, 'depth': 4, 'learning_rate': 0.04617532303299386, 'l2_leaf_reg': 2.6574189459843254, 'bagging_temperature': 0.9376091832550857, 'subsample': 0.8098750569847463, 'colsample_bylevel': 0.9419165772549124}. Best is trial 98 with value: 25509.539843419865.


Trial 108 finished with Mean CV RMSE: 25702.8294 +/- 5515.2972


[I 2025-04-05 05:52:00,183] Trial 109 finished with value: 25866.88873238707 and parameters: {'iterations': 1634, 'depth': 4, 'learning_rate': 0.04619862401565504, 'l2_leaf_reg': 2.997325730301141, 'bagging_temperature': 0.932564242351105, 'subsample': 0.8104483049763983, 'colsample_bylevel': 0.9767585295687559}. Best is trial 98 with value: 25509.539843419865.


Trial 109 finished with Mean CV RMSE: 25866.8887 +/- 5572.3750


[I 2025-04-05 05:54:32,437] Trial 110 pruned. 
[I 2025-04-05 05:57:03,317] Trial 111 pruned. 
[I 2025-04-05 05:59:33,592] Trial 112 pruned. 
[I 2025-04-05 06:02:04,847] Trial 113 pruned. 
[I 2025-04-05 06:04:35,691] Trial 114 pruned. 
[I 2025-04-05 06:07:06,136] Trial 115 pruned. 
[I 2025-04-05 06:09:36,317] Trial 116 pruned. 
[I 2025-04-05 06:12:08,100] Trial 117 finished with value: 26049.729478616304 and parameters: {'iterations': 1683, 'depth': 4, 'learning_rate': 0.038700286597957115, 'l2_leaf_reg': 2.815827738141104, 'bagging_temperature': 0.7813707115167126, 'subsample': 0.8131231909706643, 'colsample_bylevel': 0.9437758464966447}. Best is trial 98 with value: 25509.539843419865.


Trial 117 finished with Mean CV RMSE: 26049.7295 +/- 6062.2983


[I 2025-04-05 06:14:38,428] Trial 118 pruned. 
[I 2025-04-05 06:17:11,258] Trial 119 pruned. 
[I 2025-04-05 06:19:43,315] Trial 120 pruned. 
[I 2025-04-05 06:22:25,508] Trial 121 finished with value: 26931.27641653528 and parameters: {'iterations': 1430, 'depth': 7, 'learning_rate': 0.05239437062341459, 'l2_leaf_reg': 1.919963313556474, 'bagging_temperature': 0.902090606273546, 'subsample': 0.9259477578474449, 'colsample_bylevel': 0.6069033409334944}. Best is trial 98 with value: 25509.539843419865.


Trial 121 finished with Mean CV RMSE: 26931.2764 +/- 6056.0177


[I 2025-04-05 06:25:00,321] Trial 122 pruned. 
[I 2025-04-05 06:27:33,511] Trial 123 finished with value: 25872.865837770234 and parameters: {'iterations': 1619, 'depth': 5, 'learning_rate': 0.04175034009520201, 'l2_leaf_reg': 2.451593107861391, 'bagging_temperature': 0.8090176878567588, 'subsample': 0.872408389163587, 'colsample_bylevel': 0.9091161955416522}. Best is trial 98 with value: 25509.539843419865.


Trial 123 finished with Mean CV RMSE: 25872.8658 +/- 5918.1222


[I 2025-04-05 06:30:06,675] Trial 124 pruned. 
[I 2025-04-05 06:32:41,346] Trial 125 pruned. 
[I 2025-04-05 06:35:15,259] Trial 126 finished with value: 25637.720717701995 and parameters: {'iterations': 1797, 'depth': 5, 'learning_rate': 0.04821811925645375, 'l2_leaf_reg': 2.890017220944138, 'bagging_temperature': 0.9463852405064099, 'subsample': 0.9129251345960229, 'colsample_bylevel': 0.8814561214027189}. Best is trial 98 with value: 25509.539843419865.


Trial 126 finished with Mean CV RMSE: 25637.7207 +/- 5817.2855


[I 2025-04-05 06:37:48,770] Trial 127 finished with value: 25500.07858635059 and parameters: {'iterations': 1810, 'depth': 5, 'learning_rate': 0.04786411768776996, 'l2_leaf_reg': 2.9042053513414094, 'bagging_temperature': 0.9492371563018543, 'subsample': 0.8069644695772521, 'colsample_bylevel': 0.6215093062836944}. Best is trial 127 with value: 25500.07858635059.


Trial 127 finished with Mean CV RMSE: 25500.0786 +/- 5553.2719


[I 2025-04-05 06:40:22,998] Trial 128 finished with value: 26675.648159976037 and parameters: {'iterations': 1836, 'depth': 5, 'learning_rate': 0.0482616113590482, 'l2_leaf_reg': 3.0988916221616063, 'bagging_temperature': 0.9508012996311791, 'subsample': 0.8076076950532649, 'colsample_bylevel': 0.6207581935111017}. Best is trial 127 with value: 25500.07858635059.


Trial 128 finished with Mean CV RMSE: 26675.6482 +/- 6084.4714


[I 2025-04-05 06:42:56,500] Trial 129 pruned. 
[I 2025-04-05 06:45:30,509] Trial 130 pruned. 
[I 2025-04-05 06:48:04,935] Trial 131 pruned. 
[I 2025-04-05 06:50:37,645] Trial 132 finished with value: 26153.20906301251 and parameters: {'iterations': 1550, 'depth': 5, 'learning_rate': 0.0516306944271627, 'l2_leaf_reg': 2.841017861126568, 'bagging_temperature': 0.9666789421235504, 'subsample': 0.9148163913614968, 'colsample_bylevel': 0.6077765739030291}. Best is trial 127 with value: 25500.07858635059.


Trial 132 finished with Mean CV RMSE: 26153.2091 +/- 5720.5550


[I 2025-04-05 06:53:10,848] Trial 133 finished with value: 26352.373281818534 and parameters: {'iterations': 1740, 'depth': 5, 'learning_rate': 0.04820139471575862, 'l2_leaf_reg': 2.685876913167621, 'bagging_temperature': 0.9158603950036722, 'subsample': 0.8152654993937976, 'colsample_bylevel': 0.9557567118105221}. Best is trial 127 with value: 25500.07858635059.


Trial 133 finished with Mean CV RMSE: 26352.3733 +/- 5949.6042


[I 2025-04-05 06:55:43,024] Trial 134 pruned. 
[I 2025-04-05 06:58:17,292] Trial 135 finished with value: 26140.133358860912 and parameters: {'iterations': 1806, 'depth': 5, 'learning_rate': 0.04443316565504466, 'l2_leaf_reg': 3.8621840562822065, 'bagging_temperature': 0.9327612594679522, 'subsample': 0.8066960065584423, 'colsample_bylevel': 0.8551096498376106}. Best is trial 127 with value: 25500.07858635059.


Trial 135 finished with Mean CV RMSE: 26140.1334 +/- 5635.1752


[I 2025-04-05 07:00:50,275] Trial 136 finished with value: 25755.372834300862 and parameters: {'iterations': 1635, 'depth': 5, 'learning_rate': 0.056977938103751, 'l2_leaf_reg': 2.3786660026536133, 'bagging_temperature': 0.8379205172135549, 'subsample': 0.9364709720109837, 'colsample_bylevel': 0.8825689450073447}. Best is trial 127 with value: 25500.07858635059.


Trial 136 finished with Mean CV RMSE: 25755.3728 +/- 5571.3543


[I 2025-04-05 07:03:23,069] Trial 137 finished with value: 26044.38777152042 and parameters: {'iterations': 1704, 'depth': 5, 'learning_rate': 0.05734953124438894, 'l2_leaf_reg': 2.3600453876198113, 'bagging_temperature': 0.826826351008139, 'subsample': 0.9410352643718666, 'colsample_bylevel': 0.8889819637403464}. Best is trial 127 with value: 25500.07858635059.


Trial 137 finished with Mean CV RMSE: 26044.3878 +/- 5942.7174


[I 2025-04-05 07:05:56,200] Trial 138 pruned. 
[I 2025-04-05 07:08:30,284] Trial 139 finished with value: 26127.45448731993 and parameters: {'iterations': 2098, 'depth': 5, 'learning_rate': 0.05606631627324375, 'l2_leaf_reg': 9.802841440461068, 'bagging_temperature': 0.8989506250627707, 'subsample': 0.9556098313408327, 'colsample_bylevel': 0.8776797309796929}. Best is trial 127 with value: 25500.07858635059.


Trial 139 finished with Mean CV RMSE: 26127.4545 +/- 5561.0069


[I 2025-04-05 07:11:02,229] Trial 140 finished with value: 25934.547414609715 and parameters: {'iterations': 1556, 'depth': 5, 'learning_rate': 0.05914847775296103, 'l2_leaf_reg': 3.0273967958812, 'bagging_temperature': 0.9773493696627605, 'subsample': 0.7943997735129533, 'colsample_bylevel': 0.8313886820190084}. Best is trial 127 with value: 25500.07858635059.


Trial 140 finished with Mean CV RMSE: 25934.5474 +/- 5875.5034


[I 2025-04-05 07:13:35,728] Trial 141 pruned. 
[I 2025-04-05 07:16:04,959] Trial 142 finished with value: 25869.771473801917 and parameters: {'iterations': 1252, 'depth': 4, 'learning_rate': 0.0699588085610947, 'l2_leaf_reg': 2.319136758769912, 'bagging_temperature': 0.9270419307149813, 'subsample': 0.8207576645972194, 'colsample_bylevel': 0.967844260936976}. Best is trial 127 with value: 25500.07858635059.


Trial 142 finished with Mean CV RMSE: 25869.7715 +/- 5566.9534


[I 2025-04-05 07:18:37,073] Trial 143 finished with value: 26017.200091240185 and parameters: {'iterations': 1463, 'depth': 5, 'learning_rate': 0.04995094655844542, 'l2_leaf_reg': 2.5893244477586124, 'bagging_temperature': 0.9513874984700348, 'subsample': 0.8448278739193607, 'colsample_bylevel': 0.9007470073074226}. Best is trial 127 with value: 25500.07858635059.


Trial 143 finished with Mean CV RMSE: 26017.2001 +/- 5698.0106


[I 2025-04-05 07:21:08,011] Trial 144 finished with value: 26127.35543784633 and parameters: {'iterations': 1617, 'depth': 4, 'learning_rate': 0.053868570707789463, 'l2_leaf_reg': 2.75247837830688, 'bagging_temperature': 0.7857389573319953, 'subsample': 0.9160310054952467, 'colsample_bylevel': 0.6654614347029689}. Best is trial 127 with value: 25500.07858635059.


Trial 144 finished with Mean CV RMSE: 26127.3554 +/- 5896.5752


[I 2025-04-05 07:23:39,865] Trial 145 finished with value: 25985.172438456422 and parameters: {'iterations': 1390, 'depth': 5, 'learning_rate': 0.06242451515566262, 'l2_leaf_reg': 1.9960308447789858, 'bagging_temperature': 0.8771171671154956, 'subsample': 0.8275210486340001, 'colsample_bylevel': 0.8816643847105682}. Best is trial 127 with value: 25500.07858635059.


Trial 145 finished with Mean CV RMSE: 25985.1724 +/- 5695.7130


[I 2025-04-05 07:26:13,002] Trial 146 finished with value: 25869.28747226811 and parameters: {'iterations': 1775, 'depth': 5, 'learning_rate': 0.05109109009586705, 'l2_leaf_reg': 2.1103999367719632, 'bagging_temperature': 0.9122615908614536, 'subsample': 0.7779782439256316, 'colsample_bylevel': 0.9367546079670569}. Best is trial 127 with value: 25500.07858635059.


Trial 146 finished with Mean CV RMSE: 25869.2875 +/- 5740.9602


[I 2025-04-05 07:28:46,654] Trial 147 finished with value: 25994.968494790635 and parameters: {'iterations': 1877, 'depth': 5, 'learning_rate': 0.04643218701694254, 'l2_leaf_reg': 2.3939181805830634, 'bagging_temperature': 0.8607743892432076, 'subsample': 0.8844787286084109, 'colsample_bylevel': 0.9460365287145266}. Best is trial 127 with value: 25500.07858635059.


Trial 147 finished with Mean CV RMSE: 25994.9685 +/- 5649.2076


[I 2025-04-05 07:31:17,813] Trial 148 pruned. 
[I 2025-04-05 07:33:49,326] Trial 149 finished with value: 26317.74914589966 and parameters: {'iterations': 1994, 'depth': 4, 'learning_rate': 0.048297596415040805, 'l2_leaf_reg': 2.196797460941551, 'bagging_temperature': 0.8143727434901825, 'subsample': 0.813074062384171, 'colsample_bylevel': 0.6328050750440628}. Best is trial 127 with value: 25500.07858635059.


Trial 149 finished with Mean CV RMSE: 26317.7491 +/- 5809.3550


[I 2025-04-05 07:36:22,628] Trial 150 finished with value: 25952.136700787298 and parameters: {'iterations': 1514, 'depth': 5, 'learning_rate': 0.03951385393906054, 'l2_leaf_reg': 2.4743114929009753, 'bagging_temperature': 0.6992377431340597, 'subsample': 0.7912714009711835, 'colsample_bylevel': 0.6008039520919877}. Best is trial 127 with value: 25500.07858635059.


Trial 150 finished with Mean CV RMSE: 25952.1367 +/- 5736.1880


[I 2025-04-05 07:38:56,696] Trial 151 finished with value: 25593.44363794491 and parameters: {'iterations': 1826, 'depth': 5, 'learning_rate': 0.05580399476628983, 'l2_leaf_reg': 1.889476225016189, 'bagging_temperature': 0.8385735525708978, 'subsample': 0.9045289176792026, 'colsample_bylevel': 0.8944420848438777}. Best is trial 127 with value: 25500.07858635059.


Trial 151 finished with Mean CV RMSE: 25593.4436 +/- 6070.6840


[I 2025-04-05 07:41:30,833] Trial 152 finished with value: 26147.001356466026 and parameters: {'iterations': 1826, 'depth': 5, 'learning_rate': 0.055651059086426716, 'l2_leaf_reg': 1.8760886522993323, 'bagging_temperature': 0.8358382594643389, 'subsample': 0.9305448491300741, 'colsample_bylevel': 0.8462734564990454}. Best is trial 127 with value: 25500.07858635059.


Trial 152 finished with Mean CV RMSE: 26147.0014 +/- 5967.8616


[I 2025-04-05 07:44:04,321] Trial 153 finished with value: 26361.85961623196 and parameters: {'iterations': 1728, 'depth': 5, 'learning_rate': 0.05969977649806169, 'l2_leaf_reg': 8.140764474923037, 'bagging_temperature': 0.8933302559976374, 'subsample': 0.9243542617724162, 'colsample_bylevel': 0.8669536226910869}. Best is trial 127 with value: 25500.07858635059.


Trial 153 finished with Mean CV RMSE: 26361.8596 +/- 5389.7041


[I 2025-04-05 07:46:36,092] Trial 154 finished with value: 25901.584969569947 and parameters: {'iterations': 1918, 'depth': 5, 'learning_rate': 0.0530243431881129, 'l2_leaf_reg': 1.6854060800997785, 'bagging_temperature': 0.427328989553063, 'subsample': 0.8010999665115004, 'colsample_bylevel': 0.889553112084085}. Best is trial 127 with value: 25500.07858635059.


Trial 154 finished with Mean CV RMSE: 25901.5850 +/- 5819.7267


[I 2025-04-05 07:49:08,747] Trial 155 finished with value: 26145.993791155597 and parameters: {'iterations': 1586, 'depth': 5, 'learning_rate': 0.049343172154635025, 'l2_leaf_reg': 1.8248701236078775, 'bagging_temperature': 0.04483617794731598, 'subsample': 0.9037200378194071, 'colsample_bylevel': 0.9005264600002486}. Best is trial 127 with value: 25500.07858635059.


Trial 155 finished with Mean CV RMSE: 26145.9938 +/- 5397.6760


[I 2025-04-05 07:51:40,678] Trial 156 pruned. 
[I 2025-04-05 07:54:12,318] Trial 157 finished with value: 26004.644127447205 and parameters: {'iterations': 1421, 'depth': 5, 'learning_rate': 0.06343900345447005, 'l2_leaf_reg': 2.0342384585964632, 'bagging_temperature': 0.9798916405252354, 'subsample': 0.8933584834273686, 'colsample_bylevel': 0.913522882170327}. Best is trial 127 with value: 25500.07858635059.


Trial 157 finished with Mean CV RMSE: 26004.6441 +/- 5777.1039


[I 2025-04-05 07:56:43,344] Trial 158 pruned. 
[I 2025-04-05 07:59:15,925] Trial 159 pruned. 
[I 2025-04-05 08:01:47,732] Trial 160 finished with value: 25907.348243147942 and parameters: {'iterations': 1962, 'depth': 4, 'learning_rate': 0.03243840537625411, 'l2_leaf_reg': 2.271759439529079, 'bagging_temperature': 0.8653192716892452, 'subsample': 0.6799703298413866, 'colsample_bylevel': 0.7001509955887062}. Best is trial 127 with value: 25500.07858635059.


Trial 160 finished with Mean CV RMSE: 25907.3482 +/- 5645.1959


[I 2025-04-05 08:04:21,593] Trial 161 finished with value: 25954.804967519285 and parameters: {'iterations': 1741, 'depth': 5, 'learning_rate': 0.06559920079550079, 'l2_leaf_reg': 2.1997495461166574, 'bagging_temperature': 0.9312361263616651, 'subsample': 0.897737763883721, 'colsample_bylevel': 0.8936810922477633}. Best is trial 127 with value: 25500.07858635059.


Trial 161 finished with Mean CV RMSE: 25954.8050 +/- 5509.0853


[I 2025-04-05 08:06:54,600] Trial 162 finished with value: 25968.698764720044 and parameters: {'iterations': 1607, 'depth': 5, 'learning_rate': 0.055230099793995166, 'l2_leaf_reg': 1.926723642015489, 'bagging_temperature': 0.8455427990744963, 'subsample': 0.9108113387154466, 'colsample_bylevel': 0.9180470555384801}. Best is trial 127 with value: 25500.07858635059.


Trial 162 finished with Mean CV RMSE: 25968.6988 +/- 5963.8457


[I 2025-04-05 08:09:29,161] Trial 163 finished with value: 26107.763531191675 and parameters: {'iterations': 1476, 'depth': 5, 'learning_rate': 0.019037483323357435, 'l2_leaf_reg': 2.59230531475507, 'bagging_temperature': 0.88441626024315, 'subsample': 0.945486987749653, 'colsample_bylevel': 0.8808958927762386}. Best is trial 127 with value: 25500.07858635059.


Trial 163 finished with Mean CV RMSE: 26107.7635 +/- 5628.3336


[I 2025-04-05 08:12:01,625] Trial 164 finished with value: 25807.156537247185 and parameters: {'iterations': 2023, 'depth': 5, 'learning_rate': 0.061070475775903646, 'l2_leaf_reg': 2.1168312737387516, 'bagging_temperature': 0.9096355318076248, 'subsample': 0.8191345616633153, 'colsample_bylevel': 0.9071845426233843}. Best is trial 127 with value: 25500.07858635059.


Trial 164 finished with Mean CV RMSE: 25807.1565 +/- 5265.4875


[I 2025-04-05 08:14:33,882] Trial 165 pruned. 
[I 2025-04-05 08:17:08,102] Trial 166 finished with value: 26100.754602616384 and parameters: {'iterations': 1996, 'depth': 5, 'learning_rate': 0.030159406295083784, 'l2_leaf_reg': 2.1215054325430303, 'bagging_temperature': 0.21428733956797213, 'subsample': 0.8124303487208577, 'colsample_bylevel': 0.8975073325374477}. Best is trial 127 with value: 25500.07858635059.


Trial 166 finished with Mean CV RMSE: 26100.7546 +/- 5776.4167


[I 2025-04-05 08:19:40,669] Trial 167 pruned. 
[I 2025-04-05 08:22:14,357] Trial 168 finished with value: 26107.76662860078 and parameters: {'iterations': 1865, 'depth': 5, 'learning_rate': 0.04158463559066273, 'l2_leaf_reg': 3.275487230349421, 'bagging_temperature': 0.903958120091793, 'subsample': 0.8027705846460408, 'colsample_bylevel': 0.6549095032699064}. Best is trial 127 with value: 25500.07858635059.


Trial 168 finished with Mean CV RMSE: 26107.7666 +/- 5655.1137


[I 2025-04-05 08:24:48,359] Trial 169 pruned. 
[I 2025-04-05 08:27:18,816] Trial 170 finished with value: 26100.032894610304 and parameters: {'iterations': 1332, 'depth': 4, 'learning_rate': 0.05910285200153859, 'l2_leaf_reg': 1.7959750680711117, 'bagging_temperature': 0.9397989736925962, 'subsample': 0.7944147529639567, 'colsample_bylevel': 0.7546982837563424}. Best is trial 127 with value: 25500.07858635059.


Trial 170 finished with Mean CV RMSE: 26100.0329 +/- 5902.3647


[I 2025-04-05 08:29:52,221] Trial 171 pruned. 
[I 2025-04-05 08:32:24,776] Trial 172 finished with value: 26269.268538847486 and parameters: {'iterations': 1659, 'depth': 5, 'learning_rate': 0.06659711635444203, 'l2_leaf_reg': 2.4484070614275306, 'bagging_temperature': 0.8530629049424288, 'subsample': 0.7868505302038873, 'colsample_bylevel': 0.7180383887078907}. Best is trial 127 with value: 25500.07858635059.


Trial 172 finished with Mean CV RMSE: 26269.2685 +/- 5847.6088


[I 2025-04-05 08:34:59,795] Trial 173 finished with value: 26161.0053306113 and parameters: {'iterations': 2143, 'depth': 5, 'learning_rate': 0.050270438807775444, 'l2_leaf_reg': 2.301148296640979, 'bagging_temperature': 0.7692652120559745, 'subsample': 0.9219875862869558, 'colsample_bylevel': 0.8939629338116086}. Best is trial 127 with value: 25500.07858635059.


Trial 173 finished with Mean CV RMSE: 26161.0053 +/- 5451.1869


[I 2025-04-05 08:37:33,961] Trial 174 finished with value: 25897.389295263463 and parameters: {'iterations': 1838, 'depth': 5, 'learning_rate': 0.04827137752424842, 'l2_leaf_reg': 2.7717185004696625, 'bagging_temperature': 0.9011835009977912, 'subsample': 0.8612256115383304, 'colsample_bylevel': 0.8881891272757872}. Best is trial 127 with value: 25500.07858635059.


Trial 174 finished with Mean CV RMSE: 25897.3893 +/- 5915.3824


[I 2025-04-05 08:40:06,926] Trial 175 pruned. 
[I 2025-04-05 08:42:42,547] Trial 176 finished with value: 26120.687579776633 and parameters: {'iterations': 2590, 'depth': 5, 'learning_rate': 0.03753848732248349, 'l2_leaf_reg': 3.0529667851798092, 'bagging_temperature': 0.9275464740323679, 'subsample': 0.8245658797963855, 'colsample_bylevel': 0.7343927794784716}. Best is trial 127 with value: 25500.07858635059.


Trial 176 finished with Mean CV RMSE: 26120.6876 +/- 6009.7637


[I 2025-04-05 08:45:20,979] Trial 177 pruned. 
[I 2025-04-05 08:47:51,482] Trial 178 finished with value: 25573.24000200465 and parameters: {'iterations': 1930, 'depth': 4, 'learning_rate': 0.06231943185406293, 'l2_leaf_reg': 2.0423364547307115, 'bagging_temperature': 0.9693933631273888, 'subsample': 0.7982855959799833, 'colsample_bylevel': 0.9491615991991149}. Best is trial 127 with value: 25500.07858635059.


Trial 178 finished with Mean CV RMSE: 25573.2400 +/- 5786.2388


[I 2025-04-05 08:50:21,833] Trial 179 pruned. 
[I 2025-04-05 08:52:51,928] Trial 180 pruned. 
[I 2025-04-05 08:55:22,452] Trial 181 pruned. 
[I 2025-04-05 08:57:55,015] Trial 182 pruned. 
[I 2025-04-05 09:00:28,433] Trial 183 pruned. 
[I 2025-04-05 09:03:01,456] Trial 184 finished with value: 25176.782201651506 and parameters: {'iterations': 1775, 'depth': 5, 'learning_rate': 0.06039828474626542, 'l2_leaf_reg': 2.132889759843395, 'bagging_temperature': 0.9470181666458671, 'subsample': 0.833424506156886, 'colsample_bylevel': 0.9622754820457864}. Best is trial 184 with value: 25176.782201651506.


Trial 184 finished with Mean CV RMSE: 25176.7822 +/- 5485.9589


[I 2025-04-05 09:05:32,499] Trial 185 pruned. 
[I 2025-04-05 09:08:05,894] Trial 186 pruned. 
[I 2025-04-05 09:10:39,111] Trial 187 pruned. 
[I 2025-04-05 09:13:12,092] Trial 188 finished with value: 26169.45588796595 and parameters: {'iterations': 1706, 'depth': 5, 'learning_rate': 0.05907221274173258, 'l2_leaf_reg': 2.43169821316337, 'bagging_temperature': 0.9895722957124087, 'subsample': 0.820247243551616, 'colsample_bylevel': 0.9364883072401697}. Best is trial 184 with value: 25176.782201651506.


Trial 188 finished with Mean CV RMSE: 26169.4559 +/- 5889.4214


[I 2025-04-05 09:15:42,646] Trial 189 pruned. 
[I 2025-04-05 09:18:15,134] Trial 190 finished with value: 25972.974141612056 and parameters: {'iterations': 1511, 'depth': 5, 'learning_rate': 0.061957257307677906, 'l2_leaf_reg': 1.743738089941504, 'bagging_temperature': 0.8968693951082459, 'subsample': 0.7997784602307253, 'colsample_bylevel': 0.6250305134496484}. Best is trial 184 with value: 25176.782201651506.


Trial 190 finished with Mean CV RMSE: 25972.9741 +/- 5470.4039


[I 2025-04-05 09:20:49,805] Trial 191 pruned. 
[I 2025-04-05 09:23:23,543] Trial 192 pruned. 
[I 2025-04-05 09:25:55,801] Trial 193 finished with value: 26516.23077900029 and parameters: {'iterations': 1609, 'depth': 5, 'learning_rate': 0.06519097466145844, 'l2_leaf_reg': 2.346641271707092, 'bagging_temperature': 0.9593164979913015, 'subsample': 0.8113063121103893, 'colsample_bylevel': 0.7689811703378185}. Best is trial 184 with value: 25176.782201651506.


Trial 193 finished with Mean CV RMSE: 26516.2308 +/- 6641.3200


[I 2025-04-05 09:28:28,827] Trial 194 pruned. 
[I 2025-04-05 09:31:01,309] Trial 195 finished with value: 26340.27454711399 and parameters: {'iterations': 1405, 'depth': 5, 'learning_rate': 0.05799429042466648, 'l2_leaf_reg': 2.820058568634953, 'bagging_temperature': 0.8914548672951216, 'subsample': 0.8196099203822671, 'colsample_bylevel': 0.6052930039604614}. Best is trial 184 with value: 25176.782201651506.


Trial 195 finished with Mean CV RMSE: 26340.2745 +/- 5881.4978


[I 2025-04-05 09:33:34,460] Trial 196 pruned. 
[I 2025-04-05 09:36:07,702] Trial 197 finished with value: 25933.621590593375 and parameters: {'iterations': 1795, 'depth': 5, 'learning_rate': 0.05154295209173624, 'l2_leaf_reg': 2.026898618029383, 'bagging_temperature': 0.7441614919854995, 'subsample': 0.8004223706252461, 'colsample_bylevel': 0.9899262750898303}. Best is trial 184 with value: 25176.782201651506.


Trial 197 finished with Mean CV RMSE: 25933.6216 +/- 5533.0764


[I 2025-04-05 09:38:38,979] Trial 198 pruned. 
[I 2025-04-05 09:41:16,518] Trial 199 pruned. 


In [28]:
best_params

{'iterations': 1775,
 'depth': 5,
 'learning_rate': 0.06039828474626542,
 'l2_leaf_reg': 2.132889759843395,
 'bagging_temperature': 0.9470181666458671,
 'subsample': 0.833424506156886,
 'colsample_bylevel': 0.9622754820457864,
 'loss_function': 'RMSE',
 'random_seed': 42,
 'verbose': 0}

# ИТОГИ
Мы не вышли на целевые показатели 12800, что были раньше.
Стало понятно, что логарифмирование целевой переменной и всех фич ухудшает результаты. Так же scaler ни к чему хорошему не приводит. Лучший результат тут был с ансамблем моделей с разными Random State-ами - 13500.