In [10]:
!pip install category_encoders

Collecting category_encoders
  Downloading category_encoders-2.8.1-py3-none-any.whl.metadata (7.9 kB)
Downloading category_encoders-2.8.1-py3-none-any.whl (85 kB)
   ---------------------------------------- 0.0/85.7 kB ? eta -:--:--
   ---------------------------------------- 85.7/85.7 kB 4.7 MB/s eta 0:00:00
Installing collected packages: category_encoders
Successfully installed category_encoders-2.8.1


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from  sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor, Pool
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, mean_absolute_percentage_error, mean_squared_error, r2_score,mean_absolute_error
!pip install optuna-integration[catboost]
#from  typing import
from scipy import sparse
import warnings
warnings.filterwarnings('ignore')
import optuna
import logging
import sys
from optuna.integration.xgboost import XGBoostPruningCallback
# EarlyStopping is directly from XGBoost's own callback module
from xgboost.callback import EarlyStopping

In [4]:
df=pd.read_csv("data/cleaned_data_after_imputation.csv")
df.columns
df.dtypes
numeric_columns=["bedroomCount","toilet_and_bath","habitableSurface","facedeCount","hasTerrace","totalParkingCount"]
categorical_columns=["type","subtype","province","locality","postCode","buildingCondition","epcScore"]

encoder = OneHotEncoder(sparse_output=False, drop="first")
one_hot_encoded = encoder.fit_transform(df[categorical_columns])
one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(categorical_columns))
df = pd.concat([df, one_hot_df], axis=1)
df = df.drop(categorical_columns, axis=1)

In [None]:
#CATBOOST OPTUNA WITH ONEHOTENCODING
price_column = df['price'] # Store price before dropping categorical columns

# Now, drop the original categorical columns and set X and y
# (The previous concat operation handled this by only taking numeric and one-hot)
# So, X should be the combined numeric and one-hot df
X = df.copy()
# Re-add price column from original df if it was removed
# Assuming price was NOT in `numeric_columns` or `categorical_columns`
# and it was implicitly left out of the `df_numeric` selection.
# If `price` is truly only the target, it shouldn't be in `X`.
if 'price' in X.columns:
    y = X.pop("price") # Extract price as target and remove from features
else:
    # This scenario means 'price' was likely excluded from numeric_columns
    # and not part of categorical_columns. We need to get it from the original df.
    y = pd.read_csv("data/nonull_smalldata.csv")["price"]
    # Adjust X to ensure index alignment with y if `df` (X) was re-created without `price`
    X = df.copy() # X now contains only features

# Align indices just in case
X.reset_index(drop=True, inplace=True)
y.reset_index(drop=True, inplace=True)


print("\nFeatures (X) shape after encoding:", X.shape)
print("Target (y) shape:", y.shape)

# Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=1234)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=1234)

print("\nData splitting complete.")
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

# CatBoost's strength is handling categorical features internally.
# However, since you've already One-Hot Encoded them, CatBoost will treat them
# as numerical features. This is perfectly fine, but you won't use CatBoost's
# native categorical handling for these columns.
# If you wanted to use native handling, you would pass `df[categorical_columns]` directly
# and specify `cat_features` indices *before* one-hot encoding.
# For this setup, there are no `cat_features` to pass to Pool, as they are already encoded.
categorical_features_indices = []

# Create CatBoost Pool objects for efficient data handling
train_pool = Pool(X_train, y_train, cat_features=categorical_features_indices)
val_pool = Pool(X_val, y_val, cat_features=categorical_features_indices)
test_pool = Pool(X_test, y_test, cat_features=categorical_features_indices)

print("--- Data Preprocessing Complete ---")

# --- 2. Define the Optuna Objective Function ---
def objective(trial):
    """
    Objective function for Optuna to optimize CatBoost hyperparameters.
    It returns the validation RMSE for a given set of hyperparameters.
    """
    params = {
        "iterations": trial.suggest_int("iterations", 100, 1000),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-3, 0.3),
        "depth": trial.suggest_int("depth", 4, 10),
        "l2_leaf_reg": trial.suggest_loguniform("l2_leaf_reg", 1e-2, 10),
        "subsample": trial.suggest_uniform("subsample", 0.6, 1.0),
        "colsample_bylevel": trial.suggest_uniform("colsample_bylevel", 0.6, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 30),
        "loss_function": "RMSE",
        "eval_metric": "RMSE",
        "random_seed": 42,
        "verbose": 0,
        #"od_type": "Iter",
        #"od_wait": trial.suggest_int("od_wait", 10, 50),
        "early_stopping_rounds": trial.suggest_int("early_stopping_rounds", 10, 50),
    }

    model = CatBoostRegressor(**params)

    pruning_callback = optuna.integration.CatBoostPruningCallback(trial, "RMSE")

    try:
        model.fit(
            train_pool,
            eval_set=val_pool,
            early_stopping_rounds=params["early_stopping_rounds"],
            callbacks=[pruning_callback],
            verbose=0
        )
    except optuna.exceptions.TrialPruned:
        raise

    val_predictions = model.predict(X_val)
    rmse = np.sqrt(mean_squared_error(y_val, val_predictions))

    return rmse

# --- 3. Set up and Run Optuna Study ---
if __name__ == "__main__":
    optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))

    study = optuna.create_study(
        direction="minimize",
        sampler=optuna.samplers.TPESampler(seed=42),
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
    )

    print("\n--- Starting Optuna optimization ---")
    study.optimize(objective, n_trials=50, timeout=600) # Run 50 trials or for 10 minutes

    print("\n--- Optimization finished ---")
    print(f"Number of finished trials: {len(study.trials)}")
    print(f"Number of pruned trials: {len(study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.PRUNED]))}")

    print("\nBest trial:")
    trial = study.best_trial

    print(f"  Value (RMSE on validation set): {trial.value:.4f}")
    print("  Best Hyperparameters:")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    # --- 4. Train the final model with the best hyperparameters ---
    print("\n--- Training final model with best hyperparameters ---")
    best_params = trial.params
    best_params_for_final_model = {k: v for k, v in best_params.items() if k not in ["od_wait", "early_stopping_rounds"]}
    best_params_for_final_model["iterations"] = 10000 # Set a high number, rely on early stopping
    best_params_for_final_model["verbose"] = 100 # Show some progress
    best_params_for_final_model["early_stopping_rounds"] = 50 # Re-add for final training

    final_cat_regressor = CatBoostRegressor(**best_params_for_final_model)

    final_cat_regressor.fit(
        train_pool,
        eval_set=val_pool,
        early_stopping_rounds=best_params_for_final_model["early_stopping_rounds"],
        verbose=best_params_for_final_model["verbose"],
    )

    print(f"Best iteration of final model: {final_cat_regressor.get_best_iteration()}")

    # --- 5. Evaluate the final model on the unseen test set ---
    final_predictions = final_cat_regressor.predict(test_pool)

    final_mse = mean_squared_error(y_test, final_predictions)
    final_rmse = np.sqrt(final_mse)
    final_r2 = r2_score(y_test, final_predictions)

    print(f"\n--- Final Model Performance on Test Set ---")
    print(f"  MSE: {final_mse:.4f}")
    print(f"  RMSE: {final_rmse:.4f}")
    print(f"  R-squared: {final_r2:.4f}")

    # Optionally, visualize the optimization process
    try:
        from optuna.visualization import plot_optimization_history, plot_parallel_coordinate, plot_slice
        print("\n--- Generating Optuna Visualizations (requires plotly and kaleido) ---")
        fig_history = plot_optimization_history(study)
        fig_history.show()

        fig_parallel = plot_parallel_coordinate(study)
        fig_parallel.show()

        fig_slice = plot_slice(study)
        fig_slice.show()
    except ImportError:
        print("\nInstall plotly and kaleido for Optuna visualizations: pip install plotly kaleido")


Features (X) shape after encoding: (56379, 4620)
Target (y) shape: (56379,)

Data splitting complete.
X_train shape: (39465, 4620), y_train shape: (39465,)
X_val shape: (8457, 4620), y_val shape: (8457,)
X_test shape: (8457, 4620), y_test shape: (8457,)


[I 2025-06-30 17:20:24,186] A new study created in memory with name: no-name-bd89960a-8217-4837-86e4-23de06852684


--- Data Preprocessing Complete ---
A new study created in memory with name: no-name-bd89960a-8217-4837-86e4-23de06852684
A new study created in memory with name: no-name-bd89960a-8217-4837-86e4-23de06852684
A new study created in memory with name: no-name-bd89960a-8217-4837-86e4-23de06852684
A new study created in memory with name: no-name-bd89960a-8217-4837-86e4-23de06852684

--- Starting Optuna optimization ---


[I 2025-06-30 17:20:51,264] Trial 0 finished with value: 193164.08466508074 and parameters: {'iterations': 437, 'learning_rate': 0.22648248189516848, 'depth': 9, 'l2_leaf_reg': 0.6251373574521749, 'subsample': 0.6624074561769746, 'colsample_bylevel': 0.662397808134481, 'min_data_in_leaf': 2, 'early_stopping_rounds': 45}. Best is trial 0 with value: 193164.08466508074.


Trial 0 finished with value: 193164.08466508074 and parameters: {'iterations': 437, 'learning_rate': 0.22648248189516848, 'depth': 9, 'l2_leaf_reg': 0.6251373574521749, 'subsample': 0.6624074561769746, 'colsample_bylevel': 0.662397808134481, 'min_data_in_leaf': 2, 'early_stopping_rounds': 45}. Best is trial 0 with value: 193164.08466508074.
Trial 0 finished with value: 193164.08466508074 and parameters: {'iterations': 437, 'learning_rate': 0.22648248189516848, 'depth': 9, 'l2_leaf_reg': 0.6251373574521749, 'subsample': 0.6624074561769746, 'colsample_bylevel': 0.662397808134481, 'min_data_in_leaf': 2, 'early_stopping_rounds': 45}. Best is trial 0 with value: 193164.08466508074.
Trial 0 finished with value: 193164.08466508074 and parameters: {'iterations': 437, 'learning_rate': 0.22648248189516848, 'depth': 9, 'l2_leaf_reg': 0.6251373574521749, 'subsample': 0.6624074561769746, 'colsample_bylevel': 0.662397808134481, 'min_data_in_leaf': 2, 'early_stopping_rounds': 45}. Best is trial 0 wit

[I 2025-06-30 17:21:01,402] Trial 1 finished with value: 213233.10502586202 and parameters: {'iterations': 641, 'learning_rate': 0.05675206026988748, 'depth': 4, 'l2_leaf_reg': 8.123245085588687, 'subsample': 0.9329770563201687, 'colsample_bylevel': 0.6849356442713105, 'min_data_in_leaf': 6, 'early_stopping_rounds': 17}. Best is trial 0 with value: 193164.08466508074.


Trial 1 finished with value: 213233.10502586202 and parameters: {'iterations': 641, 'learning_rate': 0.05675206026988748, 'depth': 4, 'l2_leaf_reg': 8.123245085588687, 'subsample': 0.9329770563201687, 'colsample_bylevel': 0.6849356442713105, 'min_data_in_leaf': 6, 'early_stopping_rounds': 17}. Best is trial 0 with value: 193164.08466508074.
Trial 1 finished with value: 213233.10502586202 and parameters: {'iterations': 641, 'learning_rate': 0.05675206026988748, 'depth': 4, 'l2_leaf_reg': 8.123245085588687, 'subsample': 0.9329770563201687, 'colsample_bylevel': 0.6849356442713105, 'min_data_in_leaf': 6, 'early_stopping_rounds': 17}. Best is trial 0 with value: 193164.08466508074.
Trial 1 finished with value: 213233.10502586202 and parameters: {'iterations': 641, 'learning_rate': 0.05675206026988748, 'depth': 4, 'l2_leaf_reg': 8.123245085588687, 'subsample': 0.9329770563201687, 'colsample_bylevel': 0.6849356442713105, 'min_data_in_leaf': 6, 'early_stopping_rounds': 17}. Best is trial 0 wit

[I 2025-06-30 17:21:12,473] Trial 2 finished with value: 219006.47559242797 and parameters: {'iterations': 374, 'learning_rate': 0.0199473547030745, 'depth': 7, 'l2_leaf_reg': 0.07476312062252301, 'subsample': 0.8447411578889518, 'colsample_bylevel': 0.6557975442608167, 'min_data_in_leaf': 9, 'early_stopping_rounds': 25}. Best is trial 0 with value: 193164.08466508074.


Trial 2 finished with value: 219006.47559242797 and parameters: {'iterations': 374, 'learning_rate': 0.0199473547030745, 'depth': 7, 'l2_leaf_reg': 0.07476312062252301, 'subsample': 0.8447411578889518, 'colsample_bylevel': 0.6557975442608167, 'min_data_in_leaf': 9, 'early_stopping_rounds': 25}. Best is trial 0 with value: 193164.08466508074.
Trial 2 finished with value: 219006.47559242797 and parameters: {'iterations': 374, 'learning_rate': 0.0199473547030745, 'depth': 7, 'l2_leaf_reg': 0.07476312062252301, 'subsample': 0.8447411578889518, 'colsample_bylevel': 0.6557975442608167, 'min_data_in_leaf': 9, 'early_stopping_rounds': 25}. Best is trial 0 with value: 193164.08466508074.
Trial 2 finished with value: 219006.47559242797 and parameters: {'iterations': 374, 'learning_rate': 0.0199473547030745, 'depth': 7, 'l2_leaf_reg': 0.07476312062252301, 'subsample': 0.8447411578889518, 'colsample_bylevel': 0.6557975442608167, 'min_data_in_leaf': 9, 'early_stopping_rounds': 25}. Best is trial 0 

[I 2025-06-30 17:21:22,589] Trial 3 finished with value: 202730.51843127175 and parameters: {'iterations': 510, 'learning_rate': 0.08810003129071789, 'depth': 5, 'l2_leaf_reg': 0.34890188454913873, 'subsample': 0.836965827544817, 'colsample_bylevel': 0.6185801650879991, 'min_data_in_leaf': 19, 'early_stopping_rounds': 16}. Best is trial 0 with value: 193164.08466508074.


Trial 3 finished with value: 202730.51843127175 and parameters: {'iterations': 510, 'learning_rate': 0.08810003129071789, 'depth': 5, 'l2_leaf_reg': 0.34890188454913873, 'subsample': 0.836965827544817, 'colsample_bylevel': 0.6185801650879991, 'min_data_in_leaf': 19, 'early_stopping_rounds': 16}. Best is trial 0 with value: 193164.08466508074.
Trial 3 finished with value: 202730.51843127175 and parameters: {'iterations': 510, 'learning_rate': 0.08810003129071789, 'depth': 5, 'l2_leaf_reg': 0.34890188454913873, 'subsample': 0.836965827544817, 'colsample_bylevel': 0.6185801650879991, 'min_data_in_leaf': 19, 'early_stopping_rounds': 16}. Best is trial 0 with value: 193164.08466508074.
Trial 3 finished with value: 202730.51843127175 and parameters: {'iterations': 510, 'learning_rate': 0.08810003129071789, 'depth': 5, 'l2_leaf_reg': 0.34890188454913873, 'subsample': 0.836965827544817, 'colsample_bylevel': 0.6185801650879991, 'min_data_in_leaf': 19, 'early_stopping_rounds': 16}. Best is trial

[I 2025-06-30 17:21:38,466] Trial 4 finished with value: 200398.3155203107 and parameters: {'iterations': 158, 'learning_rate': 0.22413234378101138, 'depth': 10, 'l2_leaf_reg': 2.6619018884890564, 'subsample': 0.7218455076693483, 'colsample_bylevel': 0.6390688456025535, 'min_data_in_leaf': 21, 'early_stopping_rounds': 28}. Best is trial 0 with value: 193164.08466508074.


Trial 4 finished with value: 200398.3155203107 and parameters: {'iterations': 158, 'learning_rate': 0.22413234378101138, 'depth': 10, 'l2_leaf_reg': 2.6619018884890564, 'subsample': 0.7218455076693483, 'colsample_bylevel': 0.6390688456025535, 'min_data_in_leaf': 21, 'early_stopping_rounds': 28}. Best is trial 0 with value: 193164.08466508074.
Trial 4 finished with value: 200398.3155203107 and parameters: {'iterations': 158, 'learning_rate': 0.22413234378101138, 'depth': 10, 'l2_leaf_reg': 2.6619018884890564, 'subsample': 0.7218455076693483, 'colsample_bylevel': 0.6390688456025535, 'min_data_in_leaf': 21, 'early_stopping_rounds': 28}. Best is trial 0 with value: 193164.08466508074.
Trial 4 finished with value: 200398.3155203107 and parameters: {'iterations': 158, 'learning_rate': 0.22413234378101138, 'depth': 10, 'l2_leaf_reg': 2.6619018884890564, 'subsample': 0.7218455076693483, 'colsample_bylevel': 0.6390688456025535, 'min_data_in_leaf': 21, 'early_stopping_rounds': 28}. Best is trial

[I 2025-06-30 17:21:40,159] Trial 5 finished with value: 394421.5608890197 and parameters: {'iterations': 209, 'learning_rate': 0.016850517723339092, 'depth': 4, 'l2_leaf_reg': 5.345166110646819, 'subsample': 0.7035119926400067, 'colsample_bylevel': 0.8650089137415928, 'min_data_in_leaf': 10, 'early_stopping_rounds': 31}. Best is trial 0 with value: 193164.08466508074.


Trial 5 finished with value: 394421.5608890197 and parameters: {'iterations': 209, 'learning_rate': 0.016850517723339092, 'depth': 4, 'l2_leaf_reg': 5.345166110646819, 'subsample': 0.7035119926400067, 'colsample_bylevel': 0.8650089137415928, 'min_data_in_leaf': 10, 'early_stopping_rounds': 31}. Best is trial 0 with value: 193164.08466508074.
Trial 5 finished with value: 394421.5608890197 and parameters: {'iterations': 209, 'learning_rate': 0.016850517723339092, 'depth': 4, 'l2_leaf_reg': 5.345166110646819, 'subsample': 0.7035119926400067, 'colsample_bylevel': 0.8650089137415928, 'min_data_in_leaf': 10, 'early_stopping_rounds': 31}. Best is trial 0 with value: 193164.08466508074.
Trial 5 finished with value: 394421.5608890197 and parameters: {'iterations': 209, 'learning_rate': 0.016850517723339092, 'depth': 4, 'l2_leaf_reg': 5.345166110646819, 'subsample': 0.7035119926400067, 'colsample_bylevel': 0.8650089137415928, 'min_data_in_leaf': 10, 'early_stopping_rounds': 31}. Best is trial 0 

[I 2025-06-30 17:21:42,902] Trial 6 finished with value: 419852.1247820057 and parameters: {'iterations': 592, 'learning_rate': 0.002870165242185818, 'depth': 10, 'l2_leaf_reg': 2.1154290797261215, 'subsample': 0.9757995766256756, 'colsample_bylevel': 0.9579309401710595, 'min_data_in_leaf': 18, 'early_stopping_rounds': 47}. Best is trial 0 with value: 193164.08466508074.


Trial 6 finished with value: 419852.1247820057 and parameters: {'iterations': 592, 'learning_rate': 0.002870165242185818, 'depth': 10, 'l2_leaf_reg': 2.1154290797261215, 'subsample': 0.9757995766256756, 'colsample_bylevel': 0.9579309401710595, 'min_data_in_leaf': 18, 'early_stopping_rounds': 47}. Best is trial 0 with value: 193164.08466508074.
Trial 6 finished with value: 419852.1247820057 and parameters: {'iterations': 592, 'learning_rate': 0.002870165242185818, 'depth': 10, 'l2_leaf_reg': 2.1154290797261215, 'subsample': 0.9757995766256756, 'colsample_bylevel': 0.9579309401710595, 'min_data_in_leaf': 18, 'early_stopping_rounds': 47}. Best is trial 0 with value: 193164.08466508074.
Trial 6 finished with value: 419852.1247820057 and parameters: {'iterations': 592, 'learning_rate': 0.002870165242185818, 'depth': 10, 'l2_leaf_reg': 2.1154290797261215, 'subsample': 0.9757995766256756, 'colsample_bylevel': 0.9579309401710595, 'min_data_in_leaf': 18, 'early_stopping_rounds': 47}. Best is tr

[I 2025-06-30 17:21:44,541] Trial 7 finished with value: 420620.724283256 and parameters: {'iterations': 179, 'learning_rate': 0.0030582523213789677, 'depth': 4, 'l2_leaf_reg': 0.09462175356461491, 'subsample': 0.7554709158757928, 'colsample_bylevel': 0.7085396127095583, 'min_data_in_leaf': 25, 'early_stopping_rounds': 24}. Best is trial 0 with value: 193164.08466508074.


Trial 7 finished with value: 420620.724283256 and parameters: {'iterations': 179, 'learning_rate': 0.0030582523213789677, 'depth': 4, 'l2_leaf_reg': 0.09462175356461491, 'subsample': 0.7554709158757928, 'colsample_bylevel': 0.7085396127095583, 'min_data_in_leaf': 25, 'early_stopping_rounds': 24}. Best is trial 0 with value: 193164.08466508074.
Trial 7 finished with value: 420620.724283256 and parameters: {'iterations': 179, 'learning_rate': 0.0030582523213789677, 'depth': 4, 'l2_leaf_reg': 0.09462175356461491, 'subsample': 0.7554709158757928, 'colsample_bylevel': 0.7085396127095583, 'min_data_in_leaf': 25, 'early_stopping_rounds': 24}. Best is trial 0 with value: 193164.08466508074.
Trial 7 finished with value: 420620.724283256 and parameters: {'iterations': 179, 'learning_rate': 0.0030582523213789677, 'depth': 4, 'l2_leaf_reg': 0.09462175356461491, 'subsample': 0.7554709158757928, 'colsample_bylevel': 0.7085396127095583, 'min_data_in_leaf': 25, 'early_stopping_rounds': 24}. Best is tr

[I 2025-06-30 17:21:46,410] Trial 8 finished with value: 383816.238887606 and parameters: {'iterations': 353, 'learning_rate': 0.022096526145513846, 'depth': 4, 'l2_leaf_reg': 2.5502980701628926, 'subsample': 0.6298202574719083, 'colsample_bylevel': 0.9947547746402069, 'min_data_in_leaf': 24, 'early_stopping_rounds': 18}. Best is trial 0 with value: 193164.08466508074.


Trial 8 finished with value: 383816.238887606 and parameters: {'iterations': 353, 'learning_rate': 0.022096526145513846, 'depth': 4, 'l2_leaf_reg': 2.5502980701628926, 'subsample': 0.6298202574719083, 'colsample_bylevel': 0.9947547746402069, 'min_data_in_leaf': 24, 'early_stopping_rounds': 18}. Best is trial 0 with value: 193164.08466508074.
Trial 8 finished with value: 383816.238887606 and parameters: {'iterations': 353, 'learning_rate': 0.022096526145513846, 'depth': 4, 'l2_leaf_reg': 2.5502980701628926, 'subsample': 0.6298202574719083, 'colsample_bylevel': 0.9947547746402069, 'min_data_in_leaf': 24, 'early_stopping_rounds': 18}. Best is trial 0 with value: 193164.08466508074.
Trial 8 finished with value: 383816.238887606 and parameters: {'iterations': 353, 'learning_rate': 0.022096526145513846, 'depth': 4, 'l2_leaf_reg': 2.5502980701628926, 'subsample': 0.6298202574719083, 'colsample_bylevel': 0.9947547746402069, 'min_data_in_leaf': 24, 'early_stopping_rounds': 18}. Best is trial 0 

[I 2025-06-30 17:21:52,495] Trial 9 finished with value: 212370.7638043208 and parameters: {'iterations': 104, 'learning_rate': 0.10471209213501693, 'depth': 8, 'l2_leaf_reg': 1.5382308040278996, 'subsample': 0.9085081386743783, 'colsample_bylevel': 0.6296178606936361, 'min_data_in_leaf': 11, 'early_stopping_rounds': 14}. Best is trial 0 with value: 193164.08466508074.


Trial 9 finished with value: 212370.7638043208 and parameters: {'iterations': 104, 'learning_rate': 0.10471209213501693, 'depth': 8, 'l2_leaf_reg': 1.5382308040278996, 'subsample': 0.9085081386743783, 'colsample_bylevel': 0.6296178606936361, 'min_data_in_leaf': 11, 'early_stopping_rounds': 14}. Best is trial 0 with value: 193164.08466508074.
Trial 9 finished with value: 212370.7638043208 and parameters: {'iterations': 104, 'learning_rate': 0.10471209213501693, 'depth': 8, 'l2_leaf_reg': 1.5382308040278996, 'subsample': 0.9085081386743783, 'colsample_bylevel': 0.6296178606936361, 'min_data_in_leaf': 11, 'early_stopping_rounds': 14}. Best is trial 0 with value: 193164.08466508074.
Trial 9 finished with value: 212370.7638043208 and parameters: {'iterations': 104, 'learning_rate': 0.10471209213501693, 'depth': 8, 'l2_leaf_reg': 1.5382308040278996, 'subsample': 0.9085081386743783, 'colsample_bylevel': 0.6296178606936361, 'min_data_in_leaf': 11, 'early_stopping_rounds': 14}. Best is trial 0 

[I 2025-06-30 17:21:54,660] Trial 10 finished with value: 424416.6124620543 and parameters: {'iterations': 906, 'learning_rate': 0.0010347984019709355, 'depth': 8, 'l2_leaf_reg': 0.01132102180423038, 'subsample': 0.6061470949312417, 'colsample_bylevel': 0.775232370984732, 'min_data_in_leaf': 1, 'early_stopping_rounds': 50}. Best is trial 0 with value: 193164.08466508074.


Trial 10 finished with value: 424416.6124620543 and parameters: {'iterations': 906, 'learning_rate': 0.0010347984019709355, 'depth': 8, 'l2_leaf_reg': 0.01132102180423038, 'subsample': 0.6061470949312417, 'colsample_bylevel': 0.775232370984732, 'min_data_in_leaf': 1, 'early_stopping_rounds': 50}. Best is trial 0 with value: 193164.08466508074.
Trial 10 finished with value: 424416.6124620543 and parameters: {'iterations': 906, 'learning_rate': 0.0010347984019709355, 'depth': 8, 'l2_leaf_reg': 0.01132102180423038, 'subsample': 0.6061470949312417, 'colsample_bylevel': 0.775232370984732, 'min_data_in_leaf': 1, 'early_stopping_rounds': 50}. Best is trial 0 with value: 193164.08466508074.
Trial 10 finished with value: 424416.6124620543 and parameters: {'iterations': 906, 'learning_rate': 0.0010347984019709355, 'depth': 8, 'l2_leaf_reg': 0.01132102180423038, 'subsample': 0.6061470949312417, 'colsample_bylevel': 0.775232370984732, 'min_data_in_leaf': 1, 'early_stopping_rounds': 50}. Best is tr

[I 2025-06-30 17:22:43,648] Trial 11 finished with value: 197127.6718143651 and parameters: {'iterations': 806, 'learning_rate': 0.2928900386183503, 'depth': 10, 'l2_leaf_reg': 0.5990937895571765, 'subsample': 0.693378857415877, 'colsample_bylevel': 0.7580270214839643, 'min_data_in_leaf': 29, 'early_stopping_rounds': 39}. Best is trial 0 with value: 193164.08466508074.


Trial 11 finished with value: 197127.6718143651 and parameters: {'iterations': 806, 'learning_rate': 0.2928900386183503, 'depth': 10, 'l2_leaf_reg': 0.5990937895571765, 'subsample': 0.693378857415877, 'colsample_bylevel': 0.7580270214839643, 'min_data_in_leaf': 29, 'early_stopping_rounds': 39}. Best is trial 0 with value: 193164.08466508074.
Trial 11 finished with value: 197127.6718143651 and parameters: {'iterations': 806, 'learning_rate': 0.2928900386183503, 'depth': 10, 'l2_leaf_reg': 0.5990937895571765, 'subsample': 0.693378857415877, 'colsample_bylevel': 0.7580270214839643, 'min_data_in_leaf': 29, 'early_stopping_rounds': 39}. Best is trial 0 with value: 193164.08466508074.
Trial 11 finished with value: 197127.6718143651 and parameters: {'iterations': 806, 'learning_rate': 0.2928900386183503, 'depth': 10, 'l2_leaf_reg': 0.5990937895571765, 'subsample': 0.693378857415877, 'colsample_bylevel': 0.7580270214839643, 'min_data_in_leaf': 29, 'early_stopping_rounds': 39}. Best is trial 0 

[I 2025-06-30 17:23:36,049] Trial 12 finished with value: 193778.8699970946 and parameters: {'iterations': 853, 'learning_rate': 0.29830637260334764, 'depth': 9, 'l2_leaf_reg': 0.4529796795685664, 'subsample': 0.669200985181719, 'colsample_bylevel': 0.7457769212953824, 'min_data_in_leaf': 30, 'early_stopping_rounds': 39}. Best is trial 0 with value: 193164.08466508074.


Trial 12 finished with value: 193778.8699970946 and parameters: {'iterations': 853, 'learning_rate': 0.29830637260334764, 'depth': 9, 'l2_leaf_reg': 0.4529796795685664, 'subsample': 0.669200985181719, 'colsample_bylevel': 0.7457769212953824, 'min_data_in_leaf': 30, 'early_stopping_rounds': 39}. Best is trial 0 with value: 193164.08466508074.
Trial 12 finished with value: 193778.8699970946 and parameters: {'iterations': 853, 'learning_rate': 0.29830637260334764, 'depth': 9, 'l2_leaf_reg': 0.4529796795685664, 'subsample': 0.669200985181719, 'colsample_bylevel': 0.7457769212953824, 'min_data_in_leaf': 30, 'early_stopping_rounds': 39}. Best is trial 0 with value: 193164.08466508074.
Trial 12 finished with value: 193778.8699970946 and parameters: {'iterations': 853, 'learning_rate': 0.29830637260334764, 'depth': 9, 'l2_leaf_reg': 0.4529796795685664, 'subsample': 0.669200985181719, 'colsample_bylevel': 0.7457769212953824, 'min_data_in_leaf': 30, 'early_stopping_rounds': 39}. Best is trial 0 

[I 2025-06-30 17:23:44,289] Trial 13 finished with value: 205917.74996166502 and parameters: {'iterations': 985, 'learning_rate': 0.15735554323803275, 'depth': 8, 'l2_leaf_reg': 0.1268936930890743, 'subsample': 0.6572788586594467, 'colsample_bylevel': 0.8567997340268858, 'min_data_in_leaf': 1, 'early_stopping_rounds': 40}. Best is trial 0 with value: 193164.08466508074.


Trial 13 finished with value: 205917.74996166502 and parameters: {'iterations': 985, 'learning_rate': 0.15735554323803275, 'depth': 8, 'l2_leaf_reg': 0.1268936930890743, 'subsample': 0.6572788586594467, 'colsample_bylevel': 0.8567997340268858, 'min_data_in_leaf': 1, 'early_stopping_rounds': 40}. Best is trial 0 with value: 193164.08466508074.
Trial 13 finished with value: 205917.74996166502 and parameters: {'iterations': 985, 'learning_rate': 0.15735554323803275, 'depth': 8, 'l2_leaf_reg': 0.1268936930890743, 'subsample': 0.6572788586594467, 'colsample_bylevel': 0.8567997340268858, 'min_data_in_leaf': 1, 'early_stopping_rounds': 40}. Best is trial 0 with value: 193164.08466508074.
Trial 13 finished with value: 205917.74996166502 and parameters: {'iterations': 985, 'learning_rate': 0.15735554323803275, 'depth': 8, 'l2_leaf_reg': 0.1268936930890743, 'subsample': 0.6572788586594467, 'colsample_bylevel': 0.8567997340268858, 'min_data_in_leaf': 1, 'early_stopping_rounds': 40}. Best is trial

[I 2025-06-30 17:23:47,150] Trial 14 finished with value: 336010.49078546284 and parameters: {'iterations': 706, 'learning_rate': 0.044084653694835274, 'depth': 9, 'l2_leaf_reg': 0.7756818983394322, 'subsample': 0.7826974821185181, 'colsample_bylevel': 0.7219922567707062, 'min_data_in_leaf': 14, 'early_stopping_rounds': 40}. Best is trial 0 with value: 193164.08466508074.


Trial 14 finished with value: 336010.49078546284 and parameters: {'iterations': 706, 'learning_rate': 0.044084653694835274, 'depth': 9, 'l2_leaf_reg': 0.7756818983394322, 'subsample': 0.7826974821185181, 'colsample_bylevel': 0.7219922567707062, 'min_data_in_leaf': 14, 'early_stopping_rounds': 40}. Best is trial 0 with value: 193164.08466508074.
Trial 14 finished with value: 336010.49078546284 and parameters: {'iterations': 706, 'learning_rate': 0.044084653694835274, 'depth': 9, 'l2_leaf_reg': 0.7756818983394322, 'subsample': 0.7826974821185181, 'colsample_bylevel': 0.7219922567707062, 'min_data_in_leaf': 14, 'early_stopping_rounds': 40}. Best is trial 0 with value: 193164.08466508074.
Trial 14 finished with value: 336010.49078546284 and parameters: {'iterations': 706, 'learning_rate': 0.044084653694835274, 'depth': 9, 'l2_leaf_reg': 0.7756818983394322, 'subsample': 0.7826974821185181, 'colsample_bylevel': 0.7219922567707062, 'min_data_in_leaf': 14, 'early_stopping_rounds': 40}. Best is

[I 2025-06-30 17:23:49,986] Trial 15 finished with value: 226753.61021385095 and parameters: {'iterations': 463, 'learning_rate': 0.2588385652483153, 'depth': 6, 'l2_leaf_reg': 0.2594228347889954, 'subsample': 0.6795895602142985, 'colsample_bylevel': 0.8297365118287333, 'min_data_in_leaf': 30, 'early_stopping_rounds': 34}. Best is trial 0 with value: 193164.08466508074.


Trial 15 finished with value: 226753.61021385095 and parameters: {'iterations': 463, 'learning_rate': 0.2588385652483153, 'depth': 6, 'l2_leaf_reg': 0.2594228347889954, 'subsample': 0.6795895602142985, 'colsample_bylevel': 0.8297365118287333, 'min_data_in_leaf': 30, 'early_stopping_rounds': 34}. Best is trial 0 with value: 193164.08466508074.
Trial 15 finished with value: 226753.61021385095 and parameters: {'iterations': 463, 'learning_rate': 0.2588385652483153, 'depth': 6, 'l2_leaf_reg': 0.2594228347889954, 'subsample': 0.6795895602142985, 'colsample_bylevel': 0.8297365118287333, 'min_data_in_leaf': 30, 'early_stopping_rounds': 34}. Best is trial 0 with value: 193164.08466508074.
Trial 15 finished with value: 226753.61021385095 and parameters: {'iterations': 463, 'learning_rate': 0.2588385652483153, 'depth': 6, 'l2_leaf_reg': 0.2594228347889954, 'subsample': 0.6795895602142985, 'colsample_bylevel': 0.8297365118287333, 'min_data_in_leaf': 30, 'early_stopping_rounds': 34}. Best is trial

[I 2025-06-30 17:23:53,075] Trial 16 finished with value: 407801.4653134753 and parameters: {'iterations': 744, 'learning_rate': 0.007426116718087666, 'depth': 9, 'l2_leaf_reg': 0.2468877195053605, 'subsample': 0.7492431962333567, 'colsample_bylevel': 0.74246023582073, 'min_data_in_leaf': 15, 'early_stopping_rounds': 46}. Best is trial 0 with value: 193164.08466508074.


Trial 16 finished with value: 407801.4653134753 and parameters: {'iterations': 744, 'learning_rate': 0.007426116718087666, 'depth': 9, 'l2_leaf_reg': 0.2468877195053605, 'subsample': 0.7492431962333567, 'colsample_bylevel': 0.74246023582073, 'min_data_in_leaf': 15, 'early_stopping_rounds': 46}. Best is trial 0 with value: 193164.08466508074.
Trial 16 finished with value: 407801.4653134753 and parameters: {'iterations': 744, 'learning_rate': 0.007426116718087666, 'depth': 9, 'l2_leaf_reg': 0.2468877195053605, 'subsample': 0.7492431962333567, 'colsample_bylevel': 0.74246023582073, 'min_data_in_leaf': 15, 'early_stopping_rounds': 46}. Best is trial 0 with value: 193164.08466508074.
Trial 16 finished with value: 407801.4653134753 and parameters: {'iterations': 744, 'learning_rate': 0.007426116718087666, 'depth': 9, 'l2_leaf_reg': 0.2468877195053605, 'subsample': 0.7492431962333567, 'colsample_bylevel': 0.74246023582073, 'min_data_in_leaf': 15, 'early_stopping_rounds': 46}. Best is trial 0 

[I 2025-06-30 17:23:56,623] Trial 17 finished with value: 261743.75264054636 and parameters: {'iterations': 329, 'learning_rate': 0.11777517005248574, 'depth': 9, 'l2_leaf_reg': 0.03333924313244961, 'subsample': 0.6003864820323297, 'colsample_bylevel': 0.8037521393242067, 'min_data_in_leaf': 6, 'early_stopping_rounds': 43}. Best is trial 0 with value: 193164.08466508074.


Trial 17 finished with value: 261743.75264054636 and parameters: {'iterations': 329, 'learning_rate': 0.11777517005248574, 'depth': 9, 'l2_leaf_reg': 0.03333924313244961, 'subsample': 0.6003864820323297, 'colsample_bylevel': 0.8037521393242067, 'min_data_in_leaf': 6, 'early_stopping_rounds': 43}. Best is trial 0 with value: 193164.08466508074.
Trial 17 finished with value: 261743.75264054636 and parameters: {'iterations': 329, 'learning_rate': 0.11777517005248574, 'depth': 9, 'l2_leaf_reg': 0.03333924313244961, 'subsample': 0.6003864820323297, 'colsample_bylevel': 0.8037521393242067, 'min_data_in_leaf': 6, 'early_stopping_rounds': 43}. Best is trial 0 with value: 193164.08466508074.
Trial 17 finished with value: 261743.75264054636 and parameters: {'iterations': 329, 'learning_rate': 0.11777517005248574, 'depth': 9, 'l2_leaf_reg': 0.03333924313244961, 'subsample': 0.6003864820323297, 'colsample_bylevel': 0.8037521393242067, 'min_data_in_leaf': 6, 'early_stopping_rounds': 43}. Best is tr

[I 2025-06-30 17:24:01,227] Trial 18 finished with value: 331557.2575971118 and parameters: {'iterations': 847, 'learning_rate': 0.05134970681560025, 'depth': 7, 'l2_leaf_reg': 0.8707603686462977, 'subsample': 0.6532788044008354, 'colsample_bylevel': 0.6811148233933808, 'min_data_in_leaf': 26, 'early_stopping_rounds': 35}. Best is trial 0 with value: 193164.08466508074.


Trial 18 finished with value: 331557.2575971118 and parameters: {'iterations': 847, 'learning_rate': 0.05134970681560025, 'depth': 7, 'l2_leaf_reg': 0.8707603686462977, 'subsample': 0.6532788044008354, 'colsample_bylevel': 0.6811148233933808, 'min_data_in_leaf': 26, 'early_stopping_rounds': 35}. Best is trial 0 with value: 193164.08466508074.
Trial 18 finished with value: 331557.2575971118 and parameters: {'iterations': 847, 'learning_rate': 0.05134970681560025, 'depth': 7, 'l2_leaf_reg': 0.8707603686462977, 'subsample': 0.6532788044008354, 'colsample_bylevel': 0.6811148233933808, 'min_data_in_leaf': 26, 'early_stopping_rounds': 35}. Best is trial 0 with value: 193164.08466508074.
Trial 18 finished with value: 331557.2575971118 and parameters: {'iterations': 847, 'learning_rate': 0.05134970681560025, 'depth': 7, 'l2_leaf_reg': 0.8707603686462977, 'subsample': 0.6532788044008354, 'colsample_bylevel': 0.6811148233933808, 'min_data_in_leaf': 26, 'early_stopping_rounds': 35}. Best is trial

[I 2025-06-30 17:24:03,919] Trial 19 finished with value: 244082.83250688456 and parameters: {'iterations': 999, 'learning_rate': 0.18947684534767115, 'depth': 7, 'l2_leaf_reg': 0.4933157849953325, 'subsample': 0.8246753253615013, 'colsample_bylevel': 0.8984553239225072, 'min_data_in_leaf': 4, 'early_stopping_rounds': 35}. Best is trial 0 with value: 193164.08466508074.


Trial 19 finished with value: 244082.83250688456 and parameters: {'iterations': 999, 'learning_rate': 0.18947684534767115, 'depth': 7, 'l2_leaf_reg': 0.4933157849953325, 'subsample': 0.8246753253615013, 'colsample_bylevel': 0.8984553239225072, 'min_data_in_leaf': 4, 'early_stopping_rounds': 35}. Best is trial 0 with value: 193164.08466508074.
Trial 19 finished with value: 244082.83250688456 and parameters: {'iterations': 999, 'learning_rate': 0.18947684534767115, 'depth': 7, 'l2_leaf_reg': 0.4933157849953325, 'subsample': 0.8246753253615013, 'colsample_bylevel': 0.8984553239225072, 'min_data_in_leaf': 4, 'early_stopping_rounds': 35}. Best is trial 0 with value: 193164.08466508074.
Trial 19 finished with value: 244082.83250688456 and parameters: {'iterations': 999, 'learning_rate': 0.18947684534767115, 'depth': 7, 'l2_leaf_reg': 0.4933157849953325, 'subsample': 0.8246753253615013, 'colsample_bylevel': 0.8984553239225072, 'min_data_in_leaf': 4, 'early_stopping_rounds': 35}. Best is trial

[I 2025-06-30 17:24:06,376] Trial 20 finished with value: 352919.17107680393 and parameters: {'iterations': 459, 'learning_rate': 0.0326559345206202, 'depth': 9, 'l2_leaf_reg': 0.040187488201804136, 'subsample': 0.7204121973301069, 'colsample_bylevel': 0.7970940974893838, 'min_data_in_leaf': 22, 'early_stopping_rounds': 50}. Best is trial 0 with value: 193164.08466508074.


Trial 20 finished with value: 352919.17107680393 and parameters: {'iterations': 459, 'learning_rate': 0.0326559345206202, 'depth': 9, 'l2_leaf_reg': 0.040187488201804136, 'subsample': 0.7204121973301069, 'colsample_bylevel': 0.7970940974893838, 'min_data_in_leaf': 22, 'early_stopping_rounds': 50}. Best is trial 0 with value: 193164.08466508074.
Trial 20 finished with value: 352919.17107680393 and parameters: {'iterations': 459, 'learning_rate': 0.0326559345206202, 'depth': 9, 'l2_leaf_reg': 0.040187488201804136, 'subsample': 0.7204121973301069, 'colsample_bylevel': 0.7970940974893838, 'min_data_in_leaf': 22, 'early_stopping_rounds': 50}. Best is trial 0 with value: 193164.08466508074.
Trial 20 finished with value: 352919.17107680393 and parameters: {'iterations': 459, 'learning_rate': 0.0326559345206202, 'depth': 9, 'l2_leaf_reg': 0.040187488201804136, 'subsample': 0.7204121973301069, 'colsample_bylevel': 0.7970940974893838, 'min_data_in_leaf': 22, 'early_stopping_rounds': 50}. Best is

[I 2025-06-30 17:24:50,880] Trial 21 finished with value: 193869.37837896758 and parameters: {'iterations': 809, 'learning_rate': 0.2877420636662859, 'depth': 10, 'l2_leaf_reg': 0.9813453667931464, 'subsample': 0.6827783923992456, 'colsample_bylevel': 0.7547782653977205, 'min_data_in_leaf': 30, 'early_stopping_rounds': 40}. Best is trial 0 with value: 193164.08466508074.


Trial 21 finished with value: 193869.37837896758 and parameters: {'iterations': 809, 'learning_rate': 0.2877420636662859, 'depth': 10, 'l2_leaf_reg': 0.9813453667931464, 'subsample': 0.6827783923992456, 'colsample_bylevel': 0.7547782653977205, 'min_data_in_leaf': 30, 'early_stopping_rounds': 40}. Best is trial 0 with value: 193164.08466508074.
Trial 21 finished with value: 193869.37837896758 and parameters: {'iterations': 809, 'learning_rate': 0.2877420636662859, 'depth': 10, 'l2_leaf_reg': 0.9813453667931464, 'subsample': 0.6827783923992456, 'colsample_bylevel': 0.7547782653977205, 'min_data_in_leaf': 30, 'early_stopping_rounds': 40}. Best is trial 0 with value: 193164.08466508074.
Trial 21 finished with value: 193869.37837896758 and parameters: {'iterations': 809, 'learning_rate': 0.2877420636662859, 'depth': 10, 'l2_leaf_reg': 0.9813453667931464, 'subsample': 0.6827783923992456, 'colsample_bylevel': 0.7547782653977205, 'min_data_in_leaf': 30, 'early_stopping_rounds': 40}. Best is tr

[I 2025-06-30 17:24:53,969] Trial 22 finished with value: 287039.7045608816 and parameters: {'iterations': 653, 'learning_rate': 0.0787611816301395, 'depth': 10, 'l2_leaf_reg': 0.8853068880055763, 'subsample': 0.6649920756193268, 'colsample_bylevel': 0.7121201545013595, 'min_data_in_leaf': 27, 'early_stopping_rounds': 43}. Best is trial 0 with value: 193164.08466508074.


Trial 22 finished with value: 287039.7045608816 and parameters: {'iterations': 653, 'learning_rate': 0.0787611816301395, 'depth': 10, 'l2_leaf_reg': 0.8853068880055763, 'subsample': 0.6649920756193268, 'colsample_bylevel': 0.7121201545013595, 'min_data_in_leaf': 27, 'early_stopping_rounds': 43}. Best is trial 0 with value: 193164.08466508074.
Trial 22 finished with value: 287039.7045608816 and parameters: {'iterations': 653, 'learning_rate': 0.0787611816301395, 'depth': 10, 'l2_leaf_reg': 0.8853068880055763, 'subsample': 0.6649920756193268, 'colsample_bylevel': 0.7121201545013595, 'min_data_in_leaf': 27, 'early_stopping_rounds': 43}. Best is trial 0 with value: 193164.08466508074.
Trial 22 finished with value: 287039.7045608816 and parameters: {'iterations': 653, 'learning_rate': 0.0787611816301395, 'depth': 10, 'l2_leaf_reg': 0.8853068880055763, 'subsample': 0.6649920756193268, 'colsample_bylevel': 0.7121201545013595, 'min_data_in_leaf': 27, 'early_stopping_rounds': 43}. Best is trial

[I 2025-06-30 17:24:56,556] Trial 23 finished with value: 248864.47635146996 and parameters: {'iterations': 866, 'learning_rate': 0.15213334123354433, 'depth': 9, 'l2_leaf_reg': 1.3350554959592684, 'subsample': 0.6337909883971027, 'colsample_bylevel': 0.6802407359630315, 'min_data_in_leaf': 28, 'early_stopping_rounds': 44}. Best is trial 0 with value: 193164.08466508074.


Trial 23 finished with value: 248864.47635146996 and parameters: {'iterations': 866, 'learning_rate': 0.15213334123354433, 'depth': 9, 'l2_leaf_reg': 1.3350554959592684, 'subsample': 0.6337909883971027, 'colsample_bylevel': 0.6802407359630315, 'min_data_in_leaf': 28, 'early_stopping_rounds': 44}. Best is trial 0 with value: 193164.08466508074.
Trial 23 finished with value: 248864.47635146996 and parameters: {'iterations': 866, 'learning_rate': 0.15213334123354433, 'depth': 9, 'l2_leaf_reg': 1.3350554959592684, 'subsample': 0.6337909883971027, 'colsample_bylevel': 0.6802407359630315, 'min_data_in_leaf': 28, 'early_stopping_rounds': 44}. Best is trial 0 with value: 193164.08466508074.
Trial 23 finished with value: 248864.47635146996 and parameters: {'iterations': 866, 'learning_rate': 0.15213334123354433, 'depth': 9, 'l2_leaf_reg': 1.3350554959592684, 'subsample': 0.6337909883971027, 'colsample_bylevel': 0.6802407359630315, 'min_data_in_leaf': 28, 'early_stopping_rounds': 44}. Best is tr

[I 2025-06-30 17:25:19,097] Trial 24 finished with value: 191651.15446141362 and parameters: {'iterations': 748, 'learning_rate': 0.2897824765207468, 'depth': 8, 'l2_leaf_reg': 0.17705520918925982, 'subsample': 0.746108781457228, 'colsample_bylevel': 0.7507458766924474, 'min_data_in_leaf': 30, 'early_stopping_rounds': 38}. Best is trial 24 with value: 191651.15446141362.


Trial 24 finished with value: 191651.15446141362 and parameters: {'iterations': 748, 'learning_rate': 0.2897824765207468, 'depth': 8, 'l2_leaf_reg': 0.17705520918925982, 'subsample': 0.746108781457228, 'colsample_bylevel': 0.7507458766924474, 'min_data_in_leaf': 30, 'early_stopping_rounds': 38}. Best is trial 24 with value: 191651.15446141362.
Trial 24 finished with value: 191651.15446141362 and parameters: {'iterations': 748, 'learning_rate': 0.2897824765207468, 'depth': 8, 'l2_leaf_reg': 0.17705520918925982, 'subsample': 0.746108781457228, 'colsample_bylevel': 0.7507458766924474, 'min_data_in_leaf': 30, 'early_stopping_rounds': 38}. Best is trial 24 with value: 191651.15446141362.
Trial 24 finished with value: 191651.15446141362 and parameters: {'iterations': 748, 'learning_rate': 0.2897824765207468, 'depth': 8, 'l2_leaf_reg': 0.17705520918925982, 'subsample': 0.746108781457228, 'colsample_bylevel': 0.7507458766924474, 'min_data_in_leaf': 30, 'early_stopping_rounds': 38}. Best is tri

[I 2025-06-30 17:25:21,783] Trial 25 finished with value: 251713.44822104333 and parameters: {'iterations': 724, 'learning_rate': 0.15008884402716377, 'depth': 8, 'l2_leaf_reg': 0.17529731728332265, 'subsample': 0.7642586473542406, 'colsample_bylevel': 0.7923054641818716, 'min_data_in_leaf': 23, 'early_stopping_rounds': 37}. Best is trial 24 with value: 191651.15446141362.


Trial 25 finished with value: 251713.44822104333 and parameters: {'iterations': 724, 'learning_rate': 0.15008884402716377, 'depth': 8, 'l2_leaf_reg': 0.17529731728332265, 'subsample': 0.7642586473542406, 'colsample_bylevel': 0.7923054641818716, 'min_data_in_leaf': 23, 'early_stopping_rounds': 37}. Best is trial 24 with value: 191651.15446141362.
Trial 25 finished with value: 251713.44822104333 and parameters: {'iterations': 724, 'learning_rate': 0.15008884402716377, 'depth': 8, 'l2_leaf_reg': 0.17529731728332265, 'subsample': 0.7642586473542406, 'colsample_bylevel': 0.7923054641818716, 'min_data_in_leaf': 23, 'early_stopping_rounds': 37}. Best is trial 24 with value: 191651.15446141362.
Trial 25 finished with value: 251713.44822104333 and parameters: {'iterations': 724, 'learning_rate': 0.15008884402716377, 'depth': 8, 'l2_leaf_reg': 0.17529731728332265, 'subsample': 0.7642586473542406, 'colsample_bylevel': 0.7923054641818716, 'min_data_in_leaf': 23, 'early_stopping_rounds': 37}. Best 

[I 2025-06-30 17:25:24,062] Trial 26 finished with value: 400852.2781417089 and parameters: {'iterations': 574, 'learning_rate': 0.010709697515312202, 'depth': 8, 'l2_leaf_reg': 0.39755344928802744, 'subsample': 0.7290562036590932, 'colsample_bylevel': 0.7332956679369176, 'min_data_in_leaf': 13, 'early_stopping_rounds': 32}. Best is trial 24 with value: 191651.15446141362.


Trial 26 finished with value: 400852.2781417089 and parameters: {'iterations': 574, 'learning_rate': 0.010709697515312202, 'depth': 8, 'l2_leaf_reg': 0.39755344928802744, 'subsample': 0.7290562036590932, 'colsample_bylevel': 0.7332956679369176, 'min_data_in_leaf': 13, 'early_stopping_rounds': 32}. Best is trial 24 with value: 191651.15446141362.
Trial 26 finished with value: 400852.2781417089 and parameters: {'iterations': 574, 'learning_rate': 0.010709697515312202, 'depth': 8, 'l2_leaf_reg': 0.39755344928802744, 'subsample': 0.7290562036590932, 'colsample_bylevel': 0.7332956679369176, 'min_data_in_leaf': 13, 'early_stopping_rounds': 32}. Best is trial 24 with value: 191651.15446141362.
Trial 26 finished with value: 400852.2781417089 and parameters: {'iterations': 574, 'learning_rate': 0.010709697515312202, 'depth': 8, 'l2_leaf_reg': 0.39755344928802744, 'subsample': 0.7290562036590932, 'colsample_bylevel': 0.7332956679369176, 'min_data_in_leaf': 13, 'early_stopping_rounds': 32}. Best 

[I 2025-06-30 17:25:26,478] Trial 27 finished with value: 311164.37992117874 and parameters: {'iterations': 272, 'learning_rate': 0.06924696589563263, 'depth': 7, 'l2_leaf_reg': 0.16424863759692604, 'subsample': 0.7912108321984976, 'colsample_bylevel': 0.6055590232716066, 'min_data_in_leaf': 20, 'early_stopping_rounds': 47}. Best is trial 24 with value: 191651.15446141362.


Trial 27 finished with value: 311164.37992117874 and parameters: {'iterations': 272, 'learning_rate': 0.06924696589563263, 'depth': 7, 'l2_leaf_reg': 0.16424863759692604, 'subsample': 0.7912108321984976, 'colsample_bylevel': 0.6055590232716066, 'min_data_in_leaf': 20, 'early_stopping_rounds': 47}. Best is trial 24 with value: 191651.15446141362.
Trial 27 finished with value: 311164.37992117874 and parameters: {'iterations': 272, 'learning_rate': 0.06924696589563263, 'depth': 7, 'l2_leaf_reg': 0.16424863759692604, 'subsample': 0.7912108321984976, 'colsample_bylevel': 0.6055590232716066, 'min_data_in_leaf': 20, 'early_stopping_rounds': 47}. Best is trial 24 with value: 191651.15446141362.
Trial 27 finished with value: 311164.37992117874 and parameters: {'iterations': 272, 'learning_rate': 0.06924696589563263, 'depth': 7, 'l2_leaf_reg': 0.16424863759692604, 'subsample': 0.7912108321984976, 'colsample_bylevel': 0.6055590232716066, 'min_data_in_leaf': 20, 'early_stopping_rounds': 47}. Best 

[I 2025-06-30 17:25:28,654] Trial 28 finished with value: 269533.18270147825 and parameters: {'iterations': 923, 'learning_rate': 0.12996736870231101, 'depth': 6, 'l2_leaf_reg': 0.059206430572784914, 'subsample': 0.8660698517703785, 'colsample_bylevel': 0.6648447237835107, 'min_data_in_leaf': 17, 'early_stopping_rounds': 28}. Best is trial 24 with value: 191651.15446141362.


Trial 28 finished with value: 269533.18270147825 and parameters: {'iterations': 923, 'learning_rate': 0.12996736870231101, 'depth': 6, 'l2_leaf_reg': 0.059206430572784914, 'subsample': 0.8660698517703785, 'colsample_bylevel': 0.6648447237835107, 'min_data_in_leaf': 17, 'early_stopping_rounds': 28}. Best is trial 24 with value: 191651.15446141362.
Trial 28 finished with value: 269533.18270147825 and parameters: {'iterations': 923, 'learning_rate': 0.12996736870231101, 'depth': 6, 'l2_leaf_reg': 0.059206430572784914, 'subsample': 0.8660698517703785, 'colsample_bylevel': 0.6648447237835107, 'min_data_in_leaf': 17, 'early_stopping_rounds': 28}. Best is trial 24 with value: 191651.15446141362.
Trial 28 finished with value: 269533.18270147825 and parameters: {'iterations': 923, 'learning_rate': 0.12996736870231101, 'depth': 6, 'l2_leaf_reg': 0.059206430572784914, 'subsample': 0.8660698517703785, 'colsample_bylevel': 0.6648447237835107, 'min_data_in_leaf': 17, 'early_stopping_rounds': 28}. Be

[I 2025-06-30 17:25:31,456] Trial 29 finished with value: 236763.83625969017 and parameters: {'iterations': 669, 'learning_rate': 0.18789327248041002, 'depth': 9, 'l2_leaf_reg': 4.3639837346239405, 'subsample': 0.6248611510517292, 'colsample_bylevel': 0.686510507186421, 'min_data_in_leaf': 5, 'early_stopping_rounds': 37}. Best is trial 24 with value: 191651.15446141362.


Trial 29 finished with value: 236763.83625969017 and parameters: {'iterations': 669, 'learning_rate': 0.18789327248041002, 'depth': 9, 'l2_leaf_reg': 4.3639837346239405, 'subsample': 0.6248611510517292, 'colsample_bylevel': 0.686510507186421, 'min_data_in_leaf': 5, 'early_stopping_rounds': 37}. Best is trial 24 with value: 191651.15446141362.
Trial 29 finished with value: 236763.83625969017 and parameters: {'iterations': 669, 'learning_rate': 0.18789327248041002, 'depth': 9, 'l2_leaf_reg': 4.3639837346239405, 'subsample': 0.6248611510517292, 'colsample_bylevel': 0.686510507186421, 'min_data_in_leaf': 5, 'early_stopping_rounds': 37}. Best is trial 24 with value: 191651.15446141362.
Trial 29 finished with value: 236763.83625969017 and parameters: {'iterations': 669, 'learning_rate': 0.18789327248041002, 'depth': 9, 'l2_leaf_reg': 4.3639837346239405, 'subsample': 0.6248611510517292, 'colsample_bylevel': 0.686510507186421, 'min_data_in_leaf': 5, 'early_stopping_rounds': 37}. Best is trial 

[I 2025-06-30 17:25:34,385] Trial 30 finished with value: 360726.37271938997 and parameters: {'iterations': 777, 'learning_rate': 0.03234366496474334, 'depth': 6, 'l2_leaf_reg': 0.019989211752082094, 'subsample': 0.7026984528096858, 'colsample_bylevel': 0.8278042860212953, 'min_data_in_leaf': 7, 'early_stopping_rounds': 43}. Best is trial 24 with value: 191651.15446141362.


Trial 30 finished with value: 360726.37271938997 and parameters: {'iterations': 777, 'learning_rate': 0.03234366496474334, 'depth': 6, 'l2_leaf_reg': 0.019989211752082094, 'subsample': 0.7026984528096858, 'colsample_bylevel': 0.8278042860212953, 'min_data_in_leaf': 7, 'early_stopping_rounds': 43}. Best is trial 24 with value: 191651.15446141362.
Trial 30 finished with value: 360726.37271938997 and parameters: {'iterations': 777, 'learning_rate': 0.03234366496474334, 'depth': 6, 'l2_leaf_reg': 0.019989211752082094, 'subsample': 0.7026984528096858, 'colsample_bylevel': 0.8278042860212953, 'min_data_in_leaf': 7, 'early_stopping_rounds': 43}. Best is trial 24 with value: 191651.15446141362.
Trial 30 finished with value: 360726.37271938997 and parameters: {'iterations': 777, 'learning_rate': 0.03234366496474334, 'depth': 6, 'l2_leaf_reg': 0.019989211752082094, 'subsample': 0.7026984528096858, 'colsample_bylevel': 0.8278042860212953, 'min_data_in_leaf': 7, 'early_stopping_rounds': 43}. Best 

[I 2025-06-30 17:26:36,082] Trial 31 finished with value: 192449.43755492102 and parameters: {'iterations': 819, 'learning_rate': 0.2869798748467506, 'depth': 10, 'l2_leaf_reg': 1.1656735858275957, 'subsample': 0.6785875889638864, 'colsample_bylevel': 0.7607644789358028, 'min_data_in_leaf': 30, 'early_stopping_rounds': 40}. Best is trial 24 with value: 191651.15446141362.


Trial 31 finished with value: 192449.43755492102 and parameters: {'iterations': 819, 'learning_rate': 0.2869798748467506, 'depth': 10, 'l2_leaf_reg': 1.1656735858275957, 'subsample': 0.6785875889638864, 'colsample_bylevel': 0.7607644789358028, 'min_data_in_leaf': 30, 'early_stopping_rounds': 40}. Best is trial 24 with value: 191651.15446141362.
Trial 31 finished with value: 192449.43755492102 and parameters: {'iterations': 819, 'learning_rate': 0.2869798748467506, 'depth': 10, 'l2_leaf_reg': 1.1656735858275957, 'subsample': 0.6785875889638864, 'colsample_bylevel': 0.7607644789358028, 'min_data_in_leaf': 30, 'early_stopping_rounds': 40}. Best is trial 24 with value: 191651.15446141362.
Trial 31 finished with value: 192449.43755492102 and parameters: {'iterations': 819, 'learning_rate': 0.2869798748467506, 'depth': 10, 'l2_leaf_reg': 1.1656735858275957, 'subsample': 0.6785875889638864, 'colsample_bylevel': 0.7607644789358028, 'min_data_in_leaf': 30, 'early_stopping_rounds': 40}. Best is 

[I 2025-06-30 17:27:02,142] Trial 32 finished with value: 192799.30223112373 and parameters: {'iterations': 931, 'learning_rate': 0.22065733366088336, 'depth': 9, 'l2_leaf_reg': 0.546384482735498, 'subsample': 0.6515215790189713, 'colsample_bylevel': 0.7013370737909197, 'min_data_in_leaf': 28, 'early_stopping_rounds': 38}. Best is trial 24 with value: 191651.15446141362.


Trial 32 finished with value: 192799.30223112373 and parameters: {'iterations': 931, 'learning_rate': 0.22065733366088336, 'depth': 9, 'l2_leaf_reg': 0.546384482735498, 'subsample': 0.6515215790189713, 'colsample_bylevel': 0.7013370737909197, 'min_data_in_leaf': 28, 'early_stopping_rounds': 38}. Best is trial 24 with value: 191651.15446141362.
Trial 32 finished with value: 192799.30223112373 and parameters: {'iterations': 931, 'learning_rate': 0.22065733366088336, 'depth': 9, 'l2_leaf_reg': 0.546384482735498, 'subsample': 0.6515215790189713, 'colsample_bylevel': 0.7013370737909197, 'min_data_in_leaf': 28, 'early_stopping_rounds': 38}. Best is trial 24 with value: 191651.15446141362.
Trial 32 finished with value: 192799.30223112373 and parameters: {'iterations': 931, 'learning_rate': 0.22065733366088336, 'depth': 9, 'l2_leaf_reg': 0.546384482735498, 'subsample': 0.6515215790189713, 'colsample_bylevel': 0.7013370737909197, 'min_data_in_leaf': 28, 'early_stopping_rounds': 38}. Best is tri

[I 2025-06-30 17:27:04,765] Trial 33 finished with value: 264877.4136321198 and parameters: {'iterations': 928, 'learning_rate': 0.107620188885818, 'depth': 10, 'l2_leaf_reg': 0.2983431233189096, 'subsample': 0.6461440694363485, 'colsample_bylevel': 0.7006117412055323, 'min_data_in_leaf': 27, 'early_stopping_rounds': 37}. Best is trial 24 with value: 191651.15446141362.


Trial 33 finished with value: 264877.4136321198 and parameters: {'iterations': 928, 'learning_rate': 0.107620188885818, 'depth': 10, 'l2_leaf_reg': 0.2983431233189096, 'subsample': 0.6461440694363485, 'colsample_bylevel': 0.7006117412055323, 'min_data_in_leaf': 27, 'early_stopping_rounds': 37}. Best is trial 24 with value: 191651.15446141362.
Trial 33 finished with value: 264877.4136321198 and parameters: {'iterations': 928, 'learning_rate': 0.107620188885818, 'depth': 10, 'l2_leaf_reg': 0.2983431233189096, 'subsample': 0.6461440694363485, 'colsample_bylevel': 0.7006117412055323, 'min_data_in_leaf': 27, 'early_stopping_rounds': 37}. Best is trial 24 with value: 191651.15446141362.
Trial 33 finished with value: 264877.4136321198 and parameters: {'iterations': 928, 'learning_rate': 0.107620188885818, 'depth': 10, 'l2_leaf_reg': 0.2983431233189096, 'subsample': 0.6461440694363485, 'colsample_bylevel': 0.7006117412055323, 'min_data_in_leaf': 27, 'early_stopping_rounds': 37}. Best is trial 

[I 2025-06-30 17:27:06,600] Trial 34 finished with value: 247909.29572866237 and parameters: {'iterations': 422, 'learning_rate': 0.20145250365448275, 'depth': 8, 'l2_leaf_reg': 9.8966921839684, 'subsample': 0.7317879282909067, 'colsample_bylevel': 0.6423292054011528, 'min_data_in_leaf': 25, 'early_stopping_rounds': 42}. Best is trial 24 with value: 191651.15446141362.


Trial 34 finished with value: 247909.29572866237 and parameters: {'iterations': 422, 'learning_rate': 0.20145250365448275, 'depth': 8, 'l2_leaf_reg': 9.8966921839684, 'subsample': 0.7317879282909067, 'colsample_bylevel': 0.6423292054011528, 'min_data_in_leaf': 25, 'early_stopping_rounds': 42}. Best is trial 24 with value: 191651.15446141362.
Trial 34 finished with value: 247909.29572866237 and parameters: {'iterations': 422, 'learning_rate': 0.20145250365448275, 'depth': 8, 'l2_leaf_reg': 9.8966921839684, 'subsample': 0.7317879282909067, 'colsample_bylevel': 0.6423292054011528, 'min_data_in_leaf': 25, 'early_stopping_rounds': 42}. Best is trial 24 with value: 191651.15446141362.
Trial 34 finished with value: 247909.29572866237 and parameters: {'iterations': 422, 'learning_rate': 0.20145250365448275, 'depth': 8, 'l2_leaf_reg': 9.8966921839684, 'subsample': 0.7317879282909067, 'colsample_bylevel': 0.6423292054011528, 'min_data_in_leaf': 25, 'early_stopping_rounds': 42}. Best is trial 24 

[I 2025-06-30 17:27:08,930] Trial 35 finished with value: 297063.44828331994 and parameters: {'iterations': 506, 'learning_rate': 0.07731295771222205, 'depth': 9, 'l2_leaf_reg': 1.421664527640886, 'subsample': 0.7090586177904379, 'colsample_bylevel': 0.6561707039150337, 'min_data_in_leaf': 28, 'early_stopping_rounds': 46}. Best is trial 24 with value: 191651.15446141362.


Trial 35 finished with value: 297063.44828331994 and parameters: {'iterations': 506, 'learning_rate': 0.07731295771222205, 'depth': 9, 'l2_leaf_reg': 1.421664527640886, 'subsample': 0.7090586177904379, 'colsample_bylevel': 0.6561707039150337, 'min_data_in_leaf': 28, 'early_stopping_rounds': 46}. Best is trial 24 with value: 191651.15446141362.
Trial 35 finished with value: 297063.44828331994 and parameters: {'iterations': 506, 'learning_rate': 0.07731295771222205, 'depth': 9, 'l2_leaf_reg': 1.421664527640886, 'subsample': 0.7090586177904379, 'colsample_bylevel': 0.6561707039150337, 'min_data_in_leaf': 28, 'early_stopping_rounds': 46}. Best is trial 24 with value: 191651.15446141362.
Trial 35 finished with value: 297063.44828331994 and parameters: {'iterations': 506, 'learning_rate': 0.07731295771222205, 'depth': 9, 'l2_leaf_reg': 1.421664527640886, 'subsample': 0.7090586177904379, 'colsample_bylevel': 0.6561707039150337, 'min_data_in_leaf': 28, 'early_stopping_rounds': 46}. Best is tri

[I 2025-06-30 17:27:12,301] Trial 36 finished with value: 232703.14723365277 and parameters: {'iterations': 532, 'learning_rate': 0.20937878849954358, 'depth': 10, 'l2_leaf_reg': 0.5722294511033635, 'subsample': 0.6842688901983758, 'colsample_bylevel': 0.7801769492941018, 'min_data_in_leaf': 28, 'early_stopping_rounds': 25}. Best is trial 24 with value: 191651.15446141362.


Trial 36 finished with value: 232703.14723365277 and parameters: {'iterations': 532, 'learning_rate': 0.20937878849954358, 'depth': 10, 'l2_leaf_reg': 0.5722294511033635, 'subsample': 0.6842688901983758, 'colsample_bylevel': 0.7801769492941018, 'min_data_in_leaf': 28, 'early_stopping_rounds': 25}. Best is trial 24 with value: 191651.15446141362.
Trial 36 finished with value: 232703.14723365277 and parameters: {'iterations': 532, 'learning_rate': 0.20937878849954358, 'depth': 10, 'l2_leaf_reg': 0.5722294511033635, 'subsample': 0.6842688901983758, 'colsample_bylevel': 0.7801769492941018, 'min_data_in_leaf': 28, 'early_stopping_rounds': 25}. Best is trial 24 with value: 191651.15446141362.
Trial 36 finished with value: 232703.14723365277 and parameters: {'iterations': 532, 'learning_rate': 0.20937878849954358, 'depth': 10, 'l2_leaf_reg': 0.5722294511033635, 'subsample': 0.6842688901983758, 'colsample_bylevel': 0.7801769492941018, 'min_data_in_leaf': 28, 'early_stopping_rounds': 25}. Best 

[I 2025-06-30 17:27:14,951] Trial 37 finished with value: 239292.14964538466 and parameters: {'iterations': 610, 'learning_rate': 0.21407871078206217, 'depth': 8, 'l2_leaf_reg': 4.440998416566279, 'subsample': 0.7659325196641714, 'colsample_bylevel': 0.7001100940963497, 'min_data_in_leaf': 21, 'early_stopping_rounds': 31}. Best is trial 24 with value: 191651.15446141362.


Trial 37 finished with value: 239292.14964538466 and parameters: {'iterations': 610, 'learning_rate': 0.21407871078206217, 'depth': 8, 'l2_leaf_reg': 4.440998416566279, 'subsample': 0.7659325196641714, 'colsample_bylevel': 0.7001100940963497, 'min_data_in_leaf': 21, 'early_stopping_rounds': 31}. Best is trial 24 with value: 191651.15446141362.
Trial 37 finished with value: 239292.14964538466 and parameters: {'iterations': 610, 'learning_rate': 0.21407871078206217, 'depth': 8, 'l2_leaf_reg': 4.440998416566279, 'subsample': 0.7659325196641714, 'colsample_bylevel': 0.7001100940963497, 'min_data_in_leaf': 21, 'early_stopping_rounds': 31}. Best is trial 24 with value: 191651.15446141362.
Trial 37 finished with value: 239292.14964538466 and parameters: {'iterations': 610, 'learning_rate': 0.21407871078206217, 'depth': 8, 'l2_leaf_reg': 4.440998416566279, 'subsample': 0.7659325196641714, 'colsample_bylevel': 0.7001100940963497, 'min_data_in_leaf': 21, 'early_stopping_rounds': 31}. Best is tri

[I 2025-06-30 17:27:17,595] Trial 38 finished with value: 275728.59798840724 and parameters: {'iterations': 767, 'learning_rate': 0.09944967659279592, 'depth': 10, 'l2_leaf_reg': 0.1998214614108932, 'subsample': 0.7448798581966226, 'colsample_bylevel': 0.6010671845109126, 'min_data_in_leaf': 24, 'early_stopping_rounds': 33}. Best is trial 24 with value: 191651.15446141362.


Trial 38 finished with value: 275728.59798840724 and parameters: {'iterations': 767, 'learning_rate': 0.09944967659279592, 'depth': 10, 'l2_leaf_reg': 0.1998214614108932, 'subsample': 0.7448798581966226, 'colsample_bylevel': 0.6010671845109126, 'min_data_in_leaf': 24, 'early_stopping_rounds': 33}. Best is trial 24 with value: 191651.15446141362.
Trial 38 finished with value: 275728.59798840724 and parameters: {'iterations': 767, 'learning_rate': 0.09944967659279592, 'depth': 10, 'l2_leaf_reg': 0.1998214614108932, 'subsample': 0.7448798581966226, 'colsample_bylevel': 0.6010671845109126, 'min_data_in_leaf': 24, 'early_stopping_rounds': 33}. Best is trial 24 with value: 191651.15446141362.
Trial 38 finished with value: 275728.59798840724 and parameters: {'iterations': 767, 'learning_rate': 0.09944967659279592, 'depth': 10, 'l2_leaf_reg': 0.1998214614108932, 'subsample': 0.7448798581966226, 'colsample_bylevel': 0.6010671845109126, 'min_data_in_leaf': 24, 'early_stopping_rounds': 33}. Best 

[I 2025-06-30 17:27:19,798] Trial 39 finished with value: 311771.9051539933 and parameters: {'iterations': 956, 'learning_rate': 0.06228491504630561, 'depth': 9, 'l2_leaf_reg': 0.11188485004435174, 'subsample': 0.9919381085597447, 'colsample_bylevel': 0.7664083609866857, 'min_data_in_leaf': 26, 'early_stopping_rounds': 45}. Best is trial 24 with value: 191651.15446141362.


Trial 39 finished with value: 311771.9051539933 and parameters: {'iterations': 956, 'learning_rate': 0.06228491504630561, 'depth': 9, 'l2_leaf_reg': 0.11188485004435174, 'subsample': 0.9919381085597447, 'colsample_bylevel': 0.7664083609866857, 'min_data_in_leaf': 26, 'early_stopping_rounds': 45}. Best is trial 24 with value: 191651.15446141362.
Trial 39 finished with value: 311771.9051539933 and parameters: {'iterations': 956, 'learning_rate': 0.06228491504630561, 'depth': 9, 'l2_leaf_reg': 0.11188485004435174, 'subsample': 0.9919381085597447, 'colsample_bylevel': 0.7664083609866857, 'min_data_in_leaf': 26, 'early_stopping_rounds': 45}. Best is trial 24 with value: 191651.15446141362.
Trial 39 finished with value: 311771.9051539933 and parameters: {'iterations': 956, 'learning_rate': 0.06228491504630561, 'depth': 9, 'l2_leaf_reg': 0.11188485004435174, 'subsample': 0.9919381085597447, 'colsample_bylevel': 0.7664083609866857, 'min_data_in_leaf': 26, 'early_stopping_rounds': 45}. Best is 

[I 2025-06-30 17:27:21,923] Trial 40 finished with value: 255680.75589863322 and parameters: {'iterations': 678, 'learning_rate': 0.14417883555192706, 'depth': 8, 'l2_leaf_reg': 2.0557025251149685, 'subsample': 0.806142746929753, 'colsample_bylevel': 0.7296731116104603, 'min_data_in_leaf': 12, 'early_stopping_rounds': 20}. Best is trial 24 with value: 191651.15446141362.


Trial 40 finished with value: 255680.75589863322 and parameters: {'iterations': 678, 'learning_rate': 0.14417883555192706, 'depth': 8, 'l2_leaf_reg': 2.0557025251149685, 'subsample': 0.806142746929753, 'colsample_bylevel': 0.7296731116104603, 'min_data_in_leaf': 12, 'early_stopping_rounds': 20}. Best is trial 24 with value: 191651.15446141362.
Trial 40 finished with value: 255680.75589863322 and parameters: {'iterations': 678, 'learning_rate': 0.14417883555192706, 'depth': 8, 'l2_leaf_reg': 2.0557025251149685, 'subsample': 0.806142746929753, 'colsample_bylevel': 0.7296731116104603, 'min_data_in_leaf': 12, 'early_stopping_rounds': 20}. Best is trial 24 with value: 191651.15446141362.
Trial 40 finished with value: 255680.75589863322 and parameters: {'iterations': 678, 'learning_rate': 0.14417883555192706, 'depth': 8, 'l2_leaf_reg': 2.0557025251149685, 'subsample': 0.806142746929753, 'colsample_bylevel': 0.7296731116104603, 'min_data_in_leaf': 12, 'early_stopping_rounds': 20}. Best is tri

[I 2025-06-30 17:27:53,558] Trial 41 finished with value: 192215.1141401687 and parameters: {'iterations': 867, 'learning_rate': 0.2909605123637478, 'depth': 9, 'l2_leaf_reg': 0.35639530229324357, 'subsample': 0.6212658144187087, 'colsample_bylevel': 0.7408682523315459, 'min_data_in_leaf': 30, 'early_stopping_rounds': 38}. Best is trial 24 with value: 191651.15446141362.


Trial 41 finished with value: 192215.1141401687 and parameters: {'iterations': 867, 'learning_rate': 0.2909605123637478, 'depth': 9, 'l2_leaf_reg': 0.35639530229324357, 'subsample': 0.6212658144187087, 'colsample_bylevel': 0.7408682523315459, 'min_data_in_leaf': 30, 'early_stopping_rounds': 38}. Best is trial 24 with value: 191651.15446141362.
Trial 41 finished with value: 192215.1141401687 and parameters: {'iterations': 867, 'learning_rate': 0.2909605123637478, 'depth': 9, 'l2_leaf_reg': 0.35639530229324357, 'subsample': 0.6212658144187087, 'colsample_bylevel': 0.7408682523315459, 'min_data_in_leaf': 30, 'early_stopping_rounds': 38}. Best is trial 24 with value: 191651.15446141362.
Trial 41 finished with value: 192215.1141401687 and parameters: {'iterations': 867, 'learning_rate': 0.2909605123637478, 'depth': 9, 'l2_leaf_reg': 0.35639530229324357, 'subsample': 0.6212658144187087, 'colsample_bylevel': 0.7408682523315459, 'min_data_in_leaf': 30, 'early_stopping_rounds': 38}. Best is tri

[I 2025-06-30 17:27:56,625] Trial 42 finished with value: 219302.74619005053 and parameters: {'iterations': 895, 'learning_rate': 0.23600853748172307, 'depth': 9, 'l2_leaf_reg': 0.35847709814917617, 'subsample': 0.6190945643557945, 'colsample_bylevel': 0.6635191982237458, 'min_data_in_leaf': 9, 'early_stopping_rounds': 41}. Best is trial 24 with value: 191651.15446141362.


Trial 42 finished with value: 219302.74619005053 and parameters: {'iterations': 895, 'learning_rate': 0.23600853748172307, 'depth': 9, 'l2_leaf_reg': 0.35847709814917617, 'subsample': 0.6190945643557945, 'colsample_bylevel': 0.6635191982237458, 'min_data_in_leaf': 9, 'early_stopping_rounds': 41}. Best is trial 24 with value: 191651.15446141362.
Trial 42 finished with value: 219302.74619005053 and parameters: {'iterations': 895, 'learning_rate': 0.23600853748172307, 'depth': 9, 'l2_leaf_reg': 0.35847709814917617, 'subsample': 0.6190945643557945, 'colsample_bylevel': 0.6635191982237458, 'min_data_in_leaf': 9, 'early_stopping_rounds': 41}. Best is trial 24 with value: 191651.15446141362.
Trial 42 finished with value: 219302.74619005053 and parameters: {'iterations': 895, 'learning_rate': 0.23600853748172307, 'depth': 9, 'l2_leaf_reg': 0.35847709814917617, 'subsample': 0.6190945643557945, 'colsample_bylevel': 0.6635191982237458, 'min_data_in_leaf': 9, 'early_stopping_rounds': 41}. Best is 

[I 2025-06-30 17:28:41,506] Trial 43 finished with value: 191104.37863729885 and parameters: {'iterations': 821, 'learning_rate': 0.2801695218255082, 'depth': 10, 'l2_leaf_reg': 0.6670438837278988, 'subsample': 0.6378212262786854, 'colsample_bylevel': 0.7203717250461328, 'min_data_in_leaf': 29, 'early_stopping_rounds': 37}. Best is trial 43 with value: 191104.37863729885.


Trial 43 finished with value: 191104.37863729885 and parameters: {'iterations': 821, 'learning_rate': 0.2801695218255082, 'depth': 10, 'l2_leaf_reg': 0.6670438837278988, 'subsample': 0.6378212262786854, 'colsample_bylevel': 0.7203717250461328, 'min_data_in_leaf': 29, 'early_stopping_rounds': 37}. Best is trial 43 with value: 191104.37863729885.
Trial 43 finished with value: 191104.37863729885 and parameters: {'iterations': 821, 'learning_rate': 0.2801695218255082, 'depth': 10, 'l2_leaf_reg': 0.6670438837278988, 'subsample': 0.6378212262786854, 'colsample_bylevel': 0.7203717250461328, 'min_data_in_leaf': 29, 'early_stopping_rounds': 37}. Best is trial 43 with value: 191104.37863729885.
Trial 43 finished with value: 191104.37863729885 and parameters: {'iterations': 821, 'learning_rate': 0.2801695218255082, 'depth': 10, 'l2_leaf_reg': 0.6670438837278988, 'subsample': 0.6378212262786854, 'colsample_bylevel': 0.7203717250461328, 'min_data_in_leaf': 29, 'early_stopping_rounds': 37}. Best is 

[I 2025-06-30 17:28:53,634] Trial 44 finished with value: 201086.33980749716 and parameters: {'iterations': 814, 'learning_rate': 0.2984420500623529, 'depth': 10, 'l2_leaf_reg': 1.150352803105226, 'subsample': 0.6432535628230747, 'colsample_bylevel': 0.8160275061798552, 'min_data_in_leaf': 29, 'early_stopping_rounds': 37}. Best is trial 43 with value: 191104.37863729885.


Trial 44 finished with value: 201086.33980749716 and parameters: {'iterations': 814, 'learning_rate': 0.2984420500623529, 'depth': 10, 'l2_leaf_reg': 1.150352803105226, 'subsample': 0.6432535628230747, 'colsample_bylevel': 0.8160275061798552, 'min_data_in_leaf': 29, 'early_stopping_rounds': 37}. Best is trial 43 with value: 191104.37863729885.
Trial 44 finished with value: 201086.33980749716 and parameters: {'iterations': 814, 'learning_rate': 0.2984420500623529, 'depth': 10, 'l2_leaf_reg': 1.150352803105226, 'subsample': 0.6432535628230747, 'colsample_bylevel': 0.8160275061798552, 'min_data_in_leaf': 29, 'early_stopping_rounds': 37}. Best is trial 43 with value: 191104.37863729885.
Trial 44 finished with value: 201086.33980749716 and parameters: {'iterations': 814, 'learning_rate': 0.2984420500623529, 'depth': 10, 'l2_leaf_reg': 1.150352803105226, 'subsample': 0.6432535628230747, 'colsample_bylevel': 0.8160275061798552, 'min_data_in_leaf': 29, 'early_stopping_rounds': 37}. Best is tri

[I 2025-06-30 17:28:56,412] Trial 45 finished with value: 236175.32035587236 and parameters: {'iterations': 869, 'learning_rate': 0.18141561242178256, 'depth': 10, 'l2_leaf_reg': 0.682608647678517, 'subsample': 0.6191272150084564, 'colsample_bylevel': 0.7173246772847339, 'min_data_in_leaf': 30, 'early_stopping_rounds': 30}. Best is trial 43 with value: 191104.37863729885.


Trial 45 finished with value: 236175.32035587236 and parameters: {'iterations': 869, 'learning_rate': 0.18141561242178256, 'depth': 10, 'l2_leaf_reg': 0.682608647678517, 'subsample': 0.6191272150084564, 'colsample_bylevel': 0.7173246772847339, 'min_data_in_leaf': 30, 'early_stopping_rounds': 30}. Best is trial 43 with value: 191104.37863729885.
Trial 45 finished with value: 236175.32035587236 and parameters: {'iterations': 869, 'learning_rate': 0.18141561242178256, 'depth': 10, 'l2_leaf_reg': 0.682608647678517, 'subsample': 0.6191272150084564, 'colsample_bylevel': 0.7173246772847339, 'min_data_in_leaf': 30, 'early_stopping_rounds': 30}. Best is trial 43 with value: 191104.37863729885.
Trial 45 finished with value: 236175.32035587236 and parameters: {'iterations': 869, 'learning_rate': 0.18141561242178256, 'depth': 10, 'l2_leaf_reg': 0.682608647678517, 'subsample': 0.6191272150084564, 'colsample_bylevel': 0.7173246772847339, 'min_data_in_leaf': 30, 'early_stopping_rounds': 30}. Best is 

[I 2025-06-30 17:28:59,039] Trial 46 finished with value: 416681.3967162321 and parameters: {'iterations': 825, 'learning_rate': 0.0039016007699125463, 'depth': 10, 'l2_leaf_reg': 0.08506476591569744, 'subsample': 0.6647463959061239, 'colsample_bylevel': 0.7754500454372192, 'min_data_in_leaf': 26, 'early_stopping_rounds': 35}. Best is trial 43 with value: 191104.37863729885.


Trial 46 finished with value: 416681.3967162321 and parameters: {'iterations': 825, 'learning_rate': 0.0039016007699125463, 'depth': 10, 'l2_leaf_reg': 0.08506476591569744, 'subsample': 0.6647463959061239, 'colsample_bylevel': 0.7754500454372192, 'min_data_in_leaf': 26, 'early_stopping_rounds': 35}. Best is trial 43 with value: 191104.37863729885.
Trial 46 finished with value: 416681.3967162321 and parameters: {'iterations': 825, 'learning_rate': 0.0039016007699125463, 'depth': 10, 'l2_leaf_reg': 0.08506476591569744, 'subsample': 0.6647463959061239, 'colsample_bylevel': 0.7754500454372192, 'min_data_in_leaf': 26, 'early_stopping_rounds': 35}. Best is trial 43 with value: 191104.37863729885.
Trial 46 finished with value: 416681.3967162321 and parameters: {'iterations': 825, 'learning_rate': 0.0039016007699125463, 'depth': 10, 'l2_leaf_reg': 0.08506476591569744, 'subsample': 0.6647463959061239, 'colsample_bylevel': 0.7754500454372192, 'min_data_in_leaf': 26, 'early_stopping_rounds': 35}.

[I 2025-06-30 17:29:01,154] Trial 47 finished with value: 424164.5951591037 and parameters: {'iterations': 950, 'learning_rate': 0.0011455971996089888, 'depth': 9, 'l2_leaf_reg': 1.9792772454519436, 'subsample': 0.6400316993833451, 'colsample_bylevel': 0.7578869068699705, 'min_data_in_leaf': 29, 'early_stopping_rounds': 38}. Best is trial 43 with value: 191104.37863729885.


Trial 47 finished with value: 424164.5951591037 and parameters: {'iterations': 950, 'learning_rate': 0.0011455971996089888, 'depth': 9, 'l2_leaf_reg': 1.9792772454519436, 'subsample': 0.6400316993833451, 'colsample_bylevel': 0.7578869068699705, 'min_data_in_leaf': 29, 'early_stopping_rounds': 38}. Best is trial 43 with value: 191104.37863729885.
Trial 47 finished with value: 424164.5951591037 and parameters: {'iterations': 950, 'learning_rate': 0.0011455971996089888, 'depth': 9, 'l2_leaf_reg': 1.9792772454519436, 'subsample': 0.6400316993833451, 'colsample_bylevel': 0.7578869068699705, 'min_data_in_leaf': 29, 'early_stopping_rounds': 38}. Best is trial 43 with value: 191104.37863729885.
Trial 47 finished with value: 424164.5951591037 and parameters: {'iterations': 950, 'learning_rate': 0.0011455971996089888, 'depth': 9, 'l2_leaf_reg': 1.9792772454519436, 'subsample': 0.6400316993833451, 'colsample_bylevel': 0.7578869068699705, 'min_data_in_leaf': 29, 'early_stopping_rounds': 38}. Best 

[I 2025-06-30 17:29:12,954] Trial 48 finished with value: 202144.32793141753 and parameters: {'iterations': 780, 'learning_rate': 0.24056800772739648, 'depth': 10, 'l2_leaf_reg': 2.9151208029052, 'subsample': 0.6003847334191734, 'colsample_bylevel': 0.7395838685537687, 'min_data_in_leaf': 27, 'early_stopping_rounds': 28}. Best is trial 43 with value: 191104.37863729885.


Trial 48 finished with value: 202144.32793141753 and parameters: {'iterations': 780, 'learning_rate': 0.24056800772739648, 'depth': 10, 'l2_leaf_reg': 2.9151208029052, 'subsample': 0.6003847334191734, 'colsample_bylevel': 0.7395838685537687, 'min_data_in_leaf': 27, 'early_stopping_rounds': 28}. Best is trial 43 with value: 191104.37863729885.
Trial 48 finished with value: 202144.32793141753 and parameters: {'iterations': 780, 'learning_rate': 0.24056800772739648, 'depth': 10, 'l2_leaf_reg': 2.9151208029052, 'subsample': 0.6003847334191734, 'colsample_bylevel': 0.7395838685537687, 'min_data_in_leaf': 27, 'early_stopping_rounds': 28}. Best is trial 43 with value: 191104.37863729885.
Trial 48 finished with value: 202144.32793141753 and parameters: {'iterations': 780, 'learning_rate': 0.24056800772739648, 'depth': 10, 'l2_leaf_reg': 2.9151208029052, 'subsample': 0.6003847334191734, 'colsample_bylevel': 0.7395838685537687, 'min_data_in_leaf': 27, 'early_stopping_rounds': 28}. Best is trial 

[I 2025-06-30 17:29:14,958] Trial 49 finished with value: 247581.05166034037 and parameters: {'iterations': 891, 'learning_rate': 0.1649062241974827, 'depth': 8, 'l2_leaf_reg': 0.22802987601361496, 'subsample': 0.696435625302284, 'colsample_bylevel': 0.9100222982935362, 'min_data_in_leaf': 24, 'early_stopping_rounds': 41}. Best is trial 43 with value: 191104.37863729885.


Trial 49 finished with value: 247581.05166034037 and parameters: {'iterations': 891, 'learning_rate': 0.1649062241974827, 'depth': 8, 'l2_leaf_reg': 0.22802987601361496, 'subsample': 0.696435625302284, 'colsample_bylevel': 0.9100222982935362, 'min_data_in_leaf': 24, 'early_stopping_rounds': 41}. Best is trial 43 with value: 191104.37863729885.
Trial 49 finished with value: 247581.05166034037 and parameters: {'iterations': 891, 'learning_rate': 0.1649062241974827, 'depth': 8, 'l2_leaf_reg': 0.22802987601361496, 'subsample': 0.696435625302284, 'colsample_bylevel': 0.9100222982935362, 'min_data_in_leaf': 24, 'early_stopping_rounds': 41}. Best is trial 43 with value: 191104.37863729885.
Trial 49 finished with value: 247581.05166034037 and parameters: {'iterations': 891, 'learning_rate': 0.1649062241974827, 'depth': 8, 'l2_leaf_reg': 0.22802987601361496, 'subsample': 0.696435625302284, 'colsample_bylevel': 0.9100222982935362, 'min_data_in_leaf': 24, 'early_stopping_rounds': 41}. Best is tri

In [None]:
##  BEST PARAMS OF CATBOOST OPTUNA
Best trial:
  Value (RMSE on validation set): 191104.3786
  Best Hyperparameters:
    iterations: 821
    learning_rate: 0.2801695218255082
    depth: 10
    l2_leaf_reg: 0.6670438837278988
    subsample: 0.6378212262786854
    colsample_bylevel: 0.7203717250461328
    min_data_in_leaf: 29
    early_stopping_rounds: 37

In [None]:
###CATBOOSTOPTUNA WITHOUT ONEHOT 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from catboost import CatBoostRegressor, Pool
import optuna
import logging
import sys

# --- 0. Data Loading and Preprocessing ---
print("--- Starting Data Loading and Preprocessing ---")
df = pd.read_csv("data/nonull_smalldata.csv")

# Print columns and dtypes for verification
print("\nOriginal DataFrame Columns:", df.columns.tolist())
print("\nOriginal DataFrame dtypes:\n", df.dtypes)

numeric_columns = ["bedroomCount", "toilet_and_bath", "habitableSurface", "facedeCount", "hasTerrace", "totalParkingCount"]
categorical_columns = ["type", "subtype", "province", "locality", "postCode", "buildingCondition", "epcScore"]

# Ensure all specified columns exist in the DataFrame
for col in numeric_columns + categorical_columns:
    if col not in df.columns:
        raise ValueError(f"Column '{col}' not found in DataFrame.")

# Define X and y BEFORE any column modifications for clear separation
X = df[numeric_columns + categorical_columns].copy() # Features
y = df["price"].copy() # Target

# Identify categorical feature indices for CatBoost
# We need the actual integer indices of these columns in the `X` DataFrame
cat_features_indices = [X.columns.get_loc(col) for col in categorical_columns]

print(f"\nIdentified categorical feature indices: {cat_features_indices}")
print(f"Corresponding categorical column names: {[X.columns[i] for i in cat_features_indices]}")

print("\nFeatures (X) shape:", X.shape)
print("Target (y) shape:", y.shape)

# Split data into training, validation, and test sets
# Ensure random_state is used for reproducibility
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=1234)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=1234)

print("\nData splitting complete.")
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

# Create CatBoost Pool objects, passing the categorical feature indices
train_pool = Pool(X_train, y_train, cat_features=cat_features_indices)
val_pool = Pool(X_val, y_val, cat_features=cat_features_indices)
test_pool = Pool(X_test, y_test, cat_features=cat_features_indices)

print("--- Data Preprocessing Complete ---")

# --- 2. Define the Optuna Objective Function ---
def objective(trial):
    """
    Objective function for Optuna to optimize CatBoost hyperparameters.
    It returns the validation RMSE for a given set of hyperparameters.
    """
    params = {
        "iterations": trial.suggest_int("iterations", 100, 1000),
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-3, 0.3),
        "depth": trial.suggest_int("depth", 4, 10),
        "l2_leaf_reg": trial.suggest_loguniform("l2_leaf_reg", 1e-2, 10),
        "subsample": trial.suggest_uniform("subsample", 0.6, 1.0),
        "colsample_bylevel": trial.suggest_uniform("colsample_bylevel", 0.6, 1.0),
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 30),
        "loss_function": "RMSE",
        "eval_metric": "RMSE",
        "random_seed": 42,
        "verbose": 0,
        "early_stopping_rounds": trial.suggest_int("early_stopping_rounds", 10, 50),
    }

    model = CatBoostRegressor(**params)

    pruning_callback = optuna.integration.CatBoostPruningCallback(trial, "RMSE")

    try:
        model.fit(
            train_pool,
            eval_set=val_pool,
            early_stopping_rounds=params["early_stopping_rounds"],
            callbacks=[pruning_callback],
            verbose=0 # Suppress verbose output during tuning
        )
    except optuna.exceptions.TrialPruned:
        raise

    val_predictions = model.predict(X_val) # Predict on X_val directly for evaluation
    rmse = np.sqrt(mean_squared_error(y_val, val_predictions))

    return rmse

# --- 3. Set up and Run Optuna Study ---
if __name__ == "__main__":
    optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))

    study = optuna.create_study(
        direction="minimize",
        sampler=optuna.samplers.TPESampler(seed=42),
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=10)
    )

    print("\n--- Starting Optuna optimization ---")
    study.optimize(objective, n_trials=50, timeout=600) # Run 50 trials or for 10 minutes

    print("\n--- Optimization finished ---")
    print(f"Number of finished trials: {len(study.trials)}")
    print(f"Number of pruned trials: {len(study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.PRUNED]))}")

    print("\nBest trial:")
    trial = study.best_trial

    print(f"  Value (RMSE on validation set): {trial.value:.4f}")
    print("  Best Hyperparameters:")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    # --- 4. Train the final model with the best hyperparameters ---
    print("\n--- Training final model with best hyperparameters ---")
    best_params = trial.params
    best_params_for_final_model = {k: v for k, v in best_params.items() if k != "early_stopping_rounds"}

    best_params_for_final_model["iterations"] = 10000 # Set a high number, rely on early stopping
    best_params_for_final_model["verbose"] = 100 # Show some progress for the final training

    final_cat_regressor = CatBoostRegressor(**best_params_for_final_model)

    final_cat_regressor.fit(
        train_pool,
        eval_set=val_pool,
        early_stopping_rounds=best_params["early_stopping_rounds"], # Use the best found early stopping
        verbose=best_params_for_final_model["verbose"],
    )

    print(f"Best iteration of final model: {final_cat_regressor.get_best_iteration()}")

    # --- 5. Evaluate the final model on the unseen test set ---
    final_predictions = final_cat_regressor.predict(test_pool)

    final_mse = mean_squared_error(y_test, final_predictions)
    final_rmse = np.sqrt(final_mse)
    final_r2 = r2_score(y_test, final_predictions)

    print(f"\n--- Final Model Performance on Test Set ---")
    print(f"  MSE: {final_mse:.4f}")
    print(f"  RMSE: {final_rmse:.4f}")
    print(f"  R-squared: {final_r2:.4f}")

    # Optionally, visualize the optimization process
    try:
        from optuna.visualization import plot_optimization_history, plot_parallel_coordinate, plot_slice
        print("\n--- Generating Optuna Visualizations (requires plotly and kaleido) ---")
        fig_history = plot_optimization_history(study)
        fig_history.show()

        fig_parallel = plot_parallel_coordinate(study)
        fig_parallel.show()

        fig_slice = plot_slice(study)
        fig_slice.show()
    except ImportError:
        print("\nInstall plotly and kaleido for Optuna visualizations: pip install plotly kaleido")

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
import optuna
import logging
import sys

# Import the specific callbacks from their correct locations for XGBoost 3.0.2
from optuna.integration.xgboost import XGBoostPruningCallback
from xgboost.callback import EarlyStopping # Directly from xgboost.callback


# --- 0. Data Loading and Preprocessing ---
print("--- Starting Data Loading and Preprocessing ---")
df = pd.read_csv("data/nonull_smalldata.csv")

# Print columns and dtypes for verification
print("\nOriginal DataFrame Columns:", df.columns.tolist())
print("\nOriginal DataFrame dtypes:\n", df.dtypes)

numeric_columns = ["bedroomCount", "toilet_and_bath", "habitableSurface", "facedeCount", "hasTerrace", "totalParkingCount"]
categorical_columns = ["type", "subtype", "province", "locality", "postCode", "buildingCondition", "epcScore"]

# Ensure all specified categorical columns actually exist in the DataFrame
for col in categorical_columns:
    if col not in df.columns:
        raise ValueError(f"Categorical column '{col}' not found in DataFrame.")

# Store 'price' column before potentially modifying df
price_column_data = df['price']

# One-Hot Encode categorical columns
encoder = OneHotEncoder(sparse_output=False, drop="first")
one_hot_encoded = encoder.fit_transform(df[categorical_columns])
one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(categorical_columns))

# Concatenate one-hot encoded features with numeric features
df_numeric = df[numeric_columns]
X = pd.concat([df_numeric.reset_index(drop=True), one_hot_df.reset_index(drop=True)], axis=1)

y = price_column_data.reset_index(drop=True)

print("\nFeatures (X) shape after encoding:", X.shape)
print("Target (y) shape:", y.shape)

# Split data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=1234)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=1234)

print("\nData splitting complete.")
print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

print("--- Data Preprocessing Complete ---")

# --- 2. Define the Optuna Objective Function for XGBoost (Correct for XGBoost 3.0.2) ---
def objective(trial):
    """
    Objective function for Optuna to optimize XGBoost hyperparameters.
    It returns the validation RMSE for a given set of hyperparameters.
    """
    # 2.1. Suggest hyperparameters for XGBoost
    params = {
        "objective": "reg:squarederror",  # For regression tasks (MSE)
        "eval_metric": "rmse",            # Metric for early stopping
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000), # Max number of boosting rounds
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-3, 0.3),
        "max_depth": trial.suggest_int("max_depth", 3, 10),
        "subsample": trial.suggest_uniform("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.6, 1.0),
        "gamma": trial.suggest_loguniform("gamma", 1e-8, 1.0),
        "min_child_weight": trial.suggest_loguniform("min_child_weight", 1e-2, 10),
        "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 1.0),
        "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 1.0),
        "random_state": 42,
        "n_jobs": -1, # Use all available cores
    }

    # 2.2. Initialize and Train XGBoost Regressor with suggested parameters
    model = XGBRegressor(**params)

    # Optuna's XGBoostPruningCallback for pruning (requires 'callbacks' argument in fit)
    pruning_callback = XGBoostPruningCallback(trial, "validation_0-rmse")

    # XGBoost's native EarlyStopping callback (requires 'callbacks' argument in fit)
    early_stopping_rounds_for_trial = 50 # A fixed value for early stopping in Optuna trials
    early_stopping_callback = EarlyStopping(
        rounds=early_stopping_rounds_for_trial,
        metric_name="validation_0-rmse",
        data_name="validation_0",
        maximize=False # Minimize RMSE
        # 'verbose' argument is not used here for newer XGBoost versions
    )

    try:
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)], # Pass eval_set as a list of tuples
           # callbacks=[pruning_callback, early_stopping_callback], # Pass all callbacks as a list
            verbose=False # Suppress training output for cleaner Optuna logs during tuning
        )
    except optuna.exceptions.TrialPruned:
        raise

    # 2.3. Make predictions on the validation set
    # XGBoost's `predict` method automatically uses the best iteration if early stopping occurred.
    val_predictions = model.predict(X_val)

    # 2.4. Calculate the evaluation metric (RMSE)
    rmse = np.sqrt(mean_squared_error(y_val, val_predictions))

    return rmse

# --- 3. Set up and Run Optuna Study ---
if __name__ == "__main__":
    # Set up Optuna logging (optional, but good for debugging)
    optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))

    study = optuna.create_study(
        direction="minimize",  # We want to minimize RMSE
        sampler=optuna.samplers.TPESampler(seed=42), # Tree-structured Parzen Estimator (TPE) sampler
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=10) # Pruner for early stopping unpromising trials
    )

    print(f"Starting Optuna optimization for XGBoost (Version {xgb.__version__})...")
    # Optimize the objective function
    study.optimize(objective, n_trials=50, timeout=600) # Run 50 trials or for 600 seconds (10 minutes)

    print("\nOptimization finished.")
    print(f"Number of finished trials: {len(study.trials)}")
    print(f"Number of pruned trials: {len(study.get_trials(deepcopy=False, states=[optuna.trial.TrialState.PRUNED]))}")

    print("\nBest trial:")
    trial = study.best_trial

    print(f"  Value (RMSE on validation set): {trial.value:.4f}")
    print("  Best Hyperparameters:")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    # --- 4. Train the final model with the best hyperparameters ---
    print("\nTraining final model with best hyperparameters...")
    best_params = trial.params
    # Create a copy and ensure n_estimators is high, letting early stopping handle the actual rounds
    final_model_params = {k: v for k, v in best_params.items()}
    final_model_params["n_estimators"] = 10000 # Set a high number, rely on early stopping
    final_model_params["verbose"] = False # Suppress verbose output for final training, or set to an integer

    final_xgb_regressor = XGBRegressor(**final_model_params)

    # Define early stopping callback for the final model training
    final_early_stopping_rounds = 50 # You can keep this fixed or use a value from tuning
    final_early_stopping_callback = EarlyStopping(
        rounds=final_early_stopping_rounds,
        metric_name="validation_0-rmse",
        data_name="validation_0",
        maximize=False,
        verbose=True # Set to True to see final model training progress
    )

    final_xgb_regressor.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)], # Use validation set for early stopping
        #callbacks=[final_early_stopping_callback], # Pass the callback here
       # verbose=False # Verbosity of fitting is controlled by the callback's verbose setting
    )

    # The best iteration from early stopping is automatically used for predictions
    print(f"Best iteration of final model: {final_xgb_regressor.best_iteration}")

    # --- 5. Evaluate the final model on the unseen test set ---
    final_predictions = final_xgb_regressor.predict(X_test)

    final_mse = mean_squared_error(y_test, final_predictions)
    final_rmse = np.sqrt(final_mse)
    final_r2 = r2_score(y_test, final_predictions)

    print(f"\n--- Final Model Performance on Test Set ---")
    print(f"  MSE: {final_mse:.4f}")
    print(f"  RMSE: {final_rmse:.4f}")
    print(f"  R-squared: {final_r2:.4f}")

    # Optionally, visualize the optimization process
    try:
        from optuna.visualization import plot_optimization_history, plot_parallel_coordinate, plot_slice
        print("\n--- Generating Optuna Visualizations (requires plotly and kaleido) ---")
        fig_history = plot_optimization_history(study)
        fig_history.show()

        fig_parallel = plot_parallel_coordinate(study)
        fig_parallel.show()

        fig_slice = plot_slice(study)
        fig_slice.show()
    except ImportError:
        print("\nInstall plotly and kaleido for Optuna visualizations: pip install plotly kaleido")

--- Starting Data Loading and Preprocessing ---

Original DataFrame Columns: ['type', 'subtype', 'bedroomCount', 'toilet_and_bath', 'province', 'locality', 'postCode', 'habitableSurface', 'buildingCondition', 'facedeCount', 'hasTerrace', 'epcScore', 'price', 'totalParkingCount']

Original DataFrame dtypes:
 type                  object
subtype               object
bedroomCount         float64
toilet_and_bath      float64
province              object
locality              object
postCode               int64
habitableSurface     float64
buildingCondition     object
facedeCount          float64
hasTerrace              bool
epcScore              object
price                float64
totalParkingCount    float64
dtype: object

Features (X) shape after encoding: (56005, 4620)
Target (y) shape: (56005,)


[I 2025-06-30 18:36:20,821] A new study created in memory with name: no-name-2fe33898-63d0-45f2-8536-0ec8089acdfe



Data splitting complete.
X_train shape: (39203, 4620), y_train shape: (39203,)
X_val shape: (8401, 4620), y_val shape: (8401,)
X_test shape: (8401, 4620), y_test shape: (8401,)
--- Data Preprocessing Complete ---
A new study created in memory with name: no-name-2fe33898-63d0-45f2-8536-0ec8089acdfe
A new study created in memory with name: no-name-2fe33898-63d0-45f2-8536-0ec8089acdfe
A new study created in memory with name: no-name-2fe33898-63d0-45f2-8536-0ec8089acdfe
A new study created in memory with name: no-name-2fe33898-63d0-45f2-8536-0ec8089acdfe
Starting Optuna optimization for XGBoost (Version 3.0.2)...


  "learning_rate": trial.suggest_loguniform("learning_rate", 1e-3, 0.3),
  "subsample": trial.suggest_uniform("subsample", 0.6, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.6, 1.0),
  "gamma": trial.suggest_loguniform("gamma", 1e-8, 1.0),
  "min_child_weight": trial.suggest_loguniform("min_child_weight", 1e-2, 10),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 1.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 1.0),
[I 2025-06-30 18:39:55,712] Trial 0 finished with value: 259665.94784065822 and parameters: {'n_estimators': 437, 'learning_rate': 0.22648248189516848, 'max_depth': 8, 'subsample': 0.8394633936788146, 'colsample_bytree': 0.6624074561769746, 'gamma': 1.7699302940633311e-07, 'min_child_weight': 0.014936568554617643, 'reg_alpha': 0.08499808989182997, 'reg_lambda': 0.0006440507553993703}. Best is trial 0 with value: 259665.94784065822.


Trial 0 finished with value: 259665.94784065822 and parameters: {'n_estimators': 437, 'learning_rate': 0.22648248189516848, 'max_depth': 8, 'subsample': 0.8394633936788146, 'colsample_bytree': 0.6624074561769746, 'gamma': 1.7699302940633311e-07, 'min_child_weight': 0.014936568554617643, 'reg_alpha': 0.08499808989182997, 'reg_lambda': 0.0006440507553993703}. Best is trial 0 with value: 259665.94784065822.
Trial 0 finished with value: 259665.94784065822 and parameters: {'n_estimators': 437, 'learning_rate': 0.22648248189516848, 'max_depth': 8, 'subsample': 0.8394633936788146, 'colsample_bytree': 0.6624074561769746, 'gamma': 1.7699302940633311e-07, 'min_child_weight': 0.014936568554617643, 'reg_alpha': 0.08499808989182997, 'reg_lambda': 0.0006440507553993703}. Best is trial 0 with value: 259665.94784065822.
Trial 0 finished with value: 259665.94784065822 and parameters: {'n_estimators': 437, 'learning_rate': 0.22648248189516848, 'max_depth': 8, 'subsample': 0.8394633936788146, 'colsample_

  "learning_rate": trial.suggest_loguniform("learning_rate", 1e-3, 0.3),
  "subsample": trial.suggest_uniform("subsample", 0.6, 1.0),
  "colsample_bytree": trial.suggest_uniform("colsample_bytree", 0.6, 1.0),
  "gamma": trial.suggest_loguniform("gamma", 1e-8, 1.0),
  "min_child_weight": trial.suggest_loguniform("min_child_weight", 1e-2, 10),
  "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-8, 1.0),
  "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-8, 1.0),
[I 2025-06-30 18:49:16,213] Trial 1 finished with value: 346406.8839108318 and parameters: {'n_estimators': 737, 'learning_rate': 0.001124579825911934, 'max_depth': 10, 'subsample': 0.9329770563201687, 'colsample_bytree': 0.6849356442713105, 'gamma': 2.8483918709107956e-07, 'min_child_weight': 0.03549878832196503, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995}. Best is trial 0 with value: 259665.94784065822.


Trial 1 finished with value: 346406.8839108318 and parameters: {'n_estimators': 737, 'learning_rate': 0.001124579825911934, 'max_depth': 10, 'subsample': 0.9329770563201687, 'colsample_bytree': 0.6849356442713105, 'gamma': 2.8483918709107956e-07, 'min_child_weight': 0.03549878832196503, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995}. Best is trial 0 with value: 259665.94784065822.
Trial 1 finished with value: 346406.8839108318 and parameters: {'n_estimators': 737, 'learning_rate': 0.001124579825911934, 'max_depth': 10, 'subsample': 0.9329770563201687, 'colsample_bytree': 0.6849356442713105, 'gamma': 2.8483918709107956e-07, 'min_child_weight': 0.03549878832196503, 'reg_alpha': 2.716051144654844e-06, 'reg_lambda': 0.00015777981883364995}. Best is trial 0 with value: 259665.94784065822.
Trial 1 finished with value: 346406.8839108318 and parameters: {'n_estimators': 737, 'learning_rate': 0.001124579825911934, 'max_depth': 10, 'subsample': 0.9329770563201687, 'col

TypeError: EarlyStopping.__init__() got an unexpected keyword argument 'verbose'

In [3]:
import xgboost as xgb
print(f"Your XGBoost version: {xgb.__version__}")

Your XGBoost version: 3.0.2


In [None]:
# BEST PARAMS OF XGBOOSTOPTUNA
Best trial:
  Value (RMSE on validation set): 259665.9478
  Best Hyperparameters:
    n_estimators: 437
    learning_rate: 0.22648248189516848
    max_depth: 8
    subsample: 0.8394633936788146
    colsample_bytree: 0.6624074561769746
    gamma: 1.7699302940633311e-07
    min_child_weight: 0.014936568554617643
    reg_alpha: 0.08499808989182997
    reg_lambda: 0.0006440507553993703

In [None]:
xgb_regressor = XGBRegressor(
    objective='reg:squarederror', # For regression tasks, specify the objective
    n_estimators=437,             # Number of boosting rounds
    learning_rate=0.22648248189516848,            # Step size shrinkage
    max_depth=8,                  # Maximum depth of a tree
    subsample=0.8394633936788146,                # Subsample ratio of the training instance
    colsample_bytree=0.6624074561769746,         # Subsample ratio of columns
    random_state=42,              # For reproducibility
    n_jobs=-1 ,
)
'''gamma=1.7699302940633311e-07,
min_child_weight= 0.014936568554617643,
reg_alpha= 0.08499808989182997,
reg_lambda= 0.0006440507553993703,'''
# Use all available CPU cores


# Fit the model to the training data
xgb_regressor.fit(X_train, y_train)

# Make predictions on the test data
xgb_predictions = xgb_regressor.predict(X_test)

# Evaluate the XGBoost model
xgb_mse = mean_squared_error(y_test, xgb_predictions)
xgb_r2 = r2_score(y_test, xgb_predictions)

print(f"XGBoost MSE: {xgb_mse}")
predictions = xgb_regressor.predict(X_test)
accu = r2_score(y_test, predictions)
print("Accuracy of test:", accu)

pred = xgb_regressor.predict(X_train)
accu = r2_score(y_train, pred)
print("Accuracy of train:", accu)

NameError: name 'X_train' is not defined