In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import seaborn as sns

In [3]:
%pip install gdown



In [4]:
RANDOM_STATE = 42

In [5]:
CARS_FILE_ID = '1liFEe1-yFISPSpRSvbv1wIH_avYNGmBI'

random.seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)

!gdown --id {CARS_FILE_ID}

Downloading...
From (original): https://drive.google.com/uc?id=1liFEe1-yFISPSpRSvbv1wIH_avYNGmBI
From (redirected): https://drive.google.com/uc?id=1liFEe1-yFISPSpRSvbv1wIH_avYNGmBI&confirm=t&uuid=27256348-6c25-4b37-aa9b-68da2bc445db
To: /content/dataset.csv
100% 1.01G/1.01G [00:17<00:00, 57.7MB/s]


In [6]:
df = pd.read_csv('dataset.csv')
print(df.shape)

(604047, 24)


In [7]:
df_with_na_column = df[df['engine_displacement'].isna()]
df_with_na_column

Unnamed: 0,production_year,mileage,condition,owners_number,pts_original,horse_power,accidents_resolution,region,seller_type,brand,...,engine_displacement,engine_power,fuel_rate,steering_wheel,price,price_segment,tags,auto_class,equipment,complectation_available_options
210905,2003,175000,CONDITION_OK,0,True,,,Чебаркуль,PRIVATE,Hyundai,...,,,,LEFT,380000,MEDIUM,available_for_checkup;pts_original;real_photo;...,,seats-5,


In [8]:
df.drop(210905, inplace=True)

In [9]:
df['fuel_rate'].fillna(df['fuel_rate'].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['fuel_rate'].fillna(df['fuel_rate'].median(), inplace=True)


In [10]:
df['pts_original'].fillna(True, inplace=True)
df['accidents_resolution'].fillna('OK', inplace=True)
df['auto_class'].fillna('NOT SPECIFIED', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['pts_original'].fillna(True, inplace=True)
  df['pts_original'].fillna(True, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['accidents_resolution'].fillna('OK', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because 

In [11]:
df.drop('horse_power', axis=1, inplace=True)

In [12]:
def get_unique_values(series, sep=';'):
    uniq = set()
    for cell in series.dropna():
        for piece in cell.split(sep):
            s = piece.strip()
            if s:
                uniq.add(s)
    return np.array(list(uniq))

all_tags = get_unique_values(df['tags'])
all_options = get_unique_values(df['complectation_available_options'])
all_equipments = get_unique_values(df['equipment'])

In [13]:
def create_binary_features(df, column, unique_values, sep=';'):
    return (
        df[column]
        .str.get_dummies(sep=sep)
        .reindex(columns=unique_values, fill_value=0)
        .astype('int8')
    )

tags_dummies = create_binary_features(df, 'tags', all_tags)
options_dummies = create_binary_features(df, 'complectation_available_options', all_options)
equipment_dummies = create_binary_features(df, 'equipment', all_equipments)

In [14]:
tags_dummies = tags_dummies.astype('Sparse[int]')
options_dummies = options_dummies.astype('Sparse[int]')
equipment_dummies = equipment_dummies.astype('Sparse[int]')

In [15]:
full_df = pd.concat([
    df.drop(columns=['tags', 'complectation_available_options', 'equipment']),
    tags_dummies,
    options_dummies.drop(columns=['condition']),
    equipment_dummies.drop(columns=['condition'])
], axis=1)

In [16]:
duplicated_cols = full_df.columns[full_df.columns.duplicated()].unique()
for col in duplicated_cols:
    full_df[col] = full_df[col].astype("int8")

In [17]:
import numpy as np

names = full_df.columns[full_df.columns.duplicated()].unique()
new_cols = {}

for name in names:
    cols_i = [col for col in full_df.columns if col == name]
    sub_df = full_df.loc[:, cols_i].astype('int8')   # DataFrame shape (n_rows, k)

    # берём максимум по строкам → Series shape (n_rows,)
    max_series = sub_df.max(axis=1).astype('int8')

    new_cols[name] = max_series

    # удаляем *все* старые колонки с этим именем
    full_df.drop(columns=cols_i, inplace=True)

# теперь присваиваем новые колонки–Series (они одномерны!)
for name, s in new_cols.items():
    full_df[name] = s


  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s


In [18]:
full_df.sample(10)

Unnamed: 0,production_year,mileage,condition,owners_number,accidents_resolution,region,seller_type,brand,model,body_type,...,steel-wheels,ptf,central-airbag,esp,body-kit,mirrors-heat,immo,lane-keeping-assist,drl,projection-display
436947,2024,0,CONDITION_OK,0,OK,Санкт-Петербург,COMMERCIAL,Skoda,Karoq,ALLROAD_5_DOORS,...,0,0,0,1,0,1,1,0,1,0
329729,2020,40460,CONDITION_OK,1,ERROR,Москва,PRIVATE,Mercedes-Benz,E-Класс,COUPE_HARDTOP,...,0,0,0,1,0,1,1,0,0,0
343497,2000,380000,CONDITION_OK,4,OK,Москва,PRIVATE,Mercedes-Benz,M-Класс,ALLROAD_5_DOORS,...,0,0,0,0,0,0,0,0,0,0
439784,2019,170000,CONDITION_OK,1,ERROR,Москва,PRIVATE,Skoda,Octavia,LIFTBACK,...,0,1,0,1,0,0,1,0,1,0
550845,2023,107,CONDITION_OK,1,OK,Краснодар,PRIVATE,Lada (ВАЗ),Largus,WAGON_5_DOORS,...,1,0,0,0,0,1,1,0,1,0
431808,2015,110727,CONDITION_OK,1,OK,Уфа,COMMERCIAL,Renault,Sandero,HATCHBACK_5_DOORS,...,1,1,0,0,0,1,1,0,0,0
167063,2024,0,CONDITION_OK,0,OK,Химки,COMMERCIAL,Genesis,GV80 Coupe,ALLROAD_5_DOORS,...,0,0,0,1,0,1,1,1,1,1
391523,2022,13000,CONDITION_OK,0,OK,Владивосток,COMMERCIAL,Nissan,X-Trail,ALLROAD_5_DOORS,...,0,0,0,0,0,0,0,0,0,0
210052,2024,100,CONDITION_OK,1,OK,Новосибирск,PRIVATE,Hyundai,Elantra,SEDAN,...,0,0,0,0,0,0,0,0,0,0
46816,2024,34,CONDITION_OK,1,OK,Москва,COMMERCIAL,BMW,X5,ALLROAD_5_DOORS,...,0,0,0,0,0,0,0,0,0,0


In [19]:
import pandas as pd
import numpy as np
from scipy.cluster import hierarchy
from scipy.spatial.distance import squareform

numeric_df = full_df.select_dtypes(include=[np.number])
numeric_df = numeric_df.loc[:, numeric_df.nunique() > 1]

corr = numeric_df.corr().abs().fillna(0)

dist = 1 - corr

dist = (dist + dist.T) / 2
np.fill_diagonal(dist.values, 0)

dist = dist.clip(lower=0)

dist_vect = squareform(dist)
Z = hierarchy.linkage(dist_vect, method='average')

max_d = 0.4
clusters = hierarchy.fcluster(Z, t=max_d, criterion='distance')

cluster_df = pd.DataFrame({'feature': numeric_df.columns, 'cluster': clusters})
to_keep = ['production_year', 'mileage', 'owners_number', 'doors_count', 'seats', 'engine_displacement', 'engine_power', 'fuel_rate']
for c in cluster_df['cluster'].unique():
    members = cluster_df.loc[cluster_df['cluster'] == c, 'feature']
    avg_corr = corr.loc[members, members].mean().sort_values(ascending=False)
    to_keep.append(avg_corr.index[0])
to_keep = list(dict.fromkeys(to_keep))

to_drop = [col for col in numeric_df.columns if col not in to_keep]
df_reduced = full_df.drop(columns=to_drop)

print(f"Удалено числовых коррелирующих признаков: {len(to_drop)}")
print(f"Осталось признаков в df_reduced: {df_reduced.shape[1]} из {full_df.shape[1]}")

Удалено числовых коррелирующих признаков: 112
Осталось признаков в df_reduced: 304 из 416


In [20]:
df_reduced.sample(10)

Unnamed: 0,production_year,mileage,condition,owners_number,accidents_resolution,region,seller_type,brand,model,body_type,...,sport-pedals,tja,isofix-front,21-inch-wheels,leather-gear-stick,steel-wheels,ptf,central-airbag,body-kit,projection-display
518746,2007,33000,CONDITION_OK,1,ERROR,Аксай,PRIVATE,Lada (ВАЗ),2107,SEDAN,...,0,0,0,0,0,0,0,0,0,0
224119,2021,23211,CONDITION_OK,1,OK,Москва,PRIVATE,Hyundai,Solaris,SEDAN,...,0,0,0,0,0,0,1,0,0,0
351020,2021,6122,CONDITION_OK,1,OK,Уссурийск,COMMERCIAL,Mini,Countryman,ALLROAD_5_DOORS,...,0,0,0,0,0,0,0,0,0,0
424138,2016,4500,CONDITION_OK,1,ERROR,Погар,PRIVATE,Renault,Kaptur,ALLROAD_5_DOORS,...,0,0,0,0,0,0,0,0,0,0
89776,2011,245600,CONDITION_OK,3,OK,Тверь,PRIVATE,Chevrolet,Aveo,SEDAN,...,0,0,0,0,0,0,0,0,0,0
297634,2023,18300,CONDITION_OK,1,OK,Москва,PRIVATE,Lexus,RX,ALLROAD_5_DOORS,...,0,0,0,1,1,0,1,0,0,0
449720,2024,0,CONDITION_OK,0,OK,Москва,COMMERCIAL,Soueast,S07,ALLROAD_5_DOORS,...,0,1,0,0,0,0,1,0,0,0
49847,2020,121000,CONDITION_OK,1,OK,Москва,PRIVATE,BMW,X6,ALLROAD_5_DOORS,...,0,0,0,1,1,0,1,0,1,1
48872,2024,0,CONDITION_OK,0,OK,Краснодар,COMMERCIAL,BMW,X6,ALLROAD_5_DOORS,...,0,0,0,0,0,0,0,0,0,0
846,1999,363500,CONDITION_OK,3,OK,Уфа,PRIVATE,Alfa Romeo,166,SEDAN,...,0,0,0,0,0,0,1,0,0,0


### Добавим новые признаки

In [21]:
from sklearn.preprocessing import PolynomialFeatures

In [22]:
df_mod = df_reduced.copy()

In [23]:
df_mod["age"] = 2025 - df_mod["production_year"]

In [24]:
numeric_cols = ['mileage', 'engine_displacement', 'engine_power', 'age']

In [25]:
df_mod['log_price'] = np.log1p(df['price'])

In [26]:
df_mod.drop(columns=['price'], inplace=True)

Прологарифмируем некоторые признаки

In [27]:
for col in numeric_cols:
    df_mod[f'log_{col}'] = np.log1p(df_mod[col])

Возьмем корни от числовых признаков

In [28]:
for col in numeric_cols:
    df_mod[f'square_{col}'] = df_mod[col]**2

Полиномиальные преобразования

In [29]:
poly = PolynomialFeatures(degree=2, include_bias=False)
poly_array = poly.fit_transform(df_mod[numeric_cols].fillna(0))
poly_feature_names = poly.get_feature_names_out(numeric_cols)
poly_df = pd.DataFrame(poly_array, columns=poly_feature_names, index=df_mod.index)

In [30]:
poly_new = poly_df.drop(columns=[c for c in poly_df.columns if c in numeric_cols])
df_mod = pd.concat([df_mod, poly_new.add_prefix('poly_')], axis=1)

Базовые отношения двигателя

In [31]:
df_mod['power_to_disp'] = df_mod['engine_power'] / df_mod['engine_displacement'].clip(lower=0.01)

Арифметичесике преобразования

In [32]:
# Произведения признаков
df_mod['mileage_x_power'] = df_mod['mileage'] * df_mod['engine_power']
df_mod['disp_x_power'] = df_mod['engine_displacement'] * df_mod['engine_power']
df_mod['owners_x_power'] = df_mod['owners_number'] * df_mod['engine_power']
df_mod["age_x_power"]    = df_mod["age"] * df_mod["engine_power"]
df_mod["age_x_mileage"]  = df_mod["age"] * df_mod["mileage"]

# Частные признаков
df_mod['power_div_mileage'] = df_mod['engine_power'] / df_mod['mileage'].clip(lower=0.01)
df_mod['disp_per_mile'] = df_mod['engine_displacement'] / df_mod['mileage'].clip(lower=0.01)
df_mod['mileage_per_hp'] = df_mod['mileage'] / df_mod['engine_power'].clip(lower=0.01)
df_mod['milage_per_year'] = df_mod['mileage'] / df_mod['age'].clip(lower=0.01)
df_mod['power_div_owners'] = df_mod['engine_power'] / df_mod['owners_number'].clip(lower=0.01)
df_mod['disp_div_owners'] = df_mod['engine_displacement'] / df_mod['owners_number'].clip(lower=0.01)
df_mod['disp_div_fuel'] = df_mod['engine_displacement'] / df_mod['fuel_rate'].clip(lower=0.01)
df_mod['fuel_div_disp'] = df_mod['fuel_rate'] / df_mod['engine_displacement'].clip(lower=0.01)
df_mod['power_div_fuel'] = df_mod['engine_power'] / df_mod['fuel_rate'].clip(lower=0.01)
df_mod['fuel_div_power'] = df_mod['fuel_rate'] / df_mod['engine_power'].clip(lower=0.01)

# Лог-взаимодействие
df_mod['log_mileage_x_log_power'] = df_mod['log_mileage'] * df_mod['log_engine_power']
df_mod['log_age_x_log_power'] = df_mod['log_age'] * df_mod['log_engine_power']

# Разности признаков
df_mod['power_minus_disp'] = df_mod['engine_power'] - df_mod['engine_displacement']

In [33]:
from pandas.api.types import is_numeric_dtype

# Отношение дверей к местам
df_mod['seats_num'] = df_mod['seats'].str.extract(r'(\d+)').astype(float)
df_mod['doors_to_seats_ratio'] = df_mod['doors_count'] / df_mod['seats_num'].clip(lower=0.01)

# Индикатор аварий
df_mod['has_accident'] = (
    ~df_mod['accidents_resolution'].fillna('none').str.lower().isin(['none', 'no', 'unknown'])
).astype(int)

binary_cols = [
    col for col in df.columns
    if is_numeric_dtype(df[col]) and
       set(df[col].dropna().unique()).issubset({0, 1})
]
df_mod['num_binary_features'] = df[binary_cols] \
    .astype(bool).sum(axis=1)

# Счетчики безопасности и комфорта
safety_keywords = ['airbag', 'detection', 'vsm', 'ldw', 'tja', 'rcta', 'ptf']
safety_cols = [c for c in df_mod.columns if any(k in c.lower() for k in safety_keywords)]
df_mod['safety_feature_count'] = df_mod[safety_cols] \
    .astype(bool).sum(axis=1)

comfort_keywords = ['heat', 'leather', 'massage', 'climate', 'vent', 'seat']
comfort_cols = [c for c in df_mod.columns if any(k in c.lower() for k in comfort_keywords)]
df_mod['comfort_feature_count'] = df_mod[comfort_cols] \
    .astype(bool).sum(axis=1)

# Количество preset-тегов
preset_cols = [c for c in df_mod.columns if c.startswith('preset')]
df_mod['num_presets'] = df_mod[preset_cols] \
    .astype(bool).sum(axis=1)

# Является ли машина новой
df_mod['is_new'] = ((df_mod['mileage'] < 1000) & (df_mod['owners_number'] <= 1)).astype(int)
df_mod['is_very_old'] = (df_mod['age'] > 20).astype(int)

df_mod['is_one_owner'] = (df_mod['owners_number'] == 1).astype(int)

Проверим, что нет дубликатов

In [34]:
df_mod.columns.duplicated().sum()

np.int64(0)

Проверим, что не появилось пустых значений

In [35]:
cols_with_na = df_mod.columns[df_mod.isna().any()].tolist()
print("Колонки с пропусками:", cols_with_na)

Колонки с пропусками: []


Сохраним датасет с новыми признаками

In [36]:
df_mod.to_csv('dataset_with_new_features.csv', index=False)

### Обучим LightGBM

In [37]:
!pip install lightgbm --install-option=--gpu --install-option="--opencl-include-dir=/usr/local/cuda/include/" --install-option="--opencl-library=/usr/local/cuda/lib64/libOpenCL.so"


Usage:   
  pip3 install [options] <requirement specifier> [package-index-options] ...
  pip3 install [options] -r <requirements file> [package-index-options] ...
  pip3 install [options] [-e] <vcs project url> ...
  pip3 install [options] [-e] <local project path> ...
  pip3 install [options] <archive url/path> ...

no such option: --install-option


In [38]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.1-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.1-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.1 colorlog-6.9.0 optuna-4.3.0


In [44]:
import optuna
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import r2_score
from lightgbm import LGBMRegressor
import joblib

In [40]:
RANDOM_STATE = 42

In [45]:
df_shuffled = df_mod.sample(frac=1, random_state=RANDOM_STATE).reset_index(drop=True)

n_total   = len(df_shuffled)
n_train   = int(0.2 * n_total)

train_df  = df_shuffled.iloc[:n_train] # 20%
test_df   = df_shuffled.iloc[n_train:] # 80%

print(train_df.shape, test_df.shape)

(120809, 352) (483237, 352)


In [46]:
X = train_df.drop('log_price', axis=1)
y = train_df['log_price']
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=RANDOM_STATE
)

cut_test_df = test_df.sample(frac=0.3, random_state=RANDOM_STATE)
X_test = cut_test_df.drop('log_price', axis=1)
y_test = cut_test_df['log_price']

num_features = X.select_dtypes(include=['int64','float64','int8']).columns.tolist()
cat_features = X.select_dtypes(include=['object']).columns.tolist()

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_features),
    ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), cat_features),
])

In [47]:
X_train_proc = preprocessor.fit_transform(X_train)
X_valid_proc = preprocessor.transform(X_valid)



In [48]:
import lightgbm as lgb

N_TRIALS = 50

def objective(trial):
    params = {
        'device':           'gpu',
        'n_estimators':     trial.suggest_int('n_estimators', 100, 2000),
        'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
        'num_leaves':       trial.suggest_int('num_leaves', 31, 256),
        'max_depth':        trial.suggest_int('max_depth', 5, 30),
        'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
        'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
        'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 50),
        'random_state':     RANDOM_STATE,
        'verbose':          -1,
    }
    model = LGBMRegressor(**params)
    model.fit(
        X_train_proc, y_train,
        eval_set=[(X_valid_proc, y_valid)],
        callbacks=[lgb.early_stopping(stopping_rounds=N_TRIALS)]
    )
    preds = model.predict(X_valid_proc)
    return r2_score(y_valid, preds)

In [None]:
study = optuna.create_study(direction='maximize', study_name='lgbm_opt_r2')
study.optimize(
    objective,
    n_trials=N_TRIALS,
    show_progress_bar=True,
)

best = study.best_params
best.update({'device':'gpu','random_seed':RANDOM_STATE,'verbose':-1})
final_model = LGBMRegressor(**best)
final_model.fit(X_train_proc, y_train, eval_set=(X_valid_proc, y_valid), early_stopping_rounds=N_TRIALS)

joblib.dump(final_model, 'drive/MyDrive/ML_project/best_lightgbm_model_with_new_feature.pkl')
joblib.dump(preprocessor, 'drive/MyDrive/ML_project/lightgbm_preprocessor_with_new_feature.pkl')
print("Models saved to best_lightgbm_model_with_new_feature.pkl and lightgbm_preprocessor_with_new_feature.pkl")

print("Best R²:", study.best_value)
print("Best params:", study.best_params)

[I 2025-06-10 14:28:42,277] A new study created in memory with name: lgbm_opt_r2


  0%|          | 0/50 [00:00<?, ?it/s]

  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1250]	valid_0's l2: 0.0439678




[I 2025-06-10 14:30:52,964] Trial 0 finished with value: 0.9628395708512109 and parameters: {'n_estimators': 1250, 'learning_rate': 0.02768416881258849, 'num_leaves': 189, 'max_depth': 5, 'subsample': 0.8819833107478683, 'colsample_bytree': 0.5522544765504203, 'reg_alpha': 0.22969850953294937, 'reg_lambda': 0.0020622044528671494, 'min_child_weight': 25}. Best is trial 0 with value: 0.9628395708512109.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1337]	valid_0's l2: 0.0386905




[I 2025-06-10 14:34:39,410] Trial 1 finished with value: 0.9672998665338179 and parameters: {'n_estimators': 1337, 'learning_rate': 0.004053169038638184, 'num_leaves': 235, 'max_depth': 21, 'subsample': 0.848203333187671, 'colsample_bytree': 0.7688004653886942, 'reg_alpha': 0.008626635308856583, 'reg_lambda': 0.08650452248545897, 'min_child_weight': 39}. Best is trial 1 with value: 0.9672998665338179.
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1291]	valid_0's l2: 0.107087




[I 2025-06-10 14:36:42,438] Trial 2 finished with value: 0.9094933021984043 and parameters: {'n_estimators': 1291, 'learning_rate': 0.001354280238954755, 'num_leaves': 94, 'max_depth': 12, 'subsample': 0.8362987527423973, 'colsample_bytree': 0.9465424765349354, 'reg_alpha': 1.8136278350193088e-05, 'reg_lambda': 0.002801415591693803, 'min_child_weight': 39}. Best is trial 1 with value: 0.9672998665338179.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1669]	valid_0's l2: 0.0302461




[I 2025-06-10 14:40:15,233] Trial 3 finished with value: 0.9744367700426058 and parameters: {'n_estimators': 1669, 'learning_rate': 0.014863744400503958, 'num_leaves': 176, 'max_depth': 29, 'subsample': 0.5959597308256337, 'colsample_bytree': 0.5817907049525022, 'reg_alpha': 0.10587365966435719, 'reg_lambda': 0.0007948160489008346, 'min_child_weight': 11}. Best is trial 3 with value: 0.9744367700426058.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[137]	valid_0's l2: 0.28957




[I 2025-06-10 14:40:41,322] Trial 4 finished with value: 0.7552634916058645 and parameters: {'n_estimators': 137, 'learning_rate': 0.006463767467512745, 'num_leaves': 61, 'max_depth': 14, 'subsample': 0.7954803820130633, 'colsample_bytree': 0.5005992637859088, 'reg_alpha': 0.006290973230786103, 'reg_lambda': 0.2657738942068641, 'min_child_weight': 37}. Best is trial 3 with value: 0.9744367700426058.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[137]	valid_0's l2: 0.741521




[I 2025-06-10 14:41:00,051] Trial 5 finished with value: 0.37328639498841576 and parameters: {'n_estimators': 137, 'learning_rate': 0.0019506647960740643, 'num_leaves': 89, 'max_depth': 20, 'subsample': 0.5731829044192291, 'colsample_bytree': 0.6669872522805402, 'reg_alpha': 1.4478814260728283, 'reg_lambda': 1.6999224966500373e-06, 'min_child_weight': 31}. Best is trial 3 with value: 0.9744367700426058.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1501]	valid_0's l2: 0.0366339




[I 2025-06-10 14:43:31,203] Trial 6 finished with value: 0.9690379824907399 and parameters: {'n_estimators': 1501, 'learning_rate': 0.0050203216306429835, 'num_leaves': 185, 'max_depth': 25, 'subsample': 0.7350135178457897, 'colsample_bytree': 0.7110263250130701, 'reg_alpha': 3.173968588596351e-08, 'reg_lambda': 0.001120823033034608, 'min_child_weight': 33}. Best is trial 3 with value: 0.9744367700426058.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[467]	valid_0's l2: 0.0314996




[I 2025-06-10 14:44:06,409] Trial 7 finished with value: 0.9733773887713301 and parameters: {'n_estimators': 467, 'learning_rate': 0.07752345405375237, 'num_leaves': 92, 'max_depth': 30, 'subsample': 0.750768602824706, 'colsample_bytree': 0.7484494737336789, 'reg_alpha': 0.002443544324459361, 'reg_lambda': 0.003573177164665662, 'min_child_weight': 21}. Best is trial 3 with value: 0.9744367700426058.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1206]	valid_0's l2: 0.0521809




[I 2025-06-10 14:46:15,389] Trial 8 finished with value: 0.9558980966248987 and parameters: {'n_estimators': 1206, 'learning_rate': 0.002542595014203553, 'num_leaves': 157, 'max_depth': 30, 'subsample': 0.9056643504762527, 'colsample_bytree': 0.9743095536644397, 'reg_alpha': 5.175810879499376e-07, 'reg_lambda': 2.690980996502111e-07, 'min_child_weight': 17}. Best is trial 3 with value: 0.9744367700426058.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[713]	valid_0's l2: 0.032319




[I 2025-06-10 14:46:59,922] Trial 9 finished with value: 0.972684807612994 and parameters: {'n_estimators': 713, 'learning_rate': 0.044788228459184024, 'num_leaves': 127, 'max_depth': 14, 'subsample': 0.6632889870534344, 'colsample_bytree': 0.7340216642492492, 'reg_alpha': 0.17698280431010535, 'reg_lambda': 1.551748010002799e-08, 'min_child_weight': 19}. Best is trial 3 with value: 0.9744367700426058.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1975]	valid_0's l2: 0.0323013




[I 2025-06-10 14:51:06,158] Trial 10 finished with value: 0.972699778839684 and parameters: {'n_estimators': 1975, 'learning_rate': 0.017135193789104677, 'num_leaves': 256, 'max_depth': 24, 'subsample': 0.5061556124915063, 'colsample_bytree': 0.6047578182152099, 'reg_alpha': 6.499800375160383, 'reg_lambda': 8.436367128488897, 'min_child_weight': 2}. Best is trial 3 with value: 0.9744367700426058.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[720]	valid_0's l2: 0.0330848




[I 2025-06-10 14:51:36,747] Trial 11 finished with value: 0.9720376387406138 and parameters: {'n_estimators': 720, 'learning_rate': 0.09382993133302175, 'num_leaves': 36, 'max_depth': 30, 'subsample': 0.6692471888592, 'colsample_bytree': 0.8475098173222425, 'reg_alpha': 0.00031622759164924363, 'reg_lambda': 2.4091600096219722e-05, 'min_child_weight': 6}. Best is trial 3 with value: 0.9744367700426058.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1843]	valid_0's l2: 0.0318257




[I 2025-06-10 14:53:55,198] Trial 12 finished with value: 0.9731017306634596 and parameters: {'n_estimators': 1843, 'learning_rate': 0.013600029188101987, 'num_leaves': 130, 'max_depth': 26, 'subsample': 0.682214691522408, 'colsample_bytree': 0.8382619120400989, 'reg_alpha': 0.0020957900417323655, 'reg_lambda': 6.139997175871064e-05, 'min_child_weight': 49}. Best is trial 3 with value: 0.9744367700426058.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[723]	valid_0's l2: 0.0289312




[I 2025-06-10 14:55:03,759] Trial 13 finished with value: 0.9755481138562964 and parameters: {'n_estimators': 723, 'learning_rate': 0.08542023427478641, 'num_leaves': 187, 'max_depth': 30, 'subsample': 0.9813275105267472, 'colsample_bytree': 0.6356011159511896, 'reg_alpha': 9.87302618180761e-05, 'reg_lambda': 0.05593598228510109, 'min_child_weight': 12}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[891]	valid_0's l2: 0.0297427




[I 2025-06-10 14:56:37,781] Trial 14 finished with value: 0.9748622696895052 and parameters: {'n_estimators': 891, 'learning_rate': 0.03725795155405071, 'num_leaves': 206, 'max_depth': 26, 'subsample': 0.9398530083078894, 'colsample_bytree': 0.6217169427728223, 'reg_alpha': 3.331844032192053e-05, 'reg_lambda': 0.08139915563389308, 'min_child_weight': 10}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[890]	valid_0's l2: 0.0296216




[I 2025-06-10 14:57:57,805] Trial 15 finished with value: 0.9749646464793652 and parameters: {'n_estimators': 890, 'learning_rate': 0.04381075737153361, 'num_leaves': 214, 'max_depth': 22, 'subsample': 0.9998485415395538, 'colsample_bytree': 0.6418826633490962, 'reg_alpha': 1.2437787046636419e-05, 'reg_lambda': 0.11929267088042537, 'min_child_weight': 11}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[471]	valid_0's l2: 0.0315548




[I 2025-06-10 14:58:39,274] Trial 16 finished with value: 0.9733307001589185 and parameters: {'n_estimators': 471, 'learning_rate': 0.062195889994119374, 'num_leaves': 216, 'max_depth': 18, 'subsample': 0.9970469564752815, 'colsample_bytree': 0.6539875102379029, 'reg_alpha': 2.7708184684231496e-06, 'reg_lambda': 4.600751582881133, 'min_child_weight': 12}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1002]	valid_0's l2: 0.0312132




[I 2025-06-10 15:00:54,333] Trial 17 finished with value: 0.9736194263987785 and parameters: {'n_estimators': 1004, 'learning_rate': 0.024284964365500365, 'num_leaves': 160, 'max_depth': 22, 'subsample': 0.9727074123990586, 'colsample_bytree': 0.535353459032049, 'reg_alpha': 1.883485435708504e-07, 'reg_lambda': 0.43805601313151576, 'min_child_weight': 3}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[519]	valid_0's l2: 0.0364097




[I 2025-06-10 15:01:22,592] Trial 18 finished with value: 0.9692275245968921 and parameters: {'n_estimators': 519, 'learning_rate': 0.049547701872147076, 'num_leaves': 221, 'max_depth': 8, 'subsample': 0.9367676555508806, 'colsample_bytree': 0.6597110775618025, 'reg_alpha': 5.046107593632263e-05, 'reg_lambda': 0.01636969078958114, 'min_child_weight': 15}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[795]	valid_0's l2: 0.0317455




[I 2025-06-10 15:02:28,942] Trial 19 finished with value: 0.9731695532504687 and parameters: {'n_estimators': 795, 'learning_rate': 0.032256759417391236, 'num_leaves': 256, 'max_depth': 17, 'subsample': 0.9962421048819413, 'colsample_bytree': 0.8067367192310371, 'reg_alpha': 2.2661797086011207e-06, 'reg_lambda': 1.5164648943847518, 'min_child_weight': 25}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1027]	valid_0's l2: 0.0345026




[I 2025-06-10 15:04:25,529] Trial 20 finished with value: 0.9708393078490946 and parameters: {'n_estimators': 1027, 'learning_rate': 0.008908865774298582, 'num_leaves': 206, 'max_depth': 27, 'subsample': 0.8954201355947429, 'colsample_bytree': 0.8944658940639106, 'reg_alpha': 0.00023941271269757496, 'reg_lambda': 0.013543752274122139, 'min_child_weight': 6}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[893]	valid_0's l2: 0.0295851




[I 2025-06-10 15:05:55,018] Trial 21 finished with value: 0.9749954893204756 and parameters: {'n_estimators': 893, 'learning_rate': 0.04748410700073156, 'num_leaves': 197, 'max_depth': 23, 'subsample': 0.94265367738507, 'colsample_bytree': 0.618882890281446, 'reg_alpha': 1.8045724845963024e-05, 'reg_lambda': 0.05970080490364696, 'min_child_weight': 10}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[588]	valid_0's l2: 0.0299813




[I 2025-06-10 15:06:50,419] Trial 22 finished with value: 0.9746605862707889 and parameters: {'n_estimators': 602, 'learning_rate': 0.09955694604326693, 'num_leaves': 234, 'max_depth': 23, 'subsample': 0.9596509080821475, 'colsample_bytree': 0.7078482126416195, 'reg_alpha': 7.731369728474712e-06, 'reg_lambda': 0.05139387840721919, 'min_child_weight': 8}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[940]	valid_0's l2: 0.0295825




[I 2025-06-10 15:08:08,387] Trial 23 finished with value: 0.9749976689028361 and parameters: {'n_estimators': 941, 'learning_rate': 0.05834865271301928, 'num_leaves': 171, 'max_depth': 19, 'subsample': 0.9251954339576647, 'colsample_bytree': 0.6254312545738251, 'reg_alpha': 0.00010346296274784716, 'reg_lambda': 0.00016213989100974269, 'min_child_weight': 15}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1074]	valid_0's l2: 0.0292816




[I 2025-06-10 15:10:11,096] Trial 24 finished with value: 0.9752519737581392 and parameters: {'n_estimators': 1074, 'learning_rate': 0.06490194670341574, 'num_leaves': 162, 'max_depth': 17, 'subsample': 0.8559342012907967, 'colsample_bytree': 0.5626013802913276, 'reg_alpha': 0.00015695944245758207, 'reg_lambda': 0.0001463122199181873, 'min_child_weight': 14}. Best is trial 13 with value: 0.9755481138562964.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1152]	valid_0's l2: 0.0287338




[I 2025-06-10 15:12:29,045] Trial 25 finished with value: 0.975714953550876 and parameters: {'n_estimators': 1165, 'learning_rate': 0.06555964500796287, 'num_leaves': 162, 'max_depth': 18, 'subsample': 0.8642338819071332, 'colsample_bytree': 0.5520540488251534, 'reg_alpha': 0.00015346592557518072, 'reg_lambda': 9.94216540977995e-05, 'min_child_weight': 16}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1139]	valid_0's l2: 0.0325263




[I 2025-06-10 15:14:46,668] Trial 26 finished with value: 0.9725096516808814 and parameters: {'n_estimators': 1139, 'learning_rate': 0.02018160885159541, 'num_leaves': 144, 'max_depth': 16, 'subsample': 0.8432470064482128, 'colsample_bytree': 0.5023885812065118, 'reg_alpha': 0.0010756464950973455, 'reg_lambda': 5.197700721560577e-06, 'min_child_weight': 22}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1533]	valid_0's l2: 0.0293729




[I 2025-06-10 15:17:15,302] Trial 27 finished with value: 0.9751747887297041 and parameters: {'n_estimators': 1533, 'learning_rate': 0.07301040450663224, 'num_leaves': 134, 'max_depth': 11, 'subsample': 0.7965807989705596, 'colsample_bytree': 0.5608197885935229, 'reg_alpha': 0.022719475735294488, 'reg_lambda': 1.2980263486213473e-05, 'min_child_weight': 15}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[335]	valid_0's l2: 0.0329382




[I 2025-06-10 15:17:57,771] Trial 28 finished with value: 0.9721614959789052 and parameters: {'n_estimators': 335, 'learning_rate': 0.07167202329118455, 'num_leaves': 149, 'max_depth': 15, 'subsample': 0.789836287473247, 'colsample_bytree': 0.5808961486951845, 'reg_alpha': 0.0006476687418952387, 'reg_lambda': 9.299449186635758e-05, 'min_child_weight': 22}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1128]	valid_0's l2: 0.039971




[I 2025-06-10 15:19:39,619] Trial 29 finished with value: 0.9662176024877582 and parameters: {'n_estimators': 1128, 'learning_rate': 0.03257863239298225, 'num_leaves': 109, 'max_depth': 6, 'subsample': 0.8587252791065387, 'colsample_bytree': 0.5366204641853624, 'reg_alpha': 0.00018038606700355908, 'reg_lambda': 4.770762759939028e-07, 'min_child_weight': 27}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1417]	valid_0's l2: 0.0327617




[I 2025-06-10 15:20:45,774] Trial 30 finished with value: 0.9723106612609601 and parameters: {'n_estimators': 1417, 'learning_rate': 0.025266130349020727, 'num_leaves': 187, 'max_depth': 11, 'subsample': 0.8679147005236828, 'colsample_bytree': 0.6920796505497929, 'reg_alpha': 0.03858489274656208, 'reg_lambda': 0.0002606086704225046, 'min_child_weight': 18}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1541]	valid_0's l2: 0.0307108




[I 2025-06-10 15:22:55,505] Trial 31 finished with value: 0.974044083741396 and parameters: {'n_estimators': 1541, 'learning_rate': 0.07514156667761047, 'num_leaves': 166, 'max_depth': 9, 'subsample': 0.8083441217372147, 'colsample_bytree': 0.5695862061172816, 'reg_alpha': 0.02169544865548764, 'reg_lambda': 1.4717545769063865e-05, 'min_child_weight': 14}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1684]	valid_0's l2: 0.0365017




[I 2025-06-10 15:25:14,939] Trial 32 finished with value: 0.9691497362516229 and parameters: {'n_estimators': 1684, 'learning_rate': 0.06056195296862603, 'num_leaves': 130, 'max_depth': 5, 'subsample': 0.7443495049430634, 'colsample_bytree': 0.5421430163701441, 'reg_alpha': 0.8551959208854375, 'reg_lambda': 8.22920398669779e-06, 'min_child_weight': 26}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1341]	valid_0's l2: 0.0300148




[I 2025-06-10 15:27:15,217] Trial 33 finished with value: 0.9746323021708625 and parameters: {'n_estimators': 1341, 'learning_rate': 0.0791116586081056, 'num_leaves': 138, 'max_depth': 11, 'subsample': 0.82414663818274, 'colsample_bytree': 0.5765789149753575, 'reg_alpha': 0.010424174615904308, 'reg_lambda': 4.395651740413678e-05, 'min_child_weight': 14}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1241]	valid_0's l2: 0.0292096




[I 2025-06-10 15:29:29,074] Trial 34 finished with value: 0.9753127959041097 and parameters: {'n_estimators': 1241, 'learning_rate': 0.09786470232718406, 'num_leaves': 118, 'max_depth': 13, 'subsample': 0.8816452288423318, 'colsample_bytree': 0.524835645590946, 'reg_alpha': 9.223248549902108e-05, 'reg_lambda': 0.0005514504846578206, 'min_child_weight': 19}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1221]	valid_0's l2: 0.0291707




[I 2025-06-10 15:31:42,833] Trial 35 finished with value: 0.9753456681525895 and parameters: {'n_estimators': 1241, 'learning_rate': 0.09404736795532033, 'num_leaves': 119, 'max_depth': 13, 'subsample': 0.8926920320537143, 'colsample_bytree': 0.5212463817340548, 'reg_alpha': 7.348719866583087e-05, 'reg_lambda': 0.0005916123975449103, 'min_child_weight': 18}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1250]	valid_0's l2: 0.0294706




[I 2025-06-10 15:33:56,314] Trial 36 finished with value: 0.9750922458344032 and parameters: {'n_estimators': 1250, 'learning_rate': 0.08588969547781004, 'num_leaves': 116, 'max_depth': 13, 'subsample': 0.89613337484841, 'colsample_bytree': 0.5136989897929211, 'reg_alpha': 3.129814084940632e-06, 'reg_lambda': 0.0033118317231061465, 'min_child_weight': 20}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1357]	valid_0's l2: 0.0311273




[I 2025-06-10 15:35:33,358] Trial 37 finished with value: 0.9736920353997245 and parameters: {'n_estimators': 1358, 'learning_rate': 0.09759950419776565, 'num_leaves': 81, 'max_depth': 9, 'subsample': 0.91654903363764, 'colsample_bytree': 0.5940699196128714, 'reg_alpha': 7.091017939199255e-05, 'reg_lambda': 0.0006360128258407138, 'min_child_weight': 30}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1209]	valid_0's l2: 0.162154




[I 2025-06-10 15:37:55,861] Trial 38 finished with value: 0.8629517005071156 and parameters: {'n_estimators': 1209, 'learning_rate': 0.0011156643900973615, 'num_leaves': 74, 'max_depth': 13, 'subsample': 0.8795813131669259, 'colsample_bytree': 0.5274588796332399, 'reg_alpha': 0.0007465395218403415, 'reg_lambda': 0.006292999106469397, 'min_child_weight': 24}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1439]	valid_0's l2: 0.0294901




[I 2025-06-10 15:40:39,128] Trial 39 finished with value: 0.9750757507973677 and parameters: {'n_estimators': 1439, 'learning_rate': 0.03834902798151307, 'num_leaves': 105, 'max_depth': 20, 'subsample': 0.9618475232081902, 'colsample_bytree': 0.5020420516158597, 'reg_alpha': 5.602373813925191e-07, 'reg_lambda': 0.0007161584267639055, 'min_child_weight': 17}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1627]	valid_0's l2: 0.0291733




[I 2025-06-10 15:44:12,153] Trial 40 finished with value: 0.9753435076400634 and parameters: {'n_estimators': 1646, 'learning_rate': 0.0531309734289047, 'num_leaves': 179, 'max_depth': 28, 'subsample': 0.7771591606952618, 'colsample_bytree': 0.5547352856537513, 'reg_alpha': 0.0027603883106227544, 'reg_lambda': 2.7873285422727746e-06, 'min_child_weight': 35}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1637]	valid_0's l2: 0.0292389




[I 2025-06-10 15:47:46,791] Trial 41 finished with value: 0.9752881006136965 and parameters: {'n_estimators': 1637, 'learning_rate': 0.05461678915555905, 'num_leaves': 180, 'max_depth': 27, 'subsample': 0.7730703485150965, 'colsample_bytree': 0.5536757137309738, 'reg_alpha': 0.006025751659127732, 'reg_lambda': 1.2334842087676255e-06, 'min_child_weight': 47}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1663]	valid_0's l2: 0.0292043




[I 2025-06-10 15:50:35,047] Trial 42 finished with value: 0.9753172961443995 and parameters: {'n_estimators': 1858, 'learning_rate': 0.053447083328389364, 'num_leaves': 147, 'max_depth': 28, 'subsample': 0.7142834215495243, 'colsample_bytree': 0.6022486030389324, 'reg_alpha': 0.0026444025073028613, 'reg_lambda': 1.3106203199148684e-07, 'min_child_weight': 43}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1867]	valid_0's l2: 0.0293821




[I 2025-06-10 15:53:47,849] Trial 43 finished with value: 0.9751670623748205 and parameters: {'n_estimators': 1867, 'learning_rate': 0.03180563505183482, 'num_leaves': 149, 'max_depth': 28, 'subsample': 0.7106913599826337, 'colsample_bytree': 0.5973262326276219, 'reg_alpha': 0.0036669950376233135, 'reg_lambda': 1.7966673134104754e-08, 'min_child_weight': 45}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1991]	valid_0's l2: 0.0368983




[I 2025-06-10 15:56:59,826] Trial 44 finished with value: 0.9688145064575516 and parameters: {'n_estimators': 1991, 'learning_rate': 0.0036149738682994077, 'num_leaves': 193, 'max_depth': 29, 'subsample': 0.7092202045338285, 'colsample_bytree': 0.6796398052218585, 'reg_alpha': 0.0014025156248740935, 'reg_lambda': 1.1628519141906803e-07, 'min_child_weight': 42}. Best is trial 25 with value: 0.975714953550876.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds


In [None]:
X_test_proc = preprocessor.transform(X_test)
y_pred = final_model.predict(X_test_proc)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

print(f"Test MAE: {mean_absolute_error(y_test, y_pred):.4f}")
print(f"Test MSE: {mean_squared_error(y_test, y_pred):.4f}")
print(f"Test R^2: {r2_score(y_test, y_pred):.4f}")

### Выводы

$R^2 = 0.9757$

In [None]:
param = {
    'n_estimators': 1165, 
    'learning_rate': 0.06555964500796287, 
    'num_leaves': 162, 
    'max_depth': 18, 
    'subsample': 0.8642338819071332, 
    'colsample_bytree': 0.5520540488251534, 
    'reg_alpha': 0.00015346592557518072, 
    'reg_lambda': 9.94216540977995e-05, 
    'min_child_weight': 16
}