In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import seaborn as sns

In [2]:
%pip install gdown



In [3]:
RANDOM_STATE = 42

In [4]:
CARS_FILE_ID = '1liFEe1-yFISPSpRSvbv1wIH_avYNGmBI'

random.seed(RANDOM_STATE)
np.random.seed(RANDOM_STATE)

!gdown --id {CARS_FILE_ID}

Downloading...
From (original): https://drive.google.com/uc?id=1liFEe1-yFISPSpRSvbv1wIH_avYNGmBI
From (redirected): https://drive.google.com/uc?id=1liFEe1-yFISPSpRSvbv1wIH_avYNGmBI&confirm=t&uuid=b490bfb6-9aa9-4de3-8c93-7080a603ef2b
To: /content/dataset.csv
100% 1.01G/1.01G [00:09<00:00, 108MB/s]


In [5]:
df = pd.read_csv('dataset.csv')
print(df.shape)

(604047, 24)


In [6]:
df_with_na_column = df[df['engine_displacement'].isna()]
df_with_na_column

Unnamed: 0,production_year,mileage,condition,owners_number,pts_original,horse_power,accidents_resolution,region,seller_type,brand,...,engine_displacement,engine_power,fuel_rate,steering_wheel,price,price_segment,tags,auto_class,equipment,complectation_available_options
210905,2003,175000,CONDITION_OK,0,True,,,Чебаркуль,PRIVATE,Hyundai,...,,,,LEFT,380000,MEDIUM,available_for_checkup;pts_original;real_photo;...,,seats-5,


In [7]:
df.drop(210905, inplace=True)

In [8]:
df['fuel_rate'].fillna(df['fuel_rate'].median(), inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['fuel_rate'].fillna(df['fuel_rate'].median(), inplace=True)


In [9]:
df['pts_original'].fillna(True, inplace=True)
df['accidents_resolution'].fillna('OK', inplace=True)
df['auto_class'].fillna('NOT SPECIFIED', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['pts_original'].fillna(True, inplace=True)
  df['pts_original'].fillna(True, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['accidents_resolution'].fillna('OK', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because 

In [10]:
df.drop('horse_power', axis=1, inplace=True)

In [11]:
def get_unique_values(series, sep=';'):
    uniq = set()
    for cell in series.dropna():
        for piece in cell.split(sep):
            s = piece.strip()
            if s:
                uniq.add(s)
    return np.array(list(uniq))

all_tags = get_unique_values(df['tags'])
all_options = get_unique_values(df['complectation_available_options'])
all_equipments = get_unique_values(df['equipment'])

In [12]:
def create_binary_features(df, column, unique_values, sep=';'):
    return (
        df[column]
        .str.get_dummies(sep=sep)
        .reindex(columns=unique_values, fill_value=0)
        .astype('int8')
    )

tags_dummies = create_binary_features(df, 'tags', all_tags)
options_dummies = create_binary_features(df, 'complectation_available_options', all_options)
equipment_dummies = create_binary_features(df, 'equipment', all_equipments)

In [13]:
tags_dummies = tags_dummies.astype('Sparse[int]')
options_dummies = options_dummies.astype('Sparse[int]')
equipment_dummies = equipment_dummies.astype('Sparse[int]')

In [14]:
full_df = pd.concat([
    df.drop(columns=['tags', 'complectation_available_options', 'equipment']),
    tags_dummies,
    options_dummies.drop(columns=['condition']),
    equipment_dummies.drop(columns=['condition'])
], axis=1)

In [15]:
duplicated_cols = full_df.columns[full_df.columns.duplicated()].unique()
for col in duplicated_cols:
    full_df[col] = full_df[col].astype("int8")

In [16]:
import numpy as np

names = full_df.columns[full_df.columns.duplicated()].unique()
new_cols = {}

for name in names:
    cols_i = [col for col in full_df.columns if col == name]
    sub_df = full_df.loc[:, cols_i].astype('int8')   # DataFrame shape (n_rows, k)

    # берём максимум по строкам → Series shape (n_rows,)
    max_series = sub_df.max(axis=1).astype('int8')

    new_cols[name] = max_series

    # удаляем *все* старые колонки с этим именем
    full_df.drop(columns=cols_i, inplace=True)

# теперь присваиваем новые колонки–Series (они одномерны!)
for name, s in new_cols.items():
    full_df[name] = s


  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s
  full_df[name] = s


In [17]:
full_df.sample(10)

Unnamed: 0,production_year,mileage,condition,owners_number,accidents_resolution,region,seller_type,brand,model,body_type,...,e-adjustment-wheel,fcw,climate-control-2,wheel-power,electro-window-back,isofix-front,android-auto,wheel-leather,remote-engine-start,glonass
436947,2024,0,CONDITION_OK,0,OK,Санкт-Петербург,COMMERCIAL,Skoda,Karoq,ALLROAD_5_DOORS,...,0,0,1,1,1,0,0,0,0,1
329729,2020,40460,CONDITION_OK,1,ERROR,Москва,PRIVATE,Mercedes-Benz,E-Класс,COUPE_HARDTOP,...,0,0,0,0,1,0,0,1,0,0
343497,2000,380000,CONDITION_OK,4,OK,Москва,PRIVATE,Mercedes-Benz,M-Класс,ALLROAD_5_DOORS,...,0,0,0,0,0,0,0,0,0,0
439784,2019,170000,CONDITION_OK,1,ERROR,Москва,PRIVATE,Skoda,Octavia,LIFTBACK,...,0,0,1,0,1,0,1,1,0,1
550845,2023,107,CONDITION_OK,1,OK,Краснодар,PRIVATE,Lada (ВАЗ),Largus,WAGON_5_DOORS,...,0,0,0,1,0,0,0,0,0,1
431808,2015,110727,CONDITION_OK,1,OK,Уфа,COMMERCIAL,Renault,Sandero,HATCHBACK_5_DOORS,...,0,0,0,1,0,0,0,0,0,0
167063,2024,0,CONDITION_OK,0,OK,Химки,COMMERCIAL,Genesis,GV80 Coupe,ALLROAD_5_DOORS,...,1,1,1,1,1,0,1,1,0,0
391523,2022,13000,CONDITION_OK,0,OK,Владивосток,COMMERCIAL,Nissan,X-Trail,ALLROAD_5_DOORS,...,0,0,0,0,0,0,0,0,0,0
210052,2024,100,CONDITION_OK,1,OK,Новосибирск,PRIVATE,Hyundai,Elantra,SEDAN,...,0,0,0,0,0,0,0,0,0,0
46816,2024,34,CONDITION_OK,1,OK,Москва,COMMERCIAL,BMW,X5,ALLROAD_5_DOORS,...,0,0,0,0,0,0,0,0,0,0


In [18]:
import pandas as pd
import numpy as np
from scipy.cluster import hierarchy
from scipy.spatial.distance import squareform

numeric_df = full_df.select_dtypes(include=[np.number])
numeric_df = numeric_df.loc[:, numeric_df.nunique() > 1]

corr = numeric_df.corr().abs().fillna(0)

dist = 1 - corr

dist = (dist + dist.T) / 2
np.fill_diagonal(dist.values, 0)

dist = dist.clip(lower=0)

dist_vect = squareform(dist)
Z = hierarchy.linkage(dist_vect, method='average')

max_d = 0.4
clusters = hierarchy.fcluster(Z, t=max_d, criterion='distance')

cluster_df = pd.DataFrame({'feature': numeric_df.columns, 'cluster': clusters})
to_keep = ['production_year', 'mileage', 'owners_number', 'doors_count', 'seats', 'engine_displacement', 'engine_power', 'fuel_rate']
for c in cluster_df['cluster'].unique():
    members = cluster_df.loc[cluster_df['cluster'] == c, 'feature']
    avg_corr = corr.loc[members, members].mean().sort_values(ascending=False)
    to_keep.append(avg_corr.index[0])
to_keep = list(dict.fromkeys(to_keep))

to_drop = [col for col in numeric_df.columns if col not in to_keep]
df_reduced = full_df.drop(columns=to_drop)

print(f"Удалено числовых коррелирующих признаков: {len(to_drop)}")
print(f"Осталось признаков в df_reduced: {df_reduced.shape[1]} из {full_df.shape[1]}")

Удалено числовых коррелирующих признаков: 112
Осталось признаков в df_reduced: 304 из 416


In [19]:
df_reduced.sample(10)

Unnamed: 0,production_year,mileage,condition,owners_number,accidents_resolution,region,seller_type,brand,model,body_type,...,tja,massage-seats,fabric-seats,eco-leather,reduce-spare-wheel,easy-trunk-opening,wheel-power,isofix-front,remote-engine-start,glonass
518746,2007,33000,CONDITION_OK,1,ERROR,Аксай,PRIVATE,Lada (ВАЗ),2107,SEDAN,...,0,0,0,0,0,0,0,0,0,0
224119,2021,23211,CONDITION_OK,1,OK,Москва,PRIVATE,Hyundai,Solaris,SEDAN,...,0,0,1,0,0,0,1,0,1,1
351020,2021,6122,CONDITION_OK,1,OK,Уссурийск,COMMERCIAL,Mini,Countryman,ALLROAD_5_DOORS,...,0,0,0,0,0,0,0,0,0,0
424138,2016,4500,CONDITION_OK,1,ERROR,Погар,PRIVATE,Renault,Kaptur,ALLROAD_5_DOORS,...,0,0,1,0,0,0,1,0,0,1
89776,2011,245600,CONDITION_OK,3,OK,Тверь,PRIVATE,Chevrolet,Aveo,SEDAN,...,0,0,1,0,0,0,0,0,0,0
297634,2023,18300,CONDITION_OK,1,OK,Москва,PRIVATE,Lexus,RX,ALLROAD_5_DOORS,...,0,0,0,0,0,1,1,0,0,0
449720,2024,0,CONDITION_OK,0,OK,Москва,COMMERCIAL,Soueast,S07,ALLROAD_5_DOORS,...,1,0,0,1,0,0,1,0,1,1
49847,2020,121000,CONDITION_OK,1,OK,Москва,PRIVATE,BMW,X6,ALLROAD_5_DOORS,...,0,0,0,0,0,1,0,0,1,1
48872,2024,0,CONDITION_OK,0,OK,Краснодар,COMMERCIAL,BMW,X6,ALLROAD_5_DOORS,...,0,0,0,0,0,0,1,0,0,0
846,1999,363500,CONDITION_OK,3,OK,Уфа,PRIVATE,Alfa Romeo,166,SEDAN,...,0,0,0,0,0,0,1,0,0,0


### Добавим новые признаки

In [20]:
from sklearn.preprocessing import PolynomialFeatures

In [21]:
df_mod = df_reduced.copy()

In [22]:
df_mod["age"] = 2025 - df_mod["production_year"]

In [23]:
numeric_cols = ['mileage', 'engine_displacement', 'engine_power', 'age']

In [24]:
df_mod['log_price'] = np.log1p(df['price'])

In [25]:
df_mod.drop(columns=['price'], inplace=True)

Прологарифмируем некоторые признаки

In [26]:
for col in numeric_cols:
    df_mod[f'log_{col}'] = np.log1p(df_mod[col])

Возьмем корни от числовых признаков

In [27]:
for col in numeric_cols:
    df_mod[f'square_{col}'] = df_mod[col]**2

Полиномиальные преобразования

In [28]:
poly = PolynomialFeatures(degree=2, include_bias=False)
poly_array = poly.fit_transform(df_mod[numeric_cols].fillna(0))
poly_feature_names = poly.get_feature_names_out(numeric_cols)
poly_df = pd.DataFrame(poly_array, columns=poly_feature_names, index=df_mod.index)

In [29]:
poly_new = poly_df.drop(columns=[c for c in poly_df.columns if c in numeric_cols])
df_mod = pd.concat([df_mod, poly_new.add_prefix('poly_')], axis=1)

Базовые отношения двигателя

In [30]:
df_mod['power_to_disp'] = df_mod['engine_power'] / df_mod['engine_displacement'].clip(lower=0.01)

Арифметичесике преобразования

In [31]:
# Произведения признаков
df_mod['mileage_x_power'] = df_mod['mileage'] * df_mod['engine_power']
df_mod['disp_x_power'] = df_mod['engine_displacement'] * df_mod['engine_power']
df_mod['owners_x_power'] = df_mod['owners_number'] * df_mod['engine_power']
df_mod["age_x_power"]    = df_mod["age"] * df_mod["engine_power"]
df_mod["age_x_mileage"]  = df_mod["age"] * df_mod["mileage"]

# Частные признаков
df_mod['power_div_mileage'] = df_mod['engine_power'] / df_mod['mileage'].clip(lower=0.01)
df_mod['disp_per_mile'] = df_mod['engine_displacement'] / df_mod['mileage'].clip(lower=0.01)
df_mod['mileage_per_hp'] = df_mod['mileage'] / df_mod['engine_power'].clip(lower=0.01)
df_mod['milage_per_year'] = df_mod['mileage'] / df_mod['age'].clip(lower=0.01)
df_mod['power_div_owners'] = df_mod['engine_power'] / df_mod['owners_number'].clip(lower=0.01)
df_mod['disp_div_owners'] = df_mod['engine_displacement'] / df_mod['owners_number'].clip(lower=0.01)
df_mod['disp_div_fuel'] = df_mod['engine_displacement'] / df_mod['fuel_rate'].clip(lower=0.01)
df_mod['fuel_div_disp'] = df_mod['fuel_rate'] / df_mod['engine_displacement'].clip(lower=0.01)
df_mod['power_div_fuel'] = df_mod['engine_power'] / df_mod['fuel_rate'].clip(lower=0.01)
df_mod['fuel_div_power'] = df_mod['fuel_rate'] / df_mod['engine_power'].clip(lower=0.01)

# Лог-взаимодействие
df_mod['log_mileage_x_log_power'] = df_mod['log_mileage'] * df_mod['log_engine_power']
df_mod['log_age_x_log_power'] = df_mod['log_age'] * df_mod['log_engine_power']

# Разности признаков
df_mod['power_minus_disp'] = df_mod['engine_power'] - df_mod['engine_displacement']

In [32]:
from pandas.api.types import is_numeric_dtype

# Отношение дверей к местам
df_mod['seats_num'] = df_mod['seats'].str.extract(r'(\d+)').astype(float)
df_mod['doors_to_seats_ratio'] = df_mod['doors_count'] / df_mod['seats_num'].clip(lower=0.01)

# Индикатор аварий
df_mod['has_accident'] = (
    ~df_mod['accidents_resolution'].fillna('none').str.lower().isin(['none', 'no', 'unknown'])
).astype(int)

binary_cols = [
    col for col in df.columns
    if is_numeric_dtype(df[col]) and
       set(df[col].dropna().unique()).issubset({0, 1})
]
df_mod['num_binary_features'] = df[binary_cols] \
    .astype(bool).sum(axis=1)

# Счетчики безопасности и комфорта
safety_keywords = ['airbag', 'detection', 'vsm', 'ldw', 'tja', 'rcta', 'ptf']
safety_cols = [c for c in df_mod.columns if any(k in c.lower() for k in safety_keywords)]
df_mod['safety_feature_count'] = df_mod[safety_cols] \
    .astype(bool).sum(axis=1)

comfort_keywords = ['heat', 'leather', 'massage', 'climate', 'vent', 'seat']
comfort_cols = [c for c in df_mod.columns if any(k in c.lower() for k in comfort_keywords)]
df_mod['comfort_feature_count'] = df_mod[comfort_cols] \
    .astype(bool).sum(axis=1)

# Количество preset-тегов
preset_cols = [c for c in df_mod.columns if c.startswith('preset')]
df_mod['num_presets'] = df_mod[preset_cols] \
    .astype(bool).sum(axis=1)

# Является ли машина новой
df_mod['is_new'] = ((df_mod['mileage'] < 1000) & (df_mod['owners_number'] <= 1)).astype(int)
df_mod['is_very_old'] = (df_mod['age'] > 20).astype(int)

df_mod['is_one_owner'] = (df_mod['owners_number'] == 1).astype(int)

Проверим, что нет дубликатов

In [33]:
df_mod.columns.duplicated().sum()

np.int64(0)

Проверим, что не появилось пустых значений

In [34]:
cols_with_na = df_mod.columns[df_mod.isna().any()].tolist()
print("Колонки с пропусками:", cols_with_na)

Колонки с пропусками: []


### Обучим LightGBM

In [35]:
!pip install lightgbm --install-option=--gpu --install-option="--opencl-include-dir=/usr/local/cuda/include/" --install-option="--opencl-library=/usr/local/cuda/lib64/libOpenCL.so"


Usage:   
  pip3 install [options] <requirement specifier> [package-index-options] ...
  pip3 install [options] -r <requirements file> [package-index-options] ...
  pip3 install [options] [-e] <vcs project url> ...
  pip3 install [options] [-e] <local project path> ...
  pip3 install [options] <archive url/path> ...

no such option: --install-option


In [36]:
!pip install optuna



In [37]:
import optuna
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.metrics import r2_score
from lightgbm import LGBMRegressor
import joblib

In [38]:
RANDOM_STATE = 42

In [39]:
X = df_mod.drop('log_price', axis=1)
y = df_mod['log_price']
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=RANDOM_STATE
)

num_features = X.select_dtypes(include=['int64','float64','int8']).columns.tolist()
cat_features = X.select_dtypes(include=['object']).columns.tolist()

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), num_features),
    ('cat', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1), cat_features),
])

In [40]:
X_train_proc = preprocessor.fit_transform(X_train)
X_valid_proc = preprocessor.transform(X_valid)



In [45]:
import lightgbm as lgb

def objective(trial):
    params = {
        'device':           'gpu',
        'n_estimators':     trial.suggest_int('n_estimators', 100, 2000),
        'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
        'num_leaves':       trial.suggest_int('num_leaves', 31, 256),
        'max_depth':        trial.suggest_int('max_depth', 5, 30),
        'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
        'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
        'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 50),
        'random_state':     RANDOM_STATE,
        'verbose':          -1,
    }
    model = LGBMRegressor(**params)
    model.fit(
        X_train_proc, y_train,
        eval_set=[(X_valid_proc, y_valid)],
        callbacks=[lgb.early_stopping(stopping_rounds=50)]
    )
    preds = model.predict(X_valid_proc)
    return r2_score(y_valid, preds)

In [None]:
study = optuna.create_study(direction='maximize', study_name='lgbm_opt_r2')
study.optimize(
    objective,
    n_trials=50,
    show_progress_bar=True,
)

best = study.best_params
best.update({'device':'gpu','random_seed':RANDOM_STATE,'verbose':-1})
final_model = LGBMRegressor(**best)
final_model.fit(X_train_proc, y_train, eval_set=(X_valid_proc, y_valid), early_stopping_rounds=50)

joblib.dump(final_model, 'drive/MyDrive/ML_project/best_lightgbm_model_with_new_feature.pkl')
joblib.dump(preprocessor, 'drive/MyDrive/ML_project/best_lightgbm_model_with_new_feature.pkl')
print("Models saved to best_lightgbm_model_with_new_feature.pkl and best_lightgbm_model_with_new_feature.pkl")

print("Best R²:", study.best_value)
print("Best params:", study.best_params)

[I 2025-06-09 22:42:14,388] A new study created in memory with name: lgbm_opt_r2


  0%|          | 0/50 [00:00<?, ?it/s]

  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1734]	valid_0's l2: 0.0227504




[I 2025-06-09 22:47:01,750] Trial 0 finished with value: 0.9808908702546419 and parameters: {'n_estimators': 1734, 'learning_rate': 0.039764739449760694, 'num_leaves': 198, 'max_depth': 13, 'subsample': 0.5313214787231733, 'colsample_bytree': 0.6663976200821805, 'reg_alpha': 2.2031717012226766, 'reg_lambda': 8.370183284297949e-06, 'min_child_weight': 36}. Best is trial 0 with value: 0.9808908702546419.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[526]	valid_0's l2: 0.222497




[I 2025-06-09 22:50:09,414] Trial 1 finished with value: 0.8131141102463411 and parameters: {'n_estimators': 526, 'learning_rate': 0.0019420284471277202, 'num_leaves': 149, 'max_depth': 30, 'subsample': 0.7671349087266339, 'colsample_bytree': 0.5262759924672935, 'reg_alpha': 1.2968665144467302e-06, 'reg_lambda': 0.013969216039788094, 'min_child_weight': 4}. Best is trial 0 with value: 0.9808908702546419.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[962]	valid_0's l2: 0.0234927




[I 2025-06-09 22:52:27,347] Trial 2 finished with value: 0.9802673178634327 and parameters: {'n_estimators': 962, 'learning_rate': 0.06788709327340395, 'num_leaves': 147, 'max_depth': 30, 'subsample': 0.7370022528761622, 'colsample_bytree': 0.8459842307890253, 'reg_alpha': 0.005805351602618627, 'reg_lambda': 7.290536657098525e-08, 'min_child_weight': 29}. Best is trial 0 with value: 0.9808908702546419.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1005]	valid_0's l2: 0.0574623




[I 2025-06-09 22:54:44,901] Trial 3 finished with value: 0.9517346241241728 and parameters: {'n_estimators': 1005, 'learning_rate': 0.0034875115011724768, 'num_leaves': 179, 'max_depth': 6, 'subsample': 0.5515839840146407, 'colsample_bytree': 0.8273723472104563, 'reg_alpha': 1.1687297377055622e-08, 'reg_lambda': 0.0017297618335890574, 'min_child_weight': 38}. Best is trial 0 with value: 0.9808908702546419.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1887]	valid_0's l2: 0.0223293




[I 2025-06-09 22:59:40,255] Trial 4 finished with value: 0.981244571968194 and parameters: {'n_estimators': 1887, 'learning_rate': 0.047731425749493184, 'num_leaves': 244, 'max_depth': 26, 'subsample': 0.9524842540639102, 'colsample_bytree': 0.8422517303640855, 'reg_alpha': 0.036845866263152886, 'reg_lambda': 0.23745557056584296, 'min_child_weight': 24}. Best is trial 4 with value: 0.981244571968194.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1665]	valid_0's l2: 0.0393134




[I 2025-06-09 23:05:45,737] Trial 5 finished with value: 0.9669787895882387 and parameters: {'n_estimators': 1665, 'learning_rate': 0.0025730517754693066, 'num_leaves': 168, 'max_depth': 10, 'subsample': 0.7226651043870045, 'colsample_bytree': 0.7519720823332477, 'reg_alpha': 0.0021366836267833326, 'reg_lambda': 1.1029229678883727e-06, 'min_child_weight': 27}. Best is trial 4 with value: 0.981244571968194.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1895]	valid_0's l2: 0.0363186




[I 2025-06-09 23:10:19,135] Trial 6 finished with value: 0.9694942696204574 and parameters: {'n_estimators': 1895, 'learning_rate': 0.005521517414147863, 'num_leaves': 61, 'max_depth': 22, 'subsample': 0.8830308605767738, 'colsample_bytree': 0.8573292777953355, 'reg_alpha': 1.6697978655414868e-06, 'reg_lambda': 0.5400381569612341, 'min_child_weight': 18}. Best is trial 4 with value: 0.981244571968194.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1429]	valid_0's l2: 0.022415




[I 2025-06-09 23:14:09,403] Trial 7 finished with value: 0.981172564113691 and parameters: {'n_estimators': 1430, 'learning_rate': 0.09670122694365622, 'num_leaves': 252, 'max_depth': 27, 'subsample': 0.6478000101220562, 'colsample_bytree': 0.635561070015204, 'reg_alpha': 5.899145373977128e-07, 'reg_lambda': 2.859832939183072e-07, 'min_child_weight': 12}. Best is trial 4 with value: 0.981244571968194.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[913]	valid_0's l2: 0.028576




[I 2025-06-09 23:19:24,321] Trial 8 finished with value: 0.9759976279465793 and parameters: {'n_estimators': 913, 'learning_rate': 0.013100632604997987, 'num_leaves': 194, 'max_depth': 12, 'subsample': 0.6328162159623563, 'colsample_bytree': 0.5237255874226683, 'reg_alpha': 2.6030386393106693e-07, 'reg_lambda': 3.7866838315979616e-05, 'min_child_weight': 12}. Best is trial 4 with value: 0.981244571968194.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[398]	valid_0's l2: 0.043425




[I 2025-06-09 23:21:38,152] Trial 9 finished with value: 0.9635252629243032 and parameters: {'n_estimators': 398, 'learning_rate': 0.009912791877166807, 'num_leaves': 123, 'max_depth': 16, 'subsample': 0.6539601081160837, 'colsample_bytree': 0.5546589459977815, 'reg_alpha': 5.159089300087872e-08, 'reg_lambda': 0.00041426218275354476, 'min_child_weight': 9}. Best is trial 4 with value: 0.981244571968194.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1333]	valid_0's l2: 0.0233515




[I 2025-06-09 23:26:24,981] Trial 10 finished with value: 0.980385981886512 and parameters: {'n_estimators': 1333, 'learning_rate': 0.02943379165107319, 'num_leaves': 256, 'max_depth': 22, 'subsample': 0.9854743365590246, 'colsample_bytree': 0.925963770558529, 'reg_alpha': 0.6468068913372033, 'reg_lambda': 6.589656770943343, 'min_child_weight': 44}. Best is trial 4 with value: 0.981244571968194.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1287]	valid_0's l2: 0.0226591




[I 2025-06-09 23:29:47,292] Trial 11 finished with value: 0.9809675519638775 and parameters: {'n_estimators': 1407, 'learning_rate': 0.09894436425235979, 'num_leaves': 256, 'max_depth': 24, 'subsample': 0.8393253496237036, 'colsample_bytree': 0.6617540173178702, 'reg_alpha': 6.497996559279317e-05, 'reg_lambda': 1.564462261521813e-08, 'min_child_weight': 20}. Best is trial 4 with value: 0.981244571968194.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1431]	valid_0's l2: 0.0230579




[I 2025-06-09 23:33:59,131] Trial 12 finished with value: 0.980632524350401 and parameters: {'n_estimators': 1431, 'learning_rate': 0.03113445870003604, 'num_leaves': 228, 'max_depth': 26, 'subsample': 0.9995370494911567, 'colsample_bytree': 0.6538581711437207, 'reg_alpha': 0.04622831926673852, 'reg_lambda': 0.031906937881175206, 'min_child_weight': 18}. Best is trial 4 with value: 0.981244571968194.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1958]	valid_0's l2: 0.0222591




[I 2025-06-09 23:38:36,274] Trial 13 finished with value: 0.981303513877254 and parameters: {'n_estimators': 1958, 'learning_rate': 0.0596303562306618, 'num_leaves': 225, 'max_depth': 19, 'subsample': 0.8591891889270902, 'colsample_bytree': 0.7391237942583291, 'reg_alpha': 2.498641005603984e-05, 'reg_lambda': 5.184942859473355e-07, 'min_child_weight': 2}. Best is trial 13 with value: 0.981303513877254.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1978]	valid_0's l2: 0.0237423




[I 2025-06-09 23:44:54,183] Trial 14 finished with value: 0.9800576684837846 and parameters: {'n_estimators': 1978, 'learning_rate': 0.017441721965798185, 'num_leaves': 223, 'max_depth': 19, 'subsample': 0.9040612278665029, 'colsample_bytree': 0.999095632906269, 'reg_alpha': 4.934512996792572e-05, 'reg_lambda': 2.220573777546379e-05, 'min_child_weight': 1}. Best is trial 13 with value: 0.981303513877254.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1743]	valid_0's l2: 0.023471




[I 2025-06-09 23:48:00,493] Trial 15 finished with value: 0.9802855731794871 and parameters: {'n_estimators': 1743, 'learning_rate': 0.05381929781961742, 'num_leaves': 94, 'max_depth': 18, 'subsample': 0.9262104964292763, 'colsample_bytree': 0.7465585303325974, 'reg_alpha': 0.04038341259324037, 'reg_lambda': 0.2270488621949578, 'min_child_weight': 34}. Best is trial 13 with value: 0.981303513877254.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[607]	valid_0's l2: 0.0270657




[I 2025-06-09 23:50:33,900] Trial 16 finished with value: 0.9772661892332821 and parameters: {'n_estimators': 607, 'learning_rate': 0.02083811694684566, 'num_leaves': 225, 'max_depth': 21, 'subsample': 0.807597603446009, 'colsample_bytree': 0.7662002914876843, 'reg_alpha': 0.00027791684145591337, 'reg_lambda': 0.0038921652312631817, 'min_child_weight': 48}. Best is trial 13 with value: 0.981303513877254.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1595]	valid_0's l2: 0.0264433




[I 2025-06-09 23:52:52,646] Trial 17 finished with value: 0.9777889758046876 and parameters: {'n_estimators': 1595, 'learning_rate': 0.05051644913315422, 'num_leaves': 41, 'max_depth': 15, 'subsample': 0.9386981301528066, 'colsample_bytree': 0.9298818445933154, 'reg_alpha': 1.8244536713721637e-05, 'reg_lambda': 3.024088797877163, 'min_child_weight': 23}. Best is trial 13 with value: 0.981303513877254.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1972]	valid_0's l2: 0.0266047




[I 2025-06-10 00:00:09,003] Trial 18 finished with value: 0.9776533951902517 and parameters: {'n_estimators': 1972, 'learning_rate': 0.007504821879514712, 'num_leaves': 209, 'max_depth': 25, 'subsample': 0.8585374158930894, 'colsample_bytree': 0.7820701944334806, 'reg_alpha': 0.22165001352378932, 'reg_lambda': 1.7793651938685416e-06, 'min_child_weight': 6}. Best is trial 13 with value: 0.981303513877254.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1183]	valid_0's l2: 0.146544




[I 2025-06-10 00:04:05,872] Trial 19 finished with value: 0.8769110037711783 and parameters: {'n_estimators': 1183, 'learning_rate': 0.0011617090234345507, 'num_leaves': 111, 'max_depth': 28, 'subsample': 0.8168342739145515, 'colsample_bytree': 0.7083957109478359, 'reg_alpha': 0.0012717361013541908, 'reg_lambda': 6.66668782683295e-05, 'min_child_weight': 14}. Best is trial 13 with value: 0.981303513877254.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[162]	valid_0's l2: 0.0663142




[I 2025-06-10 00:04:33,651] Trial 20 finished with value: 0.9442994945824702 and parameters: {'n_estimators': 162, 'learning_rate': 0.022148128953800333, 'num_leaves': 170, 'max_depth': 5, 'subsample': 0.9550444955281393, 'colsample_bytree': 0.8960348437673802, 'reg_alpha': 7.383483922761171e-06, 'reg_lambda': 0.06244083231080839, 'min_child_weight': 31}. Best is trial 13 with value: 0.981303513877254.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1547]	valid_0's l2: 0.0225348




[I 2025-06-10 00:10:18,581] Trial 21 finished with value: 0.9810719136762495 and parameters: {'n_estimators': 1547, 'learning_rate': 0.09175551350149114, 'num_leaves': 243, 'max_depth': 27, 'subsample': 0.667548002804305, 'colsample_bytree': 0.5939438108065311, 'reg_alpha': 1.8407434181932556e-06, 'reg_lambda': 7.705019343858053e-08, 'min_child_weight': 13}. Best is trial 13 with value: 0.981303513877254.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1768]	valid_0's l2: 0.0222571




[I 2025-06-10 00:15:38,876] Trial 22 finished with value: 0.9813051458229151 and parameters: {'n_estimators': 1768, 'learning_rate': 0.06772007602645826, 'num_leaves': 233, 'max_depth': 20, 'subsample': 0.6985173167736177, 'colsample_bytree': 0.6183315447128277, 'reg_alpha': 1.9457879592590022e-07, 'reg_lambda': 9.372267643655139e-07, 'min_child_weight': 1}. Best is trial 22 with value: 0.9813051458229151.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1828]	valid_0's l2: 0.0221483




[I 2025-06-10 00:19:56,493] Trial 23 finished with value: 0.9813965631718042 and parameters: {'n_estimators': 1828, 'learning_rate': 0.05930396196813868, 'num_leaves': 231, 'max_depth': 20, 'subsample': 0.5848815693903193, 'colsample_bytree': 0.7082366853043143, 'reg_alpha': 0.00027775655371692815, 'reg_lambda': 3.7302584109152244e-06, 'min_child_weight': 1}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1747]	valid_0's l2: 0.022295




[I 2025-06-10 00:23:46,898] Trial 24 finished with value: 0.9812733318718408 and parameters: {'n_estimators': 1747, 'learning_rate': 0.0677335301894653, 'num_leaves': 209, 'max_depth': 19, 'subsample': 0.5885550410665091, 'colsample_bytree': 0.7019266698464575, 'reg_alpha': 0.00014657461582585137, 'reg_lambda': 3.4046670658808876e-06, 'min_child_weight': 3}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1163]	valid_0's l2: 0.023754




[I 2025-06-10 00:28:20,840] Trial 25 finished with value: 0.9800479049115598 and parameters: {'n_estimators': 1164, 'learning_rate': 0.03284735207646808, 'num_leaves': 187, 'max_depth': 20, 'subsample': 0.7103762416612366, 'colsample_bytree': 0.5986050784055019, 'reg_alpha': 7.68866770381174e-08, 'reg_lambda': 5.94797489403134e-07, 'min_child_weight': 7}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1854]	valid_0's l2: 0.0221515




[I 2025-06-10 00:32:39,923] Trial 26 finished with value: 0.981393906867548 and parameters: {'n_estimators': 1859, 'learning_rate': 0.06479521328370286, 'num_leaves': 229, 'max_depth': 16, 'subsample': 0.5048266037924718, 'colsample_bytree': 0.7003698336859494, 'reg_alpha': 8.722883679431325e-06, 'reg_lambda': 0.00015287860748819615, 'min_child_weight': 8}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1808]	valid_0's l2: 0.0225112




[I 2025-06-10 00:39:06,577] Trial 27 finished with value: 0.9810917198094936 and parameters: {'n_estimators': 1808, 'learning_rate': 0.03915746004518299, 'num_leaves': 208, 'max_depth': 16, 'subsample': 0.5084380635915631, 'colsample_bytree': 0.6030501701168788, 'reg_alpha': 6.909982422674999e-06, 'reg_lambda': 0.00017588903931811987, 'min_child_weight': 9}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1561]	valid_0's l2: 0.0222825




[I 2025-06-10 00:42:44,754] Trial 28 finished with value: 0.9812838500603559 and parameters: {'n_estimators': 1561, 'learning_rate': 0.07745435844149275, 'num_leaves': 236, 'max_depth': 23, 'subsample': 0.5890186164137421, 'colsample_bytree': 0.6989480530064395, 'reg_alpha': 0.000890660714069173, 'reg_lambda': 8.126927250459639e-06, 'min_child_weight': 6}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[802]	valid_0's l2: 0.0256382




[I 2025-06-10 00:45:30,652] Trial 29 finished with value: 0.9784652698695979 and parameters: {'n_estimators': 802, 'learning_rate': 0.024531415942860905, 'num_leaves': 203, 'max_depth': 14, 'subsample': 0.557186802899945, 'colsample_bytree': 0.6842934127835485, 'reg_alpha': 2.5227073392221187e-07, 'reg_lambda': 9.323423130009838e-06, 'min_child_weight': 1}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1240]	valid_0's l2: 0.0237294




[I 2025-06-10 00:49:10,644] Trial 30 finished with value: 0.9800685680805065 and parameters: {'n_estimators': 1240, 'learning_rate': 0.0401177522120749, 'num_leaves': 163, 'max_depth': 10, 'subsample': 0.5039698214774779, 'colsample_bytree': 0.6320494584708634, 'reg_alpha': 1.3316489154848606e-08, 'reg_lambda': 0.0005506741113274508, 'min_child_weight': 9}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1848]	valid_0's l2: 0.0222333




[I 2025-06-10 00:53:26,960] Trial 31 finished with value: 0.9813251857188939 and parameters: {'n_estimators': 1848, 'learning_rate': 0.0617766406734663, 'num_leaves': 222, 'max_depth': 17, 'subsample': 0.6025560678221622, 'colsample_bytree': 0.72746578942239, 'reg_alpha': 9.238041857909476e-06, 'reg_lambda': 1.9222435430767593e-07, 'min_child_weight': 1}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1800]	valid_0's l2: 0.0224682




[I 2025-06-10 00:57:55,173] Trial 32 finished with value: 0.9811278719678665 and parameters: {'n_estimators': 1800, 'learning_rate': 0.04212096372249541, 'num_leaves': 218, 'max_depth': 17, 'subsample': 0.6050498504602413, 'colsample_bytree': 0.7893185994482284, 'reg_alpha': 4.731902936998092e-06, 'reg_lambda': 1.0058138060362666e-08, 'min_child_weight': 5}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1632]	valid_0's l2: 0.0227865




[I 2025-06-10 01:03:00,696] Trial 33 finished with value: 0.9808605347599211 and parameters: {'n_estimators': 1632, 'learning_rate': 0.06841803647639379, 'num_leaves': 236, 'max_depth': 12, 'subsample': 0.5417586962502327, 'colsample_bytree': 0.7241622993235752, 'reg_alpha': 7.490983917126046, 'reg_lambda': 1.59799276634946e-07, 'min_child_weight': 1}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1817]	valid_0's l2: 0.0243798




[I 2025-06-10 01:11:52,149] Trial 34 finished with value: 0.9795222282634637 and parameters: {'n_estimators': 1817, 'learning_rate': 0.016098080704630487, 'num_leaves': 192, 'max_depth': 14, 'subsample': 0.6897845852829345, 'colsample_bytree': 0.5604087750522249, 'reg_alpha': 0.00014348347912632504, 'reg_lambda': 5.340247037848304e-06, 'min_child_weight': 4}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1670]	valid_0's l2: 0.0227105




[I 2025-06-10 01:15:01,998] Trial 35 finished with value: 0.9809243816132336 and parameters: {'n_estimators': 1670, 'learning_rate': 0.07293122747709668, 'num_leaves': 133, 'max_depth': 17, 'subsample': 0.7658817119020991, 'colsample_bytree': 0.8148183480211909, 'reg_alpha': 7.036760322598734e-07, 'reg_lambda': 4.862085004066106e-08, 'min_child_weight': 8}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1875]	valid_0's l2: 0.0224663




[I 2025-06-10 01:18:59,746] Trial 36 finished with value: 0.981129481576308 and parameters: {'n_estimators': 1875, 'learning_rate': 0.05343874327264794, 'num_leaves': 180, 'max_depth': 21, 'subsample': 0.6173842766423141, 'colsample_bytree': 0.6858327467694533, 'reg_alpha': 0.004079413702004698, 'reg_lambda': 1.672723044829346e-06, 'min_child_weight': 4}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1535]	valid_0's l2: 0.0325648




[I 2025-06-10 01:24:52,689] Trial 37 finished with value: 0.9726472421797879 and parameters: {'n_estimators': 1535, 'learning_rate': 0.0049070909395064755, 'num_leaves': 156, 'max_depth': 18, 'subsample': 0.5665501436560832, 'colsample_bytree': 0.6339217622682981, 'reg_alpha': 1.4661513959536266e-05, 'reg_lambda': 0.00287060373573651, 'min_child_weight': 16}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1693]	valid_0's l2: 0.0230946




[I 2025-06-10 01:29:35,551] Trial 38 finished with value: 0.9806016909141034 and parameters: {'n_estimators': 1693, 'learning_rate': 0.03407869075389578, 'num_leaves': 240, 'max_depth': 10, 'subsample': 0.5306404918112734, 'colsample_bytree': 0.7210811866885108, 'reg_alpha': 2.302092262536721e-06, 'reg_lambda': 1.6364862633841804e-07, 'min_child_weight': 4}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1880]	valid_0's l2: 0.0222359




[I 2025-06-10 01:33:39,006] Trial 39 finished with value: 0.981322961005912 and parameters: {'n_estimators': 1883, 'learning_rate': 0.08360865551793967, 'num_leaves': 216, 'max_depth': 20, 'subsample': 0.6776019394922901, 'colsample_bytree': 0.6730799248591713, 'reg_alpha': 0.01073043029435399, 'reg_lambda': 2.584579489785273e-05, 'min_child_weight': 11}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1864]	valid_0's l2: 0.0223959




[I 2025-06-10 01:38:13,041] Trial 40 finished with value: 0.98118861922618 and parameters: {'n_estimators': 1864, 'learning_rate': 0.048525885042233714, 'num_leaves': 215, 'max_depth': 16, 'subsample': 0.5805991252697545, 'colsample_bytree': 0.8059148725773343, 'reg_alpha': 0.012236824484432885, 'reg_lambda': 0.00019345600365959267, 'min_child_weight': 11}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1982]	valid_0's l2: 0.0222839




[I 2025-06-10 01:42:17,868] Trial 41 finished with value: 0.9812826613641811 and parameters: {'n_estimators': 1983, 'learning_rate': 0.08181828468022663, 'num_leaves': 199, 'max_depth': 21, 'subsample': 0.6844310200017443, 'colsample_bytree': 0.6740793703703856, 'reg_alpha': 0.0006010939134732825, 'reg_lambda': 1.644425912903386e-05, 'min_child_weight': 7}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[1739]	valid_0's l2: 0.022157




[I 2025-06-10 01:49:46,977] Trial 42 finished with value: 0.9813892800254611 and parameters: {'n_estimators': 1739, 'learning_rate': 0.06191760327318874, 'num_leaves': 236, 'max_depth': 23, 'subsample': 0.6213099299210503, 'colsample_bytree': 0.5673901503005567, 'reg_alpha': 0.010609218182470981, 'reg_lambda': 4.0344521498802894e-05, 'min_child_weight': 10}. Best is trial 23 with value: 0.9813965631718042.


  'learning_rate':    trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
  'subsample':        trial.suggest_uniform('subsample', 0.5, 1.0),
  'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1.0),
  'reg_alpha':        trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
  'reg_lambda':       trial.suggest_loguniform('reg_lambda', 1e-8, 10.0),


Training until validation scores don't improve for 50 rounds


### Выводы

$R^2 = 0.9814$

In [None]:
param = {
    'n_estimators': 1828, 
    'learning_rate': 0.05930396196813868, 
    'num_leaves': 231, 
    'max_depth': 20, 
    'subsample': 0.5848815693903193, 
    'colsample_bytree': 0.7082366853043143, 
    'reg_alpha': 0.00027775655371692815, 
    'reg_lambda': 3.7302584109152244e-06, 
    'min_child_weight': 1
}