In [1]:
import pandas as pd

# Define the filenames and variable names
dataset_names = [
    "X_train", 
    "X_validate",
    "X_test", 
    "y_train", "y_validate", "train_df", "test_df"
]

# Load each dataset from CSV
datasets = {name: pd.read_csv(f"{name}.csv") for name in dataset_names}

# Unpack variables for direct usage
X_train = datasets["X_train"]
X_validate = datasets["X_validate"]
X_test = datasets["X_test"]
y_train = datasets["y_train"]
y_validate = datasets["y_validate"]
train_df = datasets["train_df"]
test_df = datasets["test_df"]

print("All datasets loaded successfully! 🎉")


All datasets loaded successfully! 🎉


In [81]:
train_df

Unnamed: 0,Item_Identifier,Item_Weight,Item_Fat_Content,Item_Visibility,Item_Type,Item_MRP,Outlet_Identifier,Outlet_Establishment_Year,Outlet_Size,Outlet_Location_Type,Outlet_Type,Item_Outlet_Sales,Item_Group,MRP_per_unit_Weight,Outlet_Age,Item_Category,Outlet_Total_Sales,Outlet_Combined,Item_MRP_Per_Unit_Weight,Outlet_Age_Binned
0,FDA15,9.300,Low Fat,0.016047,Dairy,249.8092,OUT049,1999,Medium,Tier 1,Supermarket Type1,3735.1380,FD,26.861204,10,Food,2.183970e+06,Supermarket Type1_Medium,26.861204,New
1,DRC01,5.920,Regular,0.019278,Soft Drinks,48.2692,OUT018,2009,Medium,Tier 3,Supermarket Type2,443.4228,DR,8.153581,0,Drinks,1.851823e+06,Supermarket Type2_Medium,8.153581,New
2,FDN15,17.500,Low Fat,0.016760,Meat,141.6180,OUT049,1999,Medium,Tier 1,Supermarket Type1,2097.2700,FD,8.092457,10,Food,2.183970e+06,Supermarket Type1_Medium,8.092457,New
3,FDX07,19.200,Regular,0.022861,Fruits and Vegetables,182.0950,OUT010,1998,Small,Tier 3,Grocery Store,732.3800,FD,9.484115,11,Food,1.883402e+05,Grocery Store_Small,9.484115,Moderate
4,NCD19,8.930,Low Fat,0.006590,Household,53.8614,OUT013,1987,High,Tier 3,Supermarket Type1,994.7052,NC,6.031512,22,Non-Consumable,2.142664e+06,Supermarket Type1_High,6.031512,Old
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8518,FDF22,6.865,Low Fat,0.056783,Snack Foods,214.5218,OUT013,1987,High,Tier 3,Supermarket Type1,2778.3834,FD,31.248623,22,Food,2.142664e+06,Supermarket Type1_High,31.248623,Old
8519,FDS36,8.380,Regular,0.046982,Baking Goods,108.1570,OUT045,2002,Small,Tier 2,Supermarket Type1,549.2850,FD,12.906563,7,Food,2.036725e+06,Supermarket Type1_Small,12.906563,New
8520,NCJ29,10.600,Low Fat,0.035186,Health and Hygiene,85.1224,OUT035,2004,Small,Tier 2,Supermarket Type1,1193.1136,NC,8.030415,5,Non-Consumable,2.268123e+06,Supermarket Type1_Small,8.030415,New
8521,FDN46,7.210,Regular,0.145221,Snack Foods,103.1332,OUT018,2009,Medium,Tier 3,Supermarket Type2,1845.5976,FD,14.304189,0,Food,1.851823e+06,Supermarket Type2_Medium,14.304189,New


In [4]:
import pandas as pd
import numpy as np
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Select features and target
categorical_cols = ['Item_Fat_Content', 'Item_Category', 'Outlet_Location_Type', 'Outlet_Type', 'Outlet_Size']
numerical_cols = ['Item_MRP', 'Outlet_Total_Sales', 'Item_Visibility', 'Item_Weight', 'Outlet_Age']

# Ensure categorical columns are treated as category dtype
for col in categorical_cols:
    train_df[col] = train_df[col].astype('category')
    test_df[col] = test_df[col].astype('category')

# Select X and y
X = train_df[numerical_cols + categorical_cols]
y = train_df['Item_Outlet_Sales']
X_test = test_df[numerical_cols + categorical_cols]

# Split data
X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.05),
        'num_leaves': trial.suggest_int('num_leaves', 20, 200),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 10.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 10.0),
        'n_estimators': trial.suggest_int('n_estimators', 300, 1500)
        
    }

    model = lgb.LGBMRegressor(**params)

    eval_result = {}
    model.fit(
        X_train, y_train, 
        eval_set=[(X_validate, y_validate)], 
        eval_metric='rmse',
        categorical_feature=categorical_cols,  # Pass categorical features!
        callbacks=[lgb.early_stopping(50, verbose=False), lgb.record_evaluation(eval_result)]
    )
    
    preds = model.predict(X_validate)
    rmse = mean_squared_error(y_validate, preds, squared=False)
    return rmse

# Run Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)  # Increase trials for better tuning

print("Best Parameters:", study.best_params)


[I 2025-03-17 18:17:51,198] A new study created in memory with name: no-name-4eafbe10-1cbe-49a1-b1d1-180a6fa65825


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001505 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:17:51,781] Trial 0 finished with value: 1019.1008238999981 and parameters: {'learning_rate': 0.01069483256791398, 'num_leaves': 126, 'max_depth': 7, 'min_child_samples': 34, 'colsample_bytree': 0.6682917997588329, 'subsample': 0.5549494367258951, 'reg_lambda': 5.042103297374779, 'reg_alpha': 2.9205707667120144, 'n_estimators': 1376}. Best is trial 0 with value: 1019.1008238999981.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000211 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:17:52,187] Trial 1 finished with value: 1023.736016508303 and parameters: {'learning_rate': 0.01671286504044731, 'num_leaves': 90, 'max_depth': 10, 'min_child_samples': 28, 'colsample_bytree': 0.6986895518224319, 'subsample': 0.8085346963004922, 'reg_lambda': 9.531268219167666, 'reg_alpha': 8.809972555792275, 'n_estimators': 1199}. Best is trial 0 with value: 1019.1008238999981.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000239 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:52,476] Trial 2 finished with value: 1017.318442045297 and parameters: {'learning_rate': 0.021774886534699906, 'num_leaves': 59, 'max_depth': 3, 'min_child_samples': 39, 'colsample_bytree': 0.532130630753864, 'subsample': 0.8416055382656498, 'reg_lambda': 8.482065610248434, 'reg_alpha': 4.6154425548640505, 'n_estimators': 481}. Best is trial 2 with value: 1017.318442045297.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000207 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:52,732] Trial 3 finished with value: 1014.6173153441779 and parameters: {'learning_rate': 0.023456412868893453, 'num_leaves': 161, 'max_depth': 4, 'min_child_samples': 64, 'colsample_bytree': 0.5344698177667919, 'subsample': 0.6912087799530441, 'reg_lambda': 4.252235653916639, 'reg_alpha': 0.35356058895601894, 'n_estimators': 874}. Best is trial 3 with value: 1014.6173153441779.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000268 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232








[I 2025-03-17 18:17:53,469] Trial 4 finished with value: 1022.2135154593451 and parameters: {'learning_rate': 0.008042772752528632, 'num_leaves': 126, 'max_depth': 14, 'min_child_samples': 64, 'colsample_bytree': 0.8953515165491404, 'subsample': 0.7809644304944034, 'reg_lambda': 1.989876636669156, 'reg_alpha': 3.9726487257532916, 'n_estimators': 1213}. Best is trial 3 with value: 1014.6173153441779.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000210 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:17:54,049] Trial 5 finished with value: 1024.5003116165171 and parameters: {'learning_rate': 0.014235128583446395, 'num_leaves': 104, 'max_depth': 13, 'min_child_samples': 81, 'colsample_bytree': 0.5096579916618624, 'subsample': 0.6141365316708243, 'reg_lambda': 6.145022225292938, 'reg_alpha': 1.889334928490337, 'n_estimators': 954}. Best is trial 3 with value: 1014.6173153441779.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000227 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:54,300] Trial 6 finished with value: 1020.5331147166168 and parameters: {'learning_rate': 0.03453749065139964, 'num_leaves': 107, 'max_depth': 10, 'min_child_samples': 69, 'colsample_bytree': 0.6336407151336304, 'subsample': 0.6063671596210016, 'reg_lambda': 7.884717293326199, 'reg_alpha': 6.894388735121991, 'n_estimators': 1275}. Best is trial 3 with value: 1014.6173153441779.
[I 2025-03-17 18:17:54,432] Trial 7 finished with value: 1017.8560073935545 and parameters: {'learning_rate': 0.04292865211018047, 'num_leaves': 26, 'max_depth': 3, 'min_child_samples': 10, 'colsample_bytree': 0.9859528617219819, 'subsample': 0.9364565205319471, 'reg_lambda': 1.630409419014259, 'reg_alpha': 1.7665478370994645, 'n_estimators': 975}. Best is trial 3 with value: 1014.6173153441779.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000262 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000225 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:54,651] Trial 8 finished with value: 1025.8157416357374 and parameters: {'learning_rate': 0.029379859074238027, 'num_leaves': 81, 'max_depth': 14, 'min_child_samples': 21, 'colsample_bytree': 0.9477395355931963, 'subsample': 0.5327998273106809, 'reg_lambda': 9.782588621538753, 'reg_alpha': 9.190230410879009, 'n_estimators': 1337}. Best is trial 3 with value: 1014.6173153441779.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000204 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:54,922] Trial 9 finished with value: 1017.8388803405542 and parameters: {'learning_rate': 0.0210246172523408, 'num_leaves': 64, 'max_depth': 6, 'min_child_samples': 61, 'colsample_bytree': 0.6651264061890548, 'subsample': 0.9420766497065928, 'reg_lambda': 1.7445628321803166, 'reg_alpha': 0.2878394164676723, 'n_estimators': 664}. Best is trial 3 with value: 1014.6173153441779.




[I 2025-03-17 18:17:55,083] Trial 10 finished with value: 1016.1708804248358 and parameters: {'learning_rate': 0.04919822733390464, 'num_leaves': 185, 'max_depth': 6, 'min_child_samples': 99, 'colsample_bytree': 0.8184262769663808, 'subsample': 0.6700298501528309, 'reg_lambda': 3.691656070727408, 'reg_alpha': 0.026416777298854832, 'n_estimators': 312}. Best is trial 3 with value: 1014.6173153441779.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000414 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000255 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:55,276] Trial 11 finished with value: 1015.2550301084304 and parameters: {'learning_rate': 0.04959048050087922, 'num_leaves': 188, 'max_depth': 6, 'min_child_samples': 89, 'colsample_bytree': 0.8197237685454295, 'subsample': 0.7009641007716002, 'reg_lambda': 3.576690311522026, 'reg_alpha': 0.033370654783390394, 'n_estimators': 307}. Best is trial 3 with value: 1014.6173153441779.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000227 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:55,499] Trial 12 finished with value: 1016.0669884599065 and parameters: {'learning_rate': 0.03668093410683278, 'num_leaves': 197, 'max_depth': 5, 'min_child_samples': 94, 'colsample_bytree': 0.796534701972875, 'subsample': 0.6987043642437699, 'reg_lambda': 3.72831169144423, 'reg_alpha': 1.0878018693244451, 'n_estimators': 721}. Best is trial 3 with value: 1014.6173153441779.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000299 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:17:55,834] Trial 13 finished with value: 1019.778241099147 and parameters: {'learning_rate': 0.027950129283571462, 'num_leaves': 162, 'max_depth': 8, 'min_child_samples': 47, 'colsample_bytree': 0.8538757586203312, 'subsample': 0.7174649741957823, 'reg_lambda': 0.37160113613356227, 'reg_alpha': 6.338188866467027, 'n_estimators': 732}. Best is trial 3 with value: 1014.6173153441779.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000511 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:56,013] Trial 14 finished with value: 1013.5748973601336 and parameters: {'learning_rate': 0.04755905409410803, 'num_leaves': 162, 'max_depth': 4, 'min_child_samples': 81, 'colsample_bytree': 0.7501890663732351, 'subsample': 0.8820771362253207, 'reg_lambda': 6.1429570792151225, 'reg_alpha': 3.106237830271349, 'n_estimators': 370}. Best is trial 14 with value: 1013.5748973601336.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000179 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:56,222] Trial 15 finished with value: 1013.4300516725166 and parameters: {'learning_rate': 0.03797824105379016, 'num_leaves': 149, 'max_depth': 4, 'min_child_samples': 76, 'colsample_bytree': 0.5900290136010845, 'subsample': 0.8817058886479368, 'reg_lambda': 6.4202981389682705, 'reg_alpha': 3.119744454778996, 'n_estimators': 545}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000187 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:56,496] Trial 16 finished with value: 1018.8271700346368 and parameters: {'learning_rate': 0.04224947282123197, 'num_leaves': 151, 'max_depth': 9, 'min_child_samples': 77, 'colsample_bytree': 0.5844975086757102, 'subsample': 0.9062353407502284, 'reg_lambda': 6.605652252914368, 'reg_alpha': 3.2669714700892323, 'n_estimators': 546}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000228 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:56,662] Trial 17 finished with value: 1014.6577466651963 and parameters: {'learning_rate': 0.04157350561907729, 'num_leaves': 141, 'max_depth': 4, 'min_child_samples': 78, 'colsample_bytree': 0.7559804818172063, 'subsample': 0.8689873863707863, 'reg_lambda': 6.749960112068473, 'reg_alpha': 6.015013445486232, 'n_estimators': 483}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000231 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:56,959] Trial 18 finished with value: 1023.0262694210775 and parameters: {'learning_rate': 0.03558487929272886, 'num_leaves': 173, 'max_depth': 12, 'min_child_samples': 50, 'colsample_bytree': 0.7195663434179896, 'subsample': 0.99673562859182, 'reg_lambda': 5.392244759745054, 'reg_alpha': 2.770090954278882, 'n_estimators': 429}. Best is trial 15 with value: 1013.4300516725166.




[I 2025-03-17 18:17:57,170] Trial 19 finished with value: 1015.6507311076168 and parameters: {'learning_rate': 0.04447329087875908, 'num_leaves': 134, 'max_depth': 5, 'min_child_samples': 86, 'colsample_bytree': 0.6252381852155118, 'subsample': 0.8719706503769897, 'reg_lambda': 7.82694876408977, 'reg_alpha': 5.088492918010905, 'n_estimators': 609}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000187 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000260 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:57,451] Trial 20 finished with value: 1017.8593684547003 and parameters: {'learning_rate': 0.032196153164083495, 'num_leaves': 172, 'max_depth': 8, 'min_child_samples': 69, 'colsample_bytree': 0.7536126794176883, 'subsample': 0.9942189550944617, 'reg_lambda': 5.803148947304969, 'reg_alpha': 7.411260377762943, 'n_estimators': 1065}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000277 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:57,717] Trial 21 finished with value: 1013.9132296915219 and parameters: {'learning_rate': 0.02281959359292586, 'num_leaves': 155, 'max_depth': 4, 'min_child_samples': 57, 'colsample_bytree': 0.5743451892488431, 'subsample': 0.7644190800648237, 'reg_lambda': 4.545254589154304, 'reg_alpha': 1.9258018536653008, 'n_estimators': 821}. Best is trial 15 with value: 1013.4300516725166.
[I 2025-03-17 18:17:57,926] Trial 22 finished with value: 1013.9196411665824 and parameters: {'learning_rate': 0.03847480208888334, 'num_leaves': 149, 'max_depth': 4, 'min_child_samples': 59, 'colsample_bytree': 0.5910546464684212, 'subsample': 0.7682996979950771, 'reg_lambda': 7.0050631700851875, 'reg_alpha': 1.9489466437566878, 'n_estimators': 804}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000183 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000213 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10


[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:58,205] Trial 23 finished with value: 1014.9737667443231 and parameters: {'learning_rate': 0.026038129621105962, 'num_leaves': 120, 'max_depth': 3, 'min_child_samples': 72, 'colsample_bytree': 0.5694697641861162, 'subsample': 0.804299331056733, 'reg_lambda': 4.886408729877441, 'reg_alpha': 3.8135553901245727, 'n_estimators': 375}. Best is trial 15 with value: 1013.4300516725166.
[I 2025-03-17 18:17:58,412] Trial 24 finished with value: 1015.2444776297519 and parameters: {'learning_rate': 0.04501527303778612, 'num_leaves': 170, 'max_depth': 5, 'min_child_samples': 55, 'colsample_bytree': 0.6205373456722358, 'subsample': 0.838999724728415, 'reg_lambda': 2.846312366613623, 'reg_alpha': 2.5843431598641993, 'n_estimators': 561}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000221 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:58,638] Trial 25 finished with value: 1019.487584377706 and parameters: {'learning_rate': 0.03972198501884025, 'num_leaves': 147, 'max_depth': 7, 'min_child_samples': 86, 'colsample_bytree': 0.7069685240934942, 'subsample': 0.9135760259843402, 'reg_lambda': 4.474257894639562, 'reg_alpha': 4.927250468761546, 'n_estimators': 811}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000271 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000204 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:58,981] Trial 26 finished with value: 1013.7385095913484 and parameters: {'learning_rate': 0.017538296116725586, 'num_leaves': 183, 'max_depth': 4, 'min_child_samples': 44, 'colsample_bytree': 0.5564607877586425, 'subsample': 0.7538913087472774, 'reg_lambda': 7.258006618290415, 'reg_alpha': 1.1441787016082974, 'n_estimators': 625}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001030 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:17:59,397] Trial 27 finished with value: 1016.4376958655051 and parameters: {'learning_rate': 0.01640925406593073, 'num_leaves': 195, 'max_depth': 5, 'min_child_samples': 44, 'colsample_bytree': 0.5069039306927041, 'subsample': 0.7350527779474356, 'reg_lambda': 7.551462530313879, 'reg_alpha': 1.257672527612746, 'n_estimators': 399}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000229 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:59,595] Trial 28 finished with value: 1014.9088627512567 and parameters: {'learning_rate': 0.04688182115431913, 'num_leaves': 181, 'max_depth': 3, 'min_child_samples': 52, 'colsample_bytree': 0.6657288784303843, 'subsample': 0.6424827911168479, 'reg_lambda': 8.882973893578765, 'reg_alpha': 3.752553545296102, 'n_estimators': 1482}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000384 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:17:59,861] Trial 29 finished with value: 1016.8622013984464 and parameters: {'learning_rate': 0.03168958113288659, 'num_leaves': 135, 'max_depth': 7, 'min_child_samples': 31, 'colsample_bytree': 0.7883305827042371, 'subsample': 0.9544761869797095, 'reg_lambda': 5.855441656615621, 'reg_alpha': 3.1057249797353124, 'n_estimators': 588}. Best is trial 15 with value: 1013.4300516725166.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000239 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:18:00,452] Trial 30 finished with value: 1019.4332399567321 and parameters: {'learning_rate': 0.012699656342314574, 'num_leaves': 199, 'max_depth': 11, 'min_child_samples': 39, 'colsample_bytree': 0.8684449060413122, 'subsample': 0.8837736597302657, 'reg_lambda': 8.549718050754933, 'reg_alpha': 0.9137326629877982, 'n_estimators': 483}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000246 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:00,798] Trial 31 finished with value: 1013.8653217663217 and parameters: {'learning_rate': 0.018185810317273336, 'num_leaves': 161, 'max_depth': 4, 'min_child_samples': 75, 'colsample_bytree': 0.5528695033461369, 'subsample': 0.7471238696673738, 'reg_lambda': 5.324707900551988, 'reg_alpha': 2.5417243327213663, 'n_estimators': 686}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232






[I 2025-03-17 18:18:01,336] Trial 32 finished with value: 1027.509495988746 and parameters: {'learning_rate': 0.005410012103453465, 'num_leaves': 168, 'max_depth': 4, 'min_child_samples': 73, 'colsample_bytree': 0.5342342833765176, 'subsample': 0.8254491929308552, 'reg_lambda': 7.223845776627247, 'reg_alpha': 2.3828333307999507, 'n_estimators': 668}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000185 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:01,769] Trial 33 finished with value: 1018.6474620401564 and parameters: {'learning_rate': 0.0172080355489174, 'num_leaves': 179, 'max_depth': 6, 'min_child_samples': 81, 'colsample_bytree': 0.5471859417739109, 'subsample': 0.8005161656509856, 'reg_lambda': 6.202805396554174, 'reg_alpha': 4.3497738136656805, 'n_estimators': 515}. Best is trial 15 with value: 1013.4300516725166.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:02,086] Trial 34 finished with value: 1016.6643999336825 and parameters: {'learning_rate': 0.019169754562959794, 'num_leaves': 120, 'max_depth': 3, 'min_child_samples': 93, 'colsample_bytree': 0.6836110310528912, 'subsample': 0.8548073009281447, 'reg_lambda': 5.298435502198773, 'reg_alpha': 3.00942631871064, 'n_estimators': 657}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000228 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:02,339] Trial 35 finished with value: 1015.1722690086201 and parameters: {'learning_rate': 0.025747756129670375, 'num_leaves': 160, 'max_depth': 5, 'min_child_samples': 66, 'colsample_bytree': 0.6067676248482323, 'subsample': 0.7461670188136628, 'reg_lambda': 8.991171316777354, 'reg_alpha': 1.0230124453963736, 'n_estimators': 419}. Best is trial 15 with value: 1013.4300516725166.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:18:02,760] Trial 36 finished with value: 1013.3546257424019 and parameters: {'learning_rate': 0.011179724645381979, 'num_leaves': 138, 'max_depth': 4, 'min_child_samples': 38, 'colsample_bytree': 0.7250122201859769, 'subsample': 0.7970504118662582, 'reg_lambda': 6.533668047242595, 'reg_alpha': 3.5601476325484214, 'n_estimators': 723}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000292 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232






[I 2025-03-17 18:18:03,456] Trial 37 finished with value: 1026.4017612609314 and parameters: {'learning_rate': 0.010814982397484367, 'num_leaves': 96, 'max_depth': 15, 'min_child_samples': 35, 'colsample_bytree': 0.7311468471040389, 'subsample': 0.823314865731123, 'reg_lambda': 6.490137956326679, 'reg_alpha': 5.304651535433935, 'n_estimators': 759}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000208 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:18:03,960] Trial 38 finished with value: 1023.6536038143065 and parameters: {'learning_rate': 0.005862016295061289, 'num_leaves': 132, 'max_depth': 3, 'min_child_samples': 21, 'colsample_bytree': 0.6549569899826206, 'subsample': 0.7828417978544903, 'reg_lambda': 8.304611159967763, 'reg_alpha': 3.4435158645577033, 'n_estimators': 604}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000308 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232






[I 2025-03-17 18:18:04,641] Trial 39 finished with value: 1016.954353494024 and parameters: {'learning_rate': 0.009485239467898948, 'num_leaves': 116, 'max_depth': 8, 'min_child_samples': 42, 'colsample_bytree': 0.7726714248333812, 'subsample': 0.8991376928687429, 'reg_lambda': 7.630239931984627, 'reg_alpha': 4.252741150536665, 'n_estimators': 908}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000221 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:05,057] Trial 40 finished with value: 1015.0409615648307 and parameters: {'learning_rate': 0.013609288250875854, 'num_leaves': 140, 'max_depth': 4, 'min_child_samples': 27, 'colsample_bytree': 0.6448365621941038, 'subsample': 0.7948175513381193, 'reg_lambda': 7.066090966362446, 'reg_alpha': 9.933769481322454, 'n_estimators': 1051}. Best is trial 36 with value: 1013.3546257424019.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000207 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:05,477] Trial 41 finished with value: 1015.7278847968676 and parameters: {'learning_rate': 0.01600943866365767, 'num_leaves': 162, 'max_depth': 5, 'min_child_samples': 75, 'colsample_bytree': 0.5520580173256409, 'subsample': 0.7334064643281902, 'reg_lambda': 6.0995865591198255, 'reg_alpha': 2.2160301766316, 'n_estimators': 677}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000182 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10


[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:05,802] Trial 42 finished with value: 1014.1732269880464 and parameters: {'learning_rate': 0.020289826485152145, 'num_leaves': 157, 'max_depth': 4, 'min_child_samples': 85, 'colsample_bytree': 0.5007712837576397, 'subsample': 0.848060647764007, 'reg_lambda': 5.402906582174287, 'reg_alpha': 1.557223701086547, 'n_estimators': 525}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000209 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:06,052] Trial 43 finished with value: 1019.1617387069425 and parameters: {'learning_rate': 0.01067186983285355, 'num_leaves': 20, 'max_depth': 7, 'min_child_samples': 37, 'colsample_bytree': 0.687059654421743, 'subsample': 0.5012313780370671, 'reg_lambda': 4.967054943083428, 'reg_alpha': 3.613819690278416, 'n_estimators': 346}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000190 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:06,391] Trial 44 finished with value: 1015.4650426432077 and parameters: {'learning_rate': 0.019269916366333684, 'num_leaves': 188, 'max_depth': 3, 'min_child_samples': 63, 'colsample_bytree': 0.5234535539589423, 'subsample': 0.6674429146722098, 'reg_lambda': 5.647692239667787, 'reg_alpha': 2.7091486747128832, 'n_estimators': 466}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232








[I 2025-03-17 18:18:07,172] Trial 45 finished with value: 1015.0975170347256 and parameters: {'learning_rate': 0.0078025905170093, 'num_leaves': 127, 'max_depth': 6, 'min_child_samples': 44, 'colsample_bytree': 0.6041631160329008, 'subsample': 0.7562992689539259, 'reg_lambda': 6.287063659344733, 'reg_alpha': 4.612540503414392, 'n_estimators': 882}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000239 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:07,467] Trial 46 finished with value: 1016.3960512565757 and parameters: {'learning_rate': 0.023863538520760327, 'num_leaves': 147, 'max_depth': 5, 'min_child_samples': 81, 'colsample_bytree': 0.5679977140568836, 'subsample': 0.9268901463260658, 'reg_lambda': 7.327820046976854, 'reg_alpha': 0.5506497146642295, 'n_estimators': 621}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000233 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:07,668] Trial 47 finished with value: 1015.2866830649837 and parameters: {'learning_rate': 0.031014724920046033, 'num_leaves': 44, 'max_depth': 4, 'min_child_samples': 26, 'colsample_bytree': 0.9262006130882492, 'subsample': 0.601189200837667, 'reg_lambda': 6.710147030021555, 'reg_alpha': 1.4656254756962017, 'n_estimators': 711}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000283 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:18:08,063] Trial 48 finished with value: 1015.635588612399 and parameters: {'learning_rate': 0.014553875821184607, 'num_leaves': 177, 'max_depth': 6, 'min_child_samples': 93, 'colsample_bytree': 0.8319485620194461, 'subsample': 0.7131690884442063, 'reg_lambda': 4.0050013794013894, 'reg_alpha': 4.21578818355245, 'n_estimators': 772}. Best is trial 36 with value: 1013.3546257424019.
[I 2025-03-17 18:18:08,298] Trial 49 finished with value: 1015.8804201279881 and parameters: {'learning_rate': 0.03379783251508026, 'num_leaves': 165, 'max_depth': 3, 'min_child_samples': 99, 'colsample_bytree': 0.7249303084645398, 'subsample': 0.9608215056260477, 'reg_lambda': 8.12145251356516, 'reg_alpha': 2.312420052450768, 'n_estimators': 556}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:08,503] Trial 50 finished with value: 1013.9155597573672 and parameters: {'learning_rate': 0.047394076328623906, 'num_leaves': 142, 'max_depth': 4, 'min_child_samples': 67, 'colsample_bytree': 0.5566661751454182, 'subsample': 0.6778102078839037, 'reg_lambda': 6.911977234162878, 'reg_alpha': 5.611002746218306, 'n_estimators': 986}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000176 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000210 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10


[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:08,806] Trial 51 finished with value: 1013.7898894518777 and parameters: {'learning_rate': 0.02237679533796033, 'num_leaves': 155, 'max_depth': 4, 'min_child_samples': 59, 'colsample_bytree': 0.5804538155717914, 'subsample': 0.7702046020008623, 'reg_lambda': 4.770114341927133, 'reg_alpha': 2.1922461849503847, 'n_estimators': 852}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:09,156] Trial 52 finished with value: 1015.5575147974581 and parameters: {'learning_rate': 0.01825299330193362, 'num_leaves': 154, 'max_depth': 5, 'min_child_samples': 48, 'colsample_bytree': 0.5966106300812641, 'subsample': 0.8193214207198916, 'reg_lambda': 4.771265821472779, 'reg_alpha': 3.2447965845934905, 'n_estimators': 839}. Best is trial 36 with value: 1013.3546257424019.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000182 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:09,493] Trial 53 finished with value: 1014.5210278606457 and parameters: {'learning_rate': 0.022472340751121422, 'num_leaves': 191, 'max_depth': 4, 'min_child_samples': 80, 'colsample_bytree': 0.520973593698233, 'subsample': 0.7774963360376909, 'reg_lambda': 3.2913561698735148, 'reg_alpha': 0.6948225604881486, 'n_estimators': 695}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000185 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:09,738] Trial 54 finished with value: 1015.3107266307273 and parameters: {'learning_rate': 0.02871788726071931, 'num_leaves': 183, 'max_depth': 3, 'min_child_samples': 53, 'colsample_bytree': 0.611768347946469, 'subsample': 0.8836601844474319, 'reg_lambda': 5.895032638564665, 'reg_alpha': 1.9413696516021908, 'n_estimators': 642}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000174 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:10,003] Trial 55 finished with value: 1013.7777383998365 and parameters: {'learning_rate': 0.025488509573423114, 'num_leaves': 166, 'max_depth': 4, 'min_child_samples': 70, 'colsample_bytree': 0.58791557638195, 'subsample': 0.7164661670403905, 'reg_lambda': 5.5134898987720184, 'reg_alpha': 2.8192450801117666, 'n_estimators': 921}. Best is trial 36 with value: 1013.3546257424019.




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000255 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:10,517] Trial 56 finished with value: 1016.4823340383339 and parameters: {'learning_rate': 0.02487483186829069, 'num_leaves': 171, 'max_depth': 5, 'min_child_samples': 61, 'colsample_bytree': 0.6405423557238501, 'subsample': 0.7168366339316655, 'reg_lambda': 6.4140799549861685, 'reg_alpha': 7.892820623846654, 'n_estimators': 1101}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000184 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:18:10,860] Trial 57 finished with value: 1022.4477808028655 and parameters: {'learning_rate': 0.02684531692436464, 'num_leaves': 143, 'max_depth': 10, 'min_child_samples': 70, 'colsample_bytree': 0.5827736888759413, 'subsample': 0.6511969147265572, 'reg_lambda': 4.156269360563288, 'reg_alpha': 2.7782354739854083, 'n_estimators': 938}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000224 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:11,205] Trial 58 finished with value: 1016.9154201010281 and parameters: {'learning_rate': 0.020915943939842956, 'num_leaves': 75, 'max_depth': 6, 'min_child_samples': 57, 'colsample_bytree': 0.626086398844996, 'subsample': 0.8610464134666451, 'reg_lambda': 5.682242828179654, 'reg_alpha': 2.110600806708456, 'n_estimators': 1002}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:11,383] Trial 59 finished with value: 1016.2606469338959 and parameters: {'learning_rate': 0.03755679077347824, 'num_leaves': 104, 'max_depth': 3, 'min_child_samples': 31, 'colsample_bytree': 0.6883299428980199, 'subsample': 0.6970910946082444, 'reg_lambda': 0.3297507232831025, 'reg_alpha': 1.5610452757318465, 'n_estimators': 1191}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000215 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:11,616] Trial 60 finished with value: 1015.2699770170466 and parameters: {'learning_rate': 0.023517006245678023, 'num_leaves': 176, 'max_depth': 5, 'min_child_samples': 63, 'colsample_bytree': 0.7669291922398391, 'subsample': 0.8349418805006311, 'reg_lambda': 3.0483936169861328, 'reg_alpha': 3.4662899145366426, 'n_estimators': 351}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000182 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:18:11,985] Trial 61 finished with value: 1014.9519031319492 and parameters: {'learning_rate': 0.015315899317483627, 'num_leaves': 153, 'max_depth': 4, 'min_child_samples': 76, 'colsample_bytree': 0.5396799859617716, 'subsample': 0.7433926098704607, 'reg_lambda': 5.282238612199082, 'reg_alpha': 2.5249855822542644, 'n_estimators': 752}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000184 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:12,253] Trial 62 finished with value: 1013.8203048748567 and parameters: {'learning_rate': 0.02139392645183462, 'num_leaves': 165, 'max_depth': 4, 'min_child_samples': 73, 'colsample_bytree': 0.5646586908491067, 'subsample': 0.7223189705731663, 'reg_lambda': 4.758976139659496, 'reg_alpha': 2.959485424883319, 'n_estimators': 851}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:12,443] Trial 63 finished with value: 1013.7794532870704 and parameters: {'learning_rate': 0.04054887135598435, 'num_leaves': 166, 'max_depth': 4, 'min_child_samples': 71, 'colsample_bytree': 0.5703029602727989, 'subsample': 0.7279864802638324, 'reg_lambda': 4.509560074137044, 'reg_alpha': 3.059898339080545, 'n_estimators': 904}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:12,638] Trial 64 finished with value: 1014.944965906127 and parameters: {'learning_rate': 0.04527883216561276, 'num_leaves': 148, 'max_depth': 3, 'min_child_samples': 69, 'colsample_bytree': 0.5855979482205318, 'subsample': 0.7768070298373173, 'reg_lambda': 3.9046971368621803, 'reg_alpha': 3.81441015443202, 'n_estimators': 914}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000205 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:12,806] Trial 65 finished with value: 1014.8074951735329 and parameters: {'learning_rate': 0.039998908073311856, 'num_leaves': 137, 'max_depth': 5, 'min_child_samples': 84, 'colsample_bytree': 0.9947838143335102, 'subsample': 0.6825794496669897, 'reg_lambda': 4.411478440787674, 'reg_alpha': 3.97157484500591, 'n_estimators': 780}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000174 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:13,021] Trial 66 finished with value: 1018.4216479940252 and parameters: {'learning_rate': 0.04348953722713859, 'num_leaves': 157, 'max_depth': 6, 'min_child_samples': 78, 'colsample_bytree': 0.5234551349286943, 'subsample': 0.7587562748447302, 'reg_lambda': 6.038604190330942, 'reg_alpha': 3.280881105877432, 'n_estimators': 868}. Best is trial 36 with value: 1013.3546257424019.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:13,232] Trial 67 finished with value: 1012.8259212004847 and parameters: {'learning_rate': 0.03026252561189196, 'num_leaves': 128, 'max_depth': 4, 'min_child_samples': 88, 'colsample_bytree': 0.7370653755274906, 'subsample': 0.7965196935874491, 'reg_lambda': 6.6960459649275625, 'reg_alpha': 4.584303030379563, 'n_estimators': 933}. Best is trial 67 with value: 1012.8259212004847.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000200 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:13,535] Trial 68 finished with value: 1019.7565582964854 and parameters: {'learning_rate': 0.0341659150922081, 'num_leaves': 129, 'max_depth': 13, 'min_child_samples': 89, 'colsample_bytree': 0.7385107117123079, 'subsample': 0.7906179567572755, 'reg_lambda': 6.696416807717632, 'reg_alpha': 4.750892409848342, 'n_estimators': 1015}. Best is trial 67 with value: 1012.8259212004847.




[I 2025-03-17 18:18:13,742] Trial 69 finished with value: 1016.212918736842 and parameters: {'learning_rate': 0.041014849303875736, 'num_leaves': 121, 'max_depth': 3, 'min_child_samples': 91, 'colsample_bytree': 0.8001117821002817, 'subsample': 0.8143183829559959, 'reg_lambda': 7.412271318354238, 'reg_alpha': 4.489682873649444, 'n_estimators': 1106}. Best is trial 67 with value: 1012.8259212004847.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000217 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:13,954] Trial 70 finished with value: 1013.5833935070333 and parameters: {'learning_rate': 0.030464808512267023, 'num_leaves': 111, 'max_depth': 4, 'min_child_samples': 88, 'colsample_bytree': 0.7467686245504486, 'subsample': 0.8054680726355936, 'reg_lambda': 7.785818986015844, 'reg_alpha': 5.299170372454278, 'n_estimators': 947}. Best is trial 67 with value: 1012.8259212004847.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:14,174] Trial 71 finished with value: 1013.9139454827304 and parameters: {'learning_rate': 0.029350468018910926, 'num_leaves': 101, 'max_depth': 4, 'min_child_samples': 88, 'colsample_bytree': 0.7113065350270508, 'subsample': 0.8066195546428464, 'reg_lambda': 7.721350827149548, 'reg_alpha': 5.660714391276212, 'n_estimators': 921}. Best is trial 67 with value: 1012.8259212004847.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:14,385] Trial 72 finished with value: 1015.546777895935 and parameters: {'learning_rate': 0.03269105766223336, 'num_leaves': 90, 'max_depth': 5, 'min_child_samples': 99, 'colsample_bytree': 0.7513478498040173, 'subsample': 0.8745698737894816, 'reg_lambda': 6.978679324834862, 'reg_alpha': 4.994219322889333, 'n_estimators': 949}. Best is trial 67 with value: 1012.8259212004847.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000234 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:14,605] Trial 73 finished with value: 1015.4108333144967 and parameters: {'learning_rate': 0.03604840079792207, 'num_leaves': 108, 'max_depth': 3, 'min_child_samples': 83, 'colsample_bytree': 0.7408781954560413, 'subsample': 0.8467411467077808, 'reg_lambda': 8.03622884769356, 'reg_alpha': 6.462660306589504, 'n_estimators': 1037}. Best is trial 67 with value: 1012.8259212004847.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000194 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:14,800] Trial 74 finished with value: 1014.0506316689006 and parameters: {'learning_rate': 0.03069526110004842, 'num_leaves': 167, 'max_depth': 4, 'min_child_samples': 79, 'colsample_bytree': 0.7795820002299918, 'subsample': 0.7324656236247323, 'reg_lambda': 2.3083383073782677, 'reg_alpha': 4.114717917483805, 'n_estimators': 961}. Best is trial 67 with value: 1012.8259212004847.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000218 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000216 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:14,959] Trial 75 finished with value: 1013.3860809991235 and parameters: {'learning_rate': 0.04995001940300235, 'num_leaves': 114, 'max_depth': 4, 'min_child_samples': 40, 'colsample_bytree': 0.8102178995658356, 'subsample': 0.7048602118140993, 'reg_lambda': 6.4913224000066005, 'reg_alpha': 5.1783166017102165, 'n_estimators': 1094}. Best is trial 67 with value: 1012.8259212004847.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000239 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:15,150] Trial 76 finished with value: 1012.8199157667087 and parameters: {'learning_rate': 0.04969270356698379, 'num_leaves': 114, 'max_depth': 5, 'min_child_samples': 39, 'colsample_bytree': 0.81365391049395, 'subsample': 0.8979177195168643, 'reg_lambda': 7.1469999120296, 'reg_alpha': 5.210160385847204, 'n_estimators': 1162}. Best is trial 76 with value: 1012.8199157667087.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000217 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:15,355] Trial 77 finished with value: 1013.020597438173 and parameters: {'learning_rate': 0.04991497636067921, 'num_leaves': 117, 'max_depth': 6, 'min_child_samples': 41, 'colsample_bytree': 0.8095848183835965, 'subsample': 0.9666204018310747, 'reg_lambda': 8.678858354974633, 'reg_alpha': 5.519528709151884, 'n_estimators': 1231}. Best is trial 76 with value: 1012.8199157667087.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000279 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:15,609] Trial 78 finished with value: 1019.3196263262456 and parameters: {'learning_rate': 0.04899646560891055, 'num_leaves': 112, 'max_depth': 9, 'min_child_samples': 39, 'colsample_bytree': 0.8406304529229692, 'subsample': 0.9664590827369706, 'reg_lambda': 9.40562690460212, 'reg_alpha': 5.2946748642426655, 'n_estimators': 1252}. Best is trial 76 with value: 1012.8199157667087.




[I 2025-03-17 18:18:15,802] Trial 79 finished with value: 1015.8674400671538 and parameters: {'learning_rate': 0.04998019635981637, 'num_leaves': 114, 'max_depth': 5, 'min_child_samples': 96, 'colsample_bytree': 0.8070645634906244, 'subsample': 0.9762472065125392, 'reg_lambda': 8.708605584242283, 'reg_alpha': 6.2645737314425265, 'n_estimators': 1155}. Best is trial 76 with value: 1012.8199157667087.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000218 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10


[I 2025-03-17 18:18:16,020] Trial 80 finished with value: 1011.370498722554 and parameters: {'learning_rate': 0.04780732159947716, 'num_leaves': 122, 'max_depth': 6, 'min_child_samples': 32, 'colsample_bytree': 0.8817927721417727, 'subsample': 0.9157606719548699, 'reg_lambda': 7.883879159201173, 'reg_alpha': 5.77458194410136, 'n_estimators': 1305}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:16,248] Trial 81 finished with value: 1013.0068533720261 and parameters: {'learning_rate': 0.04790979315909027, 'num_leaves': 122, 'max_depth': 6, 'min_child_samples': 32, 'colsample_bytree': 0.8805727711652496, 'subsample': 0.9154248334522809, 'reg_lambda': 9.338210954452641, 'reg_alpha': 5.685673721730252, 'n_estimators': 1320}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000227 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:16,478] Trial 82 finished with value: 1014.8209849599928 and parameters: {'learning_rate': 0.047532295021826695, 'num_leaves': 120, 'max_depth': 7, 'min_child_samples': 33, 'colsample_bytree': 0.873577378509245, 'subsample': 0.922591633764091, 'reg_lambda': 9.364111081435297, 'reg_alpha': 5.748452668543268, 'n_estimators': 1312}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000423 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:16,694] Trial 83 finished with value: 1013.2190052680278 and parameters: {'learning_rate': 0.046228942670112176, 'num_leaves': 124, 'max_depth': 6, 'min_child_samples': 21, 'colsample_bytree': 0.9032042753785483, 'subsample': 0.9025839276341161, 'reg_lambda': 9.566881473128088, 'reg_alpha': 6.844844793585431, 'n_estimators': 1453}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000217 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:16,928] Trial 84 finished with value: 1012.8062807423877 and parameters: {'learning_rate': 0.04584893365189517, 'num_leaves': 123, 'max_depth': 6, 'min_child_samples': 29, 'colsample_bytree': 0.9052839990056363, 'subsample': 0.9450567631219448, 'reg_lambda': 9.11878299305084, 'reg_alpha': 7.436005833926003, 'n_estimators': 1455}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000249 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:17,190] Trial 85 finished with value: 1017.1502441449146 and parameters: {'learning_rate': 0.04583058678480382, 'num_leaves': 124, 'max_depth': 8, 'min_child_samples': 16, 'colsample_bytree': 0.9163472909001678, 'subsample': 0.9382562245752851, 'reg_lambda': 9.848663558001808, 'reg_alpha': 7.117785633920359, 'n_estimators': 1394}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000264 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:17,395] Trial 86 finished with value: 1013.732274682446 and parameters: {'learning_rate': 0.048508636374909514, 'num_leaves': 131, 'max_depth': 6, 'min_child_samples': 30, 'colsample_bytree': 0.9635258960658724, 'subsample': 0.9064900411316819, 'reg_lambda': 9.15527013117671, 'reg_alpha': 7.755416806422156, 'n_estimators': 1452}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000341 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000213 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10


[I 2025-03-17 18:18:17,621] Trial 87 finished with value: 1012.4221882389159 and parameters: {'learning_rate': 0.04656036418713401, 'num_leaves': 125, 'max_depth': 7, 'min_child_samples': 23, 'colsample_bytree': 0.8908020457413455, 'subsample': 0.9813465881483514, 'reg_lambda': 9.954519712966203, 'reg_alpha': 6.001999043036174, 'n_estimators': 1356}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:17,854] Trial 88 finished with value: 1013.8289681630354 and parameters: {'learning_rate': 0.04643976464801105, 'num_leaves': 125, 'max_depth': 7, 'min_child_samples': 24, 'colsample_bytree': 0.8914809660215276, 'subsample': 0.9477895194205095, 'reg_lambda': 8.313450098884736, 'reg_alpha': 6.681518219361644, 'n_estimators': 1389}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000230 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:18,065] Trial 89 finished with value: 1013.1282302443961 and parameters: {'learning_rate': 0.04408644135845261, 'num_leaves': 100, 'max_depth': 6, 'min_child_samples': 18, 'colsample_bytree': 0.9162075641945313, 'subsample': 0.9842745276601208, 'reg_lambda': 9.694596784675332, 'reg_alpha': 6.022725223520419, 'n_estimators': 1352}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000249 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:18,327] Trial 90 finished with value: 1013.7619494799341 and parameters: {'learning_rate': 0.04440032759411826, 'num_leaves': 100, 'max_depth': 7, 'min_child_samples': 15, 'colsample_bytree': 0.9096978556583852, 'subsample': 0.9872460606363542, 'reg_lambda': 9.989049722347666, 'reg_alpha': 5.9455630221450315, 'n_estimators': 1358}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000237 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:18,543] Trial 91 finished with value: 1012.9961744496205 and parameters: {'learning_rate': 0.04828910426459287, 'num_leaves': 118, 'max_depth': 6, 'min_child_samples': 21, 'colsample_bytree': 0.9435666735385333, 'subsample': 0.9830688969611208, 'reg_lambda': 9.657147575736547, 'reg_alpha': 7.069442852193592, 'n_estimators': 1439}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000230 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:18,768] Trial 92 finished with value: 1012.5619344001076 and parameters: {'learning_rate': 0.048126479849340556, 'num_leaves': 118, 'max_depth': 6, 'min_child_samples': 21, 'colsample_bytree': 0.943283429494439, 'subsample': 0.9813673660284471, 'reg_lambda': 9.613144296758781, 'reg_alpha': 6.8239534185006825, 'n_estimators': 1431}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000267 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:18,986] Trial 93 finished with value: 1014.8145322296407 and parameters: {'learning_rate': 0.0484115178634424, 'num_leaves': 106, 'max_depth': 6, 'min_child_samples': 18, 'colsample_bytree': 0.9421661964569749, 'subsample': 0.9762997571688039, 'reg_lambda': 9.15120532543274, 'reg_alpha': 7.3992492626478406, 'n_estimators': 1298}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000246 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:19,222] Trial 94 finished with value: 1016.1779333191996 and parameters: {'learning_rate': 0.04290131730640742, 'num_leaves': 117, 'max_depth': 7, 'min_child_samples': 25, 'colsample_bytree': 0.9708869085083205, 'subsample': 0.9829376780350357, 'reg_lambda': 9.613423724032407, 'reg_alpha': 6.172022301420239, 'n_estimators': 1500}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000212 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:19,445] Trial 95 finished with value: 1013.5171177205063 and parameters: {'learning_rate': 0.044049592619150695, 'num_leaves': 93, 'max_depth': 6, 'min_child_samples': 11, 'colsample_bytree': 0.8843668470289798, 'subsample': 0.9971790553914577, 'reg_lambda': 8.677920246333896, 'reg_alpha': 6.5311079888177215, 'n_estimators': 1414}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:19,647] Trial 96 finished with value: 1013.1852696600323 and parameters: {'learning_rate': 0.0488980335216489, 'num_leaves': 129, 'max_depth': 6, 'min_child_samples': 23, 'colsample_bytree': 0.9379855503951602, 'subsample': 0.9502760059873394, 'reg_lambda': 8.997067218005308, 'reg_alpha': 6.01051690133678, 'n_estimators': 1263}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000241 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:19,884] Trial 97 finished with value: 1015.5411553430832 and parameters: {'learning_rate': 0.047917939384843976, 'num_leaves': 99, 'max_depth': 7, 'min_child_samples': 28, 'colsample_bytree': 0.8552423344067465, 'subsample': 0.9241498445984972, 'reg_lambda': 9.766263133456068, 'reg_alpha': 8.376617235204144, 'n_estimators': 1341}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000211 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000249 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:18:20,210] Trial 98 finished with value: 1015.9311633963123 and parameters: {'learning_rate': 0.04547723991149968, 'num_leaves': 84, 'max_depth': 8, 'min_child_samples': 34, 'colsample_bytree': 0.9240845157525267, 'subsample': 0.9695297348524359, 'reg_lambda': 9.283983651186686, 'reg_alpha': 5.500602030166416, 'n_estimators': 1233}. Best is trial 80 with value: 1011.370498722554.
[I 2025-03-17 18:18:20,458] Trial 99 finished with value: 1014.1680721482697 and parameters: {'learning_rate': 0.046935832034996926, 'num_leaves': 134, 'max_depth': 6, 'min_child_samples': 36, 'colsample_bytree': 0.955186085700521, 'subsample': 0.9602562907199849, 'reg_lambda': 8.792453949549815, 'reg_alpha': 7.251814700430523, 'n_estimators': 1432}. Best is trial 80 with value: 1011.370498722554.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000291 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232
Best Parameters: {'learning_rate': 0.04780732159947716, 'num_leaves': 122, 'max_depth': 6, 'min_child_samples': 32, 'colsample_bytree': 0.8817927721417727, 'subsample': 0.9157606719548699, 'reg_lambda': 7.883879159201173, 'reg_alpha': 5.77458194410136, 'n_estimators': 1305}


In [5]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Best parameters from Optuna
best_params = {
    'objective': 'regression',
    'metric': 'rmse',
    'learning_rate': 0.04780732159947716,
    'num_leaves': 122,
    'max_depth': 6,
    'min_child_samples': 32,
    'subsample': 0.9157606719548699,
    'colsample_bytree': 0.8817927721417727,
    'n_estimators': 1305,
    'reg_lambda': 7.883879159201173,
    'reg_alpha': 5.77458194410136,
}


# Train model
model = LGBMRegressor(**best_params)
model.fit(X_train, y_train, 
          eval_set=[(X_validate, y_validate)], 
          eval_metric='rmse', 
          callbacks=[lgb.early_stopping(50, verbose=False)])

# Predict on training & validation sets
y_train_pred = model.predict(X_train)
y_valid_pred = model.predict(X_validate)

# Evaluate performance
def evaluate(y_true, y_pred, dataset):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n✅ Model Performance on {dataset}:")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

# Show results
evaluate(y_train, y_train_pred, "Training Set")
evaluate(y_validate, y_valid_pred, "Validation Set")

# Predict on test set
y_test_preds = model.predict(X_test)

# Create submission file
submission = test_df[['Item_Identifier', 'Outlet_Identifier']].copy()
submission['Item_Outlet_Sales'] = y_test_preds

submission['Item_Outlet_Sales'] = y_test_preds.clip(min=0)

# Save CSV
submission.to_csv('../submission/submission_lgb.csv', index=False)

print("Download your submission file: ../submission/submission_lgb.csv")



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000290 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 782
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 10
[LightGBM] [Info] Start training from score 2202.365232

✅ Model Performance on Training Set:
MAE: 726.5744
MSE: 1066629.6451
RMSE: 1032.7776
R² Score: 0.6394

✅ Model Performance on Validation Set:
MAE: 708.7873
MSE: 1022870.2857
RMSE: 1011.3705
R² Score: 0.6237
Download your submission file: ../submission/submission_lgb.csv


In [6]:
import pandas as pd
import numpy as np
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Select features and target
categorical_cols = ['Item_Fat_Content', 'Item_Category', 'Outlet_Location_Type']
numerical_cols = ['Item_MRP', 'Outlet_Total_Sales', 'Item_Visibility', 'Item_Weight', 'Outlet_Age']

# Ensure categorical columns are treated as category dtype
for col in categorical_cols:
    train_df[col] = train_df[col].astype('category')
    test_df[col] = test_df[col].astype('category')

# Select X and y
X = train_df[numerical_cols + categorical_cols]
y = train_df['Item_Outlet_Sales']
X_test = test_df[numerical_cols + categorical_cols]

# Split data
X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.05),
        'num_leaves': trial.suggest_int('num_leaves', 20, 200),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.01, 10.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.01, 10.0),
        'n_estimators': trial.suggest_int('n_estimators', 300, 1500)
        
    }

    model = lgb.LGBMRegressor(**params)

    eval_result = {}
    model.fit(
        X_train, y_train, 
        eval_set=[(X_validate, y_validate)], 
        eval_metric='rmse',
        categorical_feature=categorical_cols,  # Pass categorical features!
        callbacks=[lgb.early_stopping(50, verbose=False), lgb.record_evaluation(eval_result)]
    )
    
    preds = model.predict(X_validate)
    rmse = mean_squared_error(y_validate, preds, squared=False)
    return rmse

# Run Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)  # Increase trials for better tuning

print("Best Parameters:", study.best_params)


[I 2025-03-17 18:22:08,483] A new study created in memory with name: no-name-2e1efab5-ef2e-4d01-92d3-692b5685d2fc


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000266 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:08,797] Trial 0 finished with value: 1020.2342744635603 and parameters: {'learning_rate': 0.04932724915767727, 'num_leaves': 189, 'max_depth': 3, 'min_child_samples': 70, 'colsample_bytree': 0.5122445869595658, 'subsample': 0.6208506653987521, 'reg_lambda': 9.474753723381452, 'reg_alpha': 8.33180634602795, 'n_estimators': 1297}. Best is trial 0 with value: 1020.2342744635603.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000221 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:09,186] Trial 1 finished with value: 1017.9251530546893 and parameters: {'learning_rate': 0.012908784127561333, 'num_leaves': 68, 'max_depth': 11, 'min_child_samples': 19, 'colsample_bytree': 0.972190445741395, 'subsample': 0.7176069060278654, 'reg_lambda': 1.213713665073071, 'reg_alpha': 7.466222592464927, 'n_estimators': 560}. Best is trial 1 with value: 1017.9251530546893.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000241 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:09,447] Trial 2 finished with value: 1022.7015949389959 and parameters: {'learning_rate': 0.028145808075078053, 'num_leaves': 69, 'max_depth': 13, 'min_child_samples': 39, 'colsample_bytree': 0.8582162558881465, 'subsample': 0.5822920346160785, 'reg_lambda': 2.6081793078433124, 'reg_alpha': 9.828105658916877, 'n_estimators': 562}. Best is trial 1 with value: 1017.9251530546893.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000184 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:09,789] Trial 3 finished with value: 1025.0996109240327 and parameters: {'learning_rate': 0.02758878879940594, 'num_leaves': 130, 'max_depth': 10, 'min_child_samples': 70, 'colsample_bytree': 0.6540328165404541, 'subsample': 0.6326335581175426, 'reg_lambda': 0.48699176846597236, 'reg_alpha': 0.2656233015637726, 'n_estimators': 1274}. Best is trial 1 with value: 1017.9251530546893.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000251 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:10,010] Trial 4 finished with value: 1016.5930215700616 and parameters: {'learning_rate': 0.03516280766783821, 'num_leaves': 139, 'max_depth': 3, 'min_child_samples': 48, 'colsample_bytree': 0.5632344498577926, 'subsample': 0.5262092210294858, 'reg_lambda': 9.080469006570096, 'reg_alpha': 7.335548294566749, 'n_estimators': 884}. Best is trial 4 with value: 1016.5930215700616.




[I 2025-03-17 18:22:10,175] Trial 5 finished with value: 1014.3760316185135 and parameters: {'learning_rate': 0.029059588551123935, 'num_leaves': 31, 'max_depth': 12, 'min_child_samples': 15, 'colsample_bytree': 0.9692283711862584, 'subsample': 0.8085548657963666, 'reg_lambda': 1.5809279479096872, 'reg_alpha': 6.576540174562192, 'n_estimators': 545}. Best is trial 5 with value: 1014.3760316185135.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000456 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000209 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:10,463] Trial 6 finished with value: 1028.372920132632 and parameters: {'learning_rate': 0.03326130356963096, 'num_leaves': 90, 'max_depth': 13, 'min_child_samples': 10, 'colsample_bytree': 0.7874264332822296, 'subsample': 0.6257631258890742, 'reg_lambda': 7.647699229543907, 'reg_alpha': 9.272618113603242, 'n_estimators': 363}. Best is trial 5 with value: 1014.3760316185135.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000192 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8


[I 2025-03-17 18:22:10,608] Trial 7 finished with value: 1015.9351959005224 and parameters: {'learning_rate': 0.03565448528012364, 'num_leaves': 38, 'max_depth': 4, 'min_child_samples': 40, 'colsample_bytree': 0.7312360336872259, 'subsample': 0.6745659435841392, 'reg_lambda': 3.708689612878934, 'reg_alpha': 2.1094029475801856, 'n_estimators': 1451}. Best is trial 5 with value: 1014.3760316185135.


[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000211 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:10,776] Trial 8 finished with value: 1016.0277871918937 and parameters: {'learning_rate': 0.048657129366671575, 'num_leaves': 144, 'max_depth': 3, 'min_child_samples': 48, 'colsample_bytree': 0.6054378418491806, 'subsample': 0.7694102953011811, 'reg_lambda': 8.568535847320133, 'reg_alpha': 1.8411079616173576, 'n_estimators': 305}. Best is trial 5 with value: 1014.3760316185135.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000218 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:10,928] Trial 9 finished with value: 1014.245937631734 and parameters: {'learning_rate': 0.04762051262215285, 'num_leaves': 106, 'max_depth': 4, 'min_child_samples': 32, 'colsample_bytree': 0.90135729234382, 'subsample': 0.6992477686768157, 'reg_lambda': 8.55252803108424, 'reg_alpha': 6.685898994688685, 'n_estimators': 1073}. Best is trial 9 with value: 1014.245937631734.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000347 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:22:11,396] Trial 10 finished with value: 1017.556407181201 and parameters: {'learning_rate': 0.012774910320970188, 'num_leaves': 173, 'max_depth': 7, 'min_child_samples': 95, 'colsample_bytree': 0.8709532585766738, 'subsample': 0.9422019380664504, 'reg_lambda': 6.606996396008095, 'reg_alpha': 5.0194630619524565, 'n_estimators': 934}. Best is trial 9 with value: 1014.245937631734.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000263 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:11,972] Trial 11 finished with value: 1017.0697471333409 and parameters: {'learning_rate': 0.005172194949960105, 'num_leaves': 27, 'max_depth': 7, 'min_child_samples': 26, 'colsample_bytree': 0.9978512380959215, 'subsample': 0.8290507832193856, 'reg_lambda': 5.528429848389907, 'reg_alpha': 5.358126252238661, 'n_estimators': 949}. Best is trial 9 with value: 1014.245937631734.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000871 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:12,788] Trial 12 finished with value: 1026.2566389922238 and parameters: {'learning_rate': 0.04339632837826563, 'num_leaves': 101, 'max_depth': 15, 'min_child_samples': 27, 'colsample_bytree': 0.8959477005703885, 'subsample': 0.8498541772892602, 'reg_lambda': 4.327317788006968, 'reg_alpha': 6.113062078062983, 'n_estimators': 750}. Best is trial 9 with value: 1014.245937631734.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001136 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:13,207] Trial 13 finished with value: 1017.6374407480996 and parameters: {'learning_rate': 0.018552117057849517, 'num_leaves': 53, 'max_depth': 7, 'min_child_samples': 30, 'colsample_bytree': 0.9157795442680364, 'subsample': 0.7950654660843193, 'reg_lambda': 2.3302402312413415, 'reg_alpha': 3.1174855038825315, 'n_estimators': 1123}. Best is trial 9 with value: 1014.245937631734.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000246 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:13,373] Trial 14 finished with value: 1022.0561225403982 and parameters: {'learning_rate': 0.04161624030605003, 'num_leaves': 22, 'max_depth': 9, 'min_child_samples': 10, 'colsample_bytree': 0.782338632519189, 'subsample': 0.9064833361480686, 'reg_lambda': 6.242911310649296, 'reg_alpha': 6.579128515375626, 'n_estimators': 675}. Best is trial 9 with value: 1014.245937631734.
[I 2025-03-17 18:22:13,632] Trial 15 finished with value: 1015.4983245262234 and parameters: {'learning_rate': 0.020898856866568935, 'num_leaves': 118, 'max_depth': 5, 'min_child_samples': 63, 'colsample_bytree': 0.9440804701391962, 'subsample': 0.7264166931578586, 'reg_lambda': 2.9205672031098397, 'reg_alpha': 3.699979957108087, 'n_estimators': 1157}. Best is trial 9 with value: 1014.245937631734.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000219 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000308 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:13,916] Trial 16 finished with value: 1027.3876754454159 and parameters: {'learning_rate': 0.04105100743450474, 'num_leaves': 86, 'max_depth': 13, 'min_child_samples': 20, 'colsample_bytree': 0.8212571437778989, 'subsample': 0.997362515100928, 'reg_lambda': 7.664735210172756, 'reg_alpha': 4.099101540102652, 'n_estimators': 473}. Best is trial 9 with value: 1014.245937631734.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:22:14,428] Trial 17 finished with value: 1025.6885175556401 and parameters: {'learning_rate': 0.022952060203236133, 'num_leaves': 169, 'max_depth': 11, 'min_child_samples': 36, 'colsample_bytree': 0.7039927542115189, 'subsample': 0.8795594413459754, 'reg_lambda': 9.97241524035282, 'reg_alpha': 7.7736038338701166, 'n_estimators': 815}. Best is trial 9 with value: 1014.245937631734.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000221 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:14,711] Trial 18 finished with value: 1018.2878432429038 and parameters: {'learning_rate': 0.032283760694091015, 'num_leaves': 53, 'max_depth': 15, 'min_child_samples': 97, 'colsample_bytree': 0.9519172975234372, 'subsample': 0.6867721926690293, 'reg_lambda': 5.078314271399702, 'reg_alpha': 6.194224488239614, 'n_estimators': 1010}. Best is trial 9 with value: 1014.245937631734.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000212 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:15,078] Trial 19 finished with value: 1022.8043375233307 and parameters: {'learning_rate': 0.038903045337569594, 'num_leaves': 160, 'max_depth': 9, 'min_child_samples': 19, 'colsample_bytree': 0.8429217506575926, 'subsample': 0.8000604248748104, 'reg_lambda': 0.10927888931585161, 'reg_alpha': 8.789441381971972, 'n_estimators': 675}. Best is trial 9 with value: 1014.245937631734.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000213 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:15,270] Trial 20 finished with value: 1014.1940349063094 and parameters: {'learning_rate': 0.04540734467882021, 'num_leaves': 109, 'max_depth': 5, 'min_child_samples': 54, 'colsample_bytree': 0.8989220517887341, 'subsample': 0.5144825453573552, 'reg_lambda': 1.7966227492049092, 'reg_alpha': 6.8032900960995075, 'n_estimators': 1076}. Best is trial 20 with value: 1014.1940349063094.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000212 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:15,471] Trial 21 finished with value: 1015.2855457320835 and parameters: {'learning_rate': 0.045345637283962, 'num_leaves': 111, 'max_depth': 5, 'min_child_samples': 84, 'colsample_bytree': 0.9165188777115248, 'subsample': 0.5143901574384867, 'reg_lambda': 1.2668064938793953, 'reg_alpha': 6.74163901080977, 'n_estimators': 1091}. Best is trial 20 with value: 1014.1940349063094.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000204 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:15,685] Trial 22 finished with value: 1014.9813569295972 and parameters: {'learning_rate': 0.04463562442936545, 'num_leaves': 83, 'max_depth': 6, 'min_child_samples': 52, 'colsample_bytree': 0.9953452074488425, 'subsample': 0.5672044492209382, 'reg_lambda': 1.7626794655060207, 'reg_alpha': 5.710828824461671, 'n_estimators': 1234}. Best is trial 20 with value: 1014.1940349063094.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000197 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:15,878] Trial 23 finished with value: 1013.283336356111 and parameters: {'learning_rate': 0.04972425757538224, 'num_leaves': 121, 'max_depth': 5, 'min_child_samples': 57, 'colsample_bytree': 0.8937743658228886, 'subsample': 0.6762669388627878, 'reg_lambda': 3.7132854954909025, 'reg_alpha': 4.4005615903146005, 'n_estimators': 1421}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000225 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:16,087] Trial 24 finished with value: 1016.0782747043211 and parameters: {'learning_rate': 0.04984424779550523, 'num_leaves': 121, 'max_depth': 5, 'min_child_samples': 60, 'colsample_bytree': 0.7917619623181256, 'subsample': 0.6726686334628484, 'reg_lambda': 3.6510245529422365, 'reg_alpha': 4.521374132544084, 'n_estimators': 1483}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000206 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:16,322] Trial 25 finished with value: 1021.6943047158536 and parameters: {'learning_rate': 0.04589548061279956, 'num_leaves': 101, 'max_depth': 8, 'min_child_samples': 74, 'colsample_bytree': 0.8867899454960427, 'subsample': 0.5810914413205767, 'reg_lambda': 3.9257560064234114, 'reg_alpha': 3.1206367570584455, 'n_estimators': 1377}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000185 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8


[I 2025-03-17 18:22:16,510] Trial 26 finished with value: 1015.2006244876637 and parameters: {'learning_rate': 0.03857169098008973, 'num_leaves': 149, 'max_depth': 4, 'min_child_samples': 59, 'colsample_bytree': 0.8201768741746902, 'subsample': 0.7600900454196843, 'reg_lambda': 3.0146282437287146, 'reg_alpha': 5.108949070186495, 'n_estimators': 1029}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000202 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:16,724] Trial 27 finished with value: 1015.3582791061482 and parameters: {'learning_rate': 0.04689410419328483, 'num_leaves': 128, 'max_depth': 6, 'min_child_samples': 44, 'colsample_bytree': 0.9249955944719713, 'subsample': 0.5029704175037042, 'reg_lambda': 4.629661614445577, 'reg_alpha': 7.200108725309607, 'n_estimators': 1375}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000186 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:16,906] Trial 28 finished with value: 1016.0080902048181 and parameters: {'learning_rate': 0.03817542735072408, 'num_leaves': 104, 'max_depth': 4, 'min_child_samples': 83, 'colsample_bytree': 0.758124691055467, 'subsample': 0.550290341048286, 'reg_lambda': 5.693535338753648, 'reg_alpha': 8.203108145410301, 'n_estimators': 1175}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000187 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:17,088] Trial 29 finished with value: 1016.6287669328392 and parameters: {'learning_rate': 0.048903440628961764, 'num_leaves': 196, 'max_depth': 6, 'min_child_samples': 67, 'colsample_bytree': 0.8908860857220625, 'subsample': 0.6093691320725675, 'reg_lambda': 7.9216210597468875, 'reg_alpha': 8.247613628098655, 'n_estimators': 1357}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000180 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:22:17,401] Trial 30 finished with value: 1021.465687915832 and parameters: {'learning_rate': 0.0431454558246179, 'num_leaves': 73, 'max_depth': 3, 'min_child_samples': 55, 'colsample_bytree': 0.5087972356579598, 'subsample': 0.6460394354670417, 'reg_lambda': 6.80655269538865, 'reg_alpha': 5.644360247649862, 'n_estimators': 1227}. Best is trial 23 with value: 1013.283336356111.
[I 2025-03-17 18:22:17,638] Trial 31 finished with value: 1019.9916525392256 and parameters: {'learning_rate': 0.04712533016282802, 'num_leaves': 134, 'max_depth': 12, 'min_child_samples': 79, 'colsample_bytree': 0.9462236197454666, 'subsample': 0.7153200873062496, 'reg_lambda': 1.913146434529422, 'reg_alpha': 6.74738920864932, 'n_estimators': 867}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000209 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:17,911] Trial 32 finished with value: 1021.2045981085242 and parameters: {'learning_rate': 0.030717341366387946, 'num_leaves': 54, 'max_depth': 8, 'min_child_samples': 34, 'colsample_bytree': 0.9538953576780875, 'subsample': 0.7351381774212813, 'reg_lambda': 0.8941881451407607, 'reg_alpha': 4.459304238200873, 'n_estimators': 781}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000288 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000220 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:18,192] Trial 33 finished with value: 1016.1560983162318 and parameters: {'learning_rate': 0.02335996907170799, 'num_leaves': 152, 'max_depth': 4, 'min_child_samples': 19, 'colsample_bytree': 0.8436777194306342, 'subsample': 0.7010417423878638, 'reg_lambda': 1.708110081393538, 'reg_alpha': 6.152274683520851, 'n_estimators': 1012}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000246 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:18,503] Trial 34 finished with value: 1018.691026092536 and parameters: {'learning_rate': 0.04113702110238254, 'num_leaves': 112, 'max_depth': 10, 'min_child_samples': 45, 'colsample_bytree': 0.9875050077913191, 'subsample': 0.8049264778067988, 'reg_lambda': 3.1280592230057094, 'reg_alpha': 7.472994038320318, 'n_estimators': 487}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000243 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:18,994] Trial 35 finished with value: 1026.4948415807735 and parameters: {'learning_rate': 0.016515188004141975, 'num_leaves': 94, 'max_depth': 14, 'min_child_samples': 24, 'colsample_bytree': 0.9677061274728511, 'subsample': 0.6560850809583137, 'reg_lambda': 1.0873067343360003, 'reg_alpha': 9.946098311734708, 'n_estimators': 1308}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000763 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:22:19,563] Trial 36 finished with value: 1016.8723026370988 and parameters: {'learning_rate': 0.010020519551212425, 'num_leaves': 74, 'max_depth': 5, 'min_child_samples': 15, 'colsample_bytree': 0.8611129102990642, 'subsample': 0.6109259218878955, 'reg_lambda': 2.305419429366978, 'reg_alpha': 9.053443704969661, 'n_estimators': 1078}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000373 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:20,303] Trial 37 finished with value: 1022.935825030981 and parameters: {'learning_rate': 0.028966214112200243, 'num_leaves': 181, 'max_depth': 11, 'min_child_samples': 53, 'colsample_bytree': 0.9242935629871155, 'subsample': 0.7698873040612494, 'reg_lambda': 0.7360828772147685, 'reg_alpha': 6.974748184363315, 'n_estimators': 715}. Best is trial 23 with value: 1013.283336356111.
[I 2025-03-17 18:22:20,506] Trial 38 finished with value: 1017.442803540311 and parameters: {'learning_rate': 0.04987778150158424, 'num_leaves': 126, 'max_depth': 3, 'min_child_samples': 40, 'colsample_bytree': 0.701818978442522, 'subsample': 0.5378050154453304, 'reg_lambda': 0.163851750667799, 'reg_alpha': 4.684682699939842, 'n_estimators': 627}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000214 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000247 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2025-03-17 18:22:20,768] Trial 39 finished with value: 1016.681682011033 and parameters: {'learning_rate': 0.04666156664906034, 'num_leaves': 62, 'max_depth': 8, 'min_child_samples': 34, 'colsample_bytree': 0.8180879697433391, 'subsample': 0.8559169355076273, 'reg_lambda': 8.936532414167953, 'reg_alpha': 7.845739206883205, 'n_estimators': 555}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:21,025] Trial 40 finished with value: 1017.7876695045964 and parameters: {'learning_rate': 0.03692501912614923, 'num_leaves': 34, 'max_depth': 6, 'min_child_samples': 49, 'colsample_bytree': 0.5687154477310719, 'subsample': 0.746980614858601, 'reg_lambda': 3.3856270288698287, 'reg_alpha': 0.6175567966440827, 'n_estimators': 850}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000194 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:21,265] Trial 41 finished with value: 1015.166948775461 and parameters: {'learning_rate': 0.04430260913283329, 'num_leaves': 90, 'max_depth': 6, 'min_child_samples': 68, 'colsample_bytree': 0.9840327838111854, 'subsample': 0.5634787293342186, 'reg_lambda': 1.8343752636303574, 'reg_alpha': 5.3965627293587195, 'n_estimators': 1234}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000232 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:21,467] Trial 42 finished with value: 1013.6304108386738 and parameters: {'learning_rate': 0.04227443588898403, 'num_leaves': 80, 'max_depth': 4, 'min_child_samples': 52, 'colsample_bytree': 0.9696670867334112, 'subsample': 0.5847104143434684, 'reg_lambda': 1.7062737353737474, 'reg_alpha': 5.79151726497714, 'n_estimators': 1422}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000220 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:21,668] Trial 43 finished with value: 1016.5747216706808 and parameters: {'learning_rate': 0.0422904483512931, 'num_leaves': 42, 'max_depth': 3, 'min_child_samples': 61, 'colsample_bytree': 0.8992827057381285, 'subsample': 0.5954813670024832, 'reg_lambda': 2.6283949414812176, 'reg_alpha': 6.3052033788397175, 'n_estimators': 1441}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000222 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000451 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8


[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:21,948] Trial 44 finished with value: 1014.7838822503081 and parameters: {'learning_rate': 0.02546438386125073, 'num_leaves': 137, 'max_depth': 4, 'min_child_samples': 44, 'colsample_bytree': 0.9672277752138227, 'subsample': 0.6450376408801857, 'reg_lambda': 1.3291992531035879, 'reg_alpha': 4.023485366497676, 'n_estimators': 1309}. Best is trial 23 with value: 1013.283336356111.
[I 2025-03-17 18:22:22,191] Trial 45 finished with value: 1014.4619505443134 and parameters: {'learning_rate': 0.03398362379232303, 'num_leaves': 115, 'max_depth': 5, 'min_child_samples': 30, 'colsample_bytree': 0.8803082514377625, 'subsample': 0.5431767613978302, 'reg_lambda': 4.269262988007914, 'reg_alpha': 5.746315413195586, 'n_estimators': 946}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000206 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:22,367] Trial 46 finished with value: 1016.1541200239217 and parameters: {'learning_rate': 0.040501887558456205, 'num_leaves': 100, 'max_depth': 4, 'min_child_samples': 13, 'colsample_bytree': 0.9315879833607006, 'subsample': 0.6232450623894561, 'reg_lambda': 2.303078411027566, 'reg_alpha': 7.192822367574809, 'n_estimators': 1401}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000296 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000243 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:22,557] Trial 47 finished with value: 1019.7098343983454 and parameters: {'learning_rate': 0.04795810876307732, 'num_leaves': 76, 'max_depth': 7, 'min_child_samples': 65, 'colsample_bytree': 0.9706914307757053, 'subsample': 0.6919456601112581, 'reg_lambda': 0.5540837754616603, 'reg_alpha': 4.86667016361355, 'n_estimators': 387}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:22,792] Trial 48 finished with value: 1015.1210821525256 and parameters: {'learning_rate': 0.036496965619666116, 'num_leaves': 64, 'max_depth': 3, 'min_child_samples': 72, 'colsample_bytree': 0.8544585031339749, 'subsample': 0.7802579406208547, 'reg_lambda': 1.482611487797647, 'reg_alpha': 2.8777549772491056, 'n_estimators': 1499}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000268 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8


[I 2025-03-17 18:22:23,046] Trial 49 finished with value: 1022.4902904080247 and parameters: {'learning_rate': 0.04354747287863374, 'num_leaves': 122, 'max_depth': 10, 'min_child_samples': 58, 'colsample_bytree': 0.8992946789398821, 'subsample': 0.5236169846584682, 'reg_lambda': 2.475465856590416, 'reg_alpha': 6.561744236140436, 'n_estimators': 1188}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:23,286] Trial 50 finished with value: 1016.2849003829987 and parameters: {'learning_rate': 0.04783772377448536, 'num_leaves': 83, 'max_depth': 12, 'min_child_samples': 49, 'colsample_bytree': 0.9363043342485674, 'subsample': 0.8377012593150754, 'reg_lambda': 5.219956259119515, 'reg_alpha': 5.867209047401046, 'n_estimators': 1430}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000195 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:23,488] Trial 51 finished with value: 1014.0594524927784 and parameters: {'learning_rate': 0.03415653920851844, 'num_leaves': 116, 'max_depth': 5, 'min_child_samples': 38, 'colsample_bytree': 0.8734976622581178, 'subsample': 0.5475885043017961, 'reg_lambda': 4.503531684549934, 'reg_alpha': 5.264874267609672, 'n_estimators': 955}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000208 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000204 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8


[I 2025-03-17 18:22:23,710] Trial 52 finished with value: 1015.0905918329768 and parameters: {'learning_rate': 0.03132338813911946, 'num_leaves': 105, 'max_depth': 5, 'min_child_samples': 37, 'colsample_bytree': 0.9085485603048074, 'subsample': 0.5885687760392779, 'reg_lambda': 5.962947986010306, 'reg_alpha': 5.30862894504772, 'n_estimators': 979}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:23,991] Trial 53 finished with value: 1015.7842435406791 and parameters: {'learning_rate': 0.028317868194186732, 'num_leaves': 109, 'max_depth': 4, 'min_child_samples': 41, 'colsample_bytree': 0.6453968874760101, 'subsample': 0.6707183355357449, 'reg_lambda': 4.732758638514495, 'reg_alpha': 7.658014153759047, 'n_estimators': 1123}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000267 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000208 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:24,329] Trial 54 finished with value: 1016.0934650247705 and parameters: {'learning_rate': 0.02601622153201038, 'num_leaves': 94, 'max_depth': 7, 'min_child_samples': 23, 'colsample_bytree': 0.8705324411069811, 'subsample': 0.5653679065042786, 'reg_lambda': 7.000650965190333, 'reg_alpha': 3.93865432296122, 'n_estimators': 1059}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000235 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:24,560] Trial 55 finished with value: 1014.4352094295022 and parameters: {'learning_rate': 0.034803498224361, 'num_leaves': 122, 'max_depth': 5, 'min_child_samples': 55, 'colsample_bytree': 0.8353514084334065, 'subsample': 0.5090065584633254, 'reg_lambda': 4.011485102279995, 'reg_alpha': 6.387656251884776, 'n_estimators': 941}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000200 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232




[I 2025-03-17 18:22:25,029] Trial 56 finished with value: 1029.5212667144099 and parameters: {'learning_rate': 0.040119916195688846, 'num_leaves': 143, 'max_depth': 14, 'min_child_samples': 30, 'colsample_bytree': 0.800684367025509, 'subsample': 0.5299420672663315, 'reg_lambda': 3.612162396390593, 'reg_alpha': 5.942679496767634, 'n_estimators': 902}. Best is trial 23 with value: 1013.283336356111.




[I 2025-03-17 18:22:25,230] Trial 57 finished with value: 1014.169659092816 and parameters: {'learning_rate': 0.04575243213638514, 'num_leaves': 131, 'max_depth': 6, 'min_child_samples': 52, 'colsample_bytree': 0.9586482932418994, 'subsample': 0.7135049974301391, 'reg_lambda': 8.246268405742919, 'reg_alpha': 6.9135089006167005, 'n_estimators': 1322}. Best is trial 23 with value: 1013.283336356111.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000227 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8


[I 2025-03-17 18:22:25,443] Trial 58 finished with value: 1012.744209201659 and parameters: {'learning_rate': 0.046008777313723596, 'num_leaves': 132, 'max_depth': 6, 'min_child_samples': 51, 'colsample_bytree': 0.9133800915544624, 'subsample': 0.7135667543386197, 'reg_lambda': 8.310251218815315, 'reg_alpha': 4.35964945673663, 'n_estimators': 1273}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:25,650] Trial 59 finished with value: 1015.3301937975331 and parameters: {'learning_rate': 0.045587412589373606, 'num_leaves': 131, 'max_depth': 6, 'min_child_samples': 51, 'colsample_bytree': 0.9549344203777979, 'subsample': 0.7106745880931562, 'reg_lambda': 8.275774498702942, 'reg_alpha': 3.3470082855367225, 'n_estimators': 1329}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000231 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000223 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8


[I 2025-03-17 18:22:25,845] Trial 60 finished with value: 1014.1462091941905 and parameters: {'learning_rate': 0.042845900376909656, 'num_leaves': 155, 'max_depth': 5, 'min_child_samples': 46, 'colsample_bytree': 0.8727576264753834, 'subsample': 0.7384682429109247, 'reg_lambda': 9.678535675104918, 'reg_alpha': 3.573592108036728, 'n_estimators': 1243}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000191 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:26,078] Trial 61 finished with value: 1014.1166494026895 and parameters: {'learning_rate': 0.04258373861523794, 'num_leaves': 152, 'max_depth': 5, 'min_child_samples': 46, 'colsample_bytree': 0.9128775553846651, 'subsample': 0.7407967816916621, 'reg_lambda': 9.603578558413787, 'reg_alpha': 2.572325850980744, 'n_estimators': 1271}. Best is trial 58 with value: 1012.744209201659.




[I 2025-03-17 18:22:26,315] Trial 62 finished with value: 1014.3576982994917 and parameters: {'learning_rate': 0.04233934320946328, 'num_leaves': 162, 'max_depth': 6, 'min_child_samples': 47, 'colsample_bytree': 0.9132573943203677, 'subsample': 0.7385239785557008, 'reg_lambda': 9.657449234392539, 'reg_alpha': 2.418116648566037, 'n_estimators': 1265}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000201 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:26,536] Trial 63 finished with value: 1015.2130037699372 and parameters: {'learning_rate': 0.03936129264609329, 'num_leaves': 151, 'max_depth': 5, 'min_child_samples': 42, 'colsample_bytree': 0.8744437895624034, 'subsample': 0.727132016467739, 'reg_lambda': 9.175488703335095, 'reg_alpha': 1.4045241149843406, 'n_estimators': 1271}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000200 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000227 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:27,427] Trial 64 finished with value: 1015.981503611895 and parameters: {'learning_rate': 0.04451560657479464, 'num_leaves': 146, 'max_depth': 7, 'min_child_samples': 58, 'colsample_bytree': 0.9356662757012991, 'subsample': 0.7570736046915443, 'reg_lambda': 9.815195833040988, 'reg_alpha': 3.599406038014768, 'n_estimators': 1352}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000198 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:27,620] Trial 65 finished with value: 1014.4598918628759 and parameters: {'learning_rate': 0.03780466357401554, 'num_leaves': 157, 'max_depth': 4, 'min_child_samples': 46, 'colsample_bytree': 0.8589404660676269, 'subsample': 0.6703570239993354, 'reg_lambda': 9.345635590971998, 'reg_alpha': 2.54307651166967, 'n_estimators': 1411}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000208 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:27,806] Trial 66 finished with value: 1014.2931262785323 and parameters: {'learning_rate': 0.048689978948057815, 'num_leaves': 140, 'max_depth': 5, 'min_child_samples': 51, 'colsample_bytree': 0.9973587126454555, 'subsample': 0.7894944215387982, 'reg_lambda': 7.193003160542468, 'reg_alpha': 4.409995201286076, 'n_estimators': 1474}. Best is trial 58 with value: 1012.744209201659.
[I 2025-03-17 18:22:28,045] Trial 67 finished with value: 1017.8038888774462 and parameters: {'learning_rate': 0.04350479788643292, 'num_leaves': 177, 'max_depth': 7, 'min_child_samples': 38, 'colsample_bytree': 0.9149957929715768, 'subsample': 0.7082231616529692, 'reg_lambda': 8.73417986754779, 'reg_alpha': 4.189348505668552, 'n_estimators': 1282}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000216 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:28,274] Trial 68 finished with value: 1019.5198399360073 and parameters: {'learning_rate': 0.04659732625815149, 'num_leaves': 167, 'max_depth': 6, 'min_child_samples': 63, 'colsample_bytree': 0.7637381136481176, 'subsample': 0.7255440718792353, 'reg_lambda': 8.200859046634267, 'reg_alpha': 3.6970399623265218, 'n_estimators': 1195}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000204 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:28,468] Trial 69 finished with value: 1014.7399404083313 and parameters: {'learning_rate': 0.0421363835465117, 'num_leaves': 134, 'max_depth': 4, 'min_child_samples': 57, 'colsample_bytree': 0.9549935785559708, 'subsample': 0.6871407234682857, 'reg_lambda': 7.521813959178216, 'reg_alpha': 1.8721336382702862, 'n_estimators': 1331}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000239 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000225 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:28,682] Trial 70 finished with value: 1014.3743556243629 and parameters: {'learning_rate': 0.04536537098441188, 'num_leaves': 128, 'max_depth': 6, 'min_child_samples': 50, 'colsample_bytree': 0.976862227403746, 'subsample': 0.6331000392125379, 'reg_lambda': 9.655252071181387, 'reg_alpha': 4.925838618898519, 'n_estimators': 1149}. Best is trial 58 with value: 1012.744209201659.




[I 2025-03-17 18:22:28,892] Trial 71 finished with value: 1015.0840402941051 and parameters: {'learning_rate': 0.04579718198647441, 'num_leaves': 115, 'max_depth': 5, 'min_child_samples': 54, 'colsample_bytree': 0.8848233066490357, 'subsample': 0.7505319406922069, 'reg_lambda': 6.343979622035599, 'reg_alpha': 5.473693441463148, 'n_estimators': 1389}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000209 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:29,122] Trial 72 finished with value: 1013.0044916251578 and parameters: {'learning_rate': 0.04991216064577723, 'num_leaves': 141, 'max_depth': 5, 'min_child_samples': 43, 'colsample_bytree': 0.9033819876176308, 'subsample': 0.7652926781860301, 'reg_lambda': 9.370234038653438, 'reg_alpha': 2.8596912894582767, 'n_estimators': 1211}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000347 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:29,334] Trial 73 finished with value: 1014.1483560384779 and parameters: {'learning_rate': 0.04881680005904102, 'num_leaves': 155, 'max_depth': 5, 'min_child_samples': 44, 'colsample_bytree': 0.9276812319830856, 'subsample': 0.8156224939414497, 'reg_lambda': 8.834478019080047, 'reg_alpha': 2.786854496901009, 'n_estimators': 1254}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000537 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:29,553] Trial 74 finished with value: 1013.9855038405848 and parameters: {'learning_rate': 0.049901702054579965, 'num_leaves': 163, 'max_depth': 4, 'min_child_samples': 42, 'colsample_bytree': 0.9270996163474937, 'subsample': 0.8142864447272391, 'reg_lambda': 9.023807490848046, 'reg_alpha': 2.772805434785402, 'n_estimators': 1221}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000202 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:29,760] Trial 75 finished with value: 1015.7748932020297 and parameters: {'learning_rate': 0.04962417818859815, 'num_leaves': 165, 'max_depth': 3, 'min_child_samples': 41, 'colsample_bytree': 0.9412361668001108, 'subsample': 0.778000581742936, 'reg_lambda': 9.348652982513808, 'reg_alpha': 1.4163918849975514, 'n_estimators': 1219}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000823 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000286 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773


[I 2025-03-17 18:22:29,983] Trial 76 finished with value: 1013.3473924752147 and parameters: {'learning_rate': 0.04732366914487588, 'num_leaves': 172, 'max_depth': 4, 'min_child_samples': 35, 'colsample_bytree': 0.8365229958634247, 'subsample': 0.8210248959654776, 'reg_lambda': 9.080815801084045, 'reg_alpha': 2.272810890715856, 'n_estimators': 1121}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:30,208] Trial 77 finished with value: 1014.3874489379859 and parameters: {'learning_rate': 0.04776918631987791, 'num_leaves': 175, 'max_depth': 4, 'min_child_samples': 36, 'colsample_bytree': 0.8327891209743263, 'subsample': 0.8942779672924823, 'reg_lambda': 9.098919898073422, 'reg_alpha': 2.1010968261842473, 'n_estimators': 1122}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000221 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:30,398] Trial 78 finished with value: 1016.3214306500992 and parameters: {'learning_rate': 0.04985646078035618, 'num_leaves': 142, 'max_depth': 3, 'min_child_samples': 33, 'colsample_bytree': 0.8911863771806846, 'subsample': 0.8197659073214032, 'reg_lambda': 8.70651324039347, 'reg_alpha': 1.3218375384025907, 'n_estimators': 1136}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000201 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:30,610] Trial 79 finished with value: 1013.740585584384 and parameters: {'learning_rate': 0.04701111861340526, 'num_leaves': 172, 'max_depth': 4, 'min_child_samples': 39, 'colsample_bytree': 0.848861311997075, 'subsample': 0.8446267452229691, 'reg_lambda': 9.962295243800575, 'reg_alpha': 1.0304468612280147, 'n_estimators': 1203}. Best is trial 58 with value: 1012.744209201659.




[I 2025-03-17 18:22:30,824] Trial 80 finished with value: 1016.3174702200636 and parameters: {'learning_rate': 0.04732740730053288, 'num_leaves': 181, 'max_depth': 4, 'min_child_samples': 39, 'colsample_bytree': 0.8051384215587691, 'subsample': 0.8606022441230841, 'reg_lambda': 9.985381024091954, 'reg_alpha': 0.8550605809791383, 'n_estimators': 1196}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000198 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.


[I 2025-03-17 18:22:31,012] Trial 81 finished with value: 1013.5540026465808 and parameters: {'learning_rate': 0.04853742202404552, 'num_leaves': 161, 'max_depth': 4, 'min_child_samples': 42, 'colsample_bytree': 0.9077492280506763, 'subsample': 0.8240548555036593, 'reg_lambda': 8.501939232723316, 'reg_alpha': 3.1277571476160997, 'n_estimators': 1161}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000246 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8


[I 2025-03-17 18:22:31,203] Trial 82 finished with value: 1013.9205155438618 and parameters: {'learning_rate': 0.04845895547417307, 'num_leaves': 195, 'max_depth': 4, 'min_child_samples': 27, 'colsample_bytree': 0.8506818594873006, 'subsample': 0.8321447692478338, 'reg_lambda': 8.039417026132098, 'reg_alpha': 0.111092442558407, 'n_estimators': 1207}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Start training from score 2202.365232
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000229 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:31,409] Trial 83 finished with value: 1013.4892267430096 and parameters: {'learning_rate': 0.04889178211018796, 'num_leaves': 185, 'max_depth': 4, 'min_child_samples': 28, 'colsample_bytree': 0.8499006526804623, 'subsample': 0.8285194508963749, 'reg_lambda': 8.45654361007165, 'reg_alpha': 0.5651681442234638, 'n_estimators': 1164}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000185 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:31,581] Trial 84 finished with value: 1016.9087933197713 and parameters: {'learning_rate': 0.048368713391052094, 'num_leaves': 194, 'max_depth': 3, 'min_child_samples': 27, 'colsample_bytree': 0.8504485672472251, 'subsample': 0.8331429965330643, 'reg_lambda': 7.989724493812035, 'reg_alpha': 0.2659481777926462, 'n_estimators': 1167}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000184 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:31,795] Trial 85 finished with value: 1016.8248063039681 and parameters: {'learning_rate': 0.046752793534804604, 'num_leaves': 187, 'max_depth': 3, 'min_child_samples': 30, 'colsample_bytree': 0.7790764266416604, 'subsample': 0.8811881986586266, 'reg_lambda': 8.425710234445916, 'reg_alpha': 0.0740709084971681, 'n_estimators': 1096}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:31,977] Trial 86 finished with value: 1014.6114863070659 and parameters: {'learning_rate': 0.04853564363252704, 'num_leaves': 199, 'max_depth': 4, 'min_child_samples': 22, 'colsample_bytree': 0.8277067187448771, 'subsample': 0.8661616434197774, 'reg_lambda': 7.5554600609553795, 'reg_alpha': 1.0060947689546755, 'n_estimators': 1049}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:32,154] Trial 87 finished with value: 1016.0028546597044 and parameters: {'learning_rate': 0.043957925810040294, 'num_leaves': 188, 'max_depth': 3, 'min_child_samples': 35, 'colsample_bytree': 0.8441585801315042, 'subsample': 0.937742920010789, 'reg_lambda': 8.491527008539416, 'reg_alpha': 1.6755423362258606, 'n_estimators': 1096}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000185 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:32,348] Trial 88 finished with value: 1016.1491435004921 and parameters: {'learning_rate': 0.04647095816805003, 'num_leaves': 183, 'max_depth': 4, 'min_child_samples': 26, 'colsample_bytree': 0.8104277382141356, 'subsample': 0.852156986195089, 'reg_lambda': 7.834580530969967, 'reg_alpha': 0.4513806384970135, 'n_estimators': 1354}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000186 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:32,539] Trial 89 finished with value: 1013.1371215460127 and parameters: {'learning_rate': 0.04490075834942993, 'num_leaves': 194, 'max_depth': 4, 'min_child_samples': 31, 'colsample_bytree': 0.8569866359334559, 'subsample': 0.835876097483471, 'reg_lambda': 9.400137212932952, 'reg_alpha': 0.8865145684888434, 'n_estimators': 1295}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000186 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:32,745] Trial 90 finished with value: 1016.5456728440705 and parameters: {'learning_rate': 0.04466174190627935, 'num_leaves': 171, 'max_depth': 3, 'min_child_samples': 31, 'colsample_bytree': 0.8644577549712456, 'subsample': 0.8432894197176186, 'reg_lambda': 9.41706538357069, 'reg_alpha': 1.0350068883265293, 'n_estimators': 1297}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000190 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:32,930] Trial 91 finished with value: 1014.4296816968135 and parameters: {'learning_rate': 0.04756620117596619, 'num_leaves': 195, 'max_depth': 4, 'min_child_samples': 32, 'colsample_bytree': 0.903089476634339, 'subsample': 0.8249187662271382, 'reg_lambda': 8.707082681735816, 'reg_alpha': 0.5935505304743908, 'n_estimators': 1199}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000189 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:33,127] Trial 92 finished with value: 1013.7996424863242 and parameters: {'learning_rate': 0.04890791477022601, 'num_leaves': 191, 'max_depth': 4, 'min_child_samples': 28, 'colsample_bytree': 0.845518936421729, 'subsample': 0.7972803982118601, 'reg_lambda': 9.087013495474432, 'reg_alpha': 0.05968245130675565, 'n_estimators': 1144}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000234 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:33,331] Trial 93 finished with value: 1015.6311329374151 and parameters: {'learning_rate': 0.04503627098942748, 'num_leaves': 179, 'max_depth': 4, 'min_child_samples': 36, 'colsample_bytree': 0.735769871648732, 'subsample': 0.8052661968007734, 'reg_lambda': 9.238514488666524, 'reg_alpha': 0.8199979650681308, 'n_estimators': 1158}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000189 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:33,549] Trial 94 finished with value: 1014.0380238567436 and parameters: {'learning_rate': 0.04682545462377002, 'num_leaves': 173, 'max_depth': 5, 'min_child_samples': 28, 'colsample_bytree': 0.8883175568714253, 'subsample': 0.7974081068588117, 'reg_lambda': 8.969934099869205, 'reg_alpha': 3.290523655334224, 'n_estimators': 1461}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000192 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:33,750] Trial 95 finished with value: 1013.9420098757888 and parameters: {'learning_rate': 0.04918886630675749, 'num_leaves': 191, 'max_depth': 4, 'min_child_samples': 34, 'colsample_bytree': 0.8384090503722498, 'subsample': 0.8742904636955277, 'reg_lambda': 9.995146383940506, 'reg_alpha': 2.124033183228798, 'n_estimators': 1175}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000793 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:33,965] Trial 96 finished with value: 1015.0780627220142 and parameters: {'learning_rate': 0.04757212449796594, 'num_leaves': 185, 'max_depth': 4, 'min_child_samples': 25, 'colsample_bytree': 0.8269492933940089, 'subsample': 0.7859012354282886, 'reg_lambda': 8.481806336877112, 'reg_alpha': 0.3292428512649961, 'n_estimators': 1414}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000243 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:34,159] Trial 97 finished with value: 1017.6951326219094 and parameters: {'learning_rate': 0.04615575748040386, 'num_leaves': 168, 'max_depth': 3, 'min_child_samples': 17, 'colsample_bytree': 0.8647152196267648, 'subsample': 0.9054583889921882, 'reg_lambda': 9.440143470959562, 'reg_alpha': 1.7775655564109036, 'n_estimators': 1113}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000232 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:34,391] Trial 98 finished with value: 1017.6562574200352 and parameters: {'learning_rate': 0.049030961345047176, 'num_leaves': 177, 'max_depth': 5, 'min_child_samples': 21, 'colsample_bytree': 0.7938964573063568, 'subsample': 0.8034194175862028, 'reg_lambda': 8.87729481805543, 'reg_alpha': 1.2125947278952491, 'n_estimators': 1011}. Best is trial 58 with value: 1012.744209201659.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000222 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232


[I 2025-03-17 18:22:34,604] Trial 99 finished with value: 1013.9751690751573 and parameters: {'learning_rate': 0.04495054493404202, 'num_leaves': 192, 'max_depth': 5, 'min_child_samples': 48, 'colsample_bytree': 0.8174913313107846, 'subsample': 0.7744291234513374, 'reg_lambda': 9.096704097694102, 'reg_alpha': 0.6895148714604982, 'n_estimators': 1373}. Best is trial 58 with value: 1012.744209201659.


Best Parameters: {'learning_rate': 0.046008777313723596, 'num_leaves': 132, 'max_depth': 6, 'min_child_samples': 51, 'colsample_bytree': 0.9133800915544624, 'subsample': 0.7135667543386197, 'reg_lambda': 8.310251218815315, 'reg_alpha': 4.35964945673663, 'n_estimators': 1273}


In [7]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Best parameters from Optuna
best_params = {
    'objective': 'regression',
    'metric': 'rmse',
    'learning_rate': 0.046008777313723596,
    'num_leaves': 132,
    'max_depth': 6,
    'min_child_samples': 51,
    'subsample': 0.7135667543386197,
    'colsample_bytree': 0.9133800915544624,
    'n_estimators': 1305,
    'reg_lambda': 8.310251218815315,
    'reg_alpha': 4.35964945673663,
}


# Train model
model = LGBMRegressor(**best_params)
model.fit(X_train, y_train, 
          eval_set=[(X_validate, y_validate)], 
          eval_metric='rmse', 
          callbacks=[lgb.early_stopping(50, verbose=False)])

# Predict on training & validation sets
y_train_pred = model.predict(X_train)
y_valid_pred = model.predict(X_validate)

# Evaluate performance
def evaluate(y_true, y_pred, dataset):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n✅ Model Performance on {dataset}:")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

# Show results
evaluate(y_train, y_train_pred, "Training Set")
evaluate(y_validate, y_valid_pred, "Validation Set")

# Predict on test set
y_test_preds = model.predict(X_test)

# Create submission file
submission = test_df[['Item_Identifier', 'Outlet_Identifier']].copy()
submission['Item_Outlet_Sales'] = y_test_preds

submission['Item_Outlet_Sales'] = y_test_preds.clip(min=0)

# Save CSV
submission.to_csv('../submission/submission_lgb.csv', index=False)

print("Download your submission file: ../submission/submission_lgb.csv")



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001262 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 773
[LightGBM] [Info] Number of data points in the train set: 6818, number of used features: 8
[LightGBM] [Info] Start training from score 2202.365232

✅ Model Performance on Training Set:
MAE: 733.7870
MSE: 1085477.9888
RMSE: 1041.8627
R² Score: 0.6330

✅ Model Performance on Validation Set:
MAE: 710.0586
MSE: 1025650.8333
RMSE: 1012.7442
R² Score: 0.6226


Download your submission file: ../submission/submission_lgb.csv


In [25]:

import pandas as pd
import numpy as np
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Create a new combined categorical feature
train_df['Outlet_Combined'] = train_df['Outlet_Location_Type'].astype(str) + "_" + \
                              train_df['Outlet_Type'].astype(str) 
#+ "_" + train_df['Outlet_Size'].astype(str)

test_df['Outlet_Combined'] = test_df['Outlet_Location_Type'].astype(str) + "_" + \
                             test_df['Outlet_Type'].astype(str) 
#+ "_" + test_df['Outlet_Size'].astype(str)

# Select features and target
categorical_cols = [ 'Outlet_Combined']
numerical_cols = ['Item_MRP', 'Outlet_Total_Sales', 'Item_Visibility']

# Ensure categorical columns are treated as category dtype
for col in categorical_cols:
    train_df[col] = train_df[col].astype('category')
    test_df[col] = test_df[col].astype('category')

# Select X and y
X = train_df[numerical_cols + categorical_cols]
y = train_df['Item_Outlet_Sales']
X_test = test_df[numerical_cols + categorical_cols]

# Split data
X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.03),  # Wider range
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),  # More flexibility
        'max_depth': trial.suggest_int('max_depth', 3, 7),  # Slightly deeper trees
        'min_child_samples': trial.suggest_int('min_child_samples', 30, 70),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.85),
        'subsample': trial.suggest_float('subsample', 0.7, 0.9),
        'reg_lambda': trial.suggest_float('reg_lambda', 5, 12),  # Stronger L2 regularization
        'reg_alpha': trial.suggest_float('reg_alpha', 5, 12),  # Stronger L1 regularization
        'n_estimators': 2000,  # Increased for better convergence
        'verbosity': -1  # Reduce logs for speed
    }

    model = lgb.LGBMRegressor(**params)

    eval_result = {}
    model.fit(
        X_train, y_train, 
        eval_set=[(X_validate, y_validate)], 
        eval_metric='rmse',
        categorical_feature=categorical_cols,  # Pass categorical features!
        callbacks=[lgb.early_stopping(50, verbose=False), lgb.record_evaluation(eval_result)]
    )
    
    preds = model.predict(X_validate)
    rmse = mean_squared_error(y_validate, preds, squared=False)
    return rmse

# Run Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Increase trials for better tuning

print("Best Parameters:", study.best_params)


[I 2025-03-17 18:45:45,523] A new study created in memory with name: no-name-2934e974-e2fe-499c-97fa-f881c42ca75f
[I 2025-03-17 18:45:45,614] Trial 0 finished with value: 1018.0355169142222 and parameters: {'learning_rate': 0.02994395892611274, 'num_leaves': 97, 'max_depth': 3, 'min_child_samples': 30, 'colsample_bytree': 0.6952578876810936, 'subsample': 0.7589213499014307, 'reg_lambda': 7.4773952556202286, 'reg_alpha': 10.8623486471046}. Best is trial 0 with value: 1018.0355169142222.
[I 2025-03-17 18:45:45,772] Trial 1 finished with value: 1014.688306792813 and parameters: {'learning_rate': 0.011033018679916705, 'num_leaves': 20, 'max_depth': 4, 'min_child_samples': 67, 'colsample_bytree': 0.8363138237921335, 'subsample': 0.8801384559849327, 'reg_lambda': 9.14341063469826, 'reg_alpha': 5.061369609849074}. Best is trial 1 with value: 1014.688306792813.
[I 2025-03-17 18:45:45,907] Trial 2 finished with value: 1016.6291676282628 and parameters: {'learning_rate': 0.024792064826346525, 'n

[I 2025-03-17 18:45:49,728] Trial 22 finished with value: 1013.8607972400425 and parameters: {'learning_rate': 0.015380994021272918, 'num_leaves': 68, 'max_depth': 4, 'min_child_samples': 63, 'colsample_bytree': 0.6387647474637363, 'subsample': 0.8214060620150229, 'reg_lambda': 9.71769231674056, 'reg_alpha': 10.131564183562702}. Best is trial 18 with value: 1013.0155755741049.
[I 2025-03-17 18:45:49,930] Trial 23 finished with value: 1015.5526229023314 and parameters: {'learning_rate': 0.015701688755469703, 'num_leaves': 70, 'max_depth': 5, 'min_child_samples': 63, 'colsample_bytree': 0.6388396357876576, 'subsample': 0.8165505942515625, 'reg_lambda': 9.93808892663816, 'reg_alpha': 10.26624304676076}. Best is trial 18 with value: 1013.0155755741049.
[I 2025-03-17 18:45:50,052] Trial 24 finished with value: 1016.68355706173 and parameters: {'learning_rate': 0.023582902509868102, 'num_leaves': 85, 'max_depth': 3, 'min_child_samples': 59, 'colsample_bytree': 0.6495955454112402, 'subsample'

[I 2025-03-17 18:45:54,515] Trial 44 finished with value: 1016.3917557927916 and parameters: {'learning_rate': 0.011146942918703304, 'num_leaves': 67, 'max_depth': 4, 'min_child_samples': 54, 'colsample_bytree': 0.6168684347754688, 'subsample': 0.8835885242582276, 'reg_lambda': 5.605926779841074, 'reg_alpha': 10.40296844785581}. Best is trial 18 with value: 1013.0155755741049.
[I 2025-03-17 18:45:54,694] Trial 45 finished with value: 1014.6447279222969 and parameters: {'learning_rate': 0.014574417613815468, 'num_leaves': 82, 'max_depth': 4, 'min_child_samples': 66, 'colsample_bytree': 0.6744483102332407, 'subsample': 0.8601126662339396, 'reg_lambda': 9.266440740967962, 'reg_alpha': 9.817365234375577}. Best is trial 18 with value: 1013.0155755741049.
[I 2025-03-17 18:45:54,870] Trial 46 finished with value: 1016.4505760784001 and parameters: {'learning_rate': 0.013682467484044726, 'num_leaves': 89, 'max_depth': 3, 'min_child_samples': 61, 'colsample_bytree': 0.7302457468029122, 'subsamp

Best Parameters: {'learning_rate': 0.013679293785671084, 'num_leaves': 76, 'max_depth': 4, 'min_child_samples': 63, 'colsample_bytree': 0.625938766116602, 'subsample': 0.8097337683385393, 'reg_lambda': 9.933505007901022, 'reg_alpha': 8.53193027196974}


In [26]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Best parameters from Optuna
best_params = {
    'objective': 'regression',
    'metric': 'rmse',
    'learning_rate': 0.013679293785671084,  
    'num_leaves': 76,  # Slightly more complex trees
    'max_depth': 4,  
    'min_child_samples': 63,  # Allows a bit more flexibility in splits
    'subsample': 0.8097337683385393,  
    'colsample_bytree': 0.625938766116602,  
    'n_estimators': 3000,  
    'reg_lambda': 9.933505007901022,  # Reduce regularization slightly
    'reg_alpha': 8.53193027196974,  # Reduce regularization slightly
}



# Train model
model = LGBMRegressor(**best_params)
model.fit(X_train, y_train, 
          eval_set=[(X_validate, y_validate)], 
          eval_metric='rmse', 
          callbacks=[lgb.early_stopping(50, verbose=False)])

# Predict on training & validation sets
y_train_pred = model.predict(X_train)
y_valid_pred = model.predict(X_validate)

# Evaluate performance
def evaluate(y_true, y_pred, dataset):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n✅ Model Performance on {dataset}:")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

# Show results
evaluate(y_train, y_train_pred, "Training Set")
evaluate(y_validate, y_valid_pred, "Validation Set")

# Predict on test set
y_test_preds = model.predict(X_test)

# Create submission file
submission = test_df[['Item_Identifier', 'Outlet_Identifier']].copy()
submission['Item_Outlet_Sales'] = y_test_preds

submission['Item_Outlet_Sales'] = y_test_preds.clip(min=0)

# Save CSV
submission.to_csv('../submission/submission_lgb.csv', index=False)

print("Download your submission file: /mnt/data/submission_xgboost.csv")
#1151.9072619464 rank #1115



✅ Model Performance on Training Set:
MAE: 744.4006
MSE: 1123366.7073
RMSE: 1059.8900
R² Score: 0.6202

✅ Model Performance on Validation Set:
MAE: 711.3731
MSE: 1026200.5564
RMSE: 1013.0156
R² Score: 0.6224
Download your submission file: /mnt/data/submission_xgboost.csv


In [30]:

import pandas as pd
import numpy as np
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Create a new combined categorical feature
train_df['Outlet_Combined'] = train_df['Outlet_Type'].astype(str) \
                                + "_" + train_df['Outlet_Size'].astype(str)
#train_df['Outlet_Location_Type'].astype(str) \
                              #+ "_" + \
                                

test_df['Outlet_Combined'] =    test_df['Outlet_Type'].astype(str) \
                                + "_" + test_df['Outlet_Size'].astype(str)
#test_df['Outlet_Location_Type'].astype(str) \
                              #+ "_" + \

# Select features and target
categorical_cols = [ 'Outlet_Combined']
numerical_cols = ['Item_MRP', 'Outlet_Total_Sales', 'Item_Visibility']

# Ensure categorical columns are treated as category dtype
for col in categorical_cols:
    train_df[col] = train_df[col].astype('category')
    test_df[col] = test_df[col].astype('category')

# Select X and y
X = train_df[numerical_cols + categorical_cols]
y = train_df['Item_Outlet_Sales']
X_test = test_df[numerical_cols + categorical_cols]

# Split data
X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.03),  # Wider range
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),  # More flexibility
        'max_depth': trial.suggest_int('max_depth', 3, 7),  # Slightly deeper trees
        'min_child_samples': trial.suggest_int('min_child_samples', 30, 70),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.85),
        'subsample': trial.suggest_float('subsample', 0.7, 0.9),
        'reg_lambda': trial.suggest_float('reg_lambda', 5, 12),  # Stronger L2 regularization
        'reg_alpha': trial.suggest_float('reg_alpha', 5, 12),  # Stronger L1 regularization
        'n_estimators': 2000,  # Increased for better convergence
        'verbosity': -1  # Reduce logs for speed
    }

    model = lgb.LGBMRegressor(**params)

    eval_result = {}
    model.fit(
        X_train, y_train, 
        eval_set=[(X_validate, y_validate)], 
        eval_metric='rmse',
        categorical_feature=categorical_cols,  # Pass categorical features!
        callbacks=[lgb.early_stopping(100, verbose=False), lgb.record_evaluation(eval_result)]
    )
    
    preds = model.predict(X_validate)
    rmse = mean_squared_error(y_validate, preds, squared=False)
    return rmse

# Run Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Increase trials for better tuning

print("Best Parameters:", study.best_params)


[I 2025-03-17 19:29:24,484] A new study created in memory with name: no-name-aa5b69d7-1cd5-4819-af8a-7b347802bc3a
[I 2025-03-17 19:29:24,625] Trial 0 finished with value: 1017.5585524555383 and parameters: {'learning_rate': 0.02824773334976492, 'num_leaves': 60, 'max_depth': 5, 'min_child_samples': 38, 'colsample_bytree': 0.6789423026558109, 'subsample': 0.7239224319201033, 'reg_lambda': 5.864815071444314, 'reg_alpha': 7.6088163755029}. Best is trial 0 with value: 1017.5585524555383.
[I 2025-03-17 19:29:25,033] Trial 1 finished with value: 1015.6236199327619 and parameters: {'learning_rate': 0.007715645956689507, 'num_leaves': 49, 'max_depth': 6, 'min_child_samples': 69, 'colsample_bytree': 0.6647299768033469, 'subsample': 0.7499523242435641, 'reg_lambda': 6.926376952329442, 'reg_alpha': 10.120382373657769}. Best is trial 1 with value: 1015.6236199327619.
[I 2025-03-17 19:29:25,297] Trial 2 finished with value: 1016.7322905198513 and parameters: {'learning_rate': 0.009612767520747835, 

[I 2025-03-17 19:29:29,915] Trial 22 finished with value: 1014.4235520481785 and parameters: {'learning_rate': 0.02587886572549046, 'num_leaves': 64, 'max_depth': 4, 'min_child_samples': 65, 'colsample_bytree': 0.8324005236637303, 'subsample': 0.7341608446498762, 'reg_lambda': 5.000360037383532, 'reg_alpha': 6.185967406954181}. Best is trial 15 with value: 1013.4773974672255.
[I 2025-03-17 19:29:30,048] Trial 23 finished with value: 1016.5194423632778 and parameters: {'learning_rate': 0.02269633501875821, 'num_leaves': 60, 'max_depth': 3, 'min_child_samples': 61, 'colsample_bytree': 0.8491606851840044, 'subsample': 0.764307372052811, 'reg_lambda': 6.359651060523591, 'reg_alpha': 5.0916408657331935}. Best is trial 15 with value: 1013.4773974672255.
[I 2025-03-17 19:29:30,175] Trial 24 finished with value: 1015.0664505247672 and parameters: {'learning_rate': 0.029530008030412735, 'num_leaves': 88, 'max_depth': 4, 'min_child_samples': 66, 'colsample_bytree': 0.7955430268630491, 'subsample

[I 2025-03-17 19:29:33,520] Trial 44 finished with value: 1013.6739604952787 and parameters: {'learning_rate': 0.010744378354682714, 'num_leaves': 100, 'max_depth': 4, 'min_child_samples': 60, 'colsample_bytree': 0.7381491703950612, 'subsample': 0.8296723320045064, 'reg_lambda': 5.616860699450754, 'reg_alpha': 8.17902606196397}. Best is trial 15 with value: 1013.4773974672255.
[I 2025-03-17 19:29:33,792] Trial 45 finished with value: 1015.9614566522367 and parameters: {'learning_rate': 0.009742171353162462, 'num_leaves': 100, 'max_depth': 5, 'min_child_samples': 60, 'colsample_bytree': 0.7149465941486752, 'subsample': 0.8657500243050565, 'reg_lambda': 6.6605554035343095, 'reg_alpha': 8.033561595495616}. Best is trial 15 with value: 1013.4773974672255.
[I 2025-03-17 19:29:34,042] Trial 46 finished with value: 1014.0983629589455 and parameters: {'learning_rate': 0.011505416319232258, 'num_leaves': 97, 'max_depth': 4, 'min_child_samples': 57, 'colsample_bytree': 0.7394355804772909, 'subsa

Best Parameters: {'learning_rate': 0.022843340298252773, 'num_leaves': 77, 'max_depth': 4, 'min_child_samples': 61, 'colsample_bytree': 0.8281427291607215, 'subsample': 0.765113035542883, 'reg_lambda': 6.880241474132783, 'reg_alpha': 7.438342652709071}


In [31]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Best parameters from Optuna
best_params = {
    'objective': 'regression',
    'metric': 'rmse',
    'learning_rate': 0.022843340298252773,  
    'num_leaves': 77,  # Slightly more complex trees
    'max_depth': 4,  
    'min_child_samples': 61,  # Allows a bit more flexibility in splits
    'subsample': 0.765113035542883,  
    'colsample_bytree': 0.8281427291607215,  
    'n_estimators': 3000,  
    'reg_lambda': 6.880241474132783,  # Reduce regularization slightly
    'reg_alpha': 7.438342652709071,  # Reduce regularization slightly
}



# Train model
model = LGBMRegressor(**best_params)
model.fit(X_train, y_train, 
          eval_set=[(X_validate, y_validate)], 
          eval_metric='rmse', 
          callbacks=[lgb.early_stopping(50, verbose=False)])

# Predict on training & validation sets
y_train_pred = model.predict(X_train)
y_valid_pred = model.predict(X_validate)

# Evaluate performance
def evaluate(y_true, y_pred, dataset):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n✅ Model Performance on {dataset}:")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

# Show results
evaluate(y_train, y_train_pred, "Training Set")
evaluate(y_validate, y_valid_pred, "Validation Set")

# Predict on test set
y_test_preds = model.predict(X_test)

# Create submission file
submission = test_df[['Item_Identifier', 'Outlet_Identifier']].copy()
submission['Item_Outlet_Sales'] = y_test_preds

submission['Item_Outlet_Sales'] = y_test_preds.clip(min=0)

# Save CSV
submission.to_csv('../submission/submission_lgb.csv', index=False)

print("Download your submission file: /mnt/data/submission_xgboost.csv")
#1151.2782174827 rank #986



✅ Model Performance on Training Set:
MAE: 746.0255
MSE: 1128399.2446
RMSE: 1062.2614
R² Score: 0.6185

✅ Model Performance on Validation Set:
MAE: 711.9326
MSE: 1027136.4352
RMSE: 1013.4774
R² Score: 0.6221
Download your submission file: /mnt/data/submission_xgboost.csv


In [46]:

import pandas as pd
import numpy as np
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Create a new combined categorical feature
train_df['Outlet_Combined'] = train_df['Outlet_Type'].astype(str) \
                                + "_" + train_df['Outlet_Size'].astype(str)
#train_df['Outlet_Location_Type'].astype(str) \
                              #+ "_" + \
                                

test_df['Outlet_Combined'] =    test_df['Outlet_Type'].astype(str) \
                                + "_" + test_df['Outlet_Size'].astype(str)
#test_df['Outlet_Location_Type'].astype(str) \
                              #+ "_" + \

train_df['Item_MRP_Per_Unit_Weight'] = train_df['Item_MRP']/train_df['Item_Weight']
test_df['Item_MRP_Per_Unit_Weight'] = test_df['Item_MRP']/test_df['Item_Weight']
    
    
# Select features and target
categorical_cols = [ 'Outlet_Combined']
numerical_cols = ['Item_MRP', 'Outlet_Total_Sales', 'Outlet_Age']

# Ensure categorical columns are treated as category dtype
for col in categorical_cols:
    train_df[col] = train_df[col].astype('category')
    test_df[col] = test_df[col].astype('category')

# Select X and y
X = train_df[numerical_cols + categorical_cols]
y = train_df['Item_Outlet_Sales']
X_test = test_df[numerical_cols + categorical_cols]

# Split data
X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.03),  # Wider range
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),  # More flexibility
        'max_depth': trial.suggest_int('max_depth', 3, 7),  # Slightly deeper trees
        'min_child_samples': trial.suggest_int('min_child_samples', 30, 70),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.85),
        'subsample': trial.suggest_float('subsample', 0.7, 0.9),
        'reg_lambda': trial.suggest_float('reg_lambda', 5, 12),  # Stronger L2 regularization
        'reg_alpha': trial.suggest_float('reg_alpha', 5, 12),  # Stronger L1 regularization
        'n_estimators': 2000,  # Increased for better convergence
        'verbosity': -1  # Reduce logs for speed
    }

    model = lgb.LGBMRegressor(**params)

    eval_result = {}
    model.fit(
        X_train, y_train, 
        eval_set=[(X_validate, y_validate)], 
        eval_metric='rmse',
        categorical_feature=categorical_cols,  # Pass categorical features!
        callbacks=[lgb.early_stopping(100, verbose=False), lgb.record_evaluation(eval_result)]
    )
    
    preds = model.predict(X_validate)
    rmse = mean_squared_error(y_validate, preds, squared=False)
    return rmse

# Run Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Increase trials for better tuning

print("Best Parameters:", study.best_params)


[I 2025-03-17 23:17:51,459] A new study created in memory with name: no-name-bf07e4a4-8bd8-4356-9dc7-c56b1661c030
[I 2025-03-17 23:17:51,605] Trial 0 finished with value: 1019.9969747111944 and parameters: {'learning_rate': 0.025520738438971523, 'num_leaves': 97, 'max_depth': 6, 'min_child_samples': 37, 'colsample_bytree': 0.7208679212291934, 'subsample': 0.7036444249821978, 'reg_lambda': 7.379507860929351, 'reg_alpha': 9.545054071885456}. Best is trial 0 with value: 1019.9969747111944.
[I 2025-03-17 23:17:51,793] Trial 1 finished with value: 1017.0367688003355 and parameters: {'learning_rate': 0.007898004284722666, 'num_leaves': 42, 'max_depth': 3, 'min_child_samples': 46, 'colsample_bytree': 0.7844175633404089, 'subsample': 0.8575323899121057, 'reg_lambda': 10.482140895484118, 'reg_alpha': 10.297146610643315}. Best is trial 1 with value: 1017.0367688003355.
[I 2025-03-17 23:17:51,920] Trial 2 finished with value: 1016.5248514329436 and parameters: {'learning_rate': 0.0205480735140283

[I 2025-03-17 23:17:55,690] Trial 22 finished with value: 1016.438258817468 and parameters: {'learning_rate': 0.005317966528236167, 'num_leaves': 29, 'max_depth': 4, 'min_child_samples': 68, 'colsample_bytree': 0.8456280875089224, 'subsample': 0.8436469729881206, 'reg_lambda': 11.136965107541496, 'reg_alpha': 7.7615373335271745}. Best is trial 16 with value: 1016.3080023630292.
[I 2025-03-17 23:17:55,867] Trial 23 finished with value: 1017.8157317458954 and parameters: {'learning_rate': 0.016338067828764465, 'num_leaves': 26, 'max_depth': 5, 'min_child_samples': 69, 'colsample_bytree': 0.8486134003525411, 'subsample': 0.8587038448364743, 'reg_lambda': 10.1070359825578, 'reg_alpha': 7.7655817012585215}. Best is trial 16 with value: 1016.3080023630292.
[I 2025-03-17 23:17:56,111] Trial 24 finished with value: 1016.6064896379651 and parameters: {'learning_rate': 0.009509723383188732, 'num_leaves': 44, 'max_depth': 4, 'min_child_samples': 63, 'colsample_bytree': 0.8228467075758786, 'subsam

[I 2025-03-17 23:18:00,768] Trial 44 finished with value: 1018.7140616634932 and parameters: {'learning_rate': 0.01014598141277328, 'num_leaves': 49, 'max_depth': 5, 'min_child_samples': 68, 'colsample_bytree': 0.8326251367025296, 'subsample': 0.8232866541004148, 'reg_lambda': 11.37755941270404, 'reg_alpha': 8.260635501232146}. Best is trial 16 with value: 1016.3080023630292.
[I 2025-03-17 23:18:00,996] Trial 45 finished with value: 1016.7416722653154 and parameters: {'learning_rate': 0.008567151497347433, 'num_leaves': 23, 'max_depth': 4, 'min_child_samples': 61, 'colsample_bytree': 0.7905717866962261, 'subsample': 0.8934790296134133, 'reg_lambda': 10.40995192052265, 'reg_alpha': 9.119253017803517}. Best is trial 16 with value: 1016.3080023630292.
[I 2025-03-17 23:18:01,155] Trial 46 finished with value: 1017.1358296912608 and parameters: {'learning_rate': 0.01626233018777472, 'num_leaves': 29, 'max_depth': 3, 'min_child_samples': 58, 'colsample_bytree': 0.809834303512462, 'subsample'

Best Parameters: {'learning_rate': 0.011665974909561828, 'num_leaves': 40, 'max_depth': 4, 'min_child_samples': 66, 'colsample_bytree': 0.8042619634209697, 'subsample': 0.8447182011205896, 'reg_lambda': 9.674946890801323, 'reg_alpha': 8.352998066662282}


In [41]:
study.best_params

{'learning_rate': 0.007974090162682806,
 'num_leaves': 68,
 'max_depth': 4,
 'min_child_samples': 60,
 'colsample_bytree': 0.6839413130786991,
 'subsample': 0.8318953425430651,
 'reg_lambda': 10.422835186967905,
 'reg_alpha': 7.573546409151058}

In [47]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Best parameters from Optuna
best_params = study.best_params.copy()  # Copy the original dictionary
best_params.update({  
    'objective': 'regression',  
    'metric': 'rmse',  
    'n_estimators': 2000  
})



# Train model
model = LGBMRegressor(**best_params)
model.fit(X_train, y_train, 
          eval_set=[(X_validate, y_validate)], 
          eval_metric='rmse', 
          callbacks=[lgb.early_stopping(50, verbose=False)])

# Predict on training & validation sets
y_train_pred = model.predict(X_train)
y_valid_pred = model.predict(X_validate)

# Evaluate performance
def evaluate(y_true, y_pred, dataset):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n✅ Model Performance on {dataset}:")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

# Show results
evaluate(y_train, y_train_pred, "Training Set")
evaluate(y_validate, y_valid_pred, "Validation Set")

# Predict on test set
y_test_preds = model.predict(X_test)

# Create submission file
submission = test_df[['Item_Identifier', 'Outlet_Identifier']].copy()
submission['Item_Outlet_Sales'] = y_test_preds

submission['Item_Outlet_Sales'] = y_test_preds.clip(min=0)

# Save CSV
submission.to_csv('../submission/submission_lgb.csv', index=False)

print("Download your submission file: /mnt/data/submission_xgboost.csv")
#1150.9645886406 Rank #931



✅ Model Performance on Training Set:
MAE: 755.3883
MSE: 1152884.3301
RMSE: 1073.7245
R² Score: 0.6103

✅ Model Performance on Validation Set:
MAE: 714.2423
MSE: 1032881.9557
RMSE: 1016.3080
R² Score: 0.6200
Download your submission file: /mnt/data/submission_xgboost.csv


In [80]:

import pandas as pd
import numpy as np
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Define age bins (including 0 properly)
bins = [-1, 10, 20, float('inf')]  # -1 ensures 0 is included in "New"
labels = ['New', 'Moderate', 'Old']

# Apply binning
train_df['Outlet_Age_Binned'] = pd.cut(train_df['Outlet_Age'], bins=bins, labels=labels)
# Apply the same binning to test_df
test_df['Outlet_Age_Binned'] = pd.cut(test_df['Outlet_Age'], bins=bins, labels=labels)



# Create a new combined categorical feature
train_df['Outlet_Combined'] = train_df['Outlet_Type'].astype(str) \
+ "_" + train_df['Outlet_Size'].astype(str)
#+ "_" + train_df['Outlet_Age_Binned'].astype(str)
#+ "_" + train_df['Outlet_Location_Type'].astype(str)
                              
                                

test_df['Outlet_Combined'] =    test_df['Outlet_Type'].astype(str) \
+ "_" + test_df['Outlet_Size'].astype(str)
#+ "_" + test_df['Outlet_Age_Binned'].astype(str)
#+ "_" + test_df['Outlet_Location_Type'].astype(str)


train_df['Item_MRP_Per_Unit_Weight'] = train_df['Item_MRP']/train_df['Item_Weight']
test_df['Item_MRP_Per_Unit_Weight'] = test_df['Item_MRP']/test_df['Item_Weight']
    
    
# Select features and target
categorical_cols = [ 'Outlet_Combined']
numerical_cols = ['Item_MRP', 'Outlet_Total_Sales', 'Item_Visibility', 'Outlet_Age' ]

# Ensure categorical columns are treated as category dtype
for col in categorical_cols:
    train_df[col] = train_df[col].astype('category')
    test_df[col] = test_df[col].astype('category')

# Select X and y
X = train_df[numerical_cols + categorical_cols]
y = train_df['Item_Outlet_Sales']
X_test = test_df[numerical_cols + categorical_cols]

# Split data
X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.03),  # Wider range
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),  # More flexibility
        'max_depth': trial.suggest_int('max_depth', 3, 7),  # Slightly deeper trees
        'min_child_samples': trial.suggest_int('min_child_samples', 30, 70),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.85),
        'subsample': trial.suggest_float('subsample', 0.7, 0.9),
        'reg_lambda': trial.suggest_float('reg_lambda', 5, 12),  # Stronger L2 regularization
        'reg_alpha': trial.suggest_float('reg_alpha', 5, 12),  # Stronger L1 regularization
        'n_estimators': 2000,  # Increased for better convergence
        'verbosity': -1  # Reduce logs for speed
    }

    model = lgb.LGBMRegressor(**params)

    eval_result = {}
    model.fit(
        X_train, y_train, 
        eval_set=[(X_validate, y_validate)], 
        eval_metric='rmse',
        categorical_feature=categorical_cols,  # Pass categorical features!
        callbacks=[lgb.early_stopping(100, verbose=False), lgb.record_evaluation(eval_result)]
    )
    
    preds = model.predict(X_validate)
    rmse = mean_squared_error(y_validate, preds, squared=False)
    return rmse

# Run Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Increase trials for better tuning

print("Best Parameters:", study.best_params)


[I 2025-03-17 23:46:30,176] A new study created in memory with name: no-name-b6b9ed56-ffd0-4399-8937-c4f7eb278ded
[I 2025-03-17 23:46:30,373] Trial 0 finished with value: 1013.8863422536131 and parameters: {'learning_rate': 0.023312650180224208, 'num_leaves': 26, 'max_depth': 7, 'min_child_samples': 51, 'colsample_bytree': 0.8305800829393507, 'subsample': 0.7957933426890674, 'reg_lambda': 11.077838440567294, 'reg_alpha': 11.629281257572512}. Best is trial 0 with value: 1013.8863422536131.
[I 2025-03-17 23:46:30,516] Trial 1 finished with value: 1016.6531742166126 and parameters: {'learning_rate': 0.028455559321691824, 'num_leaves': 88, 'max_depth': 5, 'min_child_samples': 63, 'colsample_bytree': 0.6530460300667424, 'subsample': 0.8736825121214028, 'reg_lambda': 6.734001183222088, 'reg_alpha': 7.97278012006481}. Best is trial 0 with value: 1013.8863422536131.
[I 2025-03-17 23:46:30,797] Trial 2 finished with value: 1014.2798108652606 and parameters: {'learning_rate': 0.01252891998451368

[I 2025-03-17 23:46:34,936] Trial 22 finished with value: 1014.5501707597149 and parameters: {'learning_rate': 0.029849281930100446, 'num_leaves': 29, 'max_depth': 6, 'min_child_samples': 40, 'colsample_bytree': 0.8256056370610484, 'subsample': 0.7767759719588074, 'reg_lambda': 11.5439573386696, 'reg_alpha': 9.845293195651141}. Best is trial 11 with value: 1012.3248369436131.
[I 2025-03-17 23:46:35,105] Trial 23 finished with value: 1012.2780147061791 and parameters: {'learning_rate': 0.026928692206680278, 'num_leaves': 21, 'max_depth': 7, 'min_child_samples': 48, 'colsample_bytree': 0.7819276372978132, 'subsample': 0.745248666361766, 'reg_lambda': 10.590042512887734, 'reg_alpha': 8.518583571926653}. Best is trial 23 with value: 1012.2780147061791.
[I 2025-03-17 23:46:35,307] Trial 24 finished with value: 1013.4762347342282 and parameters: {'learning_rate': 0.02130178158430693, 'num_leaves': 21, 'max_depth': 7, 'min_child_samples': 48, 'colsample_bytree': 0.7410672389097448, 'subsample

[I 2025-03-17 23:46:39,689] Trial 44 finished with value: 1016.0096916791596 and parameters: {'learning_rate': 0.02414736544531328, 'num_leaves': 30, 'max_depth': 7, 'min_child_samples': 41, 'colsample_bytree': 0.7090134100970663, 'subsample': 0.7366121638198301, 'reg_lambda': 11.702088281494126, 'reg_alpha': 9.509710754399512}. Best is trial 23 with value: 1012.2780147061791.
[I 2025-03-17 23:46:39,896] Trial 45 finished with value: 1013.2729879309178 and parameters: {'learning_rate': 0.0251473438330294, 'num_leaves': 24, 'max_depth': 6, 'min_child_samples': 43, 'colsample_bytree': 0.7254131851439182, 'subsample': 0.7273582555654383, 'reg_lambda': 5.002192413175663, 'reg_alpha': 10.474990599199199}. Best is trial 23 with value: 1012.2780147061791.
[I 2025-03-17 23:46:40,240] Trial 46 finished with value: 1019.5715405349076 and parameters: {'learning_rate': 0.013710834801559369, 'num_leaves': 28, 'max_depth': 7, 'min_child_samples': 61, 'colsample_bytree': 0.667037497894872, 'subsample

Best Parameters: {'learning_rate': 0.026928692206680278, 'num_leaves': 21, 'max_depth': 7, 'min_child_samples': 48, 'colsample_bytree': 0.7819276372978132, 'subsample': 0.745248666361766, 'reg_lambda': 10.590042512887734, 'reg_alpha': 8.518583571926653}


In [79]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Best parameters from Optuna
best_params = study.best_params.copy()  # Copy the original dictionary
best_params.update({  
    'objective': 'regression',  
    'metric': 'rmse',  
    'n_estimators': 2000  
})



# Train model
model = LGBMRegressor(**best_params)
model.fit(X_train, y_train, 
          eval_set=[(X_validate, y_validate)], 
          eval_metric='rmse', 
          callbacks=[lgb.early_stopping(50, verbose=False)])

# Predict on training & validation sets
y_train_pred = model.predict(X_train)
y_valid_pred = model.predict(X_validate)

# Evaluate performance
def evaluate(y_true, y_pred, dataset):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n✅ Model Performance on {dataset}:")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

# Show results
evaluate(y_train, y_train_pred, "Training Set")
evaluate(y_validate, y_valid_pred, "Validation Set")

# Predict on test set
y_test_preds = model.predict(X_test)

# Create submission file
submission = test_df[['Item_Identifier', 'Outlet_Identifier']].copy()
submission['Item_Outlet_Sales'] = y_test_preds

submission['Item_Outlet_Sales'] = y_test_preds.clip(min=0)

# Save CSV
submission.to_csv('../submission/submission_lgb.csv', index=False)

print("Download your submission file: /mnt/data/submission_xgboost.csv")



✅ Model Performance on Training Set:
MAE: 739.7439
MSE: 1111898.5934
RMSE: 1054.4660
R² Score: 0.6241

✅ Model Performance on Validation Set:
MAE: 711.4432
MSE: 1025056.9816
RMSE: 1012.4510
R² Score: 0.6229
Download your submission file: /mnt/data/submission_xgboost.csv


In [95]:

import pandas as pd
import numpy as np
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Define age bins (including 0 properly)
bins = [-1, 10, 20, float('inf')]  # -1 ensures 0 is included in "New"
labels = ['New', 'Moderate', 'Old']

# Apply binning
train_df['Outlet_Age_Binned'] = pd.cut(train_df['Outlet_Age'], bins=bins, labels=labels)
# Apply the same binning to test_df
test_df['Outlet_Age_Binned'] = pd.cut(test_df['Outlet_Age'], bins=bins, labels=labels)



# Create a new combined categorical feature
train_df['Outlet_Combined'] = train_df['Outlet_Size'].astype(str) \
+ "_" + train_df['Outlet_Type'].astype(str)
#+ "_" + train_df['Outlet_Age_Binned'].astype(str)
#+ "_" + train_df['Outlet_Location_Type'].astype(str)
                              
                                

test_df['Outlet_Combined'] =    test_df['Outlet_Size'].astype(str) \
+ "_" + test_df['Outlet_Type'].astype(str)
#+ "_" + test_df['Outlet_Age_Binned'].astype(str)
#+ "_" + test_df['Outlet_Location_Type'].astype(str)


train_df['Item_MRP_Per_Unit_Weight'] = train_df['Item_MRP']/train_df['Item_Weight']
test_df['Item_MRP_Per_Unit_Weight'] = test_df['Item_MRP']/test_df['Item_Weight']
    
    
# Select features and target
categorical_cols = [  'Outlet_Combined', ]
numerical_cols = ['Item_MRP','Outlet_Total_Sales' ]

# Ensure categorical columns are treated as category dtype
for col in categorical_cols:
    train_df[col] = train_df[col].astype('category')
    test_df[col] = test_df[col].astype('category')

# Select X and y
X = train_df[numerical_cols + categorical_cols]
y = train_df['Item_Outlet_Sales']
X_test = test_df[numerical_cols + categorical_cols]

# Split data
X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.03),  # Wider range
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),  # More flexibility
        'max_depth': trial.suggest_int('max_depth', 3, 7),  # Slightly deeper trees
        'min_child_samples': trial.suggest_int('min_child_samples', 30, 70),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.85),
        'subsample': trial.suggest_float('subsample', 0.7, 0.9),
        'reg_lambda': trial.suggest_float('reg_lambda', 5, 12),  # Stronger L2 regularization
        'reg_alpha': trial.suggest_float('reg_alpha', 5, 12),  # Stronger L1 regularization
        'n_estimators': 2000,  # Increased for better convergence
        'verbosity': -1  # Reduce logs for speed
    }

    model = lgb.LGBMRegressor(**params)

    eval_result = {}
    model.fit(
        X_train, y_train, 
        eval_set=[(X_validate, y_validate)], 
        eval_metric='rmse',
        categorical_feature=categorical_cols,  # Pass categorical features!
        callbacks=[lgb.early_stopping(100, verbose=False), lgb.record_evaluation(eval_result)]
    )
    
    preds = model.predict(X_validate)
    rmse = mean_squared_error(y_validate, preds, squared=False)
    return rmse

# Run Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Increase trials for better tuning

print("Best Parameters:", study.best_params)


[I 2025-03-18 00:09:21,132] A new study created in memory with name: no-name-23422226-80b2-462f-ba0f-5fa752d43896
[I 2025-03-18 00:09:21,453] Trial 0 finished with value: 1021.1271965497546 and parameters: {'learning_rate': 0.014497099121579565, 'num_leaves': 62, 'max_depth': 6, 'min_child_samples': 32, 'colsample_bytree': 0.631051077575546, 'subsample': 0.8432251483502375, 'reg_lambda': 5.490493121960747, 'reg_alpha': 6.8638947336909935}. Best is trial 0 with value: 1021.1271965497546.
[I 2025-03-18 00:09:21,648] Trial 1 finished with value: 1018.4864503367753 and parameters: {'learning_rate': 0.013496828786359136, 'num_leaves': 67, 'max_depth': 6, 'min_child_samples': 66, 'colsample_bytree': 0.8373494050774316, 'subsample': 0.8554168805210023, 'reg_lambda': 8.796965232346773, 'reg_alpha': 6.908922762180443}. Best is trial 1 with value: 1018.4864503367753.
[I 2025-03-18 00:09:21,813] Trial 2 finished with value: 1017.9183067196623 and parameters: {'learning_rate': 0.028360819335070418

[I 2025-03-18 00:09:26,649] Trial 22 finished with value: 1016.6003162428235 and parameters: {'learning_rate': 0.02330403431223816, 'num_leaves': 43, 'max_depth': 3, 'min_child_samples': 35, 'colsample_bytree': 0.7296885162701975, 'subsample': 0.7908850998144753, 'reg_lambda': 11.128629303805079, 'reg_alpha': 9.894746052661041}. Best is trial 12 with value: 1016.4778142873837.
[I 2025-03-18 00:09:26,820] Trial 23 finished with value: 1018.3288410339326 and parameters: {'learning_rate': 0.025088013541285176, 'num_leaves': 42, 'max_depth': 4, 'min_child_samples': 30, 'colsample_bytree': 0.7902876801387441, 'subsample': 0.7824394673663031, 'reg_lambda': 11.248884823375217, 'reg_alpha': 9.97987904727653}. Best is trial 12 with value: 1016.4778142873837.
[I 2025-03-18 00:09:27,048] Trial 24 finished with value: 1016.6129186051228 and parameters: {'learning_rate': 0.017153617366572477, 'num_leaves': 53, 'max_depth': 3, 'min_child_samples': 34, 'colsample_bytree': 0.7523675598197134, 'subsamp

[I 2025-03-18 00:09:32,210] Trial 44 finished with value: 1016.5340451162508 and parameters: {'learning_rate': 0.024456574341018222, 'num_leaves': 40, 'max_depth': 3, 'min_child_samples': 30, 'colsample_bytree': 0.691410434189735, 'subsample': 0.7880861041505499, 'reg_lambda': 11.487721624624102, 'reg_alpha': 10.499562523488294}. Best is trial 12 with value: 1016.4778142873837.
[I 2025-03-18 00:09:32,399] Trial 45 finished with value: 1018.9867624124903 and parameters: {'learning_rate': 0.028072362623674033, 'num_leaves': 41, 'max_depth': 4, 'min_child_samples': 33, 'colsample_bytree': 0.6829442186987819, 'subsample': 0.7621207529458582, 'reg_lambda': 11.61574829707777, 'reg_alpha': 6.407408384429846}. Best is trial 12 with value: 1016.4778142873837.
[I 2025-03-18 00:09:32,593] Trial 46 finished with value: 1018.5490114221017 and parameters: {'learning_rate': 0.02625878523137902, 'num_leaves': 62, 'max_depth': 4, 'min_child_samples': 36, 'colsample_bytree': 0.7023225290447428, 'subsamp

Best Parameters: {'learning_rate': 0.0214956903520105, 'num_leaves': 29, 'max_depth': 3, 'min_child_samples': 34, 'colsample_bytree': 0.6799166532295509, 'subsample': 0.7692353996939044, 'reg_lambda': 10.326012198865957, 'reg_alpha': 10.22346257287167}


In [96]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Best parameters from Optuna
best_params = study.best_params.copy()  # Copy the original dictionary
best_params.update({  
    'objective': 'regression',  
    'metric': 'rmse',  
    'n_estimators': 2000  
})



# Train model
model = LGBMRegressor(**best_params)
model.fit(X_train, y_train, 
          eval_set=[(X_validate, y_validate)], 
          eval_metric='rmse', 
          callbacks=[lgb.early_stopping(50, verbose=False)])

# Predict on training & validation sets
y_train_pred = model.predict(X_train)
y_valid_pred = model.predict(X_validate)

# Evaluate performance
def evaluate(y_true, y_pred, dataset):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n✅ Model Performance on {dataset}:")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

# Show results
evaluate(y_train, y_train_pred, "Training Set")
evaluate(y_validate, y_valid_pred, "Validation Set")

# Predict on test set
y_test_preds = model.predict(X_test)

# Create submission file
submission = test_df[['Item_Identifier', 'Outlet_Identifier']].copy()
submission['Item_Outlet_Sales'] = y_test_preds

submission['Item_Outlet_Sales'] = y_test_preds.clip(min=0)

# Save CSV
submission.to_csv('../submission/submission_lgb.csv', index=False)

print("Download your submission file: /mnt/data/submission_xgboost.csv")



✅ Model Performance on Training Set:
MAE: 758.0353
MSE: 1157039.9712
RMSE: 1075.6579
R² Score: 0.6089

✅ Model Performance on Validation Set:
MAE: 715.8103
MSE: 1033227.1469
RMSE: 1016.4778
R² Score: 0.6199
Download your submission file: /mnt/data/submission_xgboost.csv


In [98]:

import pandas as pd
import numpy as np
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Create a new combined categorical feature
train_df['Outlet_Combined'] = train_df['Outlet_Type'].astype(str) \
                                + "_" + train_df['Outlet_Size'].astype(str)
#train_df['Outlet_Location_Type'].astype(str) \
                              #+ "_" + \
                                

test_df['Outlet_Combined'] =    test_df['Outlet_Type'].astype(str) \
                                + "_" + test_df['Outlet_Size'].astype(str)
#test_df['Outlet_Location_Type'].astype(str) \
                              #+ "_" + \

train_df['Item_MRP_Per_Unit_Weight'] = train_df['Item_MRP']/train_df['Item_Weight']
test_df['Item_MRP_Per_Unit_Weight'] = test_df['Item_MRP']/test_df['Item_Weight']

Outlet_Sales_Mean = train_df.groupby('Outlet_Identifier')['Item_Outlet_Sales'].mean()
train_df['Outlet_Sales_Mean'] = train_df['Outlet_Identifier'].map(Outlet_Sales_Mean)
test_df['Outlet_Sales_Mean'] = test_df['Outlet_Identifier'].map(Outlet_Sales_Mean)

    
# Select features and target
categorical_cols = [ 'Outlet_Combined']
numerical_cols = ['Item_MRP', 'Outlet_Sales_Mean', 'Outlet_Age']

# Ensure categorical columns are treated as category dtype
for col in categorical_cols:
    train_df[col] = train_df[col].astype('category')
    test_df[col] = test_df[col].astype('category')

# Select X and y
X = train_df[numerical_cols + categorical_cols]
y = train_df['Item_Outlet_Sales']
X_test = test_df[numerical_cols + categorical_cols]

# Split data
X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.03),  # Wider range
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),  # More flexibility
        'max_depth': trial.suggest_int('max_depth', 3, 7),  # Slightly deeper trees
        'min_child_samples': trial.suggest_int('min_child_samples', 30, 70),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.85),
        'subsample': trial.suggest_float('subsample', 0.7, 0.9),
        'reg_lambda': trial.suggest_float('reg_lambda', 5, 12),  # Stronger L2 regularization
        'reg_alpha': trial.suggest_float('reg_alpha', 5, 12),  # Stronger L1 regularization
        'n_estimators': 2000,  # Increased for better convergence
        'verbosity': -1  # Reduce logs for speed
    }

    model = lgb.LGBMRegressor(**params)

    eval_result = {}
    model.fit(
        X_train, y_train, 
        eval_set=[(X_validate, y_validate)], 
        eval_metric='rmse',
        categorical_feature=categorical_cols,  # Pass categorical features!
        callbacks=[lgb.early_stopping(100, verbose=False), lgb.record_evaluation(eval_result)]
    )
    
    preds = model.predict(X_validate)
    rmse = mean_squared_error(y_validate, preds, squared=False)
    return rmse

# Run Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Increase trials for better tuning

print("Best Parameters:", study.best_params)


[I 2025-03-18 00:19:00,390] A new study created in memory with name: no-name-bed3b0d6-f0eb-4ed4-8944-204d7dc385a2
[I 2025-03-18 00:19:00,536] Trial 0 finished with value: 1018.3713719756884 and parameters: {'learning_rate': 0.016305293441401962, 'num_leaves': 58, 'max_depth': 4, 'min_child_samples': 69, 'colsample_bytree': 0.6345682367086382, 'subsample': 0.7361808520073196, 'reg_lambda': 10.344080875299174, 'reg_alpha': 7.438539052577884}. Best is trial 0 with value: 1018.3713719756884.
[I 2025-03-18 00:19:00,694] Trial 1 finished with value: 1023.5569342394767 and parameters: {'learning_rate': 0.022096043350414887, 'num_leaves': 45, 'max_depth': 7, 'min_child_samples': 52, 'colsample_bytree': 0.7423174068718611, 'subsample': 0.749444167628759, 'reg_lambda': 7.978629498371472, 'reg_alpha': 8.298437392429168}. Best is trial 0 with value: 1018.3713719756884.
[I 2025-03-18 00:19:00,938] Trial 2 finished with value: 1018.7204004912826 and parameters: {'learning_rate': 0.009629845505575732

[I 2025-03-18 00:19:05,779] Trial 22 finished with value: 1018.1805981356682 and parameters: {'learning_rate': 0.007608023053262707, 'num_leaves': 44, 'max_depth': 3, 'min_child_samples': 65, 'colsample_bytree': 0.6534777905487521, 'subsample': 0.8181421717289074, 'reg_lambda': 7.322960285230865, 'reg_alpha': 9.780361486512692}. Best is trial 5 with value: 1017.8943266052809.
[I 2025-03-18 00:19:05,997] Trial 23 finished with value: 1018.522363195462 and parameters: {'learning_rate': 0.011443127957367754, 'num_leaves': 31, 'max_depth': 4, 'min_child_samples': 59, 'colsample_bytree': 0.6343777238237003, 'subsample': 0.8399026583946707, 'reg_lambda': 5.915883746956855, 'reg_alpha': 11.077927483710086}. Best is trial 5 with value: 1017.8943266052809.
[I 2025-03-18 00:19:06,192] Trial 24 finished with value: 1018.1279032852259 and parameters: {'learning_rate': 0.014767688128621772, 'num_leaves': 26, 'max_depth': 3, 'min_child_samples': 66, 'colsample_bytree': 0.7039140804570116, 'subsample

[I 2025-03-18 00:19:10,715] Trial 44 finished with value: 1017.8872646932706 and parameters: {'learning_rate': 0.010835947599748093, 'num_leaves': 56, 'max_depth': 3, 'min_child_samples': 68, 'colsample_bytree': 0.7091276247457078, 'subsample': 0.7269998270417688, 'reg_lambda': 7.447288163932868, 'reg_alpha': 6.461940046055087}. Best is trial 33 with value: 1017.7862845678826.
[I 2025-03-18 00:19:10,909] Trial 45 finished with value: 1018.2317210314893 and parameters: {'learning_rate': 0.01399116290566624, 'num_leaves': 73, 'max_depth': 3, 'min_child_samples': 68, 'colsample_bytree': 0.7135442577653077, 'subsample': 0.7255563570667785, 'reg_lambda': 7.477650842546769, 'reg_alpha': 6.482544428038504}. Best is trial 33 with value: 1017.7862845678826.
[I 2025-03-18 00:19:11,300] Trial 46 finished with value: 1021.4136009949879 and parameters: {'learning_rate': 0.008632595956419816, 'num_leaves': 62, 'max_depth': 6, 'min_child_samples': 39, 'colsample_bytree': 0.7527757856242238, 'subsampl

Best Parameters: {'learning_rate': 0.01112450477916466, 'num_leaves': 47, 'max_depth': 3, 'min_child_samples': 68, 'colsample_bytree': 0.8047939619276595, 'subsample': 0.7511958570053101, 'reg_lambda': 7.5278930435842, 'reg_alpha': 6.307137354889451}


In [99]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Best parameters from Optuna
best_params = study.best_params.copy()  # Copy the original dictionary
best_params.update({  
    'objective': 'regression',  
    'metric': 'rmse',  
    'n_estimators': 2000  
})



# Train model
model = LGBMRegressor(**best_params)
model.fit(X_train, y_train, 
          eval_set=[(X_validate, y_validate)], 
          eval_metric='rmse', 
          callbacks=[lgb.early_stopping(50, verbose=False)])

# Predict on training & validation sets
y_train_pred = model.predict(X_train)
y_valid_pred = model.predict(X_validate)

# Evaluate performance
def evaluate(y_true, y_pred, dataset):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n✅ Model Performance on {dataset}:")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

# Show results
evaluate(y_train, y_train_pred, "Training Set")
evaluate(y_validate, y_valid_pred, "Validation Set")

# Predict on test set
y_test_preds = model.predict(X_test)

# Create submission file
submission = test_df[['Item_Identifier', 'Outlet_Identifier']].copy()
submission['Item_Outlet_Sales'] = y_test_preds

submission['Item_Outlet_Sales'] = y_test_preds.clip(min=0)

# Save CSV
submission.to_csv('../submission/submission_lgb.csv', index=False)

print("Download your submission file: /mnt/data/submission_xgboost.csv")



✅ Model Performance on Training Set:
MAE: 756.3971
MSE: 1155221.6873
RMSE: 1074.8124
R² Score: 0.6095

✅ Model Performance on Validation Set:
MAE: 715.9820
MSE: 1035115.7040
RMSE: 1017.4064
R² Score: 0.6192
Download your submission file: /mnt/data/submission_xgboost.csv


In [102]:
import pandas as pd
import numpy as np
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Create a new combined categorical feature
train_df['Outlet_Combined'] = train_df['Outlet_Type'].astype(str) \
                                + "_" + train_df['Outlet_Size'].astype(str)
#train_df['Outlet_Location_Type'].astype(str) \
                              #+ "_" + \
                                

test_df['Outlet_Combined'] =    test_df['Outlet_Type'].astype(str) \
                                + "_" + test_df['Outlet_Size'].astype(str)
#test_df['Outlet_Location_Type'].astype(str) \
                              #+ "_" + \

train_df['Item_MRP_Per_Unit_Weight'] = train_df['Item_MRP']/train_df['Item_Weight']
test_df['Item_MRP_Per_Unit_Weight'] = test_df['Item_MRP']/test_df['Item_Weight']
    
    
# Select features and target
categorical_cols = [ 'Outlet_Combined']
numerical_cols = ['Item_MRP', 'Outlet_Total_Sales', 'Outlet_Age']

# Ensure categorical columns are treated as category dtype
for col in categorical_cols:
    train_df[col] = train_df[col].astype('category')
    test_df[col] = test_df[col].astype('category')

# Select X and y
X = train_df[numerical_cols + categorical_cols]
y = train_df['Item_Outlet_Sales']
X_test = test_df[numerical_cols + categorical_cols]

# Split data
X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.03),  # Wider range
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),  # More flexibility
        'max_depth': trial.suggest_int('max_depth', 3, 7),  # Slightly deeper trees
        'min_child_samples': trial.suggest_int('min_child_samples', 30, 70),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.85),
        'subsample': trial.suggest_float('subsample', 0.7, 0.9),
        'reg_lambda': trial.suggest_float('reg_lambda', 5, 12),  # Stronger L2 regularization
        'reg_alpha': trial.suggest_float('reg_alpha', 5, 12),  # Stronger L1 regularization
        'n_estimators': 3000,  # Increased for better convergence
        'verbosity': -1,  # Reduce logs for speed
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 50),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.4, 0.9),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 0.9),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10)
    }

    model = lgb.LGBMRegressor(**params)

    eval_result = {}
    model.fit(
        X_train, y_train, 
        eval_set=[(X_validate, y_validate)], 
        eval_metric='rmse',
        categorical_feature=categorical_cols,  # Pass categorical features!
        callbacks=[lgb.early_stopping(100, verbose=False), lgb.record_evaluation(eval_result)]
    )
    
    preds = model.predict(X_validate)
    rmse = mean_squared_error(y_validate, preds, squared=False)
    return rmse

# Run Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Increase trials for better tuning

print("Best Parameters:", study.best_params)


[I 2025-03-18 00:32:29,683] A new study created in memory with name: no-name-1558c699-da4d-4f66-b5c7-8a06352cfe8d
[I 2025-03-18 00:32:31,099] Trial 0 finished with value: 1017.2518599090918 and parameters: {'learning_rate': 0.008976101143419974, 'num_leaves': 69, 'max_depth': 5, 'min_child_samples': 37, 'colsample_bytree': 0.6684982089962654, 'subsample': 0.8732247213179906, 'reg_lambda': 7.509068985363829, 'reg_alpha': 11.675302461207481, 'min_data_in_leaf': 37, 'feature_fraction': 0.7496719981360618, 'bagging_fraction': 0.7289691758001717, 'bagging_freq': 5}. Best is trial 0 with value: 1017.2518599090918.
[I 2025-03-18 00:32:31,723] Trial 1 finished with value: 1018.5305828836014 and parameters: {'learning_rate': 0.013426828801937216, 'num_leaves': 32, 'max_depth': 3, 'min_child_samples': 62, 'colsample_bytree': 0.6929484863618205, 'subsample': 0.7146613122553258, 'reg_lambda': 5.971114023196622, 'reg_alpha': 8.345636118243068, 'min_data_in_leaf': 50, 'feature_fraction': 0.554976435

[I 2025-03-18 00:32:42,036] Trial 17 finished with value: 1015.993584484429 and parameters: {'learning_rate': 0.012528019018013476, 'num_leaves': 100, 'max_depth': 3, 'min_child_samples': 49, 'colsample_bytree': 0.6772204485190181, 'subsample': 0.7957538118743296, 'reg_lambda': 10.602066552915772, 'reg_alpha': 6.0684272960501735, 'min_data_in_leaf': 40, 'feature_fraction': 0.817961420086961, 'bagging_fraction': 0.435144363615096, 'bagging_freq': 4}. Best is trial 17 with value: 1015.993584484429.
[I 2025-03-18 00:32:42,505] Trial 18 finished with value: 1016.2872563067546 and parameters: {'learning_rate': 0.01191407449359935, 'num_leaves': 99, 'max_depth': 3, 'min_child_samples': 51, 'colsample_bytree': 0.7572482531101908, 'subsample': 0.7903805545820425, 'reg_lambda': 10.794122073054698, 'reg_alpha': 6.082574670622332, 'min_data_in_leaf': 43, 'feature_fraction': 0.8345202932041105, 'bagging_fraction': 0.41084911667054286, 'bagging_freq': 3}. Best is trial 17 with value: 1015.993584484

[I 2025-03-18 00:32:49,584] Trial 34 finished with value: 1017.8591672748385 and parameters: {'learning_rate': 0.024424754069456697, 'num_leaves': 83, 'max_depth': 5, 'min_child_samples': 39, 'colsample_bytree': 0.8365314242735327, 'subsample': 0.7638910508783491, 'reg_lambda': 8.255373611215571, 'reg_alpha': 9.641143849148243, 'min_data_in_leaf': 37, 'feature_fraction': 0.7476865312616355, 'bagging_fraction': 0.5305922662626651, 'bagging_freq': 4}. Best is trial 29 with value: 1014.725848225669.
[I 2025-03-18 00:32:50,149] Trial 35 finished with value: 1016.5187341951267 and parameters: {'learning_rate': 0.021609611824044368, 'num_leaves': 77, 'max_depth': 6, 'min_child_samples': 34, 'colsample_bytree': 0.8210440258219694, 'subsample': 0.746483660986114, 'reg_lambda': 6.293673271201009, 'reg_alpha': 8.23036252849243, 'min_data_in_leaf': 49, 'feature_fraction': 0.8604583736403055, 'bagging_fraction': 0.4521076699531965, 'bagging_freq': 6}. Best is trial 29 with value: 1014.725848225669

Best Parameters: {'learning_rate': 0.02796894428379391, 'num_leaves': 62, 'max_depth': 3, 'min_child_samples': 37, 'colsample_bytree': 0.8314750865217698, 'subsample': 0.7735485016722371, 'reg_lambda': 11.373906300512306, 'reg_alpha': 7.089588914108556, 'min_data_in_leaf': 29, 'feature_fraction': 0.8008771789769791, 'bagging_fraction': 0.4198048096703424, 'bagging_freq': 6}


In [103]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Best parameters from Optuna
best_params = study.best_params.copy()  # Copy the original dictionary
best_params.update({  
    'objective': 'regression',  
    'metric': 'rmse',  
    'n_estimators': 2000  
})



# Train model
model = LGBMRegressor(**best_params)
model.fit(X_train, y_train, 
          eval_set=[(X_validate, y_validate)], 
          eval_metric='rmse', 
          callbacks=[lgb.early_stopping(50, verbose=False)])

# Predict on training & validation sets
y_train_pred = model.predict(X_train)
y_valid_pred = model.predict(X_validate)

# Evaluate performance
def evaluate(y_true, y_pred, dataset):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n✅ Model Performance on {dataset}:")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

# Show results
evaluate(y_train, y_train_pred, "Training Set")
evaluate(y_validate, y_valid_pred, "Validation Set")

# Predict on test set
y_test_preds = model.predict(X_test)

# Create submission file
submission = test_df[['Item_Identifier', 'Outlet_Identifier']].copy()
submission['Item_Outlet_Sales'] = y_test_preds

submission['Item_Outlet_Sales'] = y_test_preds.clip(min=0)

# Save CSV
submission.to_csv('../submission/submission_lgb.csv', index=False)

print("Download your submission file: /mnt/data/submission_xgboost.csv")
#1150.5794325247 Rank #831



✅ Model Performance on Training Set:
MAE: 762.7064
MSE: 1164127.5549
RMSE: 1078.9474
R² Score: 0.6065

✅ Model Performance on Validation Set:
MAE: 717.4974
MSE: 1031873.7988
RMSE: 1015.8119
R² Score: 0.6204
Download your submission file: /mnt/data/submission_xgboost.csv


In [108]:
import pandas as pd
import numpy as np
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Create a new combined categorical feature
train_df['Outlet_Combined'] = train_df['Outlet_Type'].astype(str) \
                                + "_" + train_df['Outlet_Size'].astype(str)
#train_df['Outlet_Location_Type'].astype(str) \
                              #+ "_" + \
                                

test_df['Outlet_Combined'] =    test_df['Outlet_Type'].astype(str) \
                                + "_" + test_df['Outlet_Size'].astype(str)
#test_df['Outlet_Location_Type'].astype(str) \
                              #+ "_" + \

train_df['Item_MRP_Per_Unit_Weight'] = train_df['Item_MRP']/train_df['Item_Weight']
test_df['Item_MRP_Per_Unit_Weight'] = test_df['Item_MRP']/test_df['Item_Weight']
    
    
# Select features and target
categorical_cols = [ 'Outlet_Combined']
numerical_cols = ['Item_MRP', 'Outlet_Total_Sales', 'Outlet_Age']

# Ensure categorical columns are treated as category dtype
for col in categorical_cols:
    train_df[col] = train_df[col].astype('category')
    test_df[col] = test_df[col].astype('category')

# Select X and y
X = train_df[numerical_cols + categorical_cols]
y = train_df['Item_Outlet_Sales']
X_test = test_df[numerical_cols + categorical_cols]

# Split data
X_train, X_validate, y_train, y_validate = train_test_split(X, y, test_size=0.2, random_state=42)


def objective(trial):
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.05),  # Wider range
        'num_leaves': trial.suggest_int('num_leaves', 50, 200),  # More flexibility
        'max_depth': trial.suggest_int('max_depth', 3, 10),  # Slightly deeper trees
        'min_child_samples': trial.suggest_int('min_child_samples', 10, 100),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'subsample': trial.suggest_float('subsample', 0.7, 0.9),
        'reg_lambda': trial.suggest_float('reg_lambda', 5, 12),  # Stronger L2 regularization
        'reg_alpha': trial.suggest_float('reg_alpha', 5, 12),  # Stronger L1 regularization
        'n_estimators': 5000,  # Increased for better convergence
        'verbosity': -1,  # Reduce logs for speed
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 50),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.4, 0.9),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 0.9),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10)
    }

    model = lgb.LGBMRegressor(**params)

    eval_result = {}
    model.fit(
        X_train, y_train, 
        eval_set=[(X_validate, y_validate)], 
        eval_metric='rmse',
        categorical_feature=categorical_cols,  # Pass categorical features!
        callbacks=[lgb.early_stopping(100, verbose=False), lgb.record_evaluation(eval_result)]
    )
    
    preds = model.predict(X_validate)
    rmse = mean_squared_error(y_validate, preds, squared=False)
    return rmse

# Run Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)  # Increase trials for better tuning

print("Best Parameters:", study.best_params)


[I 2025-03-18 00:50:39,772] A new study created in memory with name: no-name-ace8932b-8b35-4e4d-b4b4-165a907829f7
[I 2025-03-18 00:50:39,918] Trial 0 finished with value: 1018.2005109846273 and parameters: {'learning_rate': 0.03536606799946188, 'num_leaves': 162, 'max_depth': 6, 'min_child_samples': 37, 'colsample_bytree': 0.6708243338349053, 'subsample': 0.7260553181160913, 'reg_lambda': 5.814468070119192, 'reg_alpha': 6.450629242654854, 'min_data_in_leaf': 27, 'feature_fraction': 0.8144948601808867, 'bagging_fraction': 0.4607273729286345, 'bagging_freq': 2}. Best is trial 0 with value: 1018.2005109846273.
[I 2025-03-18 00:50:41,993] Trial 1 finished with value: 1025.368636652261 and parameters: {'learning_rate': 0.0015206997475358374, 'num_leaves': 71, 'max_depth': 8, 'min_child_samples': 93, 'colsample_bytree': 0.8237730462138798, 'subsample': 0.8979623851663447, 'reg_lambda': 10.691190700867864, 'reg_alpha': 10.731866000851939, 'min_data_in_leaf': 14, 'feature_fraction': 0.78067255

[I 2025-03-18 00:50:45,871] Trial 17 finished with value: 1017.4832487046822 and parameters: {'learning_rate': 0.011567129368547693, 'num_leaves': 76, 'max_depth': 4, 'min_child_samples': 15, 'colsample_bytree': 0.6157857219235067, 'subsample': 0.8114681790445855, 'reg_lambda': 8.009074804015357, 'reg_alpha': 8.125912085396953, 'min_data_in_leaf': 38, 'feature_fraction': 0.5506004804559441, 'bagging_fraction': 0.5343842141099407, 'bagging_freq': 5}. Best is trial 8 with value: 1016.3542985858571.
[I 2025-03-18 00:50:47,787] Trial 18 finished with value: 1018.1338159372181 and parameters: {'learning_rate': 0.0011314132236276177, 'num_leaves': 93, 'max_depth': 5, 'min_child_samples': 70, 'colsample_bytree': 0.7650843975767153, 'subsample': 0.7869029347125733, 'reg_lambda': 6.395033449083058, 'reg_alpha': 6.971274893460405, 'min_data_in_leaf': 32, 'feature_fraction': 0.8428055285917246, 'bagging_fraction': 0.6181371804783076, 'bagging_freq': 8}. Best is trial 8 with value: 1016.3542985858

[I 2025-03-18 00:50:50,225] Trial 34 finished with value: 1014.8714035155638 and parameters: {'learning_rate': 0.04881458025112454, 'num_leaves': 141, 'max_depth': 4, 'min_child_samples': 46, 'colsample_bytree': 0.7867748677120606, 'subsample': 0.8944626078308132, 'reg_lambda': 10.493555843189272, 'reg_alpha': 10.272441874517941, 'min_data_in_leaf': 40, 'feature_fraction': 0.6568051713593874, 'bagging_fraction': 0.4022036412715186, 'bagging_freq': 2}. Best is trial 22 with value: 1014.158488506265.
[I 2025-03-18 00:50:50,369] Trial 35 finished with value: 1016.8013599340173 and parameters: {'learning_rate': 0.04403696256749852, 'num_leaves': 138, 'max_depth': 4, 'min_child_samples': 40, 'colsample_bytree': 0.7863070209097736, 'subsample': 0.8938182616189533, 'reg_lambda': 10.533456820087311, 'reg_alpha': 10.301074525113641, 'min_data_in_leaf': 40, 'feature_fraction': 0.5903126061063407, 'bagging_fraction': 0.450320700649765, 'bagging_freq': 2}. Best is trial 22 with value: 1014.1584885

Best Parameters: {'learning_rate': 0.047673695659454375, 'num_leaves': 142, 'max_depth': 3, 'min_child_samples': 51, 'colsample_bytree': 0.8388305149694122, 'subsample': 0.8470246164321973, 'reg_lambda': 9.33772086438144, 'reg_alpha': 11.948450889637312, 'min_data_in_leaf': 46, 'feature_fraction': 0.7309646028654864, 'bagging_fraction': 0.49858443214156756, 'bagging_freq': 3}


In [109]:
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Best parameters from Optuna
best_params = study.best_params.copy()  # Copy the original dictionary
best_params.update({  
    'objective': 'regression',  
    'metric': 'rmse',  
    'n_estimators': 2000  
})



# Train model
model = LGBMRegressor(**best_params)
model.fit(X_train, y_train, 
          eval_set=[(X_validate, y_validate)], 
          eval_metric='rmse', 
          callbacks=[lgb.early_stopping(50, verbose=False)])

# Predict on training & validation sets
y_train_pred = model.predict(X_train)
y_valid_pred = model.predict(X_validate)

# Evaluate performance
def evaluate(y_true, y_pred, dataset):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = mse ** 0.5
    r2 = r2_score(y_true, y_pred)
    
    print(f"\n✅ Model Performance on {dataset}:")
    print(f"MAE: {mae:.4f}")
    print(f"MSE: {mse:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R² Score: {r2:.4f}")

# Show results
evaluate(y_train, y_train_pred, "Training Set")
evaluate(y_validate, y_valid_pred, "Validation Set")

# Predict on test set
y_test_preds = model.predict(X_test)

# Create submission file
submission = test_df[['Item_Identifier', 'Outlet_Identifier']].copy()
submission['Item_Outlet_Sales'] = y_test_preds

submission['Item_Outlet_Sales'] = y_test_preds.clip(min=0)

# Save CSV
submission.to_csv('../submission/submission_lgb.csv', index=False)

print("Download your submission file: /mnt/data/submission_lgb.csv")
#1150.2858411057 rank#790



✅ Model Performance on Training Set:
MAE: 763.8165
MSE: 1168085.2705
RMSE: 1080.7799
R² Score: 0.6051

✅ Model Performance on Validation Set:
MAE: 716.6091
MSE: 1028456.0442
RMSE: 1014.1282
R² Score: 0.6216
Download your submission file: /mnt/data/submission_xgboost.csv
