# Exp 04: lgb agrupando por periodo-product_id

Usando distribución tweedy y max_bins=500 y optimizacion bayesiana

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gc

##### Merge: periodos + clientes + productos

In [2]:
df = pd.read_csv("../../data/preprocessed/base.csv", sep=',')
df["periodo_dt"] = pd.to_datetime(df["periodo"].astype(str), format="%Y%m")


periodos = pd.date_range(start=df['periodo_dt'].min(), end=df['periodo_dt'].max(), freq="MS")
productos = df['product_id'].unique()


idx = pd.MultiIndex.from_product([productos, periodos], names=['product_id', 'periodo'])
completo = idx.to_frame(index=False)
completo["periodo"] = completo["periodo"].dt.strftime("%Y%m").astype(int)

del periodos, productos, 
gc.collect()
completo

Unnamed: 0,product_id,periodo
0,20524,201701
1,20524,201702
2,20524,201703
3,20524,201704
4,20524,201705
...,...,...
44383,20770,201908
44384,20770,201909
44385,20770,201910
44386,20770,201911


##### Cruzamos con productos

In [3]:
productos = pd.read_csv("../../data/raw/tb_productos.csv", sep='\t')
productos = productos.drop_duplicates(subset=['product_id'], keep='first')
completo = completo.merge(productos, how='left', on="product_id")
del productos
gc.collect()

20

##### Cruzamos con stock

In [4]:
stocks = pd.read_csv("../../data/raw/tb_stocks.csv", sep='\t')
stocks = stocks.groupby(by=["periodo", "product_id"]).agg({"stock_final": "sum"}).reset_index()
completo = completo.merge(stocks, how='left', on=['periodo', 'product_id'])
del stocks
gc.collect()
completo.head()

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,


##### Cruzamos con ventas

In [5]:
sellin = pd.read_csv("../../data/raw/sell-in.csv", sep='\t')
# Agrupar ventas por periodo, cliente y producto
dt = sellin.groupby(by=["periodo","product_id"]).agg({"tn":"sum"}).reset_index()
df_completo = completo.merge(dt, how='left', on=['periodo', 'product_id'])
df_completo['tn'] = df_completo['tn'].fillna(0)
del sellin, dt, completo
gc.collect()
df_completo

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final,tn
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,,6.48085
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,,3.99755
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,,7.14711
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,,6.82163
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,,9.25949
...,...,...,...,...,...,...,...,...,...
44383,20770,201908,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44384,20770,201909,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44385,20770,201910,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44386,20770,201911,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000


##### Target

In [6]:
# Asegurarte de tener 'periodo_dt' (datetime) en completo
df_completo['periodo_dt'] = pd.to_datetime(df_completo['periodo'], format='%Y%m')

# Crear DataFrame auxiliar con tn como target y fecha adelantada
ventas_futuras = df_completo[['periodo_dt', 'product_id', 'tn']].copy()
ventas_futuras['periodo_target_dt'] = ventas_futuras['periodo_dt'] - pd.DateOffset(months=2)
ventas_futuras = ventas_futuras.rename(columns={'tn': 'target'})

# Merge con completo usando periodo adelantado
df_completo = df_completo.merge(
    ventas_futuras[['periodo_target_dt', 'product_id', 'target']],
    how='left',
    left_on=['periodo_dt', 'product_id'],
    right_on=['periodo_target_dt', 'product_id']
)

# Eliminar columna auxiliar
df_completo = df_completo.drop(columns=['periodo_target_dt'])
del ventas_futuras
gc.collect()
print(f"✅ Target generado. Filas con target no nulo: {df_completo['target'].notna().sum()}")

✅ Target generado. Filas con target no nulo: 41922


In [7]:
df_completo

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final,tn,periodo_dt,target
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,,6.48085,2017-01-01,7.14711
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,,3.99755,2017-02-01,6.82163
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,,7.14711,2017-03-01,9.25949
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,,6.82163,2017-04-01,7.04113
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,,9.25949,2017-05-01,5.92819
...,...,...,...,...,...,...,...,...,...,...,...
44383,20770,201908,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-08-01,0.00000
44384,20770,201909,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-09-01,0.00000
44385,20770,201910,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-10-01,3.18500
44386,20770,201911,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-11-01,


##### Verifico las NaN en el target: Existen porque hay clientes que solo compraron 2 veces.

In [13]:
nan_count = df_completo['target'].isna().sum()
print(f"🔍 Total de NaN en target: {nan_count}")
del nan_count
gc.collect()

🔍 Total de NaN en target: 2466


49

##### Generación de IDs

In [8]:
df_completo = df_completo.sort_values(['periodo', 'product_id'])
df_completo['id'] = df_completo.groupby(['product_id']).cumcount() + 1

##### Periodo 

In [9]:
df_completo["periodo_dt"] = pd.to_datetime(df_completo["periodo"].astype(str), format="%Y%m")

##### Eliminar productos que no nacieron

In [10]:
nacimiento_producto = df.groupby("product_id")["periodo_dt"].agg(["min"]).reset_index()
# Renombrar columna max a muerte_cliente_dt
nacimiento_producto = nacimiento_producto.rename(columns={'min': 'nacimiento_producto'})


# Unir con df_final para traer fecha de muerte del cliente
df_completo = df_completo.merge(nacimiento_producto, on='product_id', how='left')

# Filtrar filas donde periodo_dt > muerte_cliente_dt
df_completo = df_completo[df_completo['periodo_dt'] >= df_completo['nacimiento_producto']]

# Opcional: eliminar columna auxiliar
# df_final = df_final.drop(columns=['muerte_cliente_dt'])
del nacimiento_producto
gc.collect()
print(f"✅ Dataset filtrado con {len(df_completo):,} filas.")

✅ Dataset filtrado con 35,888 filas.


##### Correlograma

In [21]:
# Supongamos que tu DataFrame se llama df
cor_matrix = df_completo.corr(numeric_only=True)

# Tomamos solo la parte superior de la matriz (sin la diagonal)
upper = cor_matrix.where(np.triu(np.ones(cor_matrix.shape), k=1).astype(bool))

# Filtramos correlaciones fuertes (valor absoluto mayor a 0.7)
high_corr = upper.stack().reset_index()
high_corr.columns = ['Variable 1', 'Variable 2', 'Correlación']
high_corr_filtrada = high_corr[high_corr['Correlación'].abs() > 0.7]

print(high_corr_filtrada)

del high_corr_filtrada, cor_matrix, upper, high_corr
gc.collect()

   Variable 1 Variable 2  Correlación
10    periodo         id     0.955974
18         tn     target     0.930409


0

##### Elimino variables muy correlacionadas

In [None]:
df_completo.drop(columns=['periodo'], inplace=True)

##### Extracción de componentes temporales

In [11]:
df_completo['year'] = df_completo['periodo_dt'].dt.year
df_completo['month'] = df_completo['periodo_dt'].dt.month
# Variables dummy estacionales
df_completo['quarter'] = df_completo['periodo_dt'].dt.quarter
df_completo['semester'] = np.where(df_completo['month'] <= 6, 1, 2)
# Efectos de fin de año
df_completo['year_end'] = np.where(df_completo['month'].isin([11, 12]), 1, 0)
df_completo['year_start'] = np.where(df_completo['month'].isin([1, 2]), 1, 0)
# Indicadores estacionales
df_completo['season'] = df_completo['month'] % 12 // 3 + 1  # 1:Invierno, 2:Primavera, etc.
# Variables cíclicas (para capturar patrones estacionales)
df_completo['month_sin'] = np.sin(2 * np.pi * df_completo['month']/12)
df_completo['month_cos'] = np.cos(2 * np.pi * df_completo['month']/12)

#####  Lags, diferencias, medias móviles y otras yerbas

In [12]:
# Ordenamos por fecha para asegurar consistencia
df_completo = df_completo.sort_values('periodo_dt')

## 1. Lags (rezagos) de 1 a 12 meses
for i in range(1, 15):
    df_completo[f'lag_{i}'] = df_completo['tn'].shift(i)

## 2. Diferencias (deltas) - cambio respecto al mes anterior
for i in range(1, 13):
    df_completo[f'delta_{i}'] = df_completo['tn'].diff(i)

## 3. Diferencias porcentuales
for i in range(1, 13):
    df_completo[f'pct_change_{i}'] = df_completo['tn'].pct_change(i)

## 4. Medias móviles (promedios móviles)
windows = [2, 3, 6, 9, 12]  # También puedes incluir [2,4,5,7] según necesidad
for w in windows:
    df_completo[f'rolling_mean_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).mean()
    df_completo[f'rolling_std_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).std()
    df_completo[f'rolling_min_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).min()
    df_completo[f'rolling_max_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).max()
    df_completo[f'rolling_median_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).median()

## 5. Características de tendencia y estacionalidad
df_completo['expanding_mean'] = df_completo['tn'].expanding().mean()
df_completo['cumulative_sum'] = df_completo['tn'].cumsum()

## 6. Características de diferencia estacional (12 meses para datos mensuales)
df_completo['seasonal_diff_12'] = df_completo['tn'].diff(12)

## 7. Estadísticas anuales comparativas
df_completo['vs_prev_year'] = df_completo['tn'] / df_completo['lag_12'] - 1  # Crecimiento interanual

## 8. Componentes de descomposición (simplificada)
# Tendencia (usando media móvil de 12 meses)
df_completo['trend'] = df_completo['tn'].rolling(window=12, min_periods=1).mean()
# Estacionalidad (diferencia entre valor real y tendencia)
df_completo['seasonality'] = df_completo['tn'] - df_completo['trend']

## 9. Variables booleanas para eventos especiales
df_completo['new_high'] = (df_completo['tn'] == df_completo['rolling_max_12']).astype(int)
df_completo['new_low'] = (df_completo['tn'] == df_completo['rolling_min_12']).astype(int)

## 10. Características de aceleración/deceleración
df_completo['acceleration'] = df_completo['delta_1'].diff(1)  # Cambio en la tasa de cambio

##### Estadísticas de Ventana Dinámica

In [13]:
# Medias móviles exponenciales
df_completo['ewm_alpha_0.3'] = df_completo['tn'].ewm(alpha=0.3, adjust=False).mean()
df_completo['ewm_alpha_0.5'] = df_completo['tn'].ewm(alpha=0.5, adjust=False).mean()

# Medias móviles centradas
df_completo['rolling_center_mean_3'] = df_completo['tn'].rolling(window=3, center=True).mean()

# Sumas acumuladas por año
df_completo['ytd_sum'] = df_completo.groupby(df_completo['periodo_dt'].dt.year)['tn'].cumsum()

##### Características de Tendencia y Ciclo

In [14]:
# Modelado de tendencia polinomial
df_completo['time_index'] = range(len(df_completo))
df_completo['trend_linear'] = np.poly1d(np.polyfit(df_completo['time_index'], df_completo['tn'], 1))(df_completo['time_index'])
df_completo['trend_quadratic'] = np.poly1d(np.polyfit(df_completo['time_index'], df_completo['tn'], 2))(df_completo['time_index'])

# Residuales de tendencia
df_completo['residual_trend'] = df_completo['tn'] - df_completo['trend_linear']

##### Características de Cambio de Régimen

In [15]:
# Z-Score respecto a ventana móvil
df_completo['zscore_6'] = (df_completo['tn'] - df_completo['rolling_mean_6']) / df_completo['rolling_std_6']

# Detección de outliers
df_completo['is_outlier_3sigma'] = np.where(np.abs(df_completo['zscore_6']) > 3, 1, 0)

# Cambios bruscos (spikes)
df_completo['spike_up'] = np.where(df_completo['delta_1'] > df_completo['rolling_std_3'], 1, 0)
df_completo['spike_down'] = np.where(df_completo['delta_1'] < -df_completo['rolling_std_3'], 1, 0)

##### Características de Patrones Temporales

In [16]:
# Autocorrelaciones parciales
from statsmodels.tsa.stattools import pacf
pacf_values = pacf(df_completo['tn'].dropna(), nlags=12)
for i in range(1, 6):
    df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]

# Estacionalidad múltiple (si hay patrones semestrales)
df_completo['semester_mean'] = df_completo.groupby(['year', 'semester'])['tn'].transform('mean')

  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo['semester_mean'] = df_completo.groupby(['year', 'semester'])['tn'].transform('mean')


##### Características de Forecast Ingenieriles

In [17]:
# Método ingenuo (último valor)
df_completo['naive_forecast'] = df_completo['tn'].shift(1)

# Seasonal naive (valor del mismo período año anterior)
df_completo['seasonal_naive'] = df_completo['tn'].shift(12)

# Promedio móvil como forecast
df_completo['ma_forecast_3'] = df_completo['rolling_mean_3'].shift(1)

  df_completo['naive_forecast'] = df_completo['tn'].shift(1)
  df_completo['seasonal_naive'] = df_completo['tn'].shift(12)
  df_completo['ma_forecast_3'] = df_completo['rolling_mean_3'].shift(1)


##### Características de Decomposición Temporal

In [18]:
from statsmodels.tsa.seasonal import seasonal_decompose
# Descomposición clásica (additiva o multiplicativa)
result = seasonal_decompose(df_completo['tn'].dropna(), model='additive', period=12)
df_completo['trend_decomposed'] = result.trend
df_completo['seasonal_decomposed'] = result.seasonal
df_completo['residual_decomposed'] = result.resid

  df_completo['trend_decomposed'] = result.trend
  df_completo['seasonal_decomposed'] = result.seasonal
  df_completo['residual_decomposed'] = result.resid


##### Características de Ventanas Asimétricas

In [19]:
# Mejor mes histórico
df_completo['best_month_rank'] = df_completo.groupby('month')['tn'].rank(ascending=False)

# Comparación con mismo mes año anterior
df_completo['vs_last_year_same_month'] = df_completo['tn'] / df_completo['lag_12'] - 1

# Acumulado últimos 3 vs mismos 3 meses año anterior
df_completo['last3_vs_ly3'] = (df_completo['tn'] + df_completo['lag_1'] + df_completo['lag_2']) / (df_completo['lag_12'] + df_completo['lag_13'] + df_completo['lag_14']) - 1

  df_completo['best_month_rank'] = df_completo.groupby('month')['tn'].rank(ascending=False)
  df_completo['vs_last_year_same_month'] = df_completo['tn'] / df_completo['lag_12'] - 1
  df_completo['last3_vs_ly3'] = (df_completo['tn'] + df_completo['lag_1'] + df_completo['lag_2']) / (df_completo['lag_12'] + df_completo['lag_13'] + df_completo['lag_14']) - 1


##### Transformaciones Matemáticas

In [20]:
from scipy import stats
from scipy.special import boxcox1p

# Transformaciones clásicas
df_completo['log_tn'] = np.log1p(df_completo['tn'])
df_completo['sqrt_tn'] = np.sqrt(df_completo['tn'])

# Box-Cox (solo si tn > 0)
mask = df_completo['tn'] > 0
df_completo['boxcox_tn'] = np.nan
df_completo.loc[mask, 'boxcox_tn'], _ = stats.boxcox(df_completo.loc[mask, 'tn'])

# Diferenciación
df_completo['diff1_log'] = df_completo['log_tn'].diff(1)


  df_completo['log_tn'] = np.log1p(df_completo['tn'])
  df_completo['sqrt_tn'] = np.sqrt(df_completo['tn'])
  df_completo['boxcox_tn'] = np.nan
  df_completo['diff1_log'] = df_completo['log_tn'].diff(1)


##### Características de Interacción

In [21]:
# Interacción entre tendencia y estacionalidad
df_completo['trend_season_interaction'] = df_completo['trend'] * df_completo['seasonal_decomposed']

# Interacción lags con estacionalidad
for i in [1, 2, 3, 12]:
    df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']

  df_completo['trend_season_interaction'] = df_completo['trend'] * df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']


##### Completamos NaN del target con ceros

In [22]:
df_completo['target'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_completo['target'].fillna(0, inplace=True)


##### Dividimos el dataset

In [23]:
dt_kgl = df_completo[df_completo["periodo"].isin([201912])]
ts = df_completo.drop(df_completo[df_completo["periodo"].isin([201911,201912])].index,axis=0)

In [24]:
# Asegurate de que 'periodo' sea datetime si no lo es
# df_completo['periodo_dt'] = pd.to_datetime(df_completo['periodo'].astype(str), format='%Y%m')

# features
feature_columns = [col for col in ts.columns if col not in ['periodo_dt', 'tn_target', 'nacimiento_producto', 'target']]

# Definimos los límites
train_cutoff = '2019-09-01'
valid_cutoff = '2019-11-01'

# Split temporal
df_train = df_completo[df_completo['periodo_dt'] < train_cutoff]
df_valid = df_completo[(df_completo['periodo_dt'] >= train_cutoff) & (df_completo['periodo_dt'] < valid_cutoff)]

# Separás features y target
X_train = df_train.drop(columns=feature_columns)
y_train = df_train['target']

X_valid = df_valid.drop(columns=feature_columns)
y_valid = df_valid['target']


##### Productos a predecir

In [25]:
X_kgl = dt_kgl[feature_columns]
productos_a_predecir = pd.read_csv("../../data/raw/product_id_apredecir201912.csv")
# Filtrar filas
productos_filtrados = productos_a_predecir['product_id'].unique()
X_kgl = X_kgl[X_kgl['product_id'].isin(productos_filtrados)]
X_kgl['product_id'].nunique()

780

In [52]:
df_completo.info()

<class 'pandas.core.frame.DataFrame'>
Index: 35888 entries, 0 to 44387
Columns: 130 entries, product_id to lag_12_season_adj
dtypes: datetime64[ns](2), float64(108), int32(13), int64(3), object(4)
memory usage: 35.1+ MB


##### Optimización de Hiperparámetros con Optuna

In [26]:
import lightgbm as lgb
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import optuna
import numpy as np

# Preparar datos
feature_columns = [col for col in ts.columns if col not in ['periodo_dt', 'tn_target', 'nacimiento_producto', 'target']]
X = ts[feature_columns]
y = ts['target']

for col in ['cat1', 'cat2', 'cat3', 'brand']:
    X[col] = X[col].astype('category')

# Eliminar NaNs en el target
if y.isnull().any():
    print("⚠️ Target tiene NaN, se eliminarán.")
    mask = ~y.isnull()
    X = X[mask]
    y = y[mask]

# Función objetivo para Optuna
def objective(trial):
    params = {
        'objective': 'tweedie',
        'tweedie_variance_power': trial.suggest_float('tweedie_variance_power', 1.1, 1.9),
        # 'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'random_state': 12345,
        'max_bin': 500,
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True)
    }

    tscv = TimeSeriesSplit(n_splits=3)
    rmses = []

    for train_idx, valid_idx in tscv.split(X):
        X_train, X_valid = X.iloc[train_idx], X.iloc[valid_idx]
        y_train, y_valid = y.iloc[train_idx], y.iloc[valid_idx]

        model = lgb.LGBMRegressor(**params)
        model.fit(
            X_train, y_train,
            eval_set=[(X_valid, y_valid)],
            callbacks=[
                lgb.early_stopping(50),
                lgb.log_evaluation(0)
            ]
        )

        preds = model.predict(X_valid)
        rmse = mean_squared_error(y_valid, preds, squared=False)
        rmses.append(rmse)

    return np.mean(rmses)

# Crear estudio Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Mostrar mejores parámetros
print("Mejores parámetros encontrados:", study.best_params)

# Entrenar modelo final
best_params = study.best_params
best_model = lgb.LGBMRegressor(**best_params, objective='tweedie', max_bin=500, random_state=12345)
best_model.fit(X, y)

print("✅ Modelo LightGBM optimizado y entrenado con éxito.")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_in

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,008872 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[115]	valid_0's rmse: 36.17




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,039192 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[143]	valid_0's rmse: 28.9454




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,021628 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[143]	valid_0's rmse: 35.0716


[I 2025-06-08 21:50:09,094] Trial 0 finished with value: 33.395654223337765 and parameters: {'tweedie_variance_power': 1.8639183298766162, 'num_leaves': 144, 'max_depth': 7, 'learning_rate': 0.0649306724428819, 'n_estimators': 309, 'min_child_samples': 6, 'subsample': 0.6611672805056656, 'colsample_bytree': 0.8733270073221602, 'reg_alpha': 0.0002797105661816685, 'reg_lambda': 5.85739445693905e-07}. Best is trial 0 with value: 33.395654223337765.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,042022 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[713]	valid_0's rmse: 67.7167




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,092394 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[713]	valid_0's rmse: 68.846




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,031563 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[713]	valid_0's rmse: 65.7595


[I 2025-06-08 21:52:48,746] Trial 1 finished with value: 67.44072774260471 and parameters: {'tweedie_variance_power': 1.50104894724364, 'num_leaves': 108, 'max_depth': 10, 'learning_rate': 0.0015840305875293409, 'n_estimators': 713, 'min_child_samples': 46, 'subsample': 0.8225774055917646, 'colsample_bytree': 0.9834704312379041, 'reg_alpha': 4.1108522125063915e-05, 'reg_lambda': 0.06416937706015159}. Best is trial 0 with value: 33.395654223337765.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,043587 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[307]	valid_0's rmse: 33.2654




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,075917 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[436]	valid_0's rmse: 28.5527




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,034948 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[380]	valid_0's rmse: 31.4442


[I 2025-06-08 21:53:35,380] Trial 2 finished with value: 31.087433590340012 and parameters: {'tweedie_variance_power': 1.8705258433360887, 'num_leaves': 53, 'max_depth': 14, 'learning_rate': 0.021688819831119525, 'n_estimators': 438, 'min_child_samples': 16, 'subsample': 0.9252243058443479, 'colsample_bytree': 0.7216642144835137, 'reg_alpha': 0.002219148123588775, 'reg_lambda': 0.03287740831585806}. Best is trial 2 with value: 31.087433590340012.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,013894 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[72]	valid_0's rmse: 34.9193




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,062215 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[221]	valid_0's rmse: 28.23




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,065367 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[205]	valid_0's rmse: 34.8864


[I 2025-06-08 21:54:34,797] Trial 3 finished with value: 32.67858987279089 and parameters: {'tweedie_variance_power': 1.2399846774567278, 'num_leaves': 99, 'max_depth': 13, 'learning_rate': 0.03401925984438925, 'n_estimators': 409, 'min_child_samples': 11, 'subsample': 0.774764938072558, 'colsample_bytree': 0.9016479325776282, 'reg_alpha': 0.013401581394926127, 'reg_lambda': 1.0103282341280367e-05}. Best is trial 2 with value: 31.087433590340012.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,023233 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[55]	valid_0's rmse: 33.6345




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,103033 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[121]	valid_0's rmse: 27.0154




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,032263 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[138]	valid_0's rmse: 31.7955


[I 2025-06-08 21:55:05,244] Trial 4 finished with value: 30.815121191016633 and parameters: {'tweedie_variance_power': 1.4745319327090798, 'num_leaves': 93, 'max_depth': 10, 'learning_rate': 0.06179249101989406, 'n_estimators': 797, 'min_child_samples': 21, 'subsample': 0.854131331670269, 'colsample_bytree': 0.8256145811921656, 'reg_alpha': 0.0011553848778790017, 'reg_lambda': 1.1113827851237962e-05}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,010315 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[402]	valid_0's rmse: 36.0299




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,074287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[402]	valid_0's rmse: 36.8213




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,022279 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[402]	valid_0's rmse: 38.6633


[I 2025-06-08 21:56:10,603] Trial 5 finished with value: 37.171501650154234 and parameters: {'tweedie_variance_power': 1.5986221634286717, 'num_leaves': 90, 'max_depth': 8, 'learning_rate': 0.007361243998289328, 'n_estimators': 402, 'min_child_samples': 23, 'subsample': 0.6475714416278657, 'colsample_bytree': 0.788302025201823, 'reg_alpha': 0.0035792658704399123, 'reg_lambda': 4.5200465456512186e-08}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,008727 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[616]	valid_0's rmse: 32.623




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,106440 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[647]	valid_0's rmse: 28.7462




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,072358 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[646]	valid_0's rmse: 31.8388


[I 2025-06-08 21:56:55,100] Trial 6 finished with value: 31.06934595892315 and parameters: {'tweedie_variance_power': 1.5005550188885755, 'num_leaves': 80, 'max_depth': 5, 'learning_rate': 0.009453358416477844, 'n_estimators': 647, 'min_child_samples': 43, 'subsample': 0.840754049628819, 'colsample_bytree': 0.8510339785893667, 'reg_alpha': 4.32123910700409e-05, 'reg_lambda': 6.529634039328078e-08}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,025889 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[615]	valid_0's rmse: 33.0324




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,042228 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[941]	valid_0's rmse: 27.6501




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,059310 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[724]	valid_0's rmse: 31.8731


[I 2025-06-08 21:59:32,359] Trial 7 finished with value: 30.85189012976387 and parameters: {'tweedie_variance_power': 1.586372612191766, 'num_leaves': 86, 'max_depth': 13, 'learning_rate': 0.008523289657462992, 'n_estimators': 959, 'min_child_samples': 26, 'subsample': 0.941409041314018, 'colsample_bytree': 0.8912709973514223, 'reg_alpha': 0.06814852356246079, 'reg_lambda': 1.9110690017435403}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,006900 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[39]	valid_0's rmse: 33.5115




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,058993 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[96]	valid_0's rmse: 27.3876




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,022002 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 21:59:45,749] Trial 8 finished with value: 31.370842611180752 and parameters: {'tweedie_variance_power': 1.2239929741829703, 'num_leaves': 39, 'max_depth': 7, 'learning_rate': 0.07304745038798569, 'n_estimators': 810, 'min_child_samples': 27, 'subsample': 0.65814962110891, 'colsample_bytree': 0.8437239776845177, 'reg_alpha': 3.4074730825233304e-06, 'reg_lambda': 1.175482309428784e-06}. Best is trial 4 with value: 30.815121191016633.


Early stopping, best iteration is:
[97]	valid_0's rmse: 33.2134
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,121617 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[850]	valid_0's rmse: 52.5303




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,028941 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[850]	valid_0's rmse: 55.5675




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,030361 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[850]	valid_0's rmse: 54.5083


[I 2025-06-08 22:03:33,072] Trial 9 finished with value: 54.2019981042435 and parameters: {'tweedie_variance_power': 1.6094692277310774, 'num_leaves': 132, 'max_depth': 11, 'learning_rate': 0.00219935971714466, 'n_estimators': 850, 'min_child_samples': 8, 'subsample': 0.7993780264623962, 'colsample_bytree': 0.7443200505826008, 'reg_alpha': 0.06372337583480117, 'reg_lambda': 8.042041933046793e-07}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,048669 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[116]	valid_0's rmse: 36.7693




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,093783 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[116]	valid_0's rmse: 38.0341




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,058379 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[116]	valid_0's rmse: 36.7157


[I 2025-06-08 22:03:38,344] Trial 10 finished with value: 37.173042308783586 and parameters: {'tweedie_variance_power': 1.3399704628530933, 'num_leaves': 21, 'max_depth': 3, 'learning_rate': 0.026752819722260908, 'n_estimators': 116, 'min_child_samples': 37, 'subsample': 0.9974023209837894, 'colsample_bytree': 0.6412395356899876, 'reg_alpha': 5.725746963838781, 'reg_lambda': 0.00015757532941752426}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,010357 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[956]	valid_0's rmse: 34.732




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,045138 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[956]	valid_0's rmse: 32.9193




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,029496 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[956]	valid_0's rmse: 35.5416


[I 2025-06-08 22:06:12,068] Trial 11 finished with value: 34.397659273675025 and parameters: {'tweedie_variance_power': 1.71141954652316, 'num_leaves': 76, 'max_depth': 12, 'learning_rate': 0.004175486485558755, 'n_estimators': 956, 'min_child_samples': 33, 'subsample': 0.9073565425136079, 'colsample_bytree': 0.9533675841785685, 'reg_alpha': 1.2843520262512463e-08, 'reg_lambda': 9.221458876953003}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,007289 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[199]	valid_0's rmse: 33.7593




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,042700 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[499]	valid_0's rmse: 27.0997




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,045403 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[482]	valid_0's rmse: 32.1584


[I 2025-06-08 22:08:12,474] Trial 12 finished with value: 31.005787790373972 and parameters: {'tweedie_variance_power': 1.38647453347922, 'num_leaves': 119, 'max_depth': 15, 'learning_rate': 0.0157353200789445, 'n_estimators': 961, 'min_child_samples': 20, 'subsample': 0.9126849351630719, 'colsample_bytree': 0.9133642073986459, 'reg_alpha': 0.817542025898551, 'reg_lambda': 0.004552993339419774}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,011651 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[805]	valid_0's rmse: 35.5603




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,066198 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[805]	valid_0's rmse: 34.7467




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,030162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[805]	valid_0's rmse: 36.8341


[I 2025-06-08 22:10:05,313] Trial 13 finished with value: 35.71368545349523 and parameters: {'tweedie_variance_power': 1.7098321533145608, 'num_leaves': 65, 'max_depth': 10, 'learning_rate': 0.004457471177431986, 'n_estimators': 805, 'min_child_samples': 31, 'subsample': 0.9861906290842586, 'colsample_bytree': 0.7992970385549523, 'reg_alpha': 0.13881223878002796, 'reg_lambda': 1.4775452473691415}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,006827 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[35]	valid_0's rmse: 34.2137




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,045740 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[82]	valid_0's rmse: 27.7279




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,022189 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:10:26,686] Trial 14 finished with value: 31.484901980362054 and parameters: {'tweedie_variance_power': 1.4186434074210064, 'num_leaves': 105, 'max_depth': 12, 'learning_rate': 0.09714293303331259, 'n_estimators': 628, 'min_child_samples': 18, 'subsample': 0.7476041831435678, 'colsample_bytree': 0.6945520785757116, 'reg_alpha': 3.23497617604734e-07, 'reg_lambda': 7.66462864878744e-05}. Best is trial 4 with value: 30.815121191016633.


Early stopping, best iteration is:
[66]	valid_0's rmse: 32.5131
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,030252 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[995]	valid_0's rmse: 49.8794




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,051731 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[995]	valid_0's rmse: 51.0562




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,043872 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[995]	valid_0's rmse: 52.0603


[I 2025-06-08 22:12:55,320] Trial 15 finished with value: 50.998627760428576 and parameters: {'tweedie_variance_power': 1.1253919396860927, 'num_leaves': 70, 'max_depth': 9, 'learning_rate': 0.0010107921773682583, 'n_estimators': 995, 'min_child_samples': 26, 'subsample': 0.8687551521875166, 'colsample_bytree': 0.9360724662492911, 'reg_alpha': 8.347580865397827, 'reg_lambda': 0.0018034920573032265}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,055215 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[128]	valid_0's rmse: 33.2411




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,130687 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[174]	valid_0's rmse: 28.0714




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,027201 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[268]	valid_0's rmse: 31.7824


[I 2025-06-08 22:13:47,957] Trial 16 finished with value: 31.031640848769158 and parameters: {'tweedie_variance_power': 1.6354258568663471, 'num_leaves': 125, 'max_depth': 15, 'learning_rate': 0.04376970281850757, 'n_estimators': 853, 'min_child_samples': 38, 'subsample': 0.7316825396297667, 'colsample_bytree': 0.8294668599107, 'reg_alpha': 0.0004056263306641814, 'reg_lambda': 0.3807554250720366}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,057299 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[419]	valid_0's rmse: 33.7427




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,144335 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[700]	valid_0's rmse: 27.979




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,067244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[571]	valid_0's rmse: 32.6735


[I 2025-06-08 22:15:43,274] Trial 17 finished with value: 31.46508876105604 and parameters: {'tweedie_variance_power': 1.746272154144785, 'num_leaves': 93, 'max_depth': 13, 'learning_rate': 0.01281704366671632, 'n_estimators': 737, 'min_child_samples': 16, 'subsample': 0.9513109596167917, 'colsample_bytree': 0.7645968669223264, 'reg_alpha': 0.1961945830756222, 'reg_lambda': 2.153412576665647e-05}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,010779 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[543]	valid_0's rmse: 34.4229




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,035896 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[543]	valid_0's rmse: 34.3622




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,027389 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[543]	valid_0's rmse: 36.7355


[I 2025-06-08 22:17:16,209] Trial 18 finished with value: 35.17352467728073 and parameters: {'tweedie_variance_power': 1.5513927882234184, 'num_leaves': 53, 'max_depth': 11, 'learning_rate': 0.005770207609578585, 'n_estimators': 543, 'min_child_samples': 12, 'subsample': 0.8731029011063788, 'colsample_bytree': 0.9925027781447592, 'reg_alpha': 0.009428831845240015, 'reg_lambda': 0.0013717123234611422}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,006989 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[910]	valid_0's rmse: 34.4021




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,046848 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[910]	valid_0's rmse: 34.5912




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,021572 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[910]	valid_0's rmse: 37.7393


[I 2025-06-08 22:18:15,594] Trial 19 finished with value: 35.57753913079952 and parameters: {'tweedie_variance_power': 1.4167994198800398, 'num_leaves': 109, 'max_depth': 5, 'learning_rate': 0.002939829826641694, 'n_estimators': 910, 'min_child_samples': 23, 'subsample': 0.9537456507231417, 'colsample_bytree': 0.8860698713045754, 'reg_alpha': 0.00022861973906073, 'reg_lambda': 0.027197834209308144}. Best is trial 4 with value: 30.815121191016633.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,011925 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[299]	valid_0's rmse: 33.0702




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,022309 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[514]	valid_0's rmse: 26.9928




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,027734 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[506]	valid_0's rmse: 31.6211


[I 2025-06-08 22:19:05,710] Trial 20 finished with value: 30.5613867467403 and parameters: {'tweedie_variance_power': 1.3204248419104057, 'num_leaves': 57, 'max_depth': 9, 'learning_rate': 0.01643977179861915, 'n_estimators': 737, 'min_child_samples': 30, 'subsample': 0.8731326082568397, 'colsample_bytree': 0.6178166746764627, 'reg_alpha': 0.9532164101992833, 'reg_lambda': 0.00042852061759573155}. Best is trial 20 with value: 30.5613867467403.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,007120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[312]	valid_0's rmse: 33.0515




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,065343 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[515]	valid_0's rmse: 26.9794




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,086222 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[479]	valid_0's rmse: 31.7935


[I 2025-06-08 22:19:56,189] Trial 21 finished with value: 30.608146196776605 and parameters: {'tweedie_variance_power': 1.3010427978780184, 'num_leaves': 58, 'max_depth': 9, 'learning_rate': 0.016811092351378083, 'n_estimators': 737, 'min_child_samples': 31, 'subsample': 0.8707904884689595, 'colsample_bytree': 0.689768256113098, 'reg_alpha': 0.6638938262635954, 'reg_lambda': 8.882858517816792e-06}. Best is trial 20 with value: 30.5613867467403.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,008128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[296]	valid_0's rmse: 32.8643




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,065552 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[527]	valid_0's rmse: 26.9169




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,059237 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[462]	valid_0's rmse: 31.6546


[I 2025-06-08 22:20:36,672] Trial 22 finished with value: 30.478574675800754 and parameters: {'tweedie_variance_power': 1.307040619429954, 'num_leaves': 53, 'max_depth': 8, 'learning_rate': 0.015672588961174683, 'n_estimators': 699, 'min_child_samples': 32, 'subsample': 0.8720349454701395, 'colsample_bytree': 0.6049237270293225, 'reg_alpha': 1.5254717045613715, 'reg_lambda': 9.053825302956717e-06}. Best is trial 22 with value: 30.478574675800754.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,012367 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[265]	valid_0's rmse: 32.4337




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,047909 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[377]	valid_0's rmse: 27.0999




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,022695 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[405]	valid_0's rmse: 31.857


[I 2025-06-08 22:21:06,654] Trial 23 finished with value: 30.463526879636845 and parameters: {'tweedie_variance_power': 1.2814295427756344, 'num_leaves': 39, 'max_depth': 8, 'learning_rate': 0.017471631310045116, 'n_estimators': 573, 'min_child_samples': 31, 'subsample': 0.8933441634335864, 'colsample_bytree': 0.6134634844901318, 'reg_alpha': 1.1750206686136702, 'reg_lambda': 0.0002868319509561711}. Best is trial 23 with value: 30.463526879636845.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,008476 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[368]	valid_0's rmse: 32.733




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,062575 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[559]	valid_0's rmse: 27.5182




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,026674 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[558]	valid_0's rmse: 32.346


[I 2025-06-08 22:21:39,106] Trial 24 finished with value: 30.865742202862887 and parameters: {'tweedie_variance_power': 1.1391395363195014, 'num_leaves': 36, 'max_depth': 7, 'learning_rate': 0.012345899986057134, 'n_estimators': 560, 'min_child_samples': 38, 'subsample': 0.9024265143036121, 'colsample_bytree': 0.604812219238804, 'reg_alpha': 0.6300257894868377, 'reg_lambda': 0.0007664165288146398}. Best is trial 23 with value: 30.463526879636845.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,006162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[230]	valid_0's rmse: 32.7719




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,032169 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[353]	valid_0's rmse: 27.4118




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,024116 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[510]	valid_0's rmse: 32.1718


[I 2025-06-08 22:22:03,818] Trial 25 finished with value: 30.78515921887139 and parameters: {'tweedie_variance_power': 1.2244129270604036, 'num_leaves': 40, 'max_depth': 6, 'learning_rate': 0.022288490140543408, 'n_estimators': 542, 'min_child_samples': 35, 'subsample': 0.8105587345700627, 'colsample_bytree': 0.6001083654262587, 'reg_alpha': 3.391141144695502, 'reg_lambda': 0.00011232287479921066}. Best is trial 23 with value: 30.463526879636845.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,028725 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[154]	valid_0's rmse: 32.4958




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,025763 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[173]	valid_0's rmse: 27.3094




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,031836 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:22:18,934] Trial 26 finished with value: 30.58277837977019 and parameters: {'tweedie_variance_power': 1.3091701397789224, 'num_leaves': 25, 'max_depth': 8, 'learning_rate': 0.04230632189421475, 'n_estimators': 630, 'min_child_samples': 42, 'subsample': 0.8909745709659364, 'colsample_bytree': 0.6415464689261339, 'reg_alpha': 1.1181769742167995, 'reg_lambda': 0.0056571235806617244}. Best is trial 23 with value: 30.463526879636845.


Early stopping, best iteration is:
[204]	valid_0's rmse: 31.9432
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,016732 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[143]	valid_0's rmse: 32.9961




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,019265 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[269]	valid_0's rmse: 26.9135




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,028854 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[274]	valid_0's rmse: 31.8298


[I 2025-06-08 22:22:42,049] Trial 27 finished with value: 30.579797617394878 and parameters: {'tweedie_variance_power': 1.2749015630282647, 'num_leaves': 47, 'max_depth': 8, 'learning_rate': 0.030951218815724456, 'n_estimators': 485, 'min_child_samples': 29, 'subsample': 0.8375448177277125, 'colsample_bytree': 0.6436888040077695, 'reg_alpha': 0.021262310882975696, 'reg_lambda': 0.0004531470392844813}. Best is trial 23 with value: 30.463526879636845.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,012682 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[294]	valid_0's rmse: 32.2954




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,046564 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[294]	valid_0's rmse: 28.3988




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,026930 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[292]	valid_0's rmse: 32.6618


[I 2025-06-08 22:22:57,648] Trial 28 finished with value: 31.11865317510966 and parameters: {'tweedie_variance_power': 1.178122971891572, 'num_leaves': 32, 'max_depth': 5, 'learning_rate': 0.01679243244607886, 'n_estimators': 294, 'min_child_samples': 50, 'subsample': 0.7756508964978179, 'colsample_bytree': 0.6690103280360906, 'reg_alpha': 1.8920097680460375, 'reg_lambda': 5.537390816336856e-05}. Best is trial 23 with value: 30.463526879636845.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,006758 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[344]	valid_0's rmse: 33.4661




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,070251 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[682]	valid_0's rmse: 26.7992




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,082124 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[682]	valid_0's rmse: 31.9803


[I 2025-06-08 22:23:52,989] Trial 29 finished with value: 30.748542671293407 and parameters: {'tweedie_variance_power': 1.359441803108971, 'num_leaves': 63, 'max_depth': 8, 'learning_rate': 0.011952227598871969, 'n_estimators': 682, 'min_child_samples': 34, 'subsample': 0.7219340780049162, 'colsample_bytree': 0.6217146100393279, 'reg_alpha': 0.26333279985220404, 'reg_lambda': 2.4736680909126287e-06}. Best is trial 23 with value: 30.463526879636845.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,017366 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[314]	valid_0's rmse: 38.3865




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,061760 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[314]	valid_0's rmse: 40.6418




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,039140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:24:05,454] Trial 30 finished with value: 39.51803864135626 and parameters: {'tweedie_variance_power': 1.1784193539430825, 'num_leaves': 46, 'max_depth': 3, 'learning_rate': 0.006303756315589805, 'n_estimators': 314, 'min_child_samples': 41, 'subsample': 0.6902730191555704, 'colsample_bytree': 0.6709921899972286, 'reg_alpha': 8.889937697386681, 'reg_lambda': 3.306295243924752e-06}. Best is trial 23 with value: 30.463526879636845.


Did not meet early stopping. Best iteration is:
[314]	valid_0's rmse: 39.5258
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,020733 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[164]	valid_0's rmse: 32.8499




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,088850 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[340]	valid_0's rmse: 26.6423




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,028354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[308]	valid_0's rmse: 31.6716


[I 2025-06-08 22:24:26,834] Trial 31 finished with value: 30.387952576640256 and parameters: {'tweedie_variance_power': 1.280144365524078, 'num_leaves': 47, 'max_depth': 6, 'learning_rate': 0.029087978678239448, 'n_estimators': 513, 'min_child_samples': 30, 'subsample': 0.8381014242357521, 'colsample_bytree': 0.6345578016162069, 'reg_alpha': 0.029350484965655864, 'reg_lambda': 0.0006084299273813127}. Best is trial 31 with value: 30.387952576640256.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,028235 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[264]	valid_0's rmse: 32.6316




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,066975 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[437]	valid_0's rmse: 27.0028




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,101620 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[367]	valid_0's rmse: 31.2008


[I 2025-06-08 22:24:49,906] Trial 32 finished with value: 30.27839423767873 and parameters: {'tweedie_variance_power': 1.258323074099909, 'num_leaves': 30, 'max_depth': 6, 'learning_rate': 0.02172018677056537, 'n_estimators': 592, 'min_child_samples': 30, 'subsample': 0.6072968037461758, 'colsample_bytree': 0.6332361395231594, 'reg_alpha': 0.06055026912931478, 'reg_lambda': 2.495840443831366e-07}. Best is trial 32 with value: 30.27839423767873.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,006802 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[149]	valid_0's rmse: 32.5168




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,072915 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[297]	valid_0's rmse: 27.634




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,031008 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:25:06,631] Trial 33 finished with value: 30.50390193777177 and parameters: {'tweedie_variance_power': 1.2609065932617192, 'num_leaves': 29, 'max_depth': 6, 'learning_rate': 0.04076981081779402, 'n_estimators': 587, 'min_child_samples': 33, 'subsample': 0.6258922104723535, 'colsample_bytree': 0.6994248819253648, 'reg_alpha': 0.0329892162410512, 'reg_lambda': 1.7319119855145782e-07}. Best is trial 32 with value: 30.27839423767873.


Early stopping, best iteration is:
[215]	valid_0's rmse: 31.3609
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,014714 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[152]	valid_0's rmse: 33.3172




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,097968 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[302]	valid_0's rmse: 28.1




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,086387 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[328]	valid_0's rmse: 32.9192


[I 2025-06-08 22:25:28,501] Trial 34 finished with value: 31.44550823094508 and parameters: {'tweedie_variance_power': 1.1770432170615646, 'num_leaves': 46, 'max_depth': 6, 'learning_rate': 0.022997633067979643, 'n_estimators': 450, 'min_child_samples': 25, 'subsample': 0.7881398332017564, 'colsample_bytree': 0.6638244842742339, 'reg_alpha': 0.00439286920268667, 'reg_lambda': 1.8829409384213633e-08}. Best is trial 32 with value: 30.27839423767873.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,012643 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[120]	valid_0's rmse: 32.5753




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,045222 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[116]	valid_0's rmse: 28.1769




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,021969 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:25:40,446] Trial 35 finished with value: 30.556306075480958 and parameters: {'tweedie_variance_power': 1.4555069815427457, 'num_leaves': 22, 'max_depth': 7, 'learning_rate': 0.05924933747986304, 'n_estimators': 499, 'min_child_samples': 36, 'subsample': 0.8182465514787596, 'colsample_bytree': 0.7208324819549004, 'reg_alpha': 0.2348603227189597, 'reg_lambda': 3.104514858603579e-07}. Best is trial 32 with value: 30.27839423767873.


Early stopping, best iteration is:
[140]	valid_0's rmse: 30.9166
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,023810 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[199]	valid_0's rmse: 32.1591




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,037354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[337]	valid_0's rmse: 27.7203




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,024526 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:25:51,059] Trial 36 finished with value: 29.98750007498789 and parameters: {'tweedie_variance_power': 1.217977091145177, 'num_leaves': 30, 'max_depth': 4, 'learning_rate': 0.03131947119944806, 'n_estimators': 338, 'min_child_samples': 28, 'subsample': 0.8488421360345316, 'colsample_bytree': 0.6304232217350535, 'reg_alpha': 0.0672554412829043, 'reg_lambda': 0.00885675126787329}. Best is trial 36 with value: 29.98750007498789.


Did not meet early stopping. Best iteration is:
[300]	valid_0's rmse: 30.0831
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,033939 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[176]	valid_0's rmse: 31.5892




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,028092 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[356]	valid_0's rmse: 27.5362




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,021188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[274]	valid_0's rmse: 30.9684


[I 2025-06-08 22:26:03,657] Trial 37 finished with value: 30.031271515641848 and parameters: {'tweedie_variance_power': 1.106434358572169, 'num_leaves': 32, 'max_depth': 4, 'learning_rate': 0.03254635784710182, 'n_estimators': 357, 'min_child_samples': 28, 'subsample': 0.6079542767421696, 'colsample_bytree': 0.6317852189318288, 'reg_alpha': 0.0007649667466348628, 'reg_lambda': 0.07675927139373978}. Best is trial 36 with value: 29.98750007498789.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,007768 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[201]	valid_0's rmse: 31.9604




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,043091 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[325]	valid_0's rmse: 28.3924




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,028322 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[250]	valid_0's rmse: 30.8715


[I 2025-06-08 22:26:14,816] Trial 38 finished with value: 30.408107757368626 and parameters: {'tweedie_variance_power': 1.1141587636284402, 'num_leaves': 30, 'max_depth': 4, 'learning_rate': 0.031013718109314338, 'n_estimators': 325, 'min_child_samples': 23, 'subsample': 0.6115665080067729, 'colsample_bytree': 0.71615313236198, 'reg_alpha': 0.0008163899100093536, 'reg_lambda': 0.07995326557709286}. Best is trial 36 with value: 29.98750007498789.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,012677 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[158]	valid_0's rmse: 32.4052




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,039661 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[206]	valid_0's rmse: 27.8908




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,030283 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[187]	valid_0's rmse: 30.9139


[I 2025-06-08 22:26:25,344] Trial 39 finished with value: 30.40329139480704 and parameters: {'tweedie_variance_power': 1.2128171827762377, 'num_leaves': 20, 'max_depth': 4, 'learning_rate': 0.0555067118768528, 'n_estimators': 231, 'min_child_samples': 28, 'subsample': 0.6022771266060618, 'colsample_bytree': 0.650877469083409, 'reg_alpha': 8.801994833311998e-05, 'reg_lambda': 0.021399087290078746}. Best is trial 36 with value: 29.98750007498789.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,012725 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[86]	valid_0's rmse: 31.8024




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,037208 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[168]	valid_0's rmse: 27.6999




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,026294 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:26:33,139] Trial 40 finished with value: 29.949415665803723 and parameters: {'tweedie_variance_power': 1.1576602490833243, 'num_leaves': 33, 'max_depth': 4, 'learning_rate': 0.0798056539421128, 'n_estimators': 387, 'min_child_samples': 25, 'subsample': 0.638364413530272, 'colsample_bytree': 0.7520654565394608, 'reg_alpha': 5.519020207650274e-06, 'reg_lambda': 0.13069033029873162}. Best is trial 40 with value: 29.949415665803723.


Early stopping, best iteration is:
[109]	valid_0's rmse: 30.346
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,094974 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[59]	valid_0's rmse: 32.3773




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,073844 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[187]	valid_0's rmse: 27.9896




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,017162 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[71]	valid_0's rmse: 30.4214


[I 2025-06-08 22:26:41,727] Trial 41 finished with value: 30.262756694462922 and parameters: {'tweedie_variance_power': 1.1780293108770883, 'num_leaves': 33, 'max_depth': 4, 'learning_rate': 0.09991111076736325, 'n_estimators': 369, 'min_child_samples': 24, 'subsample': 0.6435461256980796, 'colsample_bytree': 0.7597275524953726, 'reg_alpha': 6.8263647891482965e-06, 'reg_lambda': 0.06224702433330896}. Best is trial 40 with value: 29.949415665803723.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,010725 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[68]	valid_0's rmse: 32.0411




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,048491 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[120]	valid_0's rmse: 27.97




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,070260 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:26:48,909] Trial 42 finished with value: 30.32400998056543 and parameters: {'tweedie_variance_power': 1.158663355687741, 'num_leaves': 34, 'max_depth': 4, 'learning_rate': 0.08363184011643912, 'n_estimators': 367, 'min_child_samples': 21, 'subsample': 0.6832878405034823, 'colsample_bytree': 0.7663675913078063, 'reg_alpha': 4.790773419295727e-06, 'reg_lambda': 0.24385315171415378}. Best is trial 40 with value: 29.949415665803723.


Early stopping, best iteration is:
[92]	valid_0's rmse: 30.9609
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,050621 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[178]	valid_0's rmse: 32.3496




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,151918 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[222]	valid_0's rmse: 28.669




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,067321 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:26:56,081] Trial 43 finished with value: 30.871005406235852 and parameters: {'tweedie_variance_power': 1.2055955772498872, 'num_leaves': 27, 'max_depth': 3, 'learning_rate': 0.05040527352576598, 'n_estimators': 229, 'min_child_samples': 25, 'subsample': 0.6369112739990992, 'colsample_bytree': 0.7468336833245885, 'reg_alpha': 5.626691664541462e-06, 'reg_lambda': 0.11414510245739991}. Best is trial 40 with value: 29.949415665803723.


Did not meet early stopping. Best iteration is:
[213]	valid_0's rmse: 31.5945
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,022632 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[58]	valid_0's rmse: 32.2139




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,050831 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[227]	valid_0's rmse: 27.7453




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,030313 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:27:04,104] Trial 44 finished with value: 30.207941026980027 and parameters: {'tweedie_variance_power': 1.124192391912287, 'num_leaves': 42, 'max_depth': 4, 'learning_rate': 0.07612710761705006, 'n_estimators': 398, 'min_child_samples': 19, 'subsample': 0.6753751336147961, 'colsample_bytree': 0.7468485091024962, 'reg_alpha': 1.2736507629499375e-05, 'reg_lambda': 0.009834233733372208}. Best is trial 40 with value: 29.949415665803723.


Early stopping, best iteration is:
[111]	valid_0's rmse: 30.6647
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,045595 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[88]	valid_0's rmse: 32.0954




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,030051 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[218]	valid_0's rmse: 27.9085




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,057784 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:27:12,704] Trial 45 finished with value: 30.353452771278313 and parameters: {'tweedie_variance_power': 1.1024160494731226, 'num_leaves': 40, 'max_depth': 4, 'learning_rate': 0.07282524146098726, 'n_estimators': 391, 'min_child_samples': 18, 'subsample': 0.6749463701654714, 'colsample_bytree': 0.7814533369015138, 'reg_alpha': 7.186398227688268e-07, 'reg_lambda': 0.009557558618743587}. Best is trial 40 with value: 29.949415665803723.


Early stopping, best iteration is:
[125]	valid_0's rmse: 31.0565
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,024626 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[27]	valid_0's rmse: 33.806




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,064080 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[148]	valid_0's rmse: 27.2087




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,030981 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[87]	valid_0's rmse: 32.8252


[I 2025-06-08 22:27:22,004] Trial 46 finished with value: 31.279980210535765 and parameters: {'tweedie_variance_power': 1.1490359592095505, 'num_leaves': 145, 'max_depth': 5, 'learning_rate': 0.09341643096426441, 'n_estimators': 255, 'min_child_samples': 13, 'subsample': 0.6510873261372676, 'colsample_bytree': 0.7516975457162908, 'reg_alpha': 1.7149345926313783e-05, 'reg_lambda': 1.0078078512814048}. Best is trial 40 with value: 29.949415665803723.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,009948 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[104]	valid_0's rmse: 32.8023




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,085116 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[173]	valid_0's rmse: 28.3711




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,092515 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[125]	valid_0's rmse: 31.8936


[I 2025-06-08 22:27:28,686] Trial 47 finished with value: 31.022345569130803 and parameters: {'tweedie_variance_power': 1.1012420107734497, 'num_leaves': 45, 'max_depth': 3, 'learning_rate': 0.0754088137530819, 'n_estimators': 179, 'min_child_samples': 19, 'subsample': 0.706356485583306, 'colsample_bytree': 0.7293382693466005, 'reg_alpha': 9.499313555494108e-07, 'reg_lambda': 0.014091272662696025}. Best is trial 40 with value: 29.949415665803723.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,007192 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[89]	valid_0's rmse: 32.7808




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,047116 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[116]	valid_0's rmse: 28.4299




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,029572 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds


[I 2025-06-08 22:27:36,373] Trial 48 finished with value: 30.694992354298734 and parameters: {'tweedie_variance_power': 1.196118250956856, 'num_leaves': 26, 'max_depth': 4, 'learning_rate': 0.06757603651114702, 'n_estimators': 455, 'min_child_samples': 16, 'subsample': 0.6295704409730212, 'colsample_bytree': 0.7857690222562932, 'reg_alpha': 1.030116056136688e-07, 'reg_lambda': 0.041966350583822344}. Best is trial 40 with value: 29.949415665803723.


Early stopping, best iteration is:
[117]	valid_0's rmse: 30.8742
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,035289 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51738
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 123
[LightGBM] [Info] Start training from score 3,932825
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[236]	valid_0's rmse: 33.2713




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,057232 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 51812
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 125
[LightGBM] [Info] Start training from score 3,810372
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[371]	valid_0's rmse: 28.8357




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,027196 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52345
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 126
[LightGBM] [Info] Start training from score 3,703885
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[274]	valid_0's rmse: 31.2386


[I 2025-06-08 22:27:46,244] Trial 49 finished with value: 31.115186828003672 and parameters: {'tweedie_variance_power': 1.23573966650001, 'num_leaves': 35, 'max_depth': 3, 'learning_rate': 0.0379856610845924, 'n_estimators': 373, 'min_child_samples': 6, 'subsample': 0.6671549398316798, 'colsample_bytree': 0.8137990797773667, 'reg_alpha': 3.601720569279621e-05, 'reg_lambda': 4.356756838596813}. Best is trial 40 with value: 29.949415665803723.


Mejores parámetros encontrados: {'tweedie_variance_power': 1.1576602490833243, 'num_leaves': 33, 'max_depth': 4, 'learning_rate': 0.0798056539421128, 'n_estimators': 387, 'min_child_samples': 25, 'subsample': 0.638364413530272, 'colsample_bytree': 0.7520654565394608, 'reg_alpha': 5.519020207650274e-06, 'reg_lambda': 0.13069033029873162}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0,050587 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52375
[LightGBM] [Info] Number of data points in the train set: 33425, number of used features: 126
[LightGBM] [Info] Start training from score 3,615695
✅ Modelo LightGBM optimizado y entrenado con éxito.


In [28]:
X_kgl = X_kgl[feature_columns]
for col in ['cat1', 'cat2', 'cat3', 'brand']:
    X_kgl[col] = X_kgl[col].astype('category')

y_pred = best_model.predict(X_kgl)


In [29]:
productos_ok = pd.read_csv("https://storage.googleapis.com/open-courses/austral2025-af91/labo3v/product_id_apredecir201912.txt", sep="\t")
result = pd.DataFrame({"product_id": X_kgl["product_id"],  "tn": y_pred})
result = result[result["product_id"].isin(productos_ok["product_id"])]
result = result.groupby("product_id").agg({"tn":"sum"}).reset_index()
result

Unnamed: 0,product_id,tn
0,20001,1162.157907
1,20002,947.178217
2,20003,609.345221
3,20004,574.815572
4,20005,587.670192
...,...,...
775,21263,0.020072
776,21265,0.056652
777,21266,0.055726
778,21267,0.049554


In [30]:
result.to_csv("../../outputs/lgb_exp04_con_bayesiana_tweedy.csv", index=False, sep=',')