# Exp 04: lgb agrupando por periodo-product_id

Usando distribución tweedy y max_bins=500 y optimizacion bayesiana

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gc

##### Merge: periodos + clientes + productos

In [4]:
df = pd.read_csv("../../data/preprocessed/base.csv", sep=',')
df["periodo_dt"] = pd.to_datetime(df["periodo"].astype(str), format="%Y%m")


periodos = pd.date_range(start=df['periodo_dt'].min(), end=df['periodo_dt'].max(), freq="MS")
productos = df['product_id'].unique()


idx = pd.MultiIndex.from_product([productos, periodos], names=['product_id', 'periodo'])
completo = idx.to_frame(index=False)
completo["periodo"] = completo["periodo"].dt.strftime("%Y%m").astype(int)

del periodos, productos, 
gc.collect()
completo

Unnamed: 0,product_id,periodo
0,20524,201701
1,20524,201702
2,20524,201703
3,20524,201704
4,20524,201705
...,...,...
44383,20770,201908
44384,20770,201909
44385,20770,201910
44386,20770,201911


##### Cruzamos con productos

In [5]:
productos = pd.read_csv("../../data/raw/tb_productos.csv", sep='\t')
productos = productos.drop_duplicates(subset=['product_id'], keep='first')
completo = completo.merge(productos, how='left', on="product_id")
del productos
gc.collect()

0

##### Cruzamos con stock

In [6]:
stocks = pd.read_csv("../../data/raw/tb_stocks.csv", sep='\t')
stocks = stocks.groupby(by=["periodo", "product_id"]).agg({"stock_final": "sum"}).reset_index()
completo = completo.merge(stocks, how='left', on=['periodo', 'product_id'])
del stocks
gc.collect()
completo.head()

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,


##### Cruzamos con ventas

In [7]:
sellin = pd.read_csv("../../data/raw/sell-in.csv", sep='\t')
# Agrupar ventas por periodo, cliente y producto
dt = sellin.groupby(by=["periodo","product_id"]).agg({"tn":"sum"}).reset_index()
df_completo = completo.merge(dt, how='left', on=['periodo', 'product_id'])
df_completo['tn'] = df_completo['tn'].fillna(0)
del sellin, dt, completo
gc.collect()
df_completo

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final,tn
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,,6.48085
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,,3.99755
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,,7.14711
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,,6.82163
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,,9.25949
...,...,...,...,...,...,...,...,...,...
44383,20770,201908,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44384,20770,201909,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44385,20770,201910,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44386,20770,201911,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000


##### Target

In [10]:
# Asegurarte de tener 'periodo_dt' (datetime) en completo
df_completo['periodo_dt'] = pd.to_datetime(df_completo['periodo'], format='%Y%m')

# Crear DataFrame auxiliar con tn como target y fecha adelantada
ventas_futuras = df_completo[['periodo_dt', 'product_id', 'tn']].copy()
ventas_futuras['periodo_target_dt'] = ventas_futuras['periodo_dt'] - pd.DateOffset(months=2)
ventas_futuras = ventas_futuras.rename(columns={'tn': 'target'})

# Merge con completo usando periodo adelantado
df_completo = df_completo.merge(
    ventas_futuras[['periodo_target_dt', 'product_id', 'target']],
    how='left',
    left_on=['periodo_dt', 'product_id'],
    right_on=['periodo_target_dt', 'product_id']
)

# Eliminar columna auxiliar
df_completo = df_completo.drop(columns=['periodo_target_dt'])
del ventas_futuras
gc.collect()
print(f"✅ Target generado. Filas con target no nulo: {df_completo['target'].notna().sum()}")

✅ Target generado. Filas con target no nulo: 41922


In [11]:
df_completo

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final,tn,periodo_dt,target_x,target_y,target
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,,6.48085,2017-01-01,7.14711,7.14711,7.14711
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,,3.99755,2017-02-01,6.82163,6.82163,6.82163
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,,7.14711,2017-03-01,9.25949,9.25949,9.25949
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,,6.82163,2017-04-01,7.04113,7.04113,7.04113
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,,9.25949,2017-05-01,5.92819,5.92819,5.92819
...,...,...,...,...,...,...,...,...,...,...,...,...,...
44383,20770,201908,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-08-01,0.00000,0.00000,0.00000
44384,20770,201909,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-09-01,0.00000,0.00000,0.00000
44385,20770,201910,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-10-01,3.18500,3.18500,3.18500
44386,20770,201911,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-11-01,,,


##### Verifico las NaN en el target: Existen porque hay clientes que solo compraron 2 veces.

In [12]:
nan_count = df_completo['target'].isna().sum()
print(f"🔍 Total de NaN en target: {nan_count}")
del nan_count
gc.collect()

🔍 Total de NaN en target: 2466


0

##### Generación de IDs

In [13]:
df_completo = df_completo.sort_values(['periodo', 'product_id'])
df_completo['id'] = df_completo.groupby(['product_id']).cumcount() + 1

##### Periodo 

In [14]:
df_completo["periodo_dt"] = pd.to_datetime(df_completo["periodo"].astype(str), format="%Y%m")

##### Eliminar productos que no nacieron

In [15]:
nacimiento_producto = df.groupby("product_id")["periodo_dt"].agg(["min"]).reset_index()
# Renombrar columna max a muerte_cliente_dt
nacimiento_producto = nacimiento_producto.rename(columns={'min': 'nacimiento_producto'})


# Unir con df_final para traer fecha de muerte del cliente
df_completo = df_completo.merge(nacimiento_producto, on='product_id', how='left')

# Filtrar filas donde periodo_dt > muerte_cliente_dt
df_completo = df_completo[df_completo['periodo_dt'] >= df_completo['nacimiento_producto']]

# Opcional: eliminar columna auxiliar
# df_final = df_final.drop(columns=['muerte_cliente_dt'])
del nacimiento_producto
gc.collect()
print(f"✅ Dataset filtrado con {len(df_completo):,} filas.")

✅ Dataset filtrado con 35,888 filas.


##### Correlograma

In [16]:
# Supongamos que tu DataFrame se llama df
cor_matrix = df_completo.corr(numeric_only=True)

# Tomamos solo la parte superior de la matriz (sin la diagonal)
upper = cor_matrix.where(np.triu(np.ones(cor_matrix.shape), k=1).astype(bool))

# Filtramos correlaciones fuertes (valor absoluto mayor a 0.7)
high_corr = upper.stack().reset_index()
high_corr.columns = ['Variable 1', 'Variable 2', 'Correlación']
high_corr_filtrada = high_corr[high_corr['Correlación'].abs() > 0.7]

print(high_corr_filtrada)

del high_corr_filtrada, cor_matrix, upper, high_corr
gc.collect()

   Variable 1 Variable 2  Correlación
14    periodo         id     0.955974
26         tn   target_x     0.930409
27         tn   target_y     0.930409
28         tn     target     0.930409
30   target_x   target_y     1.000000
31   target_x     target     1.000000
33   target_y     target     1.000000


0

##### Elimino variables muy correlacionadas

In [None]:
df_completo.drop(columns=['periodo'], inplace=True)

##### Extracción de componentes temporales

In [17]:
df_completo['year'] = df_completo['periodo_dt'].dt.year
df_completo['month'] = df_completo['periodo_dt'].dt.month
# Variables dummy estacionales
df_completo['quarter'] = df_completo['periodo_dt'].dt.quarter
df_completo['semester'] = np.where(df_completo['month'] <= 6, 1, 2)
# Efectos de fin de año
df_completo['year_end'] = np.where(df_completo['month'].isin([11, 12]), 1, 0)
df_completo['year_start'] = np.where(df_completo['month'].isin([1, 2]), 1, 0)
# Indicadores estacionales
df_completo['season'] = df_completo['month'] % 12 // 3 + 1  # 1:Invierno, 2:Primavera, etc.
# Variables cíclicas (para capturar patrones estacionales)
df_completo['month_sin'] = np.sin(2 * np.pi * df_completo['month']/12)
df_completo['month_cos'] = np.cos(2 * np.pi * df_completo['month']/12)

#####  Lags, diferencias, medias móviles y otras yerbas

In [18]:
# Ordenamos por fecha para asegurar consistencia
df_completo = df_completo.sort_values('periodo_dt')

## 1. Lags (rezagos) de 1 a 12 meses
for i in range(1, 15):
    df_completo[f'lag_{i}'] = df_completo['tn'].shift(i)

## 2. Diferencias (deltas) - cambio respecto al mes anterior
for i in range(1, 13):
    df_completo[f'delta_{i}'] = df_completo['tn'].diff(i)

## 3. Diferencias porcentuales
for i in range(1, 13):
    df_completo[f'pct_change_{i}'] = df_completo['tn'].pct_change(i)

## 4. Medias móviles (promedios móviles)
windows = [2, 3, 6, 9, 12]  # También puedes incluir [2,4,5,7] según necesidad
for w in windows:
    df_completo[f'rolling_mean_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).mean()
    df_completo[f'rolling_std_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).std()
    df_completo[f'rolling_min_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).min()
    df_completo[f'rolling_max_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).max()
    df_completo[f'rolling_median_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).median()

## 5. Características de tendencia y estacionalidad
df_completo['expanding_mean'] = df_completo['tn'].expanding().mean()
df_completo['cumulative_sum'] = df_completo['tn'].cumsum()

## 6. Características de diferencia estacional (12 meses para datos mensuales)
df_completo['seasonal_diff_12'] = df_completo['tn'].diff(12)

## 7. Estadísticas anuales comparativas
df_completo['vs_prev_year'] = df_completo['tn'] / df_completo['lag_12'] - 1  # Crecimiento interanual

## 8. Componentes de descomposición (simplificada)
# Tendencia (usando media móvil de 12 meses)
df_completo['trend'] = df_completo['tn'].rolling(window=12, min_periods=1).mean()
# Estacionalidad (diferencia entre valor real y tendencia)
df_completo['seasonality'] = df_completo['tn'] - df_completo['trend']

## 9. Variables booleanas para eventos especiales
df_completo['new_high'] = (df_completo['tn'] == df_completo['rolling_max_12']).astype(int)
df_completo['new_low'] = (df_completo['tn'] == df_completo['rolling_min_12']).astype(int)

## 10. Características de aceleración/deceleración
df_completo['acceleration'] = df_completo['delta_1'].diff(1)  # Cambio en la tasa de cambio

##### Estadísticas de Ventana Dinámica

In [19]:
# Medias móviles exponenciales
df_completo['ewm_alpha_0.3'] = df_completo['tn'].ewm(alpha=0.3, adjust=False).mean()
df_completo['ewm_alpha_0.5'] = df_completo['tn'].ewm(alpha=0.5, adjust=False).mean()

# Medias móviles centradas
df_completo['rolling_center_mean_3'] = df_completo['tn'].rolling(window=3, center=True).mean()

# Sumas acumuladas por año
df_completo['ytd_sum'] = df_completo.groupby(df_completo['periodo_dt'].dt.year)['tn'].cumsum()

##### Características de Tendencia y Ciclo

In [20]:
# Modelado de tendencia polinomial
df_completo['time_index'] = range(len(df_completo))
df_completo['trend_linear'] = np.poly1d(np.polyfit(df_completo['time_index'], df_completo['tn'], 1))(df_completo['time_index'])
df_completo['trend_quadratic'] = np.poly1d(np.polyfit(df_completo['time_index'], df_completo['tn'], 2))(df_completo['time_index'])

# Residuales de tendencia
df_completo['residual_trend'] = df_completo['tn'] - df_completo['trend_linear']

##### Características de Cambio de Régimen

In [21]:
# Z-Score respecto a ventana móvil
df_completo['zscore_6'] = (df_completo['tn'] - df_completo['rolling_mean_6']) / df_completo['rolling_std_6']

# Detección de outliers
df_completo['is_outlier_3sigma'] = np.where(np.abs(df_completo['zscore_6']) > 3, 1, 0)

# Cambios bruscos (spikes)
df_completo['spike_up'] = np.where(df_completo['delta_1'] > df_completo['rolling_std_3'], 1, 0)
df_completo['spike_down'] = np.where(df_completo['delta_1'] < -df_completo['rolling_std_3'], 1, 0)

##### Características de Patrones Temporales

In [22]:
# Autocorrelaciones parciales
from statsmodels.tsa.stattools import pacf
pacf_values = pacf(df_completo['tn'].dropna(), nlags=12)
for i in range(1, 6):
    df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]

# Estacionalidad múltiple (si hay patrones semestrales)
df_completo['semester_mean'] = df_completo.groupby(['year', 'semester'])['tn'].transform('mean')

  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo['semester_mean'] = df_completo.groupby(['year', 'semester'])['tn'].transform('mean')


##### Características de Forecast Ingenieriles

In [23]:
# Método ingenuo (último valor)
df_completo['naive_forecast'] = df_completo['tn'].shift(1)

# Seasonal naive (valor del mismo período año anterior)
df_completo['seasonal_naive'] = df_completo['tn'].shift(12)

# Promedio móvil como forecast
df_completo['ma_forecast_3'] = df_completo['rolling_mean_3'].shift(1)

  df_completo['naive_forecast'] = df_completo['tn'].shift(1)
  df_completo['seasonal_naive'] = df_completo['tn'].shift(12)
  df_completo['ma_forecast_3'] = df_completo['rolling_mean_3'].shift(1)


##### Características de Decomposición Temporal

In [24]:
from statsmodels.tsa.seasonal import seasonal_decompose
# Descomposición clásica (additiva o multiplicativa)
result = seasonal_decompose(df_completo['tn'].dropna(), model='additive', period=12)
df_completo['trend_decomposed'] = result.trend
df_completo['seasonal_decomposed'] = result.seasonal
df_completo['residual_decomposed'] = result.resid

  df_completo['trend_decomposed'] = result.trend
  df_completo['seasonal_decomposed'] = result.seasonal
  df_completo['residual_decomposed'] = result.resid


##### Características de Ventanas Asimétricas

In [25]:
# Mejor mes histórico
df_completo['best_month_rank'] = df_completo.groupby('month')['tn'].rank(ascending=False)

# Comparación con mismo mes año anterior
df_completo['vs_last_year_same_month'] = df_completo['tn'] / df_completo['lag_12'] - 1

# Acumulado últimos 3 vs mismos 3 meses año anterior
df_completo['last3_vs_ly3'] = (df_completo['tn'] + df_completo['lag_1'] + df_completo['lag_2']) / (df_completo['lag_12'] + df_completo['lag_13'] + df_completo['lag_14']) - 1

  df_completo['best_month_rank'] = df_completo.groupby('month')['tn'].rank(ascending=False)
  df_completo['vs_last_year_same_month'] = df_completo['tn'] / df_completo['lag_12'] - 1
  df_completo['last3_vs_ly3'] = (df_completo['tn'] + df_completo['lag_1'] + df_completo['lag_2']) / (df_completo['lag_12'] + df_completo['lag_13'] + df_completo['lag_14']) - 1


##### Transformaciones Matemáticas

In [26]:
from scipy import stats
from scipy.special import boxcox1p

# Transformaciones clásicas
df_completo['log_tn'] = np.log1p(df_completo['tn'])
df_completo['sqrt_tn'] = np.sqrt(df_completo['tn'])

# Box-Cox (solo si tn > 0)
mask = df_completo['tn'] > 0
df_completo['boxcox_tn'] = np.nan
df_completo.loc[mask, 'boxcox_tn'], _ = stats.boxcox(df_completo.loc[mask, 'tn'])

# Diferenciación
df_completo['diff1_log'] = df_completo['log_tn'].diff(1)


  df_completo['log_tn'] = np.log1p(df_completo['tn'])
  df_completo['sqrt_tn'] = np.sqrt(df_completo['tn'])
  df_completo['boxcox_tn'] = np.nan
  df_completo['diff1_log'] = df_completo['log_tn'].diff(1)


##### Características de Interacción

In [27]:
# Interacción entre tendencia y estacionalidad
df_completo['trend_season_interaction'] = df_completo['trend'] * df_completo['seasonal_decomposed']

# Interacción lags con estacionalidad
for i in [1, 2, 3, 12]:
    df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']

  df_completo['trend_season_interaction'] = df_completo['trend'] * df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']


##### Nuevos lags cruzados y acumulados

In [28]:
# Lags de rolling_mean
for i in [1, 2, 3]:
    df_completo[f'lag_mean6_{i}'] = df_completo['rolling_mean_6'].shift(i)

  df_completo[f'lag_mean6_{i}'] = df_completo['rolling_mean_6'].shift(i)
  df_completo[f'lag_mean6_{i}'] = df_completo['rolling_mean_6'].shift(i)
  df_completo[f'lag_mean6_{i}'] = df_completo['rolling_mean_6'].shift(i)


##### Slope (pendiente) de la tendencia local

In [29]:
df_completo['trend_slope_6'] = df_completo['rolling_mean_6'].diff(1)

  df_completo['trend_slope_6'] = df_completo['rolling_mean_6'].diff(1)


##### Cambios acumulados

In [30]:
df_completo['cumulative_change_3'] = df_completo['delta_1'] + df_completo['delta_2'] + df_completo['delta_3']

  df_completo['cumulative_change_3'] = df_completo['delta_1'] + df_completo['delta_2'] + df_completo['delta_3']


##### Razones entre ventanas

In [31]:
df_completo['mean_ratio_3_6'] = df_completo['rolling_mean_3'] / (df_completo['rolling_mean_6'] + 1e-6)

  df_completo['mean_ratio_3_6'] = df_completo['rolling_mean_3'] / (df_completo['rolling_mean_6'] + 1e-6)


##### Coeficiente de variación

In [32]:
df_completo['cv_6'] = df_completo['rolling_std_6'] / (df_completo['rolling_mean_6'] + 1e-6)

  df_completo['cv_6'] = df_completo['rolling_std_6'] / (df_completo['rolling_mean_6'] + 1e-6)


##### Ratio entre último valor y media móvil

In [33]:
df_completo['tn_vs_mean_3'] = df_completo['tn'] / (df_completo['rolling_mean_3'] + 1e-6)

  df_completo['tn_vs_mean_3'] = df_completo['tn'] / (df_completo['rolling_mean_3'] + 1e-6)


##### Amplitud de la serie

In [34]:
df_completo['rolling_amplitude_6'] = df_completo['rolling_max_6'] - df_completo['rolling_min_6']

  df_completo['rolling_amplitude_6'] = df_completo['rolling_max_6'] - df_completo['rolling_min_6']


##### Count positivo/negativo en ventana

In [35]:
df_completo['positive_count_6'] = df_completo['tn'].rolling(6).apply(lambda x: (x > 0).sum())

  df_completo['positive_count_6'] = df_completo['tn'].rolling(6).apply(lambda x: (x > 0).sum())


##### Media de deltas

In [36]:
df_completo['delta_mean_3'] = df_completo['delta_1'] + df_completo['delta_2'] + df_completo['delta_3']

  df_completo['delta_mean_3'] = df_completo['delta_1'] + df_completo['delta_2'] + df_completo['delta_3']


##### Rolling skewness y kurtosis

In [37]:
df_completo['skew_6'] = df_completo['tn'].rolling(6).skew()
df_completo['kurt_6'] = df_completo['tn'].rolling(6).kurt()

  df_completo['skew_6'] = df_completo['tn'].rolling(6).skew()
  df_completo['kurt_6'] = df_completo['tn'].rolling(6).kurt()


##### Cambio en media móvil

In [38]:
df_completo['mean_change_6'] = df_completo['rolling_mean_6'].diff()

  df_completo['mean_change_6'] = df_completo['rolling_mean_6'].diff()


#####  Slope entre dos puntos (último vs. anterior)

In [39]:
df_completo['slope_last2'] = df_completo['tn'] - df_completo['lag_1']

  df_completo['slope_last2'] = df_completo['tn'] - df_completo['lag_1']


##### Momentum

In [40]:
df_completo['momentum_3'] = df_completo['tn'] - df_completo['lag_3']

  df_completo['momentum_3'] = df_completo['tn'] - df_completo['lag_3']


##### Rolling quantiles

In [41]:
df_completo['quantile_25_6'] = df_completo['tn'].rolling(6).quantile(0.25)
df_completo['quantile_75_6'] = df_completo['tn'].rolling(6).quantile(0.75)

  df_completo['quantile_25_6'] = df_completo['tn'].rolling(6).quantile(0.25)
  df_completo['quantile_75_6'] = df_completo['tn'].rolling(6).quantile(0.75)


##### Categoría como frecuencia histórica

In [43]:
df_completo['freq_cat1'] = df_completo.groupby('cat1')['tn'].transform('count')

##### Cantidad de meses sin ventas en los últimos 6

In [44]:
df_completo['zeros_6'] = df_completo['tn'].rolling(6).apply(lambda x: (x == 0).sum())

  df_completo['zeros_6'] = df_completo['tn'].rolling(6).apply(lambda x: (x == 0).sum())


#####  Ratio de spikes

In [45]:
df_completo['spike_ratio_3'] = (df_completo['spike_up'] + df_completo['spike_down']) / 3

  df_completo['spike_ratio_3'] = (df_completo['spike_up'] + df_completo['spike_down']) / 3


##### Ratio entre tn y su z-score

In [46]:
df_completo['tn_zscore_ratio'] = df_completo['tn'] / (df_completo['zscore_6'] + 1e-6)

  df_completo['tn_zscore_ratio'] = df_completo['tn'] / (df_completo['zscore_6'] + 1e-6)


##### Cambio en residual

In [47]:
df_completo['residual_change'] = df_completo['residual_decomposed'].diff()

  df_completo['residual_change'] = df_completo['residual_decomposed'].diff()


##### Valor relativo respecto al rango local

In [48]:
df_completo['position_in_range_6'] = (df_completo['tn'] - df_completo['rolling_min_6']) / (
    df_completo['rolling_max_6'] - df_completo['rolling_min_6'] + 1e-6)

  df_completo['position_in_range_6'] = (df_completo['tn'] - df_completo['rolling_min_6']) / (


##### Correlaciones

In [50]:
# Paso 1: Pivot para tener productos como columnas
pivot = df_completo.pivot(index='periodo', columns='product_id', values='tn')

# Paso 2: Top 15 productos más vendidos (en todo el histórico)
top_15_productos = (
    df_completo.groupby('product_id')['tn']
    .sum()
    .sort_values(ascending=False)
    .head(15)
    .index.tolist()
)

# Paso 3: Calcular la media de correlación de cada producto con los top 15
correlations = []

for prod in pivot.columns:
    if prod in top_15_productos:
        # No calculamos autocorrelación
        continue
    # Obtener serie del producto actual
    serie_prod = pivot[prod]
    
    # Calcular correlaciones con cada uno de los top 15
    corrs = []
    for top_prod in top_15_productos:
        if top_prod in pivot.columns:
            corr = serie_prod.corr(pivot[top_prod])
            if pd.notnull(corr):
                corrs.append(corr)
    if corrs:
        correlations.append((prod, np.mean(corrs)))

# Paso 4: Convertir a DataFrame y mapear
df_corrs = pd.DataFrame(correlations, columns=['product_id', 'correlacion_top15'])
df_completo = df_completo.merge(df_corrs, on='product_id', how='left')


  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


##### Completamos NaN del target con ceros

In [52]:
df_completo['target'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_completo['target'].fillna(0, inplace=True)


##### Dividimos el dataset

In [53]:
dt_kgl = df_completo[df_completo["periodo"].isin([201912])]
ts = df_completo.drop(df_completo[df_completo["periodo"].isin([201911,201912])].index,axis=0)

In [54]:
# Asegurate de que 'periodo' sea datetime si no lo es
# df_completo['periodo_dt'] = pd.to_datetime(df_completo['periodo'].astype(str), format='%Y%m')

# features
feature_columns = [col for col in ts.columns if col not in ['periodo_dt', 'tn_target', 'nacimiento_producto', 'target']]

# Definimos los límites
train_cutoff = '2019-09-01'
valid_cutoff = '2019-11-01'

# Split temporal
df_train = df_completo[df_completo['periodo_dt'] < train_cutoff]
df_valid = df_completo[(df_completo['periodo_dt'] >= train_cutoff) & (df_completo['periodo_dt'] < valid_cutoff)]

# Separás features y target
X_train = df_train.drop(columns=feature_columns)
y_train = df_train['target']

X_valid = df_valid.drop(columns=feature_columns)
y_valid = df_valid['target']


##### Productos a predecir

In [55]:
X_kgl = dt_kgl[feature_columns]
productos_a_predecir = pd.read_csv("../../data/raw/product_id_apredecir201912.csv")
# Filtrar filas
productos_filtrados = productos_a_predecir['product_id'].unique()
X_kgl = X_kgl[X_kgl['product_id'].isin(productos_filtrados)]
X_kgl['product_id'].nunique()

780

In [None]:
df_completo.info()

<class 'pandas.core.frame.DataFrame'>
Index: 35888 entries, 0 to 44387
Columns: 130 entries, product_id to lag_12_season_adj
dtypes: datetime64[ns](2), float64(108), int32(13), int64(3), object(4)
memory usage: 35.1+ MB


##### Optimización de Hiperparámetros con Optuna

In [59]:
import lightgbm as lgb
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import optuna
import numpy as np

# Preparar datos
feature_columns = [col for col in ts.columns if col not in ['periodo_dt', 'tn_target', 'nacimiento_producto', 'target']]
X = ts[feature_columns]
y = ts['target']

for col in ['cat1', 'cat2', 'cat3', 'brand']:
    X[col] = X[col].astype('category')

# Eliminar NaNs en el target
if y.isnull().any():
    print("⚠️ Target tiene NaN, se eliminarán.")
    mask = ~y.isnull()
    X = X[mask]
    y = y[mask]

# Función objetivo para Optuna
def objective(trial):
    params = {
        'objective': 'regression',
        # 'tweedie_variance_power': trial.suggest_float('tweedie_variance_power', 1.1, 1.9),
        # 'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'random_state': 12345,
        'max_bin': 500,
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True)
    }

    tscv = TimeSeriesSplit(n_splits=3)
    rmses = []

    for train_idx, valid_idx in tscv.split(X):
        X_train, X_valid = X.iloc[train_idx], X.iloc[valid_idx]
        y_train, y_valid = y.iloc[train_idx], y.iloc[valid_idx]

        model = lgb.LGBMRegressor(**params)
        model.fit(
            X_train, y_train,
            eval_set=[(X_valid, y_valid)],
            callbacks=[
                lgb.early_stopping(50),
                lgb.log_evaluation(0)
            ]
        )

        preds = model.predict(X_valid)
        rmse = mean_squared_error(y_valid, preds, squared=False)
        rmses.append(rmse)

    return np.mean(rmses)

# Crear estudio Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Mostrar mejores parámetros
print("Mejores parámetros encontrados:", study.best_params)

# Entrenar modelo final
best_params = study.best_params
best_model = lgb.LGBMRegressor(**best_params, objective='regression', max_bin=500, random_state=12345)
best_model.fit(X, y)

print("✅ Modelo LightGBM optimizado y entrenado con éxito.")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_in

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006722 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[547]	valid_0's rmse: 26.7418
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007987 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219




Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[547]	valid_0's rmse: 28.589




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009639 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:49:14,999] Trial 0 finished with value: 26.48447930536942 and parameters: {'num_leaves': 96, 'max_depth': 10, 'learning_rate': 0.0026716586898084447, 'n_estimators': 547, 'min_child_samples': 23, 'subsample': 0.9957854230131103, 'colsample_bytree': 0.8785596522705639, 'reg_alpha': 1.5791251606704067e-08, 'reg_lambda': 2.643085170524024e-08}. Best is trial 0 with value: 26.48447930536942.


Did not meet early stopping. Best iteration is:
[547]	valid_0's rmse: 24.1225
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007478 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[110]	valid_0's rmse: 69.9585
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007566 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Did not meet early stopping. Best iteration is:
[110]	valid_0's rmse: 67.4
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008167 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:49:17,570] Trial 1 finished with value: 65.90432967788031 and parameters: {'num_leaves': 46, 'max_depth': 10, 'learning_rate': 0.0037386367138065404, 'n_estimators': 110, 'min_child_samples': 42, 'subsample': 0.8486532111894338, 'colsample_bytree': 0.9459769552880898, 'reg_alpha': 1.0764240966161965e-05, 'reg_lambda': 1.306477465310722e-08}. Best is trial 0 with value: 26.48447930536942.


Did not meet early stopping. Best iteration is:
[110]	valid_0's rmse: 60.3544
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005917 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[254]	valid_0's rmse: 67.2354
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006738 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152




[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[254]	valid_0's rmse: 65.5514
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008096 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864




[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:49:18,684] Trial 2 finished with value: 64.11249858050026 and parameters: {'num_leaves': 119, 'max_depth': 3, 'learning_rate': 0.0018484601889865973, 'n_estimators': 254, 'min_child_samples': 47, 'subsample': 0.8301167831518592, 'colsample_bytree': 0.828032609096175, 'reg_alpha': 0.00014282421978385876, 'reg_lambda': 0.07576119221233767}. Best is trial 0 with value: 26.48447930536942.


Did not meet early stopping. Best iteration is:
[254]	valid_0's rmse: 59.5507
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006498 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[106]	valid_0's rmse: 15.7335
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Did not meet early stopping. Best iteration is:
[106]	valid_0's rmse: 20.0388
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009926 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:49:20,254] Trial 3 finished with value: 16.207101694830186 and parameters: {'num_leaves': 25, 'max_depth': 12, 'learning_rate': 0.028066739582405624, 'n_estimators': 106, 'min_child_samples': 43, 'subsample': 0.6844797152977357, 'colsample_bytree': 0.9374525930143663, 'reg_alpha': 1.7446508321932195e-07, 'reg_lambda': 0.0025035130609118213}. Best is trial 3 with value: 16.207101694830186.


Did not meet early stopping. Best iteration is:
[106]	valid_0's rmse: 12.849
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005370 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[258]	valid_0's rmse: 9.6884
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006974 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[391]	valid_0's rmse: 13.7932




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010099 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:49:24,057] Trial 4 finished with value: 12.338033291630936 and parameters: {'num_leaves': 25, 'max_depth': 9, 'learning_rate': 0.014703472686255938, 'n_estimators': 472, 'min_child_samples': 15, 'subsample': 0.814769735151777, 'colsample_bytree': 0.8395249415461857, 'reg_alpha': 0.003596502661327771, 'reg_lambda': 0.0002990562788057367}. Best is trial 4 with value: 12.338033291630936.


Early stopping, best iteration is:
[287]	valid_0's rmse: 13.5325
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006903 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[217]	valid_0's rmse: 13.0816




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008176 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[186]	valid_0's rmse: 14.1777
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011369 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864




[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:49:28,093] Trial 5 finished with value: 13.334316506923637 and parameters: {'num_leaves': 122, 'max_depth': 8, 'learning_rate': 0.029943851593168147, 'n_estimators': 284, 'min_child_samples': 29, 'subsample': 0.7664846689657423, 'colsample_bytree': 0.6326884982312841, 'reg_alpha': 0.008670915130490392, 'reg_lambda': 5.547370607246951e-05}. Best is trial 4 with value: 12.338033291630936.


Early stopping, best iteration is:
[149]	valid_0's rmse: 12.7436
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006619 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[298]	valid_0's rmse: 76.0181
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006773 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Did not meet early stopping. Best iteration is:
[298]	valid_0's rmse: 73.3969




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008612 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:49:30,495] Trial 6 finished with value: 71.8878541811318 and parameters: {'num_leaves': 45, 'max_depth': 8, 'learning_rate': 0.0012009565132998176, 'n_estimators': 298, 'min_child_samples': 46, 'subsample': 0.9383977894973005, 'colsample_bytree': 0.9107382457332969, 'reg_alpha': 0.4616527555211521, 'reg_lambda': 8.895492680595131}. Best is trial 4 with value: 12.338033291630936.


Did not meet early stopping. Best iteration is:
[298]	valid_0's rmse: 66.2486
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005858 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[383]	valid_0's rmse: 15.9586




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008177 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[449]	valid_0's rmse: 17.5646




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009739 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:49:37,365] Trial 7 finished with value: 16.070138902660872 and parameters: {'num_leaves': 61, 'max_depth': 10, 'learning_rate': 0.01939831044049239, 'n_estimators': 449, 'min_child_samples': 46, 'subsample': 0.691597376867415, 'colsample_bytree': 0.805690527241995, 'reg_alpha': 6.846853498051515e-07, 'reg_lambda': 1.3899469188446897e-08}. Best is trial 4 with value: 12.338033291630936.


Early stopping, best iteration is:
[343]	valid_0's rmse: 14.6872
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006794 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[187]	valid_0's rmse: 13.4342
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008727 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219




Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[272]	valid_0's rmse: 16.9212




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010458 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:49:40,019] Trial 8 finished with value: 14.23771563003414 and parameters: {'num_leaves': 109, 'max_depth': 8, 'learning_rate': 0.05249349804034687, 'n_estimators': 540, 'min_child_samples': 42, 'subsample': 0.7353855082611174, 'colsample_bytree': 0.7922911283497522, 'reg_alpha': 0.8897167726409524, 'reg_lambda': 2.216337286603451e-07}. Best is trial 4 with value: 12.338033291630936.


Early stopping, best iteration is:
[90]	valid_0's rmse: 12.3577
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006505 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[339]	valid_0's rmse: 44.645
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007749 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Did not meet early stopping. Best iteration is:
[339]	valid_0's rmse: 44.4609
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011052 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864




[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:49:53,065] Trial 9 finished with value: 43.07431374266553 and parameters: {'num_leaves': 75, 'max_depth': 12, 'learning_rate': 0.002487551646974148, 'n_estimators': 339, 'min_child_samples': 12, 'subsample': 0.761648314416377, 'colsample_bytree': 0.896617248228247, 'reg_alpha': 0.08244801224031995, 'reg_lambda': 2.4290728199934674e-05}. Best is trial 4 with value: 12.338033291630936.


Did not meet early stopping. Best iteration is:
[339]	valid_0's rmse: 40.117
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006130 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[433]	valid_0's rmse: 11.3583
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009542 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Did not meet early stopping. Best iteration is:
[842]	valid_0's rmse: 13.4332




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012585 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:50:00,776] Trial 10 finished with value: 12.533155735150316 and parameters: {'num_leaves': 29, 'max_depth': 15, 'learning_rate': 0.007507929613915941, 'n_estimators': 856, 'min_child_samples': 8, 'subsample': 0.6088416401407801, 'colsample_bytree': 0.6730708890970138, 'reg_alpha': 0.0019059655098903573, 'reg_lambda': 0.004291268342944297}. Best is trial 4 with value: 12.338033291630936.


Early stopping, best iteration is:
[568]	valid_0's rmse: 12.808
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008501 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[489]	valid_0's rmse: 10.6539
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009858 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[655]	valid_0's rmse: 14.3355




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012049 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:50:06,725] Trial 11 finished with value: 12.459273545364384 and parameters: {'num_leaves': 23, 'max_depth': 15, 'learning_rate': 0.007630461413844083, 'n_estimators': 867, 'min_child_samples': 5, 'subsample': 0.6052800941894051, 'colsample_bytree': 0.6949187233947384, 'reg_alpha': 0.0009219151598849378, 'reg_lambda': 0.007320417133965412}. Best is trial 4 with value: 12.338033291630936.


Early stopping, best iteration is:
[559]	valid_0's rmse: 12.3884
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008115 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[346]	valid_0's rmse: 13.3728




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009642 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[437]	valid_0's rmse: 14.1336




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012745 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:50:10,562] Trial 12 finished with value: 13.639200639320478 and parameters: {'num_leaves': 150, 'max_depth': 5, 'learning_rate': 0.009366595552370682, 'n_estimators': 814, 'min_child_samples': 16, 'subsample': 0.8770099267564224, 'colsample_bytree': 0.719263287037496, 'reg_alpha': 0.00015212162449722714, 'reg_lambda': 0.08483022097710652}. Best is trial 4 with value: 12.338033291630936.


Early stopping, best iteration is:
[468]	valid_0's rmse: 13.4112
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006586 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[636]	valid_0's rmse: 10.2866
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Did not meet early stopping. Best iteration is:
[705]	valid_0's rmse: 13.7394




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011096 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:50:17,322] Trial 13 finished with value: 12.06606458086028 and parameters: {'num_leaves': 22, 'max_depth': 15, 'learning_rate': 0.005677633089306113, 'n_estimators': 705, 'min_child_samples': 6, 'subsample': 0.6049173999103752, 'colsample_bytree': 0.7460131857587301, 'reg_alpha': 0.01299932623236467, 'reg_lambda': 1.7399888915012284e-05}. Best is trial 13 with value: 12.06606458086028.


Did not meet early stopping. Best iteration is:
[705]	valid_0's rmse: 12.1723
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006754 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[456]	valid_0's rmse: 12.6819




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008795 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[290]	valid_0's rmse: 14.3009
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009260 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153




[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:50:24,178] Trial 14 finished with value: 13.370345495507001 and parameters: {'num_leaves': 45, 'max_depth': 13, 'learning_rate': 0.015648484880287854, 'n_estimators': 704, 'min_child_samples': 20, 'subsample': 0.8958053201267004, 'colsample_bytree': 0.7658888538672737, 'reg_alpha': 0.03374007232064283, 'reg_lambda': 2.0896523631395192e-06}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[266]	valid_0's rmse: 13.1282
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007026 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[700]	valid_0's rmse: 13.4593
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007089 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152




[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[700]	valid_0's rmse: 15.4076




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009078 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[700]	valid_0's rmse: 12.7677


[I 2025-06-10 09:50:33,469] Trial 15 finished with value: 13.878160483353659 and parameters: {'num_leaves': 67, 'max_depth': 6, 'learning_rate': 0.004667973834951402, 'n_estimators': 700, 'min_child_samples': 30, 'subsample': 0.6759552101209442, 'colsample_bytree': 0.8436830211127679, 'reg_alpha': 9.611361829708793e-06, 'reg_lambda': 8.26712376273735e-06}. Best is trial 13 with value: 12.06606458086028.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005905 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[46]	valid_0's rmse: 10.2641
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008974 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[60]	valid_0's rmse: 13.6389




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009958 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:50:35,172] Trial 16 finished with value: 12.26489660039214 and parameters: {'num_leaves': 37, 'max_depth': 13, 'learning_rate': 0.07887595723245319, 'n_estimators': 688, 'min_child_samples': 11, 'subsample': 0.7757248014216936, 'colsample_bytree': 0.9878185896141183, 'reg_alpha': 9.108277224870104, 'reg_lambda': 0.00046162139663303664}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[49]	valid_0's rmse: 12.8917
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006180 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[60]	valid_0's rmse: 10.1506
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008495 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[83]	valid_0's rmse: 13.6754




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010812 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:50:37,509] Trial 17 finished with value: 12.1000551365153 and parameters: {'num_leaves': 54, 'max_depth': 14, 'learning_rate': 0.06052258028571644, 'n_estimators': 985, 'min_child_samples': 10, 'subsample': 0.6328667541448032, 'colsample_bytree': 0.9843573995712906, 'reg_alpha': 5.726353057503194, 'reg_lambda': 8.330065668219256e-07}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[63]	valid_0's rmse: 12.4742
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005959 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[165]	valid_0's rmse: 13.3419




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008740 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[201]	valid_0's rmse: 16.961




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:50:40,305] Trial 18 finished with value: 14.207383892072144 and parameters: {'num_leaves': 55, 'max_depth': 14, 'learning_rate': 0.04606503332129903, 'n_estimators': 984, 'min_child_samples': 35, 'subsample': 0.6391855171566403, 'colsample_bytree': 0.7605597194839194, 'reg_alpha': 5.278633587713086, 'reg_lambda': 8.474320318128043e-07}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[120]	valid_0's rmse: 12.3193
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006581 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[37]	valid_0's rmse: 10.1495
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007865 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152




[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[47]	valid_0's rmse: 14.366




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010610 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:50:43,801] Trial 19 finished with value: 12.40503723994015 and parameters: {'num_leaves': 85, 'max_depth': 12, 'learning_rate': 0.09747974909469817, 'n_estimators': 973, 'min_child_samples': 5, 'subsample': 0.6520767171986733, 'colsample_bytree': 0.9940444959453619, 'reg_alpha': 0.3238475849787472, 'reg_lambda': 2.4424778665306663e-07}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[38]	valid_0's rmse: 12.6996
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007537 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[674]	valid_0's rmse: 13.5626




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[791]	valid_0's rmse: 14.733




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011219 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:51:05,435] Trial 20 finished with value: 13.94190413188251 and parameters: {'num_leaves': 77, 'max_depth': 15, 'learning_rate': 0.004854848775403811, 'n_estimators': 791, 'min_child_samples': 21, 'subsample': 0.7125895227255059, 'colsample_bytree': 0.6260078971473299, 'reg_alpha': 0.03477263378845133, 'reg_lambda': 7.086730272354471e-06}. Best is trial 13 with value: 12.06606458086028.


Did not meet early stopping. Best iteration is:
[791]	valid_0's rmse: 13.5301
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009015 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[40]	valid_0's rmse: 10.2243




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008973 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[63]	valid_0's rmse: 13.4091




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011041 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:51:07,196] Trial 21 finished with value: 12.152189562522992 and parameters: {'num_leaves': 33, 'max_depth': 13, 'learning_rate': 0.0844303439351546, 'n_estimators': 661, 'min_child_samples': 11, 'subsample': 0.6342565158939746, 'colsample_bytree': 0.9893186377183354, 'reg_alpha': 5.566231813031233, 'reg_lambda': 0.00016948279472847044}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[44]	valid_0's rmse: 12.8231
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006521 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[57]	valid_0's rmse: 10.1861
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008581 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[103]	valid_0's rmse: 13.2796




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016994 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:51:09,646] Trial 22 finished with value: 12.196236023669798 and parameters: {'num_leaves': 36, 'max_depth': 14, 'learning_rate': 0.058625825481134264, 'n_estimators': 629, 'min_child_samples': 9, 'subsample': 0.6377432801051347, 'colsample_bytree': 0.9685932766396611, 'reg_alpha': 2.1281658063510593, 'reg_lambda': 0.00014677725484154916}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[68]	valid_0's rmse: 13.123
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006119 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[83]	valid_0's rmse: 13.4755
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007489 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[147]	valid_0's rmse: 13.7759




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009770 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[112]	valid_0's rmse: 13.3427


[I 2025-06-10 09:51:13,415] Trial 23 finished with value: 13.531361834058371 and parameters: {'num_leaves': 53, 'max_depth': 13, 'learning_rate': 0.03952352965544902, 'n_estimators': 898, 'min_child_samples': 16, 'subsample': 0.6064249356326412, 'colsample_bytree': 0.7288591540530915, 'reg_alpha': 0.2214649379168449, 'reg_lambda': 3.1247934587538978e-06}. Best is trial 13 with value: 12.06606458086028.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009770 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[48]	valid_0's rmse: 10.2146
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008067 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[83]	valid_0's rmse: 13.4126




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009998 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:51:15,144] Trial 24 finished with value: 12.225301789079879 and parameters: {'num_leaves': 36, 'max_depth': 14, 'learning_rate': 0.07305289727763564, 'n_estimators': 760, 'min_child_samples': 13, 'subsample': 0.6599650247979233, 'colsample_bytree': 0.9397434052404642, 'reg_alpha': 9.378233859353548, 'reg_lambda': 1.9035027378843292e-07}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[57]	valid_0's rmse: 13.0487
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006427 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[118]	valid_0's rmse: 10.9821
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007779 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[224]	valid_0's rmse: 13.1366
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008792 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:51:17,376] Trial 25 finished with value: 12.328553133419028 and parameters: {'num_leaves': 20, 'max_depth': 12, 'learning_rate': 0.029084708240457745, 'n_estimators': 624, 'min_child_samples': 8, 'subsample': 0.7170854417107223, 'colsample_bytree': 0.8609100518820183, 'reg_alpha': 0.012886569402283592, 'reg_lambda': 1.9017544406439723e-05}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[146]	valid_0's rmse: 12.8669
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006901 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[496]	valid_0's rmse: 12.5502




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008051 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[247]	valid_0's rmse: 14.3215
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010741 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153




[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[371]	valid_0's rmse: 12.6507


[I 2025-06-10 09:51:22,666] Trial 26 finished with value: 13.174119456389226 and parameters: {'num_leaves': 37, 'max_depth': 11, 'learning_rate': 0.018161848338083406, 'n_estimators': 932, 'min_child_samples': 25, 'subsample': 0.6369238373390828, 'colsample_bytree': 0.9151576395226693, 'reg_alpha': 1.742260823492027, 'reg_lambda': 0.0007530559339978449}. Best is trial 13 with value: 12.06606458086028.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006600 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[35]	valid_0's rmse: 13.2988




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008626 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[61]	valid_0's rmse: 14.234




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008884 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:51:25,262] Trial 27 finished with value: 13.71516338040442 and parameters: {'num_leaves': 64, 'max_depth': 14, 'learning_rate': 0.09030007657078955, 'n_estimators': 440, 'min_child_samples': 17, 'subsample': 0.6235505677521745, 'colsample_bytree': 0.9622674461188664, 'reg_alpha': 0.11765851304490046, 'reg_lambda': 0.03726885990644919}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[83]	valid_0's rmse: 13.6127
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007138 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[328]	valid_0's rmse: 10.9379
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007396 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[519]	valid_0's rmse: 14.0861




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010519 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:51:34,159] Trial 28 finished with value: 12.620766172645522 and parameters: {'num_leaves': 51, 'max_depth': 15, 'learning_rate': 0.011087043088541524, 'n_estimators': 621, 'min_child_samples': 5, 'subsample': 0.6696024862498939, 'colsample_bytree': 0.6633451209067411, 'reg_alpha': 1.2742078466732183, 'reg_lambda': 8.821081034586898e-05}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[366]	valid_0's rmse: 12.8383
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008643 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[541]	valid_0's rmse: 12.851
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008582 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329




[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[541]	valid_0's rmse: 15.1439




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011015 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:51:58,352] Trial 29 finished with value: 13.970327155172528 and parameters: {'num_leaves': 92, 'max_depth': 11, 'learning_rate': 0.005859040119136409, 'n_estimators': 541, 'min_child_samples': 19, 'subsample': 0.9577923608496581, 'colsample_bytree': 0.8832694170791976, 'reg_alpha': 0.0006126154188953804, 'reg_lambda': 5.683100174248293e-08}. Best is trial 13 with value: 12.06606458086028.


Did not meet early stopping. Best iteration is:
[541]	valid_0's rmse: 13.9161
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006222 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[744]	valid_0's rmse: 17.9657
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009583 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152




[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[744]	valid_0's rmse: 20.1812




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.013300 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:10,718] Trial 30 finished with value: 18.40171609826917 and parameters: {'num_leaves': 32, 'max_depth': 13, 'learning_rate': 0.002804236025882847, 'n_estimators': 744, 'min_child_samples': 23, 'subsample': 0.7213297474923135, 'colsample_bytree': 0.9991192106579332, 'reg_alpha': 5.0793921698778105e-05, 'reg_lambda': 8.055796345574201e-07}. Best is trial 13 with value: 12.06606458086028.


Did not meet early stopping. Best iteration is:
[744]	valid_0's rmse: 17.0582
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008438 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[55]	valid_0's rmse: 10.141
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009071 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[72]	valid_0's rmse: 13.4737
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010849 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746




Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:12,996] Trial 31 finished with value: 12.174331706754778 and parameters: {'num_leaves': 41, 'max_depth': 14, 'learning_rate': 0.06398539540678345, 'n_estimators': 617, 'min_child_samples': 9, 'subsample': 0.628636892806052, 'colsample_bytree': 0.9672756684813689, 'reg_alpha': 2.4706793823608852, 'reg_lambda': 0.00012223804659143478}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[62]	valid_0's rmse: 12.9083
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007685 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[55]	valid_0's rmse: 10.1632
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010121 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152




[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[86]	valid_0's rmse: 13.3726




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011769 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:15,280] Trial 32 finished with value: 12.093511120091344 and parameters: {'num_leaves': 42, 'max_depth': 14, 'learning_rate': 0.06738401103405947, 'n_estimators': 576, 'min_child_samples': 10, 'subsample': 0.69603929211189, 'colsample_bytree': 0.9634516977295998, 'reg_alpha': 2.8431301939068976, 'reg_lambda': 3.3006663617520245e-05}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[57]	valid_0's rmse: 12.7448
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008911 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[105]	valid_0's rmse: 10.0402
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008402 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[130]	valid_0's rmse: 13.5962
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008727 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153




[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:19,820] Trial 33 finished with value: 12.160499759258004 and parameters: {'num_leaves': 56, 'max_depth': 15, 'learning_rate': 0.03428540227510848, 'n_estimators': 505, 'min_child_samples': 13, 'subsample': 0.6934827346087449, 'colsample_bytree': 0.9287227586459147, 'reg_alpha': 0.507361007602089, 'reg_lambda': 1.1503834803747918e-05}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[140]	valid_0's rmse: 12.845
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006296 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[157]	valid_0's rmse: 10.25
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007925 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[255]	valid_0's rmse: 13.4461
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:22,442] Trial 34 finished with value: 12.161996967704013 and parameters: {'num_leaves': 20, 'max_depth': 11, 'learning_rate': 0.023115214606423327, 'n_estimators': 413, 'min_child_samples': 10, 'subsample': 0.6010296820587108, 'colsample_bytree': 0.9551461128824762, 'reg_alpha': 0.0652056980672421, 'reg_lambda': 2.4069915599109708e-06}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[165]	valid_0's rmse: 12.7899
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006951 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[91]	valid_0's rmse: 10.9622
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009720 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Did not meet early stopping. Best iteration is:
[185]	valid_0's rmse: 13.1265
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014172 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864




[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:25,094] Trial 35 finished with value: 12.189882699263437 and parameters: {'num_leaves': 30, 'max_depth': 13, 'learning_rate': 0.040114317489724174, 'n_estimators': 213, 'min_child_samples': 7, 'subsample': 0.6533105289953658, 'colsample_bytree': 0.9773986903670845, 'reg_alpha': 1.2029581632095283e-08, 'reg_lambda': 0.0013499194866645767}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[108]	valid_0's rmse: 12.481
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[32]	valid_0's rmse: 10.5009
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007456 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[52]	valid_0's rmse: 13.5327




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009424 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:26,901] Trial 36 finished with value: 12.22232799790246 and parameters: {'num_leaves': 48, 'max_depth': 14, 'learning_rate': 0.0996809089040554, 'n_estimators': 579, 'min_child_samples': 14, 'subsample': 0.6964220597740269, 'colsample_bytree': 0.7531559634210971, 'reg_alpha': 3.3341709902031873, 'reg_lambda': 3.951847668407529e-05}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[50]	valid_0's rmse: 12.6334
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006777 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[100]	valid_0's rmse: 13.1805




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008611 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[98]	valid_0's rmse: 17.561




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010598 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:30,242] Trial 37 finished with value: 14.376288820545483 and parameters: {'num_leaves': 72, 'max_depth': 12, 'learning_rate': 0.06490195175211931, 'n_estimators': 368, 'min_child_samples': 33, 'subsample': 0.7374999281893125, 'colsample_bytree': 0.9283362923487495, 'reg_alpha': 0.0096509719999721, 'reg_lambda': 6.614722747828436e-08}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[66]	valid_0's rmse: 12.3874
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006748 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[68]	valid_0's rmse: 12.8614
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007711 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329




[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[126]	valid_0's rmse: 14.1092




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010443 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:32,292] Trial 38 finished with value: 13.4290324933821 and parameters: {'num_leaves': 43, 'max_depth': 10, 'learning_rate': 0.04952503105014574, 'n_estimators': 834, 'min_child_samples': 18, 'subsample': 0.8041978524799412, 'colsample_bytree': 0.8782457363324073, 'reg_alpha': 0.2015560527549033, 'reg_lambda': 1.5237050407999901}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[83]	valid_0's rmse: 13.3165
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006251 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[184]	valid_0's rmse: 14.8117




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008211 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[184]	valid_0's rmse: 19.3493




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010107 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:33,325] Trial 39 finished with value: 16.915320504215302 and parameters: {'num_leaves': 27, 'max_depth': 3, 'learning_rate': 0.011522294680432692, 'n_estimators': 184, 'min_child_samples': 11, 'subsample': 0.6766380433391026, 'colsample_bytree': 0.80939782967593, 'reg_alpha': 0.7109450089919888, 'reg_lambda': 8.421463801865896e-07}. Best is trial 13 with value: 12.06606458086028.


Did not meet early stopping. Best iteration is:
[184]	valid_0's rmse: 16.5849
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006699 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[511]	valid_0's rmse: 46.7553
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007713 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Did not meet early stopping. Best iteration is:
[511]	valid_0's rmse: 46.8444




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011057 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:43,827] Trial 40 finished with value: 45.58860869212887 and parameters: {'num_leaves': 58, 'max_depth': 9, 'learning_rate': 0.001544783394514524, 'n_estimators': 511, 'min_child_samples': 14, 'subsample': 0.6238692987252821, 'colsample_bytree': 0.6006010719624675, 'reg_alpha': 1.3108926415845686e-05, 'reg_lambda': 0.00021679263526985465}. Best is trial 13 with value: 12.06606458086028.


Did not meet early stopping. Best iteration is:
[511]	valid_0's rmse: 43.1662
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008261 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[105]	valid_0's rmse: 10.0731
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007723 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[159]	valid_0's rmse: 13.5248




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009698 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:47,754] Trial 41 finished with value: 12.127598328076735 and parameters: {'num_leaves': 49, 'max_depth': 15, 'learning_rate': 0.034580790449462065, 'n_estimators': 510, 'min_child_samples': 12, 'subsample': 0.6857439263421222, 'colsample_bytree': 0.9471543474294127, 'reg_alpha': 0.4657345133966574, 'reg_lambda': 2.8768628124026238e-05}. Best is trial 13 with value: 12.06606458086028.


Early stopping, best iteration is:
[111]	valid_0's rmse: 12.7849
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006759 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[47]	valid_0's rmse: 10.1618
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008046 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[64]	valid_0's rmse: 13.5517
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010348 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153




[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:49,694] Trial 42 finished with value: 11.944390252362345 and parameters: {'num_leaves': 43, 'max_depth': 15, 'learning_rate': 0.07470268920691919, 'n_estimators': 665, 'min_child_samples': 6, 'subsample': 0.6529293855578678, 'colsample_bytree': 0.9559073683911707, 'reg_alpha': 4.222773115116315, 'reg_lambda': 2.850391716968007e-05}. Best is trial 42 with value: 11.944390252362345.


Early stopping, best iteration is:
[52]	valid_0's rmse: 12.1197
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007018 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[66]	valid_0's rmse: 10.8236
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009029 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[141]	valid_0's rmse: 13.132




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010828 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:52:52,951] Trial 43 finished with value: 12.145442457916355 and parameters: {'num_leaves': 49, 'max_depth': 15, 'learning_rate': 0.052090354925292896, 'n_estimators': 562, 'min_child_samples': 7, 'subsample': 0.7421666453901337, 'colsample_bytree': 0.9467154772034038, 'reg_alpha': 0.8642261149021221, 'reg_lambda': 3.61472237674714e-05}. Best is trial 42 with value: 11.944390252362345.


Early stopping, best iteration is:
[84]	valid_0's rmse: 12.4808
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006071 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[155]	valid_0's rmse: 10.0642
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007927 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[188]	valid_0's rmse: 13.662
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009425 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153




[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:53:00,405] Trial 44 finished with value: 11.943959259349013 and parameters: {'num_leaves': 69, 'max_depth': 15, 'learning_rate': 0.024669661727690494, 'n_estimators': 481, 'min_child_samples': 6, 'subsample': 0.6967056448183867, 'colsample_bytree': 0.9118594763391731, 'reg_alpha': 0.0032362427678406624, 'reg_lambda': 4.967577465599254e-06}. Best is trial 44 with value: 11.943959259349013.


Early stopping, best iteration is:
[161]	valid_0's rmse: 12.1057
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007011 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[150]	valid_0's rmse: 10.2402
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007415 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[217]	valid_0's rmse: 14.4054
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009245 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153




[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:53:08,402] Trial 45 finished with value: 12.501931633734584 and parameters: {'num_leaves': 71, 'max_depth': 14, 'learning_rate': 0.02490377103315264, 'n_estimators': 396, 'min_child_samples': 5, 'subsample': 0.7846930308094014, 'colsample_bytree': 0.9086027623855706, 'reg_alpha': 0.0015442605927420685, 'reg_lambda': 2.713219029105807e-06}. Best is trial 44 with value: 11.943959259349013.


Early stopping, best iteration is:
[160]	valid_0's rmse: 12.8601
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006104 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[50]	valid_0's rmse: 11.0596
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[83]	valid_0's rmse: 13.2562




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009680 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:53:12,538] Trial 46 finished with value: 12.21820099012806 and parameters: {'num_leaves': 80, 'max_depth': 15, 'learning_rate': 0.06947513620117658, 'n_estimators': 752, 'min_child_samples': 7, 'subsample': 0.8316468857476024, 'colsample_bytree': 0.7835430413311597, 'reg_alpha': 0.005301375548937097, 'reg_lambda': 6.427603300282644e-06}. Best is trial 44 with value: 11.943959259349013.


Early stopping, best iteration is:
[65]	valid_0's rmse: 12.3389
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006262 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[461]	valid_0's rmse: 29.9045
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007490 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152




[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[461]	valid_0's rmse: 32.7768




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010578 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:53:24,337] Trial 47 finished with value: 29.873766832077223 and parameters: {'num_leaves': 62, 'max_depth': 14, 'learning_rate': 0.0031932209449001265, 'n_estimators': 461, 'min_child_samples': 50, 'subsample': 0.7037997261271036, 'colsample_bytree': 0.8245673668383031, 'reg_alpha': 6.293084029783906e-07, 'reg_lambda': 5.338377251685957e-07}. Best is trial 44 with value: 11.943959259349013.


Did not meet early stopping. Best iteration is:
[461]	valid_0's rmse: 26.94
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006772 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[165]	valid_0's rmse: 10.8956
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007606 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[206]	valid_0's rmse: 13.3696




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009775 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:53:28,111] Trial 48 finished with value: 12.308044727192952 and parameters: {'num_leaves': 67, 'max_depth': 6, 'learning_rate': 0.022377818348649936, 'n_estimators': 580, 'min_child_samples': 7, 'subsample': 0.6590259258499066, 'colsample_bytree': 0.8942429406827628, 'reg_alpha': 0.021616133718160683, 'reg_lambda': 1.5777273304984995e-05}. Best is trial 44 with value: 11.943959259349013.


Early stopping, best iteration is:
[187]	valid_0's rmse: 12.6589
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006951 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63254
[LightGBM] [Info] Number of data points in the train set: 8357, number of used features: 150
[LightGBM] [Info] Start training from score 51.051001
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[610]	valid_0's rmse: 13.1392




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007741 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63329
[LightGBM] [Info] Number of data points in the train set: 16713, number of used features: 152
[LightGBM] [Info] Start training from score 45.167219
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[649]	valid_0's rmse: 16.971




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009320 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63864
[LightGBM] [Info] Number of data points in the train set: 25069, number of used features: 153
[LightGBM] [Info] Start training from score 40.604746
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 09:53:49,484] Trial 49 finished with value: 14.111929943405372 and parameters: {'num_leaves': 116, 'max_depth': 15, 'learning_rate': 0.014294906544823247, 'n_estimators': 651, 'min_child_samples': 40, 'subsample': 0.6166250564863196, 'colsample_bytree': 0.976478607628599, 'reg_alpha': 0.0002708078697149976, 'reg_lambda': 5.306969468808936e-05}. Best is trial 44 with value: 11.943959259349013.


Early stopping, best iteration is:
[318]	valid_0's rmse: 12.2256
Mejores parámetros encontrados: {'num_leaves': 69, 'max_depth': 15, 'learning_rate': 0.024669661727690494, 'n_estimators': 481, 'min_child_samples': 6, 'subsample': 0.6967056448183867, 'colsample_bytree': 0.9118594763391731, 'reg_alpha': 0.0032362427678406624, 'reg_lambda': 4.967577465599254e-06}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010257 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63899
[LightGBM] [Info] Number of data points in the train set: 33425, number of used features: 153
[LightGBM] [Info] Start training from score 37.177162
✅ Modelo LightGBM optimizado y entrenado con éxito.


In [60]:
X_kgl = X_kgl[feature_columns]
for col in ['cat1', 'cat2', 'cat3', 'brand']:
    X_kgl[col] = X_kgl[col].astype('category')

y_pred = best_model.predict(X_kgl)


In [61]:
productos_ok = pd.read_csv("https://storage.googleapis.com/open-courses/austral2025-af91/labo3v/product_id_apredecir201912.txt", sep="\t")
result = pd.DataFrame({"product_id": X_kgl["product_id"],  "tn": y_pred})
result = result[result["product_id"].isin(productos_ok["product_id"])]
result = result.groupby("product_id").agg({"tn":"sum"}).reset_index()
result

Unnamed: 0,product_id,tn
0,20001,-7.541191
1,20002,5.919637
2,20003,1.057103
3,20004,0.387169
4,20005,0.148572
...,...,...
775,21263,0.012097
776,21265,0.012097
777,21266,0.012097
778,21267,0.012097


In [73]:
result.to_csv("../../outputs/lgb_exp04_con_bayesiana_tweeede_v3.csv", index=False, sep=',')

In [74]:
pd.set_option('display.max_rows', None)
# Supongamos que usaste un DataFrame para entrenar:
feature_names = X.columns  # Si X_train es un DataFrame

# Obtener importancias
importances = best_model.feature_importances_

# Combinar en un DataFrame para visualizar mejor
importancia_df = pd.DataFrame({
    'Variable': feature_names,
    'Importancia': importances
}).sort_values(by='Importancia', ascending=False)

importancia_df


Unnamed: 0,Variable,Importancia
4,cat3,1681
5,brand,1202
0,product_id,899
7,stock_final,677
94,ytd_sum,636
6,sku_size,497
115,best_month_rank,435
82,expanding_mean,434
138,skew_6,407
83,cumulative_sum,374
