# Exp 04: lgb agrupando por periodo-product_id

Usando distribución tweedy y max_bins=500 y optimizacion bayesiana

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gc

##### Merge: periodos + clientes + productos

In [21]:
df = pd.read_csv("../../data/preprocessed/base.csv", sep=',')
df["periodo_dt"] = pd.to_datetime(df["periodo"].astype(str), format="%Y%m")


periodos = pd.date_range(start=df['periodo_dt'].min(), end=df['periodo_dt'].max(), freq="MS")
productos = df['product_id'].unique()


idx = pd.MultiIndex.from_product([productos, periodos], names=['product_id', 'periodo'])
completo = idx.to_frame(index=False)
completo["periodo"] = completo["periodo"].dt.strftime("%Y%m").astype(int)

del periodos, productos, 
gc.collect()
completo

Unnamed: 0,product_id,periodo
0,20524,201701
1,20524,201702
2,20524,201703
3,20524,201704
4,20524,201705
...,...,...
44383,20770,201908
44384,20770,201909
44385,20770,201910
44386,20770,201911


##### Cruzamos con productos

In [22]:
productos = pd.read_csv("../../data/raw/tb_productos.csv", sep='\t')
productos = productos.drop_duplicates(subset=['product_id'], keep='first')
completo = completo.merge(productos, how='left', on="product_id")
del productos
gc.collect()

0

##### Cruzamos con stock

In [23]:
stocks = pd.read_csv("../../data/raw/tb_stocks.csv", sep='\t')
stocks = stocks.groupby(by=["periodo", "product_id"]).agg({"stock_final": "sum"}).reset_index()
completo = completo.merge(stocks, how='left', on=['periodo', 'product_id'])
del stocks
gc.collect()
completo.head()

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,


##### Cruzamos con ventas

In [24]:
sellin = pd.read_csv("../../data/raw/sell-in.csv", sep='\t')
# Agrupar ventas por periodo, cliente y producto
dt = sellin.groupby(by=["periodo","product_id"]).agg({"tn":"sum"}).reset_index()
df_completo = completo.merge(dt, how='left', on=['periodo', 'product_id'])
df_completo['tn'] = df_completo['tn'].fillna(0)
del sellin, dt, completo
gc.collect()
df_completo

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final,tn
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,,6.48085
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,,3.99755
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,,7.14711
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,,6.82163
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,,9.25949
...,...,...,...,...,...,...,...,...,...
44383,20770,201908,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44384,20770,201909,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44385,20770,201910,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44386,20770,201911,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000


##### Target

In [25]:
# Asegurarte de tener 'periodo_dt' (datetime) en completo
df_completo['periodo_dt'] = pd.to_datetime(df_completo['periodo'], format='%Y%m')

# Crear DataFrame auxiliar con tn como target y fecha adelantada
ventas_futuras = df_completo[['periodo_dt', 'product_id', 'tn']].copy()
ventas_futuras['periodo_target_dt'] = ventas_futuras['periodo_dt'] - pd.DateOffset(months=2)
ventas_futuras = ventas_futuras.rename(columns={'tn': 'target'})

# Merge con completo usando periodo adelantado
df_completo = df_completo.merge(
    ventas_futuras[['periodo_target_dt', 'product_id', 'target']],
    how='left',
    left_on=['periodo_dt', 'product_id'],
    right_on=['periodo_target_dt', 'product_id']
)

# Eliminar columna auxiliar
df_completo = df_completo.drop(columns=['periodo_target_dt'])
del ventas_futuras
gc.collect()
print(f"✅ Target generado. Filas con target no nulo: {df_completo['target'].notna().sum()}")

✅ Target generado. Filas con target no nulo: 41922


In [9]:
df_completo

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final,tn,periodo_dt,target
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,,6.48085,2017-01-01,7.14711
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,,3.99755,2017-02-01,6.82163
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,,7.14711,2017-03-01,9.25949
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,,6.82163,2017-04-01,7.04113
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,,9.25949,2017-05-01,5.92819
...,...,...,...,...,...,...,...,...,...,...,...
10039,20770,201908,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-08-01,0.00000
10040,20770,201909,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-09-01,0.00000
10041,20770,201910,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-10-01,3.18500
10042,20770,201911,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-11-01,


##### Verifico las NaN en el target: Existen porque hay clientes que solo compraron 2 veces.

In [10]:
nan_count = df_completo['target'].isna().sum()
print(f"🔍 Total de NaN en target: {nan_count}")
del nan_count
gc.collect()

🔍 Total de NaN en target: 558


0

##### Generación de IDs

In [26]:
df_completo = df_completo.sort_values(['periodo', 'product_id'])
df_completo['id'] = df_completo.groupby(['product_id']).cumcount() + 1

##### Periodo 

In [27]:
df_completo["periodo_dt"] = pd.to_datetime(df_completo["periodo"].astype(str), format="%Y%m")

##### Eliminar productos que no nacieron

In [28]:
nacimiento_producto = df.groupby("product_id")["periodo_dt"].agg(["min"]).reset_index()
# Renombrar columna max a muerte_cliente_dt
nacimiento_producto = nacimiento_producto.rename(columns={'min': 'nacimiento_producto'})


# Unir con df_final para traer fecha de muerte del cliente
df_completo = df_completo.merge(nacimiento_producto, on='product_id', how='left')

# Filtrar filas donde periodo_dt > muerte_cliente_dt
df_completo = df_completo[df_completo['periodo_dt'] >= df_completo['nacimiento_producto']]

# Opcional: eliminar columna auxiliar
# df_final = df_final.drop(columns=['muerte_cliente_dt'])
del nacimiento_producto
gc.collect()
print(f"✅ Dataset filtrado con {len(df_completo):,} filas.")

✅ Dataset filtrado con 35,888 filas.


##### Correlograma

In [13]:
# Supongamos que tu DataFrame se llama df
cor_matrix = df_completo.corr(numeric_only=True)

# Tomamos solo la parte superior de la matriz (sin la diagonal)
upper = cor_matrix.where(np.triu(np.ones(cor_matrix.shape), k=1).astype(bool))

# Filtramos correlaciones fuertes (valor absoluto mayor a 0.7)
high_corr = upper.stack().reset_index()
high_corr.columns = ['Variable 1', 'Variable 2', 'Correlación']
high_corr_filtrada = high_corr[high_corr['Correlación'].abs() > 0.7]

print(high_corr_filtrada)

del high_corr_filtrada, cor_matrix, upper, high_corr
gc.collect()

   Variable 1 Variable 2  Correlación
10    periodo         id     0.955566
18         tn     target     0.927145


0

##### Elimino variables muy correlacionadas

In [14]:
df_completo.drop(columns=['periodo'], inplace=True)

##### Extracción de componentes temporales

In [29]:
df_completo['year'] = df_completo['periodo_dt'].dt.year
df_completo['month'] = df_completo['periodo_dt'].dt.month
# Variables dummy estacionales
df_completo['quarter'] = df_completo['periodo_dt'].dt.quarter
df_completo['semester'] = np.where(df_completo['month'] <= 6, 1, 2)
# Efectos de fin de año
df_completo['year_end'] = np.where(df_completo['month'].isin([11, 12]), 1, 0)
df_completo['year_start'] = np.where(df_completo['month'].isin([1, 2]), 1, 0)
# Indicadores estacionales
df_completo['season'] = df_completo['month'] % 12 // 3 + 1  # 1:Invierno, 2:Primavera, etc.
# Variables cíclicas (para capturar patrones estacionales)
df_completo['month_sin'] = np.sin(2 * np.pi * df_completo['month']/12)
df_completo['month_cos'] = np.cos(2 * np.pi * df_completo['month']/12)

#####  Lags, diferencias, medias móviles y otras yerbas

In [30]:
# Ordenamos por fecha para asegurar consistencia
df_completo = df_completo.sort_values('periodo_dt')

## 1. Lags (rezagos) de 1 a 12 meses
for i in range(1, 15):
    df_completo[f'lag_{i}'] = df_completo['tn'].shift(i)

## 2. Diferencias (deltas) - cambio respecto al mes anterior
for i in range(1, 13):
    df_completo[f'delta_{i}'] = df_completo['tn'].diff(i)

## 3. Diferencias porcentuales
for i in range(1, 13):
    df_completo[f'pct_change_{i}'] = df_completo['tn'].pct_change(i)

## 4. Medias móviles (promedios móviles)
windows = [2, 3, 6, 9, 12]  # También puedes incluir [2,4,5,7] según necesidad
for w in windows:
    df_completo[f'rolling_mean_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).mean()
    df_completo[f'rolling_std_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).std()
    df_completo[f'rolling_min_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).min()
    df_completo[f'rolling_max_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).max()
    df_completo[f'rolling_median_{w}'] = df_completo['tn'].rolling(window=w, min_periods=1).median()

## 5. Características de tendencia y estacionalidad
df_completo['expanding_mean'] = df_completo['tn'].expanding().mean()
df_completo['cumulative_sum'] = df_completo['tn'].cumsum()

## 6. Características de diferencia estacional (12 meses para datos mensuales)
df_completo['seasonal_diff_12'] = df_completo['tn'].diff(12)

## 7. Estadísticas anuales comparativas
df_completo['vs_prev_year'] = df_completo['tn'] / df_completo['lag_12'] - 1  # Crecimiento interanual

## 8. Componentes de descomposición (simplificada)
# Tendencia (usando media móvil de 12 meses)
df_completo['trend'] = df_completo['tn'].rolling(window=12, min_periods=1).mean()
# Estacionalidad (diferencia entre valor real y tendencia)
df_completo['seasonality'] = df_completo['tn'] - df_completo['trend']

## 9. Variables booleanas para eventos especiales
df_completo['new_high'] = (df_completo['tn'] == df_completo['rolling_max_12']).astype(int)
df_completo['new_low'] = (df_completo['tn'] == df_completo['rolling_min_12']).astype(int)

## 10. Características de aceleración/deceleración
df_completo['acceleration'] = df_completo['delta_1'].diff(1)  # Cambio en la tasa de cambio

##### Estadísticas de Ventana Dinámica

In [31]:
# Medias móviles exponenciales
df_completo['ewm_alpha_0.3'] = df_completo['tn'].ewm(alpha=0.3, adjust=False).mean()
df_completo['ewm_alpha_0.5'] = df_completo['tn'].ewm(alpha=0.5, adjust=False).mean()

# Medias móviles centradas
df_completo['rolling_center_mean_3'] = df_completo['tn'].rolling(window=3, center=True).mean()

# Sumas acumuladas por año
df_completo['ytd_sum'] = df_completo.groupby(df_completo['periodo_dt'].dt.year)['tn'].cumsum()

##### Características de Tendencia y Ciclo

In [32]:
# Modelado de tendencia polinomial
df_completo['time_index'] = range(len(df_completo))
df_completo['trend_linear'] = np.poly1d(np.polyfit(df_completo['time_index'], df_completo['tn'], 1))(df_completo['time_index'])
df_completo['trend_quadratic'] = np.poly1d(np.polyfit(df_completo['time_index'], df_completo['tn'], 2))(df_completo['time_index'])

# Residuales de tendencia
df_completo['residual_trend'] = df_completo['tn'] - df_completo['trend_linear']

##### Características de Cambio de Régimen

In [34]:
# Z-Score respecto a ventana móvil
df_completo['zscore_6'] = (df_completo['tn'] - df_completo['rolling_mean_6']) / df_completo['rolling_std_6']

# Detección de outliers
df_completo['is_outlier_3sigma'] = np.where(np.abs(df_completo['zscore_6']) > 3, 1, 0)

# Cambios bruscos (spikes)
df_completo['spike_up'] = np.where(df_completo['delta_1'] > df_completo['rolling_std_3'], 1, 0)
df_completo['spike_down'] = np.where(df_completo['delta_1'] < -df_completo['rolling_std_3'], 1, 0)

##### Características de Patrones Temporales

In [35]:
# Autocorrelaciones parciales
from statsmodels.tsa.stattools import pacf
pacf_values = pacf(df_completo['tn'].dropna(), nlags=12)
for i in range(1, 6):
    df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]

# Estacionalidad múltiple (si hay patrones semestrales)
df_completo['semester_mean'] = df_completo.groupby(['year', 'semester'])['tn'].transform('mean')

  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo[f'pacf_{i}'] = df_completo['tn'].shift(i) * pacf_values[i]
  df_completo['semester_mean'] = df_completo.groupby(['year', 'semester'])['tn'].transform('mean')


##### Características de Forecast Ingenieriles

In [36]:
# Método ingenuo (último valor)
df_completo['naive_forecast'] = df_completo['tn'].shift(1)

# Seasonal naive (valor del mismo período año anterior)
df_completo['seasonal_naive'] = df_completo['tn'].shift(12)

# Promedio móvil como forecast
df_completo['ma_forecast_3'] = df_completo['rolling_mean_3'].shift(1)

  df_completo['naive_forecast'] = df_completo['tn'].shift(1)
  df_completo['seasonal_naive'] = df_completo['tn'].shift(12)
  df_completo['ma_forecast_3'] = df_completo['rolling_mean_3'].shift(1)


##### Características de Decomposición Temporal

In [37]:
from statsmodels.tsa.seasonal import seasonal_decompose
# Descomposición clásica (additiva o multiplicativa)
result = seasonal_decompose(df_completo['tn'].dropna(), model='additive', period=12)
df_completo['trend_decomposed'] = result.trend
df_completo['seasonal_decomposed'] = result.seasonal
df_completo['residual_decomposed'] = result.resid

  df_completo['trend_decomposed'] = result.trend
  df_completo['seasonal_decomposed'] = result.seasonal
  df_completo['residual_decomposed'] = result.resid


##### Características de Ventanas Asimétricas

In [38]:
# Mejor mes histórico
df_completo['best_month_rank'] = df_completo.groupby('month')['tn'].rank(ascending=False)

# Comparación con mismo mes año anterior
df_completo['vs_last_year_same_month'] = df_completo['tn'] / df_completo['lag_12'] - 1

# Acumulado últimos 3 vs mismos 3 meses año anterior
df_completo['last3_vs_ly3'] = (df_completo['tn'] + df_completo['lag_1'] + df_completo['lag_2']) / (df_completo['lag_12'] + df_completo['lag_13'] + df_completo['lag_14']) - 1

  df_completo['best_month_rank'] = df_completo.groupby('month')['tn'].rank(ascending=False)
  df_completo['vs_last_year_same_month'] = df_completo['tn'] / df_completo['lag_12'] - 1
  df_completo['last3_vs_ly3'] = (df_completo['tn'] + df_completo['lag_1'] + df_completo['lag_2']) / (df_completo['lag_12'] + df_completo['lag_13'] + df_completo['lag_14']) - 1


##### Transformaciones Matemáticas

In [39]:
from scipy import stats
from scipy.special import boxcox1p

# Transformaciones clásicas
df_completo['log_tn'] = np.log1p(df_completo['tn'])
df_completo['sqrt_tn'] = np.sqrt(df_completo['tn'])

# Box-Cox (solo si tn > 0)
mask = df_completo['tn'] > 0
df_completo['boxcox_tn'] = np.nan
df_completo.loc[mask, 'boxcox_tn'], _ = stats.boxcox(df_completo.loc[mask, 'tn'])

# Diferenciación
df_completo['diff1_log'] = df_completo['log_tn'].diff(1)


  df_completo['log_tn'] = np.log1p(df_completo['tn'])
  df_completo['sqrt_tn'] = np.sqrt(df_completo['tn'])
  df_completo['boxcox_tn'] = np.nan
  df_completo['diff1_log'] = df_completo['log_tn'].diff(1)


##### Características de Interacción

In [40]:
# Interacción entre tendencia y estacionalidad
df_completo['trend_season_interaction'] = df_completo['trend'] * df_completo['seasonal_decomposed']

# Interacción lags con estacionalidad
for i in [1, 2, 3, 12]:
    df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']

  df_completo['trend_season_interaction'] = df_completo['trend'] * df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']


##### Nuevos lags cruzados y acumulados

In [41]:
# Lags de rolling_mean
for i in [1, 2, 3]:
    df_completo[f'lag_mean6_{i}'] = df_completo['rolling_mean_6'].shift(i)

  df_completo[f'lag_mean6_{i}'] = df_completo['rolling_mean_6'].shift(i)
  df_completo[f'lag_mean6_{i}'] = df_completo['rolling_mean_6'].shift(i)
  df_completo[f'lag_mean6_{i}'] = df_completo['rolling_mean_6'].shift(i)


##### Slope (pendiente) de la tendencia local

In [42]:
df_completo['trend_slope_6'] = df_completo['rolling_mean_6'].diff(1)

  df_completo['trend_slope_6'] = df_completo['rolling_mean_6'].diff(1)


##### Cambios acumulados

In [43]:
df_completo['cumulative_change_3'] = df_completo['delta_1'] + df_completo['delta_2'] + df_completo['delta_3']

  df_completo['cumulative_change_3'] = df_completo['delta_1'] + df_completo['delta_2'] + df_completo['delta_3']


##### Razones entre ventanas

In [44]:
df_completo['mean_ratio_3_6'] = df_completo['rolling_mean_3'] / (df_completo['rolling_mean_6'] + 1e-6)

  df_completo['mean_ratio_3_6'] = df_completo['rolling_mean_3'] / (df_completo['rolling_mean_6'] + 1e-6)


##### Coeficiente de variación

In [45]:
df_completo['cv_6'] = df_completo['rolling_std_6'] / (df_completo['rolling_mean_6'] + 1e-6)

  df_completo['cv_6'] = df_completo['rolling_std_6'] / (df_completo['rolling_mean_6'] + 1e-6)


##### Ratio entre último valor y media móvil

In [46]:
df_completo['tn_vs_mean_3'] = df_completo['tn'] / (df_completo['rolling_mean_3'] + 1e-6)

  df_completo['tn_vs_mean_3'] = df_completo['tn'] / (df_completo['rolling_mean_3'] + 1e-6)


##### Amplitud de la serie

In [47]:
df_completo['rolling_amplitude_6'] = df_completo['rolling_max_6'] - df_completo['rolling_min_6']

  df_completo['rolling_amplitude_6'] = df_completo['rolling_max_6'] - df_completo['rolling_min_6']


##### Count positivo/negativo en ventana

In [48]:
df_completo['positive_count_6'] = df_completo['tn'].rolling(6).apply(lambda x: (x > 0).sum())

  df_completo['positive_count_6'] = df_completo['tn'].rolling(6).apply(lambda x: (x > 0).sum())


##### Media de deltas

In [49]:
df_completo['delta_mean_3'] = df_completo['delta_1'] + df_completo['delta_2'] + df_completo['delta_3']

  df_completo['delta_mean_3'] = df_completo['delta_1'] + df_completo['delta_2'] + df_completo['delta_3']


##### Rolling skewness y kurtosis

In [50]:
df_completo['skew_6'] = df_completo['tn'].rolling(6).skew()
df_completo['kurt_6'] = df_completo['tn'].rolling(6).kurt()

  df_completo['skew_6'] = df_completo['tn'].rolling(6).skew()
  df_completo['kurt_6'] = df_completo['tn'].rolling(6).kurt()


##### Cambio en media móvil

In [51]:
df_completo['mean_change_6'] = df_completo['rolling_mean_6'].diff()

  df_completo['mean_change_6'] = df_completo['rolling_mean_6'].diff()


#####  Slope entre dos puntos (último vs. anterior)

In [52]:
df_completo['slope_last2'] = df_completo['tn'] - df_completo['lag_1']

  df_completo['slope_last2'] = df_completo['tn'] - df_completo['lag_1']


##### Momentum

In [53]:
df_completo['momentum_3'] = df_completo['tn'] - df_completo['lag_3']

  df_completo['momentum_3'] = df_completo['tn'] - df_completo['lag_3']


##### Rolling quantiles

In [54]:
df_completo['quantile_25_6'] = df_completo['tn'].rolling(6).quantile(0.25)
df_completo['quantile_75_6'] = df_completo['tn'].rolling(6).quantile(0.75)

  df_completo['quantile_25_6'] = df_completo['tn'].rolling(6).quantile(0.25)
  df_completo['quantile_75_6'] = df_completo['tn'].rolling(6).quantile(0.75)


##### Categoría como frecuencia histórica

In [55]:
df_completo['freq_cat1'] = df_completo.groupby('cat1')['tn'].transform('count')

  df_completo['freq_cat1'] = df_completo.groupby('cat1')['tn'].transform('count')


##### Cantidad de meses sin ventas en los últimos 6

In [56]:
df_completo['zeros_6'] = df_completo['tn'].rolling(6).apply(lambda x: (x == 0).sum())

  df_completo['zeros_6'] = df_completo['tn'].rolling(6).apply(lambda x: (x == 0).sum())


#####  Ratio de spikes

In [57]:
df_completo['spike_ratio_3'] = (df_completo['spike_up'] + df_completo['spike_down']) / 3

  df_completo['spike_ratio_3'] = (df_completo['spike_up'] + df_completo['spike_down']) / 3


##### Ratio entre tn y su z-score

In [58]:
df_completo['tn_zscore_ratio'] = df_completo['tn'] / (df_completo['zscore_6'] + 1e-6)

  df_completo['tn_zscore_ratio'] = df_completo['tn'] / (df_completo['zscore_6'] + 1e-6)


##### Cambio en residual

In [59]:
df_completo['residual_change'] = df_completo['residual_decomposed'].diff()

  df_completo['residual_change'] = df_completo['residual_decomposed'].diff()


##### Valor relativo respecto al rango local

In [60]:
df_completo['position_in_range_6'] = (df_completo['tn'] - df_completo['rolling_min_6']) / (
    df_completo['rolling_max_6'] - df_completo['rolling_min_6'] + 1e-6)

  df_completo['position_in_range_6'] = (df_completo['tn'] - df_completo['rolling_min_6']) / (


##### Completamos NaN del target con ceros

In [61]:
df_completo['target'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_completo['target'].fillna(0, inplace=True)


##### Dividimos el dataset

In [62]:
df_completo_copy = df_completo.copy()

In [72]:
df_completo_copy['cat1'].unique()

array(['HC', 'PC', 'FOODS', nan, 'REF'], dtype=object)

In [90]:
df_completo = df_completo_copy.copy()
df_completo = df_completo[df_completo['cat1'] == 'REF']

In [91]:
dt_kgl = df_completo[df_completo["periodo"].isin([201912])]
ts = df_completo.drop(df_completo[df_completo["periodo"].isin([201911,201912])].index,axis=0)

In [92]:
# Asegurate de que 'periodo' sea datetime si no lo es
# df_completo['periodo_dt'] = pd.to_datetime(df_completo['periodo'].astype(str), format='%Y%m')

# features
feature_columns = [col for col in ts.columns if col not in ['periodo_dt', 'tn_target', 'nacimiento_producto', 'target']]

# Definimos los límites
train_cutoff = '2019-09-01'
valid_cutoff = '2019-11-01'

# Split temporal
df_train = df_completo[df_completo['periodo_dt'] < train_cutoff]
df_valid = df_completo[(df_completo['periodo_dt'] >= train_cutoff) & (df_completo['periodo_dt'] < valid_cutoff)]

# Separás features y target
X_train = df_train.drop(columns=feature_columns)
y_train = df_train['target']

X_valid = df_valid.drop(columns=feature_columns)
y_valid = df_valid['target']


##### Productos a predecir

In [93]:
X_kgl = dt_kgl[feature_columns]
productos_a_predecir = pd.read_csv("../../data/raw/product_id_apredecir201912.csv")
# Filtrar filas
productos_filtrados = productos_a_predecir['product_id'].unique()
X_kgl = X_kgl[X_kgl['product_id'].isin(productos_filtrados)]
X_kgl['product_id'].nunique()

6

In [94]:
df_completo.info()

<class 'pandas.core.frame.DataFrame'>
Index: 456 entries, 1113 to 44114
Columns: 154 entries, product_id to position_in_range_6
dtypes: datetime64[ns](2), float64(132), int32(13), int64(3), object(4)
memory usage: 545.2+ KB


In [95]:
df_completo.shape

(456, 154)

##### Optimización de Hiperparámetros con Optuna

In [96]:
import lightgbm as lgb
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import optuna
import numpy as np

# Preparar datos
feature_columns = [col for col in ts.columns if col not in ['periodo_dt', 'tn_target', 'nacimiento_producto', 'target']]
X = ts[feature_columns]
y = ts['target']

for col in ['cat1', 'cat2', 'cat3', 'brand']:
    X[col] = X[col].astype('category')

# Eliminar NaNs en el target
if y.isnull().any():
    print("⚠️ Target tiene NaN, se eliminarán.")
    mask = ~y.isnull()
    X = X[mask]
    y = y[mask]

# Función objetivo para Optuna
def objective(trial):
    params = {
        'objective': 'regression',
        # 'tweedie_variance_power': trial.suggest_float('tweedie_variance_power', 1.1, 1.9),
        # 'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'random_state': 12345,
        'max_bin': 500,
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 10.0, log=True)
    }

    tscv = TimeSeriesSplit(n_splits=3)
    rmses = []

    for train_idx, valid_idx in tscv.split(X):
        X_train, X_valid = X.iloc[train_idx], X.iloc[valid_idx]
        y_train, y_valid = y.iloc[train_idx], y.iloc[valid_idx]

        model = lgb.LGBMRegressor(**params)
        model.fit(
            X_train, y_train,
            eval_set=[(X_valid, y_valid)],
            callbacks=[
                lgb.early_stopping(50),
                lgb.log_evaluation(0)
            ]
        )

        preds = model.predict(X_valid)
        rmse = mean_squared_error(y_valid, preds, squared=False)
        rmses.append(rmse)

    return np.mean(rmses)

# Crear estudio Optuna
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Mostrar mejores parámetros
print("Mejores parámetros encontrados:", study.best_params)

# Entrenar modelo final
best_params = study.best_params
best_model = lgb.LGBMRegressor(**best_params, objective='regression', max_bin=500, random_state=12345)
best_model.fit(X, y)

print("✅ Modelo LightGBM optimizado y entrenado con éxito.")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_in

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000359 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4285
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 140
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[380]	valid_0's rmse: 0.278302
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000909 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[380]	valid_0's rmse: 0.213447
[LightGBM] [Info] Auto-choosing col-wi

[I 2025-06-10 10:20:30,840] Trial 0 finished with value: 0.24155139163293007 and parameters: {'num_leaves': 56, 'max_depth': 11, 'learning_rate': 0.006888890084703839, 'n_estimators': 380, 'min_child_samples': 26, 'subsample': 0.6487603293428262, 'colsample_bytree': 0.6698610196929371, 'reg_alpha': 0.1991032374722444, 'reg_lambda': 2.282664769538421}. Best is trial 0 with value: 0.24155139163293007.
[I 2025-06-10 10:20:31,022] Trial 1 finished with value: 0.2191521990516073 and parameters: {'num_leaves': 104, 'max_depth': 9, 'learning_rate': 0.03749505982674794, 'n_estimators': 807, 'min_child_samples': 13, 'subsample': 0.7689384964260408, 'colsample_bytree': 0.7553137487500419, 'reg_alpha': 0.00019506119626515076, 'reg_lambda': 7.127352287111836e-07}. Best is trial 1 with value: 0.2191521990516073.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000410 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[144]	valid_0's rmse: 0.231368
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000600 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[55]	valid_0's rmse: 0.199524
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ove

[I 2025-06-10 10:20:31,176] Trial 2 finished with value: 0.2214908993343939 and parameters: {'num_leaves': 122, 'max_depth': 6, 'learning_rate': 0.04152308535342565, 'n_estimators': 624, 'min_child_samples': 24, 'subsample': 0.9809863591515247, 'colsample_bytree': 0.8380933895953855, 'reg_alpha': 0.00024486898749603874, 'reg_lambda': 0.08172478801240578}. Best is trial 1 with value: 0.2191521990516073.


Early stopping, best iteration is:
[275]	valid_0's rmse: 0.242932
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000566 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[109]	valid_0's rmse: 0.20707
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001065 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[62]	valid_0's rmse: 0.21447


[I 2025-06-10 10:20:31,270] Trial 3 finished with value: 0.22804440014154012 and parameters: {'num_leaves': 21, 'max_depth': 4, 'learning_rate': 0.0777838940832749, 'n_estimators': 990, 'min_child_samples': 50, 'subsample': 0.7387576668604137, 'colsample_bytree': 0.6415893677135948, 'reg_alpha': 0.00012483442378815687, 'reg_lambda': 1.672391641621867e-07}. Best is trial 1 with value: 0.2191521990516073.


Early stopping, best iteration is:
[70]	valid_0's rmse: 0.224021
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000384 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[290]	valid_0's rmse: 0.323264
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000597 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[290

[I 2025-06-10 10:20:31,511] Trial 4 finished with value: 0.2865279652761484 and parameters: {'num_leaves': 115, 'max_depth': 8, 'learning_rate': 0.0011066020984592483, 'n_estimators': 290, 'min_child_samples': 19, 'subsample': 0.7097889925768512, 'colsample_bytree': 0.768172912564845, 'reg_alpha': 0.0007483018778069443, 'reg_lambda': 0.6373572711991558}. Best is trial 1 with value: 0.2191521990516073.


Did not meet early stopping. Best iteration is:
[290]	valid_0's rmse: 0.277669
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000409 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[376]	valid_0's rmse: 0.217753
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000341 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[131]	valid_0's 

[I 2025-06-10 10:20:31,770] Trial 5 finished with value: 0.2134063110125206 and parameters: {'num_leaves': 85, 'max_depth': 9, 'learning_rate': 0.014472294910411616, 'n_estimators': 467, 'min_child_samples': 11, 'subsample': 0.7361133788372275, 'colsample_bytree': 0.6285141946378682, 'reg_alpha': 2.4266626189586235e-05, 'reg_lambda': 2.333729406748122e-06}. Best is trial 5 with value: 0.2134063110125206.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001069 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[166]	valid_0's rmse: 0.231393
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000379 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[246]	valid_0's rmse: 0.207306
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the o

[I 2025-06-10 10:20:32,004] Trial 6 finished with value: 0.21460735102324371 and parameters: {'num_leaves': 31, 'max_depth': 10, 'learning_rate': 0.015125342628763623, 'n_estimators': 525, 'min_child_samples': 8, 'subsample': 0.6198556112192405, 'colsample_bytree': 0.9302389785821511, 'reg_alpha': 2.3534988613376045e-08, 'reg_lambda': 2.8136043689748846}. Best is trial 5 with value: 0.2134063110125206.


Early stopping, best iteration is:
[135]	valid_0's rmse: 0.201932
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000819 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[140]	valid_0's rmse: 0.234584
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000309 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4285
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 140
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds




Did not meet early stopping. Best iteration is:
[901]	valid_0's rmse: 0.249735
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000809 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[560]	valid_0's rmse: 0.207687


[I 2025-06-10 10:20:32,382] Trial 7 finished with value: 0.22394212579007874 and parameters: {'num_leaves': 117, 'max_depth': 5, 'learning_rate': 0.00783830126876357, 'n_estimators': 901, 'min_child_samples': 24, 'subsample': 0.7803853178487351, 'colsample_bytree': 0.7183483769809642, 'reg_alpha': 0.019273253247194366, 'reg_lambda': 0.7863832377873806}. Best is trial 5 with value: 0.2134063110125206.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000802 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[318]	valid_0's rmse: 0.214404
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000209 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4268
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 136
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's rmse: 0.351741
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ove

[I 2025-06-10 10:20:32,672] Trial 8 finished with value: 0.27375658642917106 and parameters: {'num_leaves': 117, 'max_depth': 11, 'learning_rate': 0.0092876777691397, 'n_estimators': 893, 'min_child_samples': 38, 'subsample': 0.751704702301335, 'colsample_bytree': 0.8474484455460957, 'reg_alpha': 7.970225885945869, 'reg_lambda': 0.006611006559633859}. Best is trial 5 with value: 0.2134063110125206.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000890 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11151
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 143
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[893]	valid_0's rmse: 0.23994
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000478 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4270
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 137
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 10:20:32,835] Trial 9 finished with value: 0.23044064014654667 and parameters: {'num_leaves': 110, 'max_depth': 5, 'learning_rate': 0.046428474101669545, 'n_estimators': 654, 'min_child_samples': 32, 'subsample': 0.6535510879719729, 'colsample_bytree': 0.8155808318102237, 'reg_alpha': 1.219554674192252, 'reg_lambda': 0.00021325384862260865}. Best is trial 5 with value: 0.2134063110125206.


Early stopping, best iteration is:
[420]	valid_0's rmse: 0.253529
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000572 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7666
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 142
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[81]	valid_0's rmse: 0.213151
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000727 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[178]	valid_0's rmse: 0.22464



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000549 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7672
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 145
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[132]	valid_0's rmse: 0.233519
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000753 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 10:20:33,166] Trial 10 finished with value: 0.26927363007111166 and parameters: {'num_leaves': 147, 'max_depth': 15, 'learning_rate': 0.0025361745505938955, 'n_estimators': 132, 'min_child_samples': 5, 'subsample': 0.8808113115795495, 'colsample_bytree': 0.6020794864246366, 'reg_alpha': 4.542059220236152e-07, 'reg_lambda': 2.6579681068009e-05}. Best is trial 5 with value: 0.2134063110125206.


Did not meet early stopping. Best iteration is:
[132]	valid_0's rmse: 0.275739
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000243 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4289
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 142
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[110]	valid_0's rmse: 0.219956
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000453 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7672
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 145
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[77]	valid_0's r





[I 2025-06-10 10:20:33,649] Trial 11 finished with value: 0.21373724626394133 and parameters: {'num_leaves': 64, 'max_depth': 12, 'learning_rate': 0.02184629252847343, 'n_estimators': 450, 'min_child_samples': 6, 'subsample': 0.604620468774415, 'colsample_bytree': 0.9680593135300966, 'reg_alpha': 5.316172045300878e-08, 'reg_lambda': 5.800040178887559e-06}. Best is trial 5 with value: 0.2134063110125206.


Early stopping, best iteration is:
[341]	valid_0's rmse: 0.22859
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000393 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[239]	valid_0's rmse: 0.231518




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000737 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[100]	valid_0's rmse: 0.201975
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001207 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 10:20:33,972] Trial 12 finished with value: 0.2213288768114002 and parameters: {'num_leaves': 71, 'max_depth': 14, 'learning_rate': 0.019910546187220844, 'n_estimators': 447, 'min_child_samples': 14, 'subsample': 0.8632436942080861, 'colsample_bytree': 0.9890114313257798, 'reg_alpha': 1.5496693497374043e-06, 'reg_lambda': 1.3328871348381788e-08}. Best is trial 5 with value: 0.2134063110125206.


Early stopping, best iteration is:
[111]	valid_0's rmse: 0.230494
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000278 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[257]	valid_0's rmse: 0.255424
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000556 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[25

[I 2025-06-10 10:20:34,270] Trial 13 finished with value: 0.23662309049907504 and parameters: {'num_leaves': 84, 'max_depth': 13, 'learning_rate': 0.00418739255000869, 'n_estimators': 257, 'min_child_samples': 12, 'subsample': 0.6904674498698393, 'colsample_bytree': 0.898031740456069, 'reg_alpha': 1.049465793655477e-08, 'reg_lambda': 8.708907544330478e-06}. Best is trial 5 with value: 0.2134063110125206.


Did not meet early stopping. Best iteration is:
[257]	valid_0's rmse: 0.246612
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000301 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4289
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 142
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[121]	valid_0's rmse: 0.218106
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000436 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7672
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 145
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[31]	valid_0's r

[I 2025-06-10 10:20:34,544] Trial 14 finished with value: 0.2248818219335941 and parameters: {'num_leaves': 48, 'max_depth': 7, 'learning_rate': 0.022290798688510506, 'n_estimators': 702, 'min_child_samples': 5, 'subsample': 0.8411374531232056, 'colsample_bytree': 0.9971024839189603, 'reg_alpha': 2.3647099395669244e-06, 'reg_lambda': 1.4551202496657805e-06}. Best is trial 5 with value: 0.2134063110125206.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000896 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[290]	valid_0's rmse: 0.226765
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000367 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 10:20:34,708] Trial 15 finished with value: 0.2159949719375985 and parameters: {'num_leaves': 86, 'max_depth': 12, 'learning_rate': 0.0935561303136741, 'n_estimators': 481, 'min_child_samples': 18, 'subsample': 0.6098168731308828, 'colsample_bytree': 0.9012722557311597, 'reg_alpha': 1.3188136734257207e-05, 'reg_lambda': 0.0017003853843252311}. Best is trial 5 with value: 0.2134063110125206.


Early stopping, best iteration is:
[114]	valid_0's rmse: 0.238534
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000434 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[25]	valid_0's rmse: 0.188696
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[51]	valid_0's rmse: 0.220754

[I 2025-06-10 10:20:35,046] Trial 16 finished with value: 0.21421642283930775 and parameters: {'num_leaves': 63, 'max_depth': 9, 'learning_rate': 0.014396755731643207, 'n_estimators': 347, 'min_child_samples': 10, 'subsample': 0.9365794611255672, 'colsample_bytree': 0.7020245144273409, 'reg_alpha': 1.5295878066037315e-07, 'reg_lambda': 7.704276940229598e-05}. Best is trial 5 with value: 0.2134063110125206.
[I 2025-06-10 10:20:35,208] Trial 17 finished with value: 0.27344281387851904 and parameters: {'num_leaves': 92, 'max_depth': 13, 'learning_rate': 0.0038755753638356704, 'n_estimators': 181, 'min_child_samples': 33, 'subsample': 0.6840915641511536, 'colsample_bytree': 0.9346195138152641, 'reg_alpha': 0.006705581523212757, 'reg_lambda': 1.7940567709759822e-08}. Best is trial 5 with value: 0.2134063110125206.


Early stopping, best iteration is:
[176]	valid_0's rmse: 0.227148
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000304 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4270
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 137
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[181]	valid_0's rmse: 0.312502
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000707 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7666
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 142
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[18

[I 2025-06-10 10:20:35,434] Trial 18 finished with value: 0.2270091490560763 and parameters: {'num_leaves': 42, 'max_depth': 3, 'learning_rate': 0.0292591078232525, 'n_estimators': 592, 'min_child_samples': 44, 'subsample': 0.8117825808793584, 'colsample_bytree': 0.7794264729109801, 'reg_alpha': 1.2848050110371776e-05, 'reg_lambda': 4.4702227931791695e-06}. Best is trial 5 with value: 0.2134063110125206.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003021 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4244
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 132
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[577]	valid_0's rmse: 0.260358
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000585 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7654
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 141
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[178]	valid_0's rmse: 0.199701
[LightGBM] [Info] Auto-choosing col-wise multi-thre



Did not meet early stopping. Best iteration is:
[410]	valid_0's rmse: 0.238935
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000573 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[199]	valid_0's rmse: 0.218461
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001040 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 10:20:35,736] Trial 19 finished with value: 0.22683881119828317 and parameters: {'num_leaves': 69, 'max_depth': 10, 'learning_rate': 0.013195117612394986, 'n_estimators': 410, 'min_child_samples': 19, 'subsample': 0.7215774613355881, 'colsample_bytree': 0.6117673866910652, 'reg_alpha': 1.0232115419689442e-05, 'reg_lambda': 8.571711648237012e-08}. Best is trial 5 with value: 0.2134063110125206.


Early stopping, best iteration is:
[341]	valid_0's rmse: 0.223121
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000305 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[756]	valid_0's rmse: 0.230796




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000817 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[675]	valid_0's rmse: 0.19029
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000861 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[752]	valid_0's rmse: 0.224712


[I 2025-06-10 10:20:36,313] Trial 20 finished with value: 0.21526620289972986 and parameters: {'num_leaves': 137, 'max_depth': 7, 'learning_rate': 0.005250525651026206, 'n_estimators': 758, 'min_child_samples': 16, 'subsample': 0.6538560736585852, 'colsample_bytree': 0.7279350365655725, 'reg_alpha': 9.308417049244157e-08, 'reg_lambda': 0.00157481003173407}. Best is trial 5 with value: 0.2134063110125206.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000528 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[212]	valid_0's rmse: 0.204164
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000585 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[145]	valid_0's rmse: 0.193826
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ov

[I 2025-06-10 10:20:36,655] Trial 21 finished with value: 0.2113646591524285 and parameters: {'num_leaves': 66, 'max_depth': 8, 'learning_rate': 0.013689586949103822, 'n_estimators': 332, 'min_child_samples': 9, 'subsample': 0.9300241271051011, 'colsample_bytree': 0.684893897972831, 'reg_alpha': 1.5066007609760097e-07, 'reg_lambda': 5.105387638703255e-05}. Best is trial 21 with value: 0.2113646591524285.


Did not meet early stopping. Best iteration is:
[327]	valid_0's rmse: 0.236104
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000230 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[121]	valid_0's rmse: 0.205664
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000574 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 10:20:36,933] Trial 22 finished with value: 0.20931533260731516 and parameters: {'num_leaves': 77, 'max_depth': 8, 'learning_rate': 0.022179472859175414, 'n_estimators': 523, 'min_child_samples': 8, 'subsample': 0.9095999669112728, 'colsample_bytree': 0.6624250906516778, 'reg_alpha': 1.546216773055904e-07, 'reg_lambda': 2.600507587529526e-05}. Best is trial 22 with value: 0.20931533260731516.


Early stopping, best iteration is:
[76]	valid_0's rmse: 0.188234
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000953 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[254]	valid_0's rmse: 0.234049
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000382 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds




Early stopping, best iteration is:
[114]	valid_0's rmse: 0.202091
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000586 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[147]	valid_0's rmse: 0.187512
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000779 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[71]	valid_0's rmse: 0.23001

[I 2025-06-10 10:20:37,176] Trial 23 finished with value: 0.2065396143875685 and parameters: {'num_leaves': 81, 'max_depth': 8, 'learning_rate': 0.057239272015954266, 'n_estimators': 277, 'min_child_samples': 9, 'subsample': 0.9156642050645557, 'colsample_bytree': 0.6559655142702285, 'reg_alpha': 6.595740574767763e-07, 'reg_lambda': 7.357014886092592e-05}. Best is trial 23 with value: 0.2065396143875685.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000314 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[63]	valid_0's rmse: 0.204668
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000601 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[174]	valid_0's rmse: 0.188532
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ove

[I 2025-06-10 10:20:37,413] Trial 24 finished with value: 0.20706685450867277 and parameters: {'num_leaves': 79, 'max_depth': 7, 'learning_rate': 0.056318514055498196, 'n_estimators': 248, 'min_child_samples': 9, 'subsample': 0.9180861642088748, 'colsample_bytree': 0.6705786979691831, 'reg_alpha': 9.403993236815816e-07, 'reg_lambda': 5.020268364157369e-05}. Best is trial 23 with value: 0.2065396143875685.
[I 2025-06-10 10:20:37,567] Trial 25 finished with value: 0.22450697367085862 and parameters: {'num_leaves': 95, 'max_depth': 7, 'learning_rate': 0.05972067770085811, 'n_estimators': 229, 'min_child_samples': 21, 'subsample': 0.9124583356202038, 'colsample_bytree': 0.6596654893246322, 'reg_alpha': 1.896424494931657e-06, 'reg_lambda': 0.0006238435161035629}. Best is trial 23 with value: 0.2065396143875685.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000332 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[149]	valid_0's rmse: 0.241946
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000589 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[41]	valid_0's rmse: 0.206897
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ove

[I 2025-06-10 10:20:37,702] Trial 26 finished with value: 0.21782739700762047 and parameters: {'num_leaves': 76, 'max_depth': 6, 'learning_rate': 0.06361418010217508, 'n_estimators': 160, 'min_child_samples': 15, 'subsample': 0.9948545036778603, 'colsample_bytree': 0.7319871695575659, 'reg_alpha': 4.4706322885544853e-07, 'reg_lambda': 0.009420883438212778}. Best is trial 23 with value: 0.2065396143875685.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001160 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[75]	valid_0's rmse: 0.223796
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000409 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[85]	valid_0's rmse: 0.20599
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the over

[I 2025-06-10 10:20:37,961] Trial 27 finished with value: 0.21001940491553586 and parameters: {'num_leaves': 99, 'max_depth': 8, 'learning_rate': 0.032156603834825145, 'n_estimators': 214, 'min_child_samples': 9, 'subsample': 0.9617141823985003, 'colsample_bytree': 0.647823536707772, 'reg_alpha': 6.92786265141371e-07, 'reg_lambda': 0.00019958247003619325}. Best is trial 23 with value: 0.2065396143875685.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001185 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[177]	valid_0's rmse: 0.228408
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000415 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4270
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 137
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[97]	valid_0's rmse: 0.263326
[LightGBM] [Info] Auto-choosing col-wi

[I 2025-06-10 10:20:38,098] Trial 28 finished with value: 0.23479034752892694 and parameters: {'num_leaves': 78, 'max_depth': 6, 'learning_rate': 0.052641312902219156, 'n_estimators': 101, 'min_child_samples': 29, 'subsample': 0.892957440248334, 'colsample_bytree': 0.678023011511245, 'reg_alpha': 5.299688745574155e-06, 'reg_lambda': 2.0172662245195713e-05}. Best is trial 23 with value: 0.2065396143875685.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000760 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[74]	valid_0's rmse: 0.231547
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000673 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[53]	valid_0's rmse: 0.23042
[LightGBM] [Info] Auto-choosing col-wise multi-thread

[I 2025-06-10 10:20:38,287] Trial 29 finished with value: 0.21094367642584735 and parameters: {'num_leaves': 53, 'max_depth': 10, 'learning_rate': 0.0917562195110067, 'n_estimators': 376, 'min_child_samples': 16, 'subsample': 0.8370345374510104, 'colsample_bytree': 0.6941028698606959, 'reg_alpha': 1.1575692959962953e-08, 'reg_lambda': 4.8169580484013e-07}. Best is trial 23 with value: 0.2065396143875685.


Early stopping, best iteration is:
[45]	valid_0's rmse: 0.219696
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000295 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[295]	valid_0's rmse: 0.237925
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000598 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[235]	valid_0's r

[I 2025-06-10 10:20:38,519] Trial 30 finished with value: 0.22215145269070158 and parameters: {'num_leaves': 59, 'max_depth': 5, 'learning_rate': 0.025959880363132813, 'n_estimators': 295, 'min_child_samples': 22, 'subsample': 0.9485892879265586, 'colsample_bytree': 0.6682803730924792, 'reg_alpha': 0.004692626106612815, 'reg_lambda': 0.027833974766027707}. Best is trial 23 with value: 0.2065396143875685.


Did not meet early stopping. Best iteration is:
[254]	valid_0's rmse: 0.215884
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000462 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[105]	valid_0's rmse: 0.202389
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000414 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[18

[I 2025-06-10 10:20:38,775] Trial 31 finished with value: 0.20603044262196082 and parameters: {'num_leaves': 102, 'max_depth': 8, 'learning_rate': 0.03489161393616217, 'n_estimators': 215, 'min_child_samples': 8, 'subsample': 0.95927222493681, 'colsample_bytree': 0.6379621730188431, 'reg_alpha': 8.96621091980059e-07, 'reg_lambda': 0.00025617198734391775}. Best is trial 31 with value: 0.20603044262196082.


Did not meet early stopping. Best iteration is:
[183]	valid_0's rmse: 0.232214
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000446 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[151]	valid_0's rmse: 0.234163
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000406 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[77]	valid_0's r

[I 2025-06-10 10:20:38,960] Trial 32 finished with value: 0.21780163000444638 and parameters: {'num_leaves': 106, 'max_depth': 8, 'learning_rate': 0.035261162554952666, 'n_estimators': 269, 'min_child_samples': 13, 'subsample': 0.908705425009117, 'colsample_bytree': 0.6286313006136721, 'reg_alpha': 3.8826207764264726e-07, 'reg_lambda': 0.00011349185443844936}. Best is trial 31 with value: 0.20603044262196082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001131 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[62]	valid_0's rmse: 0.226616
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000568 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[77]	valid_0's rmse: 0.203254
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ove

[I 2025-06-10 10:20:39,226] Trial 33 finished with value: 0.20709402011787245 and parameters: {'num_leaves': 128, 'max_depth': 9, 'learning_rate': 0.043967510984812545, 'n_estimators': 195, 'min_child_samples': 7, 'subsample': 0.975786276909722, 'colsample_bytree': 0.7466676150702576, 'reg_alpha': 3.512854456254934e-05, 'reg_lambda': 0.0006901377584766184}. Best is trial 31 with value: 0.20603044262196082.


Early stopping, best iteration is:
[40]	valid_0's rmse: 0.190532
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001171 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[128]	valid_0's rmse: 0.227496
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000322 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[41]	valid_0's rmse: 0.203991

[I 2025-06-10 10:20:39,409] Trial 34 finished with value: 0.21054894567878266 and parameters: {'num_leaves': 140, 'max_depth': 9, 'learning_rate': 0.06948899605425826, 'n_estimators': 189, 'min_child_samples': 7, 'subsample': 0.9709034364858476, 'colsample_bytree': 0.7501688690408057, 'reg_alpha': 5.7453113219159044e-05, 'reg_lambda': 0.0017090326753884904}. Best is trial 31 with value: 0.20603044262196082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000583 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7670
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 144
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[24]	valid_0's rmse: 0.193607
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000982 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[99]	valid_0's rmse: 0.234049
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ove

[I 2025-06-10 10:20:39,567] Trial 35 finished with value: 0.21474815449089202 and parameters: {'num_leaves': 126, 'max_depth': 7, 'learning_rate': 0.0492076946376333, 'n_estimators': 100, 'min_child_samples': 11, 'subsample': 0.9878810562891727, 'colsample_bytree': 0.709658491889913, 'reg_alpha': 0.0009748967514177023, 'reg_lambda': 0.0004384025217313078}. Best is trial 31 with value: 0.20603044262196082.


Early stopping, best iteration is:
[41]	valid_0's rmse: 0.196201
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001150 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[87]	valid_0's rmse: 0.229772
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000554 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[109]	valid_0's r

[I 2025-06-10 10:20:39,829] Trial 36 finished with value: 0.21737670869089376 and parameters: {'num_leaves': 132, 'max_depth': 11, 'learning_rate': 0.04404002170562224, 'n_estimators': 320, 'min_child_samples': 12, 'subsample': 0.95404990828721, 'colsample_bytree': 0.7955248362138516, 'reg_alpha': 5.5085725494118305e-05, 'reg_lambda': 0.007930915971308457}. Best is trial 31 with value: 0.20603044262196082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000500 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[58]	valid_0's rmse: 0.1986
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[126]	valid_0's rmse: 0.231285
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the over

[I 2025-06-10 10:20:40,081] Trial 37 finished with value: 0.2107599894348642 and parameters: {'num_leaves': 101, 'max_depth': 10, 'learning_rate': 0.07867118009391263, 'n_estimators': 236, 'min_child_samples': 5, 'subsample': 0.9996857509564561, 'colsample_bytree': 0.6301667479385504, 'reg_alpha': 4.591444544586527e-06, 'reg_lambda': 0.0008103985592035678}. Best is trial 31 with value: 0.20603044262196082.


[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[33]	valid_0's rmse: 0.222227
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000391 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7672
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 145
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[14]	valid_0's rmse: 0.196407
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000827 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training unt



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000395 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[64]	valid_0's rmse: 0.204109
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000591 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[93]	valid_0's rmse: 0.189972
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the over

[I 2025-06-10 10:20:40,341] Trial 38 finished with value: 0.20839199673686118 and parameters: {'num_leaves': 125, 'max_depth': 6, 'learning_rate': 0.04069200490632748, 'n_estimators': 391, 'min_child_samples': 8, 'subsample': 0.9297410318217809, 'colsample_bytree': 0.7624676337907647, 'reg_alpha': 0.0003973963991709698, 'reg_lambda': 0.10039154315536912}. Best is trial 31 with value: 0.20603044262196082.
[I 2025-06-10 10:20:40,469] Trial 39 finished with value: 0.22908878069111874 and parameters: {'num_leaves': 90, 'max_depth': 9, 'learning_rate': 0.09916302104112419, 'n_estimators': 163, 'min_child_samples': 26, 'subsample': 0.9755522250434112, 'colsample_bytree': 0.7396608628225602, 'reg_alpha': 4.389292877511961e-05, 'reg_lambda': 0.004646602456617501}. Best is trial 31 with value: 0.20603044262196082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000293 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4285
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 140
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[133]	valid_0's rmse: 0.247227
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000608 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[26]	valid_0's rmse: 0.208303
[LightGBM] [Info] Auto-choosing col-wise multi-threa

[I 2025-06-10 10:20:40,676] Trial 40 finished with value: 0.217369908131314 and parameters: {'num_leaves': 110, 'max_depth': 4, 'learning_rate': 0.037692925560113576, 'n_estimators': 219, 'min_child_samples': 17, 'subsample': 0.8684320829226726, 'colsample_bytree': 0.6404062568723492, 'reg_alpha': 3.757484939146871e-08, 'reg_lambda': 0.038246995770663574}. Best is trial 31 with value: 0.20603044262196082.


[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[122]	valid_0's rmse: 0.220143
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000235 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[58]	valid_0's rmse: 0.205719
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000592 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[

[I 2025-06-10 10:20:40,871] Trial 41 finished with value: 0.20873064982508527 and parameters: {'num_leaves': 127, 'max_depth': 6, 'learning_rate': 0.0569313870485893, 'n_estimators': 288, 'min_child_samples': 8, 'subsample': 0.9299277656238224, 'colsample_bytree': 0.7672851027701578, 'reg_alpha': 0.0011992535066656198, 'reg_lambda': 0.5816512518266836}. Best is trial 31 with value: 0.20603044262196082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001260 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[120]	valid_0's rmse: 0.236474
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000380 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[139]	valid_0's rmse: 0.236055
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the o

[I 2025-06-10 10:20:41,048] Trial 42 finished with value: 0.2213707005059662 and parameters: {'num_leaves': 121, 'max_depth': 7, 'learning_rate': 0.03886099783120749, 'n_estimators': 387, 'min_child_samples': 13, 'subsample': 0.9478699398311718, 'colsample_bytree': 0.8353858975314526, 'reg_alpha': 0.0002800390219138211, 'reg_lambda': 0.135068902877333}. Best is trial 31 with value: 0.20603044262196082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001570 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11163
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 144
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[48]	valid_0's rmse: 0.228319
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000645 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[74]	valid_0's rmse: 0.223869
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ove

[I 2025-06-10 10:20:41,254] Trial 43 finished with value: 0.21712245718187373 and parameters: {'num_leaves': 140, 'max_depth': 6, 'learning_rate': 0.07351528522881048, 'n_estimators': 357, 'min_child_samples': 10, 'subsample': 0.9236270755717697, 'colsample_bytree': 0.7824136342775093, 'reg_alpha': 0.00010930898811097914, 'reg_lambda': 0.00021251169777415744}. Best is trial 31 with value: 0.20603044262196082.


Early stopping, best iteration is:
[39]	valid_0's rmse: 0.19667
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001176 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11165
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 145
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[50]	valid_0's rmse: 0.230829
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000276 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[306]	valid_0's rm

[I 2025-06-10 10:20:41,581] Trial 44 finished with value: 0.2795150169774231 and parameters: {'num_leaves': 111, 'max_depth': 8, 'learning_rate': 0.001335230799469873, 'n_estimators': 306, 'min_child_samples': 7, 'subsample': 0.8784130510852604, 'colsample_bytree': 0.7168887495181584, 'reg_alpha': 0.2543413931570073, 'reg_lambda': 7.9318637117416575}. Best is trial 31 with value: 0.20603044262196082.


Did not meet early stopping. Best iteration is:
[306]	valid_0's rmse: 0.250292
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001205 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[306]	valid_0's rmse: 0.276941




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000615 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[208]	valid_0's rmse: 0.215627
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[69]	valid_0's rmse: 0.198813
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the ove

[I 2025-06-10 10:20:41,875] Trial 45 finished with value: 0.216952205969396 and parameters: {'num_leaves': 147, 'max_depth': 5, 'learning_rate': 0.027456090663061726, 'n_estimators': 424, 'min_child_samples': 11, 'subsample': 0.9706035799954243, 'colsample_bytree': 0.813969250105064, 'reg_alpha': 1.1369651751373767e-06, 'reg_lambda': 1.1420029341374607e-05}. Best is trial 31 with value: 0.20603044262196082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000305 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4287
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 141
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Did not meet early stopping. Best iteration is:
[256]	valid_0's rmse: 0.233458
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000594 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7668
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 143
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[110]	valid_0's rmse: 0.198961
[LightGBM] [Info] Auto-choosing col-wise multi-thre

[I 2025-06-10 10:20:42,118] Trial 46 finished with value: 0.22011133209331976 and parameters: {'num_leaves': 104, 'max_depth': 9, 'learning_rate': 0.018449832272636867, 'n_estimators': 261, 'min_child_samples': 14, 'subsample': 0.8949487827464466, 'colsample_bytree': 0.757446441392682, 'reg_alpha': 0.0004329619242320798, 'reg_lambda': 0.20749184469398244}. Best is trial 31 with value: 0.20603044262196082.
[I 2025-06-10 10:20:42,326] Trial 47 finished with value: 0.21315719641368683 and parameters: {'num_leaves': 121, 'max_depth': 7, 'learning_rate': 0.0445963535358778, 'n_estimators': 142, 'min_child_samples': 5, 'subsample': 0.8569139871425547, 'colsample_bytree': 0.6094549238126695, 'reg_alpha': 5.318022729441864e-06, 'reg_lambda': 6.685190330369308e-05}. Best is trial 31 with value: 0.20603044262196082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000414 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4289
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 142
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[65]	valid_0's rmse: 0.222229
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000331 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7672
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 145
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[26]	valid_0's rmse: 0.196804
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the over



Did not meet early stopping. Best iteration is:
[213]	valid_0's rmse: 0.208581
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000589 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7670
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 144
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[157]	valid_0's rmse: 0.190162
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001032 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11167
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 146
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds


[I 2025-06-10 10:20:42,687] Trial 48 finished with value: 0.21128411201442931 and parameters: {'num_leaves': 81, 'max_depth': 8, 'learning_rate': 0.010189661662161691, 'n_estimators': 213, 'min_child_samples': 7, 'subsample': 0.8088820879284976, 'colsample_bytree': 0.6952061555456709, 'reg_alpha': 0.0019292228243103593, 'reg_lambda': 1.6447707531962137e-06}. Best is trial 31 with value: 0.20603044262196082.


Did not meet early stopping. Best iteration is:
[211]	valid_0's rmse: 0.235109
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000217 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4183
[LightGBM] [Info] Number of data points in the train set: 109, number of used features: 129
[LightGBM] [Info] Start training from score 0.253199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[416]	valid_0's rmse: 0.266556


[I 2025-06-10 10:20:42,901] Trial 49 finished with value: 0.23017609162589103 and parameters: {'num_leaves': 128, 'max_depth': 5, 'learning_rate': 0.032883168981031455, 'n_estimators': 497, 'min_child_samples': 50, 'subsample': 0.940733864327079, 'colsample_bytree': 0.6839858804079064, 'reg_alpha': 0.03251102473698905, 'reg_lambda': 0.003348227672950287}. Best is trial 31 with value: 0.20603044262196082.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000825 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7636
[LightGBM] [Info] Number of data points in the train set: 216, number of used features: 139
[LightGBM] [Info] Start training from score 0.230502
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[212]	valid_0's rmse: 0.199053
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000959 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11135
[LightGBM] [Info] Number of data points in the train set: 323, number of used features: 142
[LightGBM] [Info] Start training from score 0.212284
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[220]	valid_0's rmse: 0.224919
Mejores parámetros encontrados: {'num_leaves': 102, 'max_depth'

In [97]:
X_kgl = X_kgl[feature_columns]
for col in ['cat1', 'cat2', 'cat3', 'brand']:
    X_kgl[col] = X_kgl[col].astype('category')

y_pred = best_model.predict(X_kgl)


In [98]:
productos_ok = pd.read_csv("https://storage.googleapis.com/open-courses/austral2025-af91/labo3v/product_id_apredecir201912.txt", sep="\t")
result = pd.DataFrame({"product_id": X_kgl["product_id"],  "tn": y_pred})
result = result[result["product_id"].isin(productos_ok["product_id"])]
result = result.groupby("product_id").agg({"tn":"sum"}).reset_index()
result

Unnamed: 0,product_id,tn
0,20982,0.304826
1,21170,0.027379
2,21192,0.062962
3,21202,0.03237
4,21218,0.04253
5,21222,0.022821


In [99]:
result.to_csv("../../outputs/lgb_cat1_REF.csv", index=False, sep=',')

In [100]:
REF =  pd.read_csv("../../outputs/lgb_cat1_REF.csv", sep=',')
HC =  pd.read_csv("../../outputs/lgb_cat1_HC.csv", sep=',')
PC =  pd.read_csv("../../outputs/lgb_cat1_PC.csv", sep=',')
FOODS =  pd.read_csv("../../outputs/lgb_cat1_FOODS.csv", sep=',')

In [101]:
df_concatenado = pd.concat([REF, HC, PC, FOODS], ignore_index=True)
df_concatenado.shape

(780, 2)

In [105]:
df_201912 = df_completo_copy[df_completo_copy['periodo'] == 201912][['product_id', 'tn']]
df_201912

Unnamed: 0,product_id,tn
43975,20830,1.11598
43980,20836,2.26427
43979,20835,0.97552
43978,20833,0.00000
43977,20832,0.23123
...,...,...
43562,20410,10.14597
43561,20409,2.40926
43560,20408,11.77663
43567,20415,4.85906


In [106]:

# Suponiendo que la columna con valores numéricos es 'target' (modificar si se llama distinto)
columna_objetivo = 'tn'

# Buscar valores negativos
negativos_mask = df_concatenado[columna_objetivo] < 0

# Reemplazar con valores de df_201912
df_concatenado.loc[negativos_mask, columna_objetivo] = (
    df_concatenado.loc[negativos_mask, 'product_id']
    .map(df_201912.set_index('product_id')[columna_objetivo])
)

# Verificar resultado
print(f"✔️ Valores negativos corregidos: {negativos_mask.sum()}")

✔️ Valores negativos corregidos: 94


In [107]:
df_concatenado.to_csv("../../outputs/lgb_x_cat1.csv", index=False, sep=',')

In [74]:
pd.set_option('display.max_rows', None)
# Supongamos que usaste un DataFrame para entrenar:
feature_names = X.columns  # Si X_train es un DataFrame

# Obtener importancias
importances = best_model.feature_importances_

# Combinar en un DataFrame para visualizar mejor
importancia_df = pd.DataFrame({
    'Variable': feature_names,
    'Importancia': importances
}).sort_values(by='Importancia', ascending=False)

importancia_df


Unnamed: 0,Variable,Importancia
4,cat3,1681
5,brand,1202
0,product_id,899
7,stock_final,677
94,ytd_sum,636
6,sku_size,497
115,best_month_rank,435
82,expanding_mean,434
138,skew_6,407
83,cumulative_sum,374
