# Exp 04: lgb agrupando por periodo-product_id

Con FE sin optimización bayesiana

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import gc

##### Merge: periodos + clientes + productos

In [2]:
df = pd.read_csv("../../data/preprocessed/base.csv", sep=',')
df["periodo_dt"] = pd.to_datetime(df["periodo"].astype(str), format="%Y%m")


periodos = pd.date_range(start=df['periodo_dt'].min(), end=df['periodo_dt'].max(), freq="MS")
productos = df['product_id'].unique()


idx = pd.MultiIndex.from_product([productos, periodos], names=['product_id', 'periodo'])
completo = idx.to_frame(index=False)
completo["periodo"] = completo["periodo"].dt.strftime("%Y%m").astype(int)

del periodos, productos, 
gc.collect()
completo

Unnamed: 0,product_id,periodo
0,20524,201701
1,20524,201702
2,20524,201703
3,20524,201704
4,20524,201705
...,...,...
44383,20770,201908
44384,20770,201909
44385,20770,201910
44386,20770,201911


##### Cruzamos con productos

In [3]:
productos = pd.read_csv("../../data/raw/tb_productos.csv", sep='\t')
productos = productos.drop_duplicates(subset=['product_id'], keep='first')
completo = completo.merge(productos, how='left', on="product_id")
del productos
gc.collect()

20

##### Cruzamos con stock

In [4]:
stocks = pd.read_csv("../../data/raw/tb_stocks.csv", sep='\t')
stocks = stocks.groupby(by=["periodo", "product_id"]).agg({"stock_final": "sum"}).reset_index()
completo = completo.merge(stocks, how='left', on=['periodo', 'product_id'])
del stocks
gc.collect()
completo.head()

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,


##### Cruzamos con ventas

In [5]:
sellin = pd.read_csv("../../data/raw/sell-in.csv", sep='\t')
# Agrupar ventas por periodo, cliente y producto
dt = sellin.groupby(by=["periodo","product_id"]).agg({"tn":"sum"}).reset_index()
df_completo = completo.merge(dt, how='left', on=['periodo', 'product_id'])
df_completo['tn'] = df_completo['tn'].fillna(0)
del sellin, dt, completo
gc.collect()
df_completo

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final,tn
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,,6.48085
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,,3.99755
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,,7.14711
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,,6.82163
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,,9.25949
...,...,...,...,...,...,...,...,...,...
44383,20770,201908,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44384,20770,201909,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44385,20770,201910,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000
44386,20770,201911,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000


##### Target

In [6]:
# Asegurarte de tener 'periodo_dt' (datetime) en completo
df_completo['periodo_dt'] = pd.to_datetime(df_completo['periodo'], format='%Y%m')

# Crear DataFrame auxiliar con tn como target y fecha adelantada
ventas_futuras = df_completo[['periodo_dt', 'product_id', 'tn']].copy()
ventas_futuras['periodo_target_dt'] = ventas_futuras['periodo_dt'] - pd.DateOffset(months=2)
ventas_futuras = ventas_futuras.rename(columns={'tn': 'target'})

# Merge con completo usando periodo adelantado
df_completo = df_completo.merge(
    ventas_futuras[['periodo_target_dt', 'product_id', 'target']],
    how='left',
    left_on=['periodo_dt', 'product_id'],
    right_on=['periodo_target_dt', 'product_id']
)

# Eliminar columna auxiliar
df_completo = df_completo.drop(columns=['periodo_target_dt'])
del ventas_futuras
gc.collect()
print(f"✅ Target generado. Filas con target no nulo: {df_completo['target'].notna().sum()}")

✅ Target generado. Filas con target no nulo: 41922


In [12]:
df_completo

Unnamed: 0,product_id,periodo,cat1,cat2,cat3,brand,sku_size,stock_final,tn,periodo_dt,target
0,20524,201701,HC,VAJILLA,Cristalino,Importado,500.0,,6.48085,2017-01-01,7.14711
1,20524,201702,HC,VAJILLA,Cristalino,Importado,500.0,,3.99755,2017-02-01,6.82163
2,20524,201703,HC,VAJILLA,Cristalino,Importado,500.0,,7.14711,2017-03-01,9.25949
3,20524,201704,HC,VAJILLA,Cristalino,Importado,500.0,,6.82163,2017-04-01,7.04113
4,20524,201705,HC,VAJILLA,Cristalino,Importado,500.0,,9.25949,2017-05-01,5.92819
...,...,...,...,...,...,...,...,...,...,...,...
44383,20770,201908,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-08-01,0.00000
44384,20770,201909,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-09-01,0.00000
44385,20770,201910,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-10-01,3.18500
44386,20770,201911,HC,PROFESIONAL,LV ROPA POLVO,INDUSTRIAL,25.0,,0.00000,2019-11-01,


##### Verifico las NaN en el target: Existen porque hay clientes que solo compraron 2 veces.

In [13]:
nan_count = df_completo['target'].isna().sum()
print(f"🔍 Total de NaN en target: {nan_count}")
del nan_count
gc.collect()

🔍 Total de NaN en target: 2466


49

##### Generación de IDs

In [7]:
df_completo = df_completo.sort_values(['periodo', 'product_id'])
df_completo['id'] = df_completo.groupby(['product_id']).cumcount() + 1

##### Periodo 

In [8]:
df_completo["periodo_dt"] = pd.to_datetime(df_completo["periodo"].astype(str), format="%Y%m")

##### Eliminar productos que no nacieron

In [9]:
nacimiento_producto = df.groupby("product_id")["periodo_dt"].agg(["min"]).reset_index()
# Renombrar columna max a muerte_cliente_dt
nacimiento_producto = nacimiento_producto.rename(columns={'min': 'nacimiento_producto'})


# Unir con df_final para traer fecha de muerte del cliente
df_completo = df_completo.merge(nacimiento_producto, on='product_id', how='left')

# Filtrar filas donde periodo_dt > muerte_cliente_dt
df_completo = df_completo[df_completo['periodo_dt'] >= df_completo['nacimiento_producto']]

# Opcional: eliminar columna auxiliar
# df_final = df_final.drop(columns=['muerte_cliente_dt'])
del nacimiento_producto
gc.collect()
print(f"✅ Dataset filtrado con {len(df_completo):,} filas.")

✅ Dataset filtrado con 35,888 filas.


##### Correlograma

In [10]:
# Supongamos que tu DataFrame se llama df
cor_matrix = df_completo.corr(numeric_only=True)

# Tomamos solo la parte superior de la matriz (sin la diagonal)
upper = cor_matrix.where(np.triu(np.ones(cor_matrix.shape), k=1).astype(bool))

# Filtramos correlaciones fuertes (valor absoluto mayor a 0.7)
high_corr = upper.stack().reset_index()
high_corr.columns = ['Variable 1', 'Variable 2', 'Correlación']
high_corr_filtrada = high_corr[high_corr['Correlación'].abs() > 0.7]

print(high_corr_filtrada)

del high_corr_filtrada, cor_matrix, upper, high_corr
gc.collect()

   Variable 1 Variable 2  Correlación
10    periodo         id     0.955974
18         tn     target     0.930409


0

##### Elimino variables muy correlacionadas

In [11]:
df_completo.drop(columns=['periodo'], inplace=True)

##### Extracción de componentes temporales

In [12]:
df_completo['year'] = df_completo['periodo_dt'].dt.year
df_completo['month'] = df_completo['periodo_dt'].dt.month
# Variables dummy estacionales
df_completo['quarter'] = df_completo['periodo_dt'].dt.quarter
df_completo['semester'] = np.where(df_completo['month'] <= 6, 1, 2)
# Efectos de fin de año
df_completo['year_end'] = np.where(df_completo['month'].isin([11, 12]), 1, 0)
df_completo['year_start'] = np.where(df_completo['month'].isin([1, 2]), 1, 0)
# Indicadores estacionales
df_completo['season'] = df_completo['month'] % 12 // 3 + 1  # 1:Invierno, 2:Primavera, etc.
# Variables cíclicas (para capturar patrones estacionales)
df_completo['month_sin'] = np.sin(2 * np.pi * df_completo['month']/12)
df_completo['month_cos'] = np.cos(2 * np.pi * df_completo['month']/12)

#####  Lags, diferencias, medias móviles y otras yerbas

In [13]:
# Ordenamos por fecha para asegurar consistencia
df_completo = df_completo.sort_values('periodo_dt')

## 1. Lags (rezagos) de 1 a 12 meses
for i in range(1, 15):
    df_completo[f'lag_{i}'] = df_completo['target'].shift(i)

## 2. Diferencias (deltas) - cambio respecto al mes anterior
for i in range(1, 13):
    df_completo[f'delta_{i}'] = df_completo['target'].diff(i)

## 3. Diferencias porcentuales
for i in range(1, 13):
    df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)

## 4. Medias móviles (promedios móviles)
windows = [2, 3, 6, 9, 12]  # También puedes incluir [2,4,5,7] según necesidad
for w in windows:
    df_completo[f'rolling_mean_{w}'] = df_completo['target'].rolling(window=w, min_periods=1).mean()
    df_completo[f'rolling_std_{w}'] = df_completo['target'].rolling(window=w, min_periods=1).std()
    df_completo[f'rolling_min_{w}'] = df_completo['target'].rolling(window=w, min_periods=1).min()
    df_completo[f'rolling_max_{w}'] = df_completo['target'].rolling(window=w, min_periods=1).max()
    df_completo[f'rolling_median_{w}'] = df_completo['target'].rolling(window=w, min_periods=1).median()

## 5. Características de tendencia y estacionalidad
df_completo['expanding_mean'] = df_completo['target'].expanding().mean()
df_completo['cumulative_sum'] = df_completo['target'].cumsum()

## 6. Características de diferencia estacional (12 meses para datos mensuales)
df_completo['seasonal_diff_12'] = df_completo['target'].diff(12)

## 7. Estadísticas anuales comparativas
df_completo['vs_prev_year'] = df_completo['target'] / df_completo['lag_12'] - 1  # Crecimiento interanual

## 8. Componentes de descomposición (simplificada)
# Tendencia (usando media móvil de 12 meses)
df_completo['trend'] = df_completo['target'].rolling(window=12, min_periods=1).mean()
# Estacionalidad (diferencia entre valor real y tendencia)
df_completo['seasonality'] = df_completo['target'] - df_completo['trend']

## 9. Variables booleanas para eventos especiales
df_completo['new_high'] = (df_completo['target'] == df_completo['rolling_max_12']).astype(int)
df_completo['new_low'] = (df_completo['target'] == df_completo['rolling_min_12']).astype(int)

## 10. Características de aceleración/deceleración
df_completo['acceleration'] = df_completo['delta_1'].diff(1)  # Cambio en la tasa de cambio

  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)
  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)
  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)
  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)
  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)
  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)
  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)
  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)
  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)
  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)
  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)
  df_completo[f'pct_change_{i}'] = df_completo['target'].pct_change(i)


##### Estadísticas de Ventana Dinámica

In [14]:
# Medias móviles exponenciales
df_completo['ewm_alpha_0.3'] = df_completo['target'].ewm(alpha=0.3, adjust=False).mean()
df_completo['ewm_alpha_0.5'] = df_completo['target'].ewm(alpha=0.5, adjust=False).mean()

# Medias móviles centradas
df_completo['rolling_center_mean_3'] = df_completo['target'].rolling(window=3, center=True).mean()

# Sumas acumuladas por año
df_completo['ytd_sum'] = df_completo.groupby(df_completo['periodo_dt'].dt.year)['target'].cumsum()

##### Características de Tendencia y Ciclo

In [15]:
# Modelado de tendencia polinomial
df_completo['time_index'] = range(len(df_completo))
df_completo['trend_linear'] = np.poly1d(np.polyfit(df_completo['time_index'], df_completo['target'], 1))(df_completo['time_index'])
df_completo['trend_quadratic'] = np.poly1d(np.polyfit(df_completo['time_index'], df_completo['target'], 2))(df_completo['time_index'])

# Residuales de tendencia
df_completo['residual_trend'] = df_completo['target'] - df_completo['trend_linear']

##### Características de Cambio de Régimen

In [16]:
# Z-Score respecto a ventana móvil
df_completo['zscore_6'] = (df_completo['target'] - df_completo['rolling_mean_6']) / df_completo['rolling_std_6']

# Detección de outliers
df_completo['is_outlier_3sigma'] = np.where(np.abs(df_completo['zscore_6']) > 3, 1, 0)

# Cambios bruscos (spikes)
df_completo['spike_up'] = np.where(df_completo['delta_1'] > df_completo['rolling_std_3'], 1, 0)
df_completo['spike_down'] = np.where(df_completo['delta_1'] < -df_completo['rolling_std_3'], 1, 0)

##### Características de Patrones Temporales

In [17]:
# Autocorrelaciones parciales
from statsmodels.tsa.stattools import pacf
pacf_values = pacf(df_completo['target'].dropna(), nlags=12)
for i in range(1, 6):
    df_completo[f'pacf_{i}'] = df_completo['target'].shift(i) * pacf_values[i]

# Estacionalidad múltiple (si hay patrones semestrales)
df_completo['semester_mean'] = df_completo.groupby(['year', 'semester'])['target'].transform('mean')

  df_completo[f'pacf_{i}'] = df_completo['target'].shift(i) * pacf_values[i]
  df_completo[f'pacf_{i}'] = df_completo['target'].shift(i) * pacf_values[i]
  df_completo[f'pacf_{i}'] = df_completo['target'].shift(i) * pacf_values[i]
  df_completo['semester_mean'] = df_completo.groupby(['year', 'semester'])['target'].transform('mean')


##### Características de Forecast Ingenieriles

In [18]:
# Método ingenuo (último valor)
df_completo['naive_forecast'] = df_completo['target'].shift(1)

# Seasonal naive (valor del mismo período año anterior)
df_completo['seasonal_naive'] = df_completo['target'].shift(12)

# Promedio móvil como forecast
df_completo['ma_forecast_3'] = df_completo['rolling_mean_3'].shift(1)

  df_completo['naive_forecast'] = df_completo['target'].shift(1)
  df_completo['seasonal_naive'] = df_completo['target'].shift(12)
  df_completo['ma_forecast_3'] = df_completo['rolling_mean_3'].shift(1)


##### Características de Decomposición Temporal

In [19]:
from statsmodels.tsa.seasonal import seasonal_decompose
# Descomposición clásica (additiva o multiplicativa)
result = seasonal_decompose(df_completo['target'].dropna(), model='additive', period=12)
df_completo['trend_decomposed'] = result.trend
df_completo['seasonal_decomposed'] = result.seasonal
df_completo['residual_decomposed'] = result.resid

  df_completo['trend_decomposed'] = result.trend
  df_completo['seasonal_decomposed'] = result.seasonal
  df_completo['residual_decomposed'] = result.resid


##### Características de Ventanas Asimétricas

In [20]:
# Mejor mes histórico
df_completo['best_month_rank'] = df_completo.groupby('month')['target'].rank(ascending=False)

# Comparación con mismo mes año anterior
df_completo['vs_last_year_same_month'] = df_completo['target'] / df_completo['lag_12'] - 1

# Acumulado últimos 3 vs mismos 3 meses año anterior
df_completo['last3_vs_ly3'] = (df_completo['target'] + df_completo['lag_1'] + df_completo['lag_2']) / (df_completo['lag_12'] + df_completo['lag_13'] + df_completo['lag_14']) - 1

  df_completo['best_month_rank'] = df_completo.groupby('month')['target'].rank(ascending=False)
  df_completo['vs_last_year_same_month'] = df_completo['target'] / df_completo['lag_12'] - 1
  df_completo['last3_vs_ly3'] = (df_completo['target'] + df_completo['lag_1'] + df_completo['lag_2']) / (df_completo['lag_12'] + df_completo['lag_13'] + df_completo['lag_14']) - 1


##### Transformaciones Matemáticas

In [21]:
from scipy import stats
from scipy.special import boxcox1p

# Transformaciones clásicas
df_completo['log_target'] = np.log1p(df_completo['target'])
df_completo['sqrt_target'] = np.sqrt(df_completo['target'])

# Box-Cox (solo si target > 0)
mask = df_completo['target'] > 0
df_completo['boxcox_target'] = np.nan
df_completo.loc[mask, 'boxcox_target'], _ = stats.boxcox(df_completo.loc[mask, 'target'])

# Diferenciación
df_completo['diff1_log'] = df_completo['log_target'].diff(1)


  df_completo['log_target'] = np.log1p(df_completo['target'])
  df_completo['sqrt_target'] = np.sqrt(df_completo['target'])
  df_completo['boxcox_target'] = np.nan
  df_completo['diff1_log'] = df_completo['log_target'].diff(1)


##### Características de Interacción

In [22]:
# Interacción entre tendencia y estacionalidad
df_completo['trend_season_interaction'] = df_completo['trend'] * df_completo['seasonal_decomposed']

# Interacción lags con estacionalidad
for i in [1, 2, 3, 12]:
    df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']

  df_completo['trend_season_interaction'] = df_completo['trend'] * df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']
  df_completo[f'lag_{i}_season_adj'] = df_completo[f'lag_{i}'] / df_completo['seasonal_decomposed']


##### Completamos NaN del target con ceros

In [23]:
df_completo['target'].fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_completo['target'].fillna(0, inplace=True)


##### Dividimos el dataset

In [26]:
df_completo["periodo"] = df_completo["periodo_dt"].dt.year * 100 + df_completo["periodo_dt"].dt.month
dt_kgl = df_completo[df_completo["periodo"].isin([201912])]
ts = df_completo.drop(df_completo[df_completo["periodo"].isin([201911,201912])].index,axis=0)

  df_completo["periodo"] = df_completo["periodo_dt"].dt.year * 100 + df_completo["periodo_dt"].dt.month


##### Productos a predecir

In [29]:
feature_columns = [col for col in ts.columns if col not in ['periodo_dt', 'tn_target', 'periodo', 'nacimiento_producto', 'target']]
X_kgl = dt_kgl[feature_columns]
productos_a_predecir = pd.read_csv("../../data/raw/product_id_apredecir201912.csv")
# Filtrar filas
productos_filtrados = productos_a_predecir['product_id'].unique()
X_kgl = X_kgl[X_kgl['product_id'].isin(productos_filtrados)]
X_kgl['product_id'].nunique()

780

##### Optimización de Hiperparámetros con Optuna

In [27]:
import lightgbm as lgb
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error
import optuna
import numpy as np

# Preparar datos
feature_columns = [col for col in ts.columns if col not in ['periodo_dt', 'tn_target', 'periodo', 'nacimiento_producto', 'target']]
X = ts[feature_columns]
y = ts['target']

for col in ['cat1', 'cat2', 'cat3', 'brand']:
    X[col] = X[col].astype('category')


# Dividir los datos en conjuntos de entrenamiento y prueba (opcional, pero recomendado)
# Para este caso, vamos a entrenar con todos los datos disponibles antes de 201911
# Si quisieras validación, podrías usar un periodo anterior como validación

# Definir el modelo LightGBM
lgb_reg = lgb.LGBMRegressor(random_state=12345)

# Entrenar el modelo
lgb_reg.fit(X, y)

print("Modelo LightGBM entrenado con éxito.")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = X[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_in

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0,009662 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 26096
[LightGBM] [Info] Number of data points in the train set: 33425, number of used features: 122
[LightGBM] [Info] Start training from score 37,177162
Modelo LightGBM entrenado con éxito.


In [30]:
for col in ['cat1', 'cat2', 'cat3', 'brand']:
    X_kgl[col] = X_kgl[col].astype('category')


y_pred = lgb_reg.predict(X_kgl)


In [31]:
productos_ok = pd.read_csv("https://storage.googleapis.com/open-courses/austral2025-af91/labo3v/product_id_apredecir201912.txt", sep="\t")
result = pd.DataFrame({"product_id": X_kgl["product_id"],  "tn": y_pred})
result = result[result["product_id"].isin(productos_ok["product_id"])]
result = result.groupby("product_id").agg({"tn":"sum"}).reset_index()
result

Unnamed: 0,product_id,tn
0,20001,1.118452
1,20002,1.136887
2,20003,1.136887
3,20004,6.732289
4,20005,6.900514
...,...,...
775,21263,7.719531
776,21265,7.719531
777,21266,7.719531
778,21267,7.719531


In [32]:
result.to_csv("../../outputs/lgb_exp04_sin_bayesiana_con_fe.csv", index=False, sep=',')