In [3]:
import os
import numpy as np
import pandas as pd
import lightgbm as lgb
from datetime import datetime

from config.config import (
    FE_PATH,
    MES_TEST_FINAL,
    MODELOS_PATH,
    RESULTADOS_PREDICCION_PATH,
    NOMBRE_EXPERIMENTO,
)
from src.data_load_preparation import cargar_datos, preparar_test_final


In [None]:
# Cargar dataset de features
data = cargar_datos(FE_PATH)

# Test final segÃºn config (MES_TEST_FINAL = [202108])
X_test, clientes_test = preparar_test_final(data)

# Por si querÃ©s tambiÃ©n el subset completo de 202108 con todas las columnas:
data_test = data[data["foto_mes"].isin(MES_TEST_FINAL)].copy()

len(X_test), len(clientes_test), len(data_test)


2025-11-16 22:20:16,607 - INFO - ðŸ“¥ Cargando dataset desde: /home/sanmartinofacundo/buckets/b1/features/fe_v3/competencia_02_fe_v3.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [None]:
import glob

model_pattern = os.path.join(MODELOS_PATH, f"{NOMBRE_EXPERIMENTO}_seed*_final.txt")
model_files = sorted(glob.glob(model_pattern))

print("Modelos encontrados:")
for f in model_files:
    print(" -", os.path.basename(f))


In [None]:
all_preds = []

for model_path in model_files:
    print(f"ðŸ“‚ Cargando modelo: {os.path.basename(model_path)}")
    model = lgb.Booster(model_file=model_path)
    y_pred = model.predict(X_test)
    all_preds.append(y_pred)

all_preds = np.vstack(all_preds)   # shape = (n_modelos, n_clientes)
prob_ensemble = all_preds.mean(axis=0)

prob_ensemble.shape


In [None]:
N_optimo = 10500  # por ejemplo

# Ordenar prob de mayor a menor
indices_ordenados = np.argsort(prob_ensemble)[::-1]

# Seleccionar top N
topN_idx = indices_ordenados[:N_optimo]

# Obtenemos sÃ³lo los clientes a incentivar
clientes_incentivar = clientes_test[topN_idx]


In [None]:
os.makedirs(RESULTADOS_PREDICCION_PATH, exist_ok=True)

timestamp = datetime.now().strftime("%Y%m%d_%H%M")
filename = (
    f"{NOMBRE_EXPERIMENTO}_topN_{N_optimo}_{timestamp}.csv"
)

output_path = os.path.join(RESULTADOS_PREDICCION_PATH, filename)

# Guardar CSV sin header ni Ã­ndice
pd.DataFrame(clientes_incentivar).to_csv(
    output_path,
    index=False,
    header=False
)

output_path