In [1]:
import os
import numpy as np
import pandas as pd
import lightgbm as lgb

from config.config import (
    FE_PATH,
    MES_TEST_FINAL,
    MODELOS_PATH,
    RESULTADOS_PREDICCION_PATH,
    NOMBRE_EXPERIMENTO,
)
from src.data_load_preparation import cargar_datos, preparar_test_final


In [2]:
# Cargar dataset de features
data = cargar_datos(FE_PATH)

# Test final segÃºn config (MES_TEST_FINAL = [202108])
X_test, clientes_test = preparar_test_final(data)

# Por si querÃ©s tambiÃ©n el subset completo de 202108 con todas las columnas:
data_test = data[data["foto_mes"].isin(MES_TEST_FINAL)].copy()

len(X_test), len(clientes_test), len(data_test)


2025-11-16 00:47:57,856 - INFO - ðŸ“¥ Cargando dataset desde: /home/sanmartinofacundo/buckets/b1/features/fe_v3/competencia_02_fe_v3.parquet


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

2025-11-16 00:58:08,180 - INFO - âœ… Dataset cargado con 4,717,958 filas y 673 columnas
2025-11-16 00:58:36,947 - INFO - ðŸ“Š Test final: 164,822 registros de meses [202108]
2025-11-16 00:58:37,475 - INFO - âœ… X_test: (164822, 672)


(164822, 164822, 164822)

In [3]:
import glob

model_pattern = os.path.join(MODELOS_PATH, f"{NOMBRE_EXPERIMENTO}_seed*_final.txt")
model_files = sorted(glob.glob(model_pattern))

print("Modelos encontrados:")
for f in model_files:
    print(" -", os.path.basename(f))


Modelos encontrados:
 - lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed100003_final.txt
 - lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed100019_final.txt
 - lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed100043_final.txt
 - lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed100049_final.txt
 - lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed100057_final.txt
 - lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed182009_final.txt
 - lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed182011_final.txt
 - lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed182027_final.txt
 - lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed200003_final.txt
 - lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed200017_final.txt
 - lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_te

In [4]:
all_preds = []

for model_path in model_files:
    print(f"ðŸ“‚ Cargando modelo: {os.path.basename(model_path)}")
    model = lgb.Booster(model_file=model_path)
    y_pred = model.predict(X_test)
    all_preds.append(y_pred)

all_preds = np.vstack(all_preds)   # shape = (n_modelos, n_clientes)
prob_ensemble = all_preds.mean(axis=0)

prob_ensemble.shape


ðŸ“‚ Cargando modelo: lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed100003_final.txt
ðŸ“‚ Cargando modelo: lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed100019_final.txt
ðŸ“‚ Cargando modelo: lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed100043_final.txt
ðŸ“‚ Cargando modelo: lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed100049_final.txt
ðŸ“‚ Cargando modelo: lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed100057_final.txt
ðŸ“‚ Cargando modelo: lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed182009_final.txt
ðŸ“‚ Cargando modelo: lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed182011_final.txt
ðŸ“‚ Cargando modelo: lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed182027_final.txt
ðŸ“‚ Cargando modelo: lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_seed200003_final.txt
ð

(164822,)

In [5]:
UMBRAL_ENSEMBLE = 0.044330  # del log

pred_binaria = (prob_ensemble >= UMBRAL_ENSEMBLE).astype(int)
N_enviados = int(pred_binaria.sum())

N_enviados, N_enviados / len(pred_binaria) * 100


(8541, 5.181953865382048)

In [6]:
from datetime import datetime

os.makedirs(RESULTADOS_PREDICCION_PATH, exist_ok=True)

timestamp = datetime.now().strftime("%Y%m%d_%H%M")
filename = (
    f"{NOMBRE_EXPERIMENTO}_ensemble_desde_modelos_"
    f"U{UMBRAL_ENSEMBLE:.6f}_N{N_enviados}_{timestamp}.csv"
)
output_path = os.path.join(RESULTADOS_PREDICCION_PATH, filename)

submission_202108 = pd.DataFrame({
    "numero_de_cliente": clientes_test,
    "Predicted": pred_binaria
})

submission_202108.to_csv(output_path, index=False)

output_path


'/home/sanmartinofacundo/buckets/b1/competencia_02/lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7/resultados_prediccion/lgbm_fe_v3_us010_tr201901-202102_val202104_vext202106_test202108_s7_ensemble_desde_modelos_U0.044330_N8541_20251116_0101.csv'