In [1]:
import pandas as pd
import polars as pl
import numpy as np
import lightgbm as lgb
import optuna
import os

In [2]:
base_path = 'C:/Eugenio/Maestria/DMEyF/'
# base_path = '/home/eanegrin/buckets/b1/'

dataset_path = base_path + 'datasets/'
modelos_path = base_path + 'modelos/'
db_path = base_path + 'db/'
dataset_file = 'competencia_03_fe_v05_predict.parquet'

ganancia_acierto = 273000
costo_estimulo = 7000

semillas = [1149, 4836, 9443, 7483, 3492, 6287, 830, 3294, 5932, 13]

In [3]:
data = pl.scan_parquet(dataset_path + dataset_file).filter(pl.col("foto_mes") == 202109).collect()

In [4]:
# convirto a pandas dataframe
data = data.to_pandas()

In [5]:
data['clase_peso'] = 1.0

data.loc[data['clase_ternaria'] == 'BAJA+2', 'clase_peso'] = 1.00002
data.loc[data['clase_ternaria'] == 'BAJA+1', 'clase_peso'] = 1.00001

In [6]:
data['clase_binaria'] = 0
data['clase_binaria'] = np.where(data['clase_ternaria'] == 'BAJA+2', 1, 0)

In [7]:
X_test = data.drop(['clase_ternaria', 'clase_peso', 'clase_binaria'], axis=1)

### Predicciones

In [8]:
version = 'v005' # UPADTE

modelos = [f'lgb_competencia3_{version}_s{semilla}_final.txt' for semilla in semillas]
modelos

['lgb_competencia3_v005_s1149_final.txt',
 'lgb_competencia3_v005_s4836_final.txt',
 'lgb_competencia3_v005_s9443_final.txt',
 'lgb_competencia3_v005_s7483_final.txt',
 'lgb_competencia3_v005_s3492_final.txt',
 'lgb_competencia3_v005_s6287_final.txt',
 'lgb_competencia3_v005_s830_final.txt',
 'lgb_competencia3_v005_s3294_final.txt',
 'lgb_competencia3_v005_s5932_final.txt',
 'lgb_competencia3_v005_s13_final.txt']

In [10]:
predicciones = [] 
headers = []

for counter, modelo in enumerate(modelos):
    
    # cargamos el modelo
    model = lgb.Booster(model_file=f'{modelos_path}v009/{modelo}')
    
    # predecimos
    y_pred_lgm = model.predict(X_test)
    
    # Guardamos la prediccion y un nombre de columna para asignarle despues
    predicciones.append(y_pred_lgm)
    headers.append(f'pred_lgm_{version}_{semillas[counter]}')
    
    print(f'{counter + 1}. Predicciones del modelo: {modelo} DONE')

1. Predicciones del modelo: lgb_competencia3_v005_s1149_final.txt DONE
2. Predicciones del modelo: lgb_competencia3_v005_s4836_final.txt DONE
3. Predicciones del modelo: lgb_competencia3_v005_s9443_final.txt DONE
4. Predicciones del modelo: lgb_competencia3_v005_s7483_final.txt DONE
5. Predicciones del modelo: lgb_competencia3_v005_s3492_final.txt DONE
6. Predicciones del modelo: lgb_competencia3_v005_s6287_final.txt DONE
7. Predicciones del modelo: lgb_competencia3_v005_s830_final.txt DONE
8. Predicciones del modelo: lgb_competencia3_v005_s3294_final.txt DONE
9. Predicciones del modelo: lgb_competencia3_v005_s5932_final.txt DONE
10. Predicciones del modelo: lgb_competencia3_v005_s13_final.txt DONE


In [11]:
# combinamos todas las predicciones en un mismo df y mergeamos
df_predicciones = pd.DataFrame(np.column_stack(predicciones), columns=headers)

output = pd.concat([X_test['numero_de_cliente'], df_predicciones], axis=1)

In [12]:
output.head()

Unnamed: 0,numero_de_cliente,pred_lgm_v005_1149,pred_lgm_v005_4836,pred_lgm_v005_9443,pred_lgm_v005_7483,pred_lgm_v005_3492,pred_lgm_v005_6287,pred_lgm_v005_830,pred_lgm_v005_3294,pred_lgm_v005_5932,pred_lgm_v005_13
0,249237079,0.004417,0.003415,0.004795,0.004022,0.004498,0.005703,0.006816,0.005234,0.005087,0.003362
1,249267267,0.106608,0.095619,0.079132,0.102648,0.095516,0.076424,0.076872,0.118647,0.079417,0.068065
2,249318906,0.006937,0.006395,0.006441,0.005542,0.004629,0.005774,0.006502,0.005613,0.004826,0.005049
3,249905603,0.001372,0.001487,0.001893,0.001678,0.001901,0.002119,0.001846,0.001719,0.001654,0.002054
4,250008430,0.151576,0.176921,0.147263,0.149408,0.187239,0.149539,0.148836,0.177249,0.170504,0.12723


In [16]:
file_name = f'predicciones_modelos_{version}.csv'

os.makedirs(base_path + 'exp/competencia_3/v009/', exist_ok= True) # carpeta donde vamos a almacenar los modelos que va a usar el ensamble

output_path = base_path + f'exp/competencia_3/v009/' + file_name
output.to_csv(output_path, index=False)