In [1]:
import pandas as pd
import polars as pl
import numpy as np
import lightgbm as lgb
import optuna
import os

In [2]:
base_path = 'C:/Eugenio/Maestria/DMEyF/'
# base_path = '/home/eanegrin/buckets/b1/'

dataset_path = base_path + 'datasets/'
modelos_path = base_path + 'modelos/'
db_path = base_path + 'db/'
dataset_file = 'competencia_03_fe_v08_predict.parquet'

ganancia_acierto = 273000
costo_estimulo = 7000

semillas = [1149, 4836, 9443, 7483, 3492, 6287, 830, 3294, 5932, 13]

In [3]:
data = pl.scan_parquet(dataset_path + dataset_file).filter(pl.col("foto_mes") == 202109).collect()

In [4]:
# convirto a pandas dataframe
data = data.to_pandas()

In [5]:
data['clase_peso'] = 1.0

data.loc[data['clase_ternaria'] == 'BAJA+2', 'clase_peso'] = 1.00002
data.loc[data['clase_ternaria'] == 'BAJA+1', 'clase_peso'] = 1.00001

In [6]:
data['clase_binaria'] = 0
data['clase_binaria'] = np.where(data['clase_ternaria'] == 'CONTINUA', 0, 1)

In [7]:
X_test = data.drop(['clase_ternaria', 'clase_peso', 'clase_binaria'], axis=1)

### Predicciones

In [None]:
version = 'v008' # UPADTE

modelos = [f'lgb_competencia3_{version}_s{semilla}_final.txt' for semilla in semillas]
modelos

['lgb_competencia3_v008_s1149_final.txt',
 'lgb_competencia3_v008_s4836_final.txt',
 'lgb_competencia3_v008_s9443_final.txt',
 'lgb_competencia3_v008_s7483_final.txt',
 'lgb_competencia3_v008_s3492_final.txt',
 'lgb_competencia3_v008_s6287_final.txt',
 'lgb_competencia3_v008_s830_final.txt',
 'lgb_competencia3_v008_s3294_final.txt',
 'lgb_competencia3_v008_s5932_final.txt',
 'lgb_competencia3_v008_s13_final.txt']

In [None]:
predicciones = [] 
headers = []

for counter, modelo in enumerate(modelos):
    
    # cargamos el modelo
    model = lgb.Booster(model_file=f'{modelos_path}v010/{modelo}')
    
    # predecimos
    y_pred_lgm = model.predict(X_test)
    
    # Guardamos la prediccion y un nombre de columna para asignarle despues
    predicciones.append(y_pred_lgm)
    headers.append(f'pred_lgm_{version}_{semillas[counter]}')
    
    print(f'{counter + 1}. Predicciones del modelo: {modelo} DONE')

1. Predicciones del modelo: lgb_competencia3_v008_s1149_final.txt DONE
2. Predicciones del modelo: lgb_competencia3_v008_s4836_final.txt DONE
3. Predicciones del modelo: lgb_competencia3_v008_s9443_final.txt DONE
4. Predicciones del modelo: lgb_competencia3_v008_s7483_final.txt DONE
5. Predicciones del modelo: lgb_competencia3_v008_s3492_final.txt DONE
6. Predicciones del modelo: lgb_competencia3_v008_s6287_final.txt DONE
7. Predicciones del modelo: lgb_competencia3_v008_s830_final.txt DONE
8. Predicciones del modelo: lgb_competencia3_v008_s3294_final.txt DONE
9. Predicciones del modelo: lgb_competencia3_v008_s5932_final.txt DONE
10. Predicciones del modelo: lgb_competencia3_v008_s13_final.txt DONE


In [10]:
# combinamos todas las predicciones en un mismo df y mergeamos
df_predicciones = pd.DataFrame(np.column_stack(predicciones), columns=headers)

output = pd.concat([X_test['numero_de_cliente'], df_predicciones], axis=1)

In [11]:
output.head()

Unnamed: 0,numero_de_cliente,pred_lgm_v008_1149,pred_lgm_v008_4836,pred_lgm_v008_9443,pred_lgm_v008_7483,pred_lgm_v008_3492,pred_lgm_v008_6287,pred_lgm_v008_830,pred_lgm_v008_3294,pred_lgm_v008_5932,pred_lgm_v008_13
0,249237079,1e-06,4e-06,8e-06,3e-06,2e-06,6.331278e-07,1e-06,4e-06,3e-06,3e-06
1,249267267,0.046741,0.041009,0.010568,0.100386,0.013505,0.04267765,0.016903,0.060872,0.050984,0.017199
2,249318906,2.1e-05,1.1e-05,1.4e-05,1.7e-05,7e-06,1.457622e-05,1.1e-05,8e-06,1e-05,1.2e-05
3,249905603,3e-06,1e-06,3e-06,3e-06,3e-06,1.170099e-06,4e-06,5e-06,1e-06,2e-06
4,250008430,0.086113,0.107617,0.075205,0.037136,0.073272,0.05051494,0.043327,0.044919,0.060405,0.028684


In [None]:
file_name = f'predicciones_modelos_{version}.csv'

os.makedirs(base_path + 'exp/competencia_3/v010/', exist_ok= True) # carpeta donde vamos a almacenar los modelos que va a usar el ensamble

output_path = base_path + f'exp/competencia_3/v010/' + file_name
output.to_csv(output_path, index=False)