In [1]:
import pandas as pd
import polars as pl
import numpy as np
import lightgbm as lgb
import optuna
import os

In [2]:
base_path = 'C:/Eugenio/Maestria/DMEyF/'
# base_path = '/home/eanegrin/buckets/b1/'

dataset_path = base_path + 'datasets/'
modelos_path = base_path + 'modelos/'
db_path = base_path + 'db/'
dataset_file = 'competencia_03_fe_v07_predict.parquet'

ganancia_acierto = 273000
costo_estimulo = 7000

semillas = [1149, 4836, 9443, 7483, 3492, 6287, 830, 3294, 5932, 13]

In [3]:
data = pl.scan_parquet(dataset_path + dataset_file).filter(pl.col("foto_mes") == 202109).collect()

In [4]:
# convirto a pandas dataframe
data = data.to_pandas()

In [5]:
data['clase_peso'] = 1.0

data.loc[data['clase_ternaria'] == 'BAJA+2', 'clase_peso'] = 1.00002
data.loc[data['clase_ternaria'] == 'BAJA+1', 'clase_peso'] = 1.00001

In [6]:
data['clase_binaria'] = 0
data['clase_binaria'] = np.where(data['clase_ternaria'] == 'CONTINUA', 0, 1)

In [7]:
X_test = data.drop(['clase_ternaria', 'clase_peso', 'clase_binaria'], axis=1)

### Predicciones

In [8]:
version = 'v007' # UPADTE

modelos = [f'lgb_competencia3_{version}_s{semilla}_final.txt' for semilla in semillas]
modelos

['lgb_competencia3_v007_s1149_final.txt',
 'lgb_competencia3_v007_s4836_final.txt',
 'lgb_competencia3_v007_s9443_final.txt',
 'lgb_competencia3_v007_s7483_final.txt',
 'lgb_competencia3_v007_s3492_final.txt',
 'lgb_competencia3_v007_s6287_final.txt',
 'lgb_competencia3_v007_s830_final.txt',
 'lgb_competencia3_v007_s3294_final.txt',
 'lgb_competencia3_v007_s5932_final.txt',
 'lgb_competencia3_v007_s13_final.txt']

In [None]:
predicciones = [] 
headers = []

for counter, modelo in enumerate(modelos):
    
    # cargamos el modelo
    model = lgb.Booster(model_file=f'{modelos_path}v010/{modelo}')
    
    # predecimos
    y_pred_lgm = model.predict(X_test)
    
    # Guardamos la prediccion y un nombre de columna para asignarle despues
    predicciones.append(y_pred_lgm)
    headers.append(f'pred_lgm_{version}_{semillas[counter]}')
    
    print(f'{counter + 1}. Predicciones del modelo: {modelo} DONE')

1. Predicciones del modelo: lgb_competencia3_v007_s1149_final.txt DONE
2. Predicciones del modelo: lgb_competencia3_v007_s4836_final.txt DONE
3. Predicciones del modelo: lgb_competencia3_v007_s9443_final.txt DONE
4. Predicciones del modelo: lgb_competencia3_v007_s7483_final.txt DONE
5. Predicciones del modelo: lgb_competencia3_v007_s3492_final.txt DONE
6. Predicciones del modelo: lgb_competencia3_v007_s6287_final.txt DONE
7. Predicciones del modelo: lgb_competencia3_v007_s830_final.txt DONE
8. Predicciones del modelo: lgb_competencia3_v007_s3294_final.txt DONE
9. Predicciones del modelo: lgb_competencia3_v007_s5932_final.txt DONE
10. Predicciones del modelo: lgb_competencia3_v007_s13_final.txt DONE


In [10]:
# combinamos todas las predicciones en un mismo df y mergeamos
df_predicciones = pd.DataFrame(np.column_stack(predicciones), columns=headers)

output = pd.concat([X_test['numero_de_cliente'], df_predicciones], axis=1)

In [11]:
output.head()

Unnamed: 0,numero_de_cliente,pred_lgm_v007_1149,pred_lgm_v007_4836,pred_lgm_v007_9443,pred_lgm_v007_7483,pred_lgm_v007_3492,pred_lgm_v007_6287,pred_lgm_v007_830,pred_lgm_v007_3294,pred_lgm_v007_5932,pred_lgm_v007_13
0,249237079,5.2e-05,5.5e-05,0.000243,0.000165,5.5e-05,0.0001,0.000148,0.000138,7.1e-05,0.000138
1,249267267,0.312953,0.382251,0.427619,0.400496,0.189855,0.234532,0.275285,0.265668,0.261475,0.376014
2,249318906,0.000505,0.000305,0.000294,0.000595,0.000208,0.000463,0.000673,0.000757,0.000554,0.000314
3,249905603,5.2e-05,9.7e-05,8.7e-05,2.1e-05,4.5e-05,5.1e-05,3.5e-05,0.00011,4.7e-05,4e-05
4,250008430,0.348698,0.497278,0.486627,0.492228,0.399408,0.261102,0.28098,0.387853,0.201863,0.404646


In [None]:
file_name = f'predicciones_modelos_{version}.csv'

os.makedirs(base_path + 'exp/competencia_3/v010/', exist_ok= True) # carpeta donde vamos a almacenar los modelos que va a usar el ensamble

output_path = base_path + f'exp/competencia_3/v010/' + file_name
output.to_csv(output_path, index=False)