In [1]:
import pandas as pd
import numpy as np
# !pip install pulp
import pulp
import copy
import random
from datetime import datetime
from dateutil.relativedelta import relativedelta
from pathlib import Path
import warnings
warnings.filterwarnings("ignore")

In [2]:
def opt_port(df_sample,epsilon):
    epsilon = epsilon
    X_tr = df_sample.copy(deep=True)

    mto_van_opt = df_sample[df_sample.flg_max_vane==1]['mto_e'].sum()
    mto_opt_port = mto_van_opt*epsilon

    idcs = list(df_sample['llave'].unique())
    df_sample.set_index(keys=['llave','escenario'], inplace=True)

    esc_opt = pulp.LpVariable.dicts("esc_opt",
                                    ((idc, escenario) for idc, escenario in df_sample.index),
                                    cat='Binary')

    # Definición del Modelo
    model_opt = pulp.LpProblem("Optimización_del_Van_Esperado", pulp.LpMaximize)

    # Función Objetivo
    model_opt += pulp.lpSum([esc_opt[idc, escenario] * df_sample.loc[(idc, escenario), 'van_e'] for idc, escenario in df_sample.index]) 

    # Restricciones
    for idc in idcs:
        escenarios = X_tr['escenario'][X_tr['llave']==idc]
        model_opt += pulp.lpSum(esc_opt[(idc, esc)] for esc in escenarios) == 1

    model_opt += pulp.lpSum([esc_opt[idc, escenario] * df_sample.loc[(idc, escenario), 'mto_e'] for idc, escenario in df_sample.index]) >= mto_opt_port

    model_opt.solve()


    print(pulp.LpStatus[model_opt.status])
    print(model_opt.objective.value())


    output = []
    for idc, esc in esc_opt:
        var_output = {
            'idc': idc, 
            'escenario': esc, 
            'binario': esc_opt[(idc, esc)].varValue, 
            'value':df_sample.loc[(idc,esc),'van_e'] }
        
        output.append(var_output) 

    output_df = pd.DataFrame.from_records(output)
    return output_df

In [3]:
# route_bases = Path('.').resolve() / 'data' / 'bases'
# route_output = Path('.').resolve() / 'data' / 'optimizacion'

# input_filename = "wtp_escenarios_ind_20231229.csv"
# output_filename_5 = f"salida_opt_port_campania_{(datetime.now() + relativedelta(months=1)).strftime('%Y%m')}_5_new.csv"
# output_filename_10 = f"salida_opt_port_campania_{(datetime.now() + relativedelta(months=1)).strftime('%Y%m')}_10_new.csv"


# output_filename_5_final, extension = output_filename_5.rsplit('.', 1)  
# output_filename_5_final = output_filename_5_final.replace("new", "escenario_new")  
# output_filename_5_final = f"{output_filename_5_final}.{extension}" 

# output_filename_10_final, extension = output_filename_10.rsplit('.', 1)  
# output_filename_10_final = output_filename_10_final.replace("new", "escenario_new")  
# output_filename_10_final = f"{output_filename_10_final}.{extension}" 


# full_filename_input = route_bases / input_filename
# full_filename_output_5 = route_output/ output_filename_5
# full_filename_output_10 = route_output/ output_filename_10
# full_filename_output_5_final = route_output/ output_filename_5_final

# full_filename_output_10_final = route_output/ output_filename_10_final

In [4]:
route_bases = Path('.').resolve() / 'CEN_CEF_ADAPTACION_5MILL_Base_para_probar_CENOPT.csv'
df = pd.read_csv( route_bases)
df.columns = df.columns.str.lower()
df.columns

Index(['codmes', 'seg_wtp_cef', 'numscoreriesgo', 'codsubsegmento',
       'codclavecic', 'llave', 'mtofinalofertadosol', 'rng_prob', 'num_sen',
       'iter', 'tasa_propuesta', 'prob_adj', 'prob', 'van', 'van_estimado',
       'mto_estimado', 'ec_efect_i', 'b0', 'b1', 'prob_adj_ef',
       'flg_subsegmento_consumo'],
      dtype='object')

In [5]:
df = df.rename(columns={'iter': 'escenario',
                        'tasa_propuesta':'tea',
                        'van_estimado':'van_e',
                        'mto_estimado':'mto_e',
                        'prob_adj':'tir',
                        'prob_adj_ef':'efec',
                        })

In [6]:
df_input = df.copy() 
df_input.columns

Index(['codmes', 'seg_wtp_cef', 'numscoreriesgo', 'codsubsegmento',
       'codclavecic', 'llave', 'mtofinalofertadosol', 'rng_prob', 'num_sen',
       'escenario', 'tea', 'tir', 'prob', 'van', 'van_e', 'mto_e',
       'ec_efect_i', 'b0', 'b1', 'efec', 'flg_subsegmento_consumo'],
      dtype='object')

In [7]:
df= df[['llave','escenario','tea','van_e', 'mto_e','tir','efec']].sort_values(by=['llave','escenario'])

In [8]:
# idx_max = df[df.escenario==0]
# idx_max
# df.loc[idx_max, 'flg_max_vane'] = 1
df['flg_max_vane']=np.where(df.escenario==0,1,0)

#df['flg_max_vane'].fillna(0)


In [14]:
clientes = set(df['llave'])
tmo_m_list = [1000,3000,5000,10000]
# muestra_list = []
tamano_m = []
n_group = []
# output_list = []
# tiempo = []
porc = 1.06

for i in tmo_m_list:
    mc_time = []
    tamano_m.append(i)
    n_group.append(np.ceil(len(clientes)/(i)))    
        
    # for x in range(5):
    #     clientes_sample = random.sample(list(clientes), i)
    #     dfc = df[df.llave.isin(clientes_sample)].reset_index(drop=True)
    #     start_time = datetime.now() 

    #     output_df = opt_port(dfc,porc)
        
    #     time_elapsed = datetime.now() - start_time  
    #     mc_time.append(time_elapsed)
        
    # tiempo.append(np.mean(mc_time))
    
    print('Time elapsed (hh:mm:ss.ms) {time_elapsed}, TMO_M: {grupo}'.format(time_elapsed=np.mean(mc_time), grupo=i),"\n")
    
    
    
    # muestra_list.append(dfc)
    

    


Time elapsed (hh:mm:ss.ms) nan, TMO_M: 1000 

Time elapsed (hh:mm:ss.ms) nan, TMO_M: 3000 

Time elapsed (hh:mm:ss.ms) nan, TMO_M: 5000 

Time elapsed (hh:mm:ss.ms) nan, TMO_M: 10000 



In [None]:
# tamano_m
# n_group
# tiempo

In [18]:
dicc = {'tamano_m':tamano_m,'n_group':n_group,'Tiempo_c_grp':tiempo}
prueba_t = pd.DataFrame(dicc)

prueba_t['Time_Total'] = prueba_t.n_group*prueba_t.Tiempo_c_grp
prueba_t

Unnamed: 0,tamano_m,n_group,Tiempo_c_grp,Time_Total
0,1000,517.0,0 days 00:00:10.275002,0 days 01:28:32.176034
1,3000,173.0,0 days 00:00:42.705131,0 days 02:03:07.987663
2,5000,104.0,0 days 00:01:14.775691,0 days 02:09:36.671864
3,10000,52.0,0 days 00:02:52.368427,0 days 02:29:23.158204


In [None]:
np.mean(tiempo)

In [None]:
# Load DataFrame

#df = pd.read_csv( 'C:\Users\JHONATAN\Documents\BCP  practicas verano\CEF_ Credito_Efectivo_Personas\10000CEN_CEF_ADAPTACIONBase_para_probar_CENOPT.csv',sep=',')

# Lista de Clientes
clientes = set(df['llave'])

# Número de samples que deseas generar
samples_num = 10

# Tamaño de las submuestras
sample_size = int(len(clientes)/samples_num)

# Lista de Dataframes
df_list = []
output_list = []
tiempo = []

porc = 1.06

print("Clientes totales del DataFrame:",len(df.llave.unique()),"\n")

dfc = df.copy()
# generar las submuestras sin repetir elementos
for i in range(1,samples_num+1):
    if i<samples_num: 
        clientes_sample = random.sample(list(clientes), sample_size)
        # print(clientes_sample)
        df_sample = df[df.llave.isin(clientes_sample)]
        df_sample["sample_num"] = i 
        df = df.drop(df_sample.index)

        print("Clientes del DataFrame del grupo",i,":",len(df_sample.llave.unique()))
        print("Clientes que quedan en el DataFrame:", len(df.llave.unique()))
       
    
        df_list.append(df_sample)
        #set1.difference(set2)
        clientes = clientes - set(clientes_sample)

        start_time = datetime.now() 

        output_df = opt_port(df_sample,porc)
        output_list.append(output_df)
        
        time_elapsed = datetime.now() - start_time  
        tiempo.append(time_elapsed)
        print('Time elapsed (hh:mm:ss.ms) {time_elapsed}, grupo:{grupo}'.format(time_elapsed=time_elapsed, grupo=i),"\n")
        
    else:
        df_sample = df.copy()
        df_sample["sample_num"] = i    
        df_list.append(df_sample)
        
        print("Clientes del último sample:",len(df_sample.llave.unique()))
        
        start_time = datetime.now() 
       
        output_df = opt_port(df_sample,porc)
        output_list.append(output_df)

        time_elapsed = datetime.now() - start_time  
        tiempo.append(time_elapsed)
        print('Time elapsed (hh:mm:ss.ms) {time_elapsed}, grupo:{grupo}'.format(time_elapsed=time_elapsed, grupo=i),"\n")
        
      
df_tot = pd.concat(df_list)   
df_tot_opt = pd.concat(output_list)

df = dfc.copy()

In [None]:
df_tot_opt[df_tot_opt.binario==1]['escenario'].value_counts()

In [None]:
# df_tot_opt.to_csv(full_filename_output_5,index=False)

In [None]:
df_tot_opt = pd.read_csv(full_filename_output_5)

#Salida del WTP Individual Escenarios
df_wtp_esc = pd.read_csv(full_filename_input)

#Salida de la Optimización del Portafolio tomando los leads por grupo
df_opt_port_group = df_tot_opt[df_tot_opt.binario==1][['idc','escenario']].rename(columns={'idc':'codclavecic'})

#Se le pega la TEA Opt por grupo de acuerdo al escenario
df_tea_opt_port_by_group = pd.merge(df_opt_port_group, df_wtp_esc[['codclavecic','escenario','tea', 'cluster','tir']], on=['codclavecic','escenario'], how='left')

df_tea_opt_port_by_group.to_csv(full_filename_output_5_final, index=False)

In [None]:
df_tea_opt_port_by_group.shape

In [None]:
# Load DataFrame
df = pd.read_csv(full_filename_input)

# Lista de Clientes
clientes = set(df['codclavecic'])

# Número de samples que deseas generar
samples_num = 10

# Tamaño de las submuestras
sample_size = int(len(clientes)/samples_num)

# Lista de Dataframes
df_list = []
output_list = []

porc = 1.061

print("Clientes totales del DataFrame:",len(df.codclavecic.unique()),"\n")

# generar las submuestras sin repetir elementos
for i in range(1,samples_num+1):
    if i<samples_num:
        clientes_sample = random.sample(clientes, sample_size)
        df_sample = df[df.codclavecic.isin(clientes_sample)]
        df_sample["sample_num"] = i 
        df = df.drop(df_sample.index)

        print("Clientes del DataFrame del grupo",i,":",len(df_sample.codclavecic.unique()))
        print("Clientes que quedan en el DataFrame:", len(df.codclavecic.unique()))
       
    
        df_list.append(df_sample)
        clientes = clientes - set(clientes_sample)

        start_time = datetime.now() 

        output_df = opt_port(df_sample,porc)
        output_list.append(output_df)

        time_elapsed = datetime.now() - start_time  
        print('Time elapsed (hh:mm:ss.ms) {time_elapsed}, grupo:{grupo}'.format(time_elapsed=time_elapsed, grupo=i),"\n")
        
    else:
        df_sample = df.copy()
        df_sample["sample_num"] = i    
        df_list.append(df_sample)
        
        print("Clientes del último sample:",len(df_sample.codclavecic.unique()))
        
        start_time = datetime.now() 
       
        output_df = opt_port(df_sample,porc)
        output_list.append(output_df)

        time_elapsed = datetime.now() - start_time  
        print('Time elapsed (hh:mm:ss.ms) {time_elapsed}, grupo:{grupo}'.format(time_elapsed=time_elapsed, grupo=i),"\n")
        
      
df_tot_10 = pd.concat(df_list)   
df_tot_opt_10 = pd.concat(output_list) 

In [None]:
df_tot_opt_10.to_csv(full_filename_output_10,index=False)

In [None]:
df_tot_opt_10.shape

In [None]:
df_tot_opt_10 = pd.read_csv(full_filename_output_10)

#Salida del WTP Individual Escenarios
df_wtp_esc = pd.read_csv(full_filename_input)

#Salida de la Optimización del Portafolio tomando los leads por grupo
df_opt_port_group = df_tot_opt_10[df_tot_opt_10.binario==1][['idc','escenario']].rename(columns={'idc':'codclavecic'})

#Se le pega la TEA Opt por grupo de acuerdo al escenario
df_tea_opt_port_by_group = pd.merge(df_opt_port_group, df_wtp_esc[['codclavecic','escenario','tea', 'cluster','tir']], on=['codclavecic','escenario'], how='left')

df_tea_opt_port_by_group.to_csv(full_filename_output_10_final, index=False)

In [None]:
df_tea_opt_port_by_group.shape