In [9]:
import pandas as pd
import numpy as np
import seaborn as sns
import pymysql
import matplotlib.pyplot as plt

**Pérdida de cosecha definida por el grupo de riesgo**

In [10]:
"""
Estos son los datos que debería de llenar.
cosechas_estimadas es la pérdida de cosecha media en cada grupo que esperamos de cada cluster
corteS_probabilidad son las probabilidades asociadas a cada grupo, es decir,
grupo 1 - 1.54% de pérdida de cosecha y probabilidad entre 100% - 81.09%
grupo 2 - 3.4% de pérdida de cosecha y probabilidad entre 81.09% - 72.06%
...
"""
cosechas_estimadas = np.array([0.00598185, 0.01829813, 0.0332206 , 0.0311605 , 0.03374105,
       0.03977056, 0.05243911, 0.05828372, 0.07900052, 0.1122694 ])

cortes_probabilidad = np.array([ 2.84212986, 21.80003665, 30.190946  , 35.27074295, 41.23305635,
       46.81750016, 51.63921278, 56.13693519, 61.86523739, 68.68459193,
       94.03795644])

In [11]:
num_grupos_riesgo = 10

**Carga de información de redes**

In [12]:
data_redes_personales = pd.read_csv("./data_redes_personales.csv")
data_redes = pd.read_csv("./data_redes.csv")

**Carga de datos de prospectos**

Aquí es donde suben el output del pipeline

In [18]:
prospectos = pd.read_csv("../../modelo_riesgo_volana/data/evaluation/prospectos_evaluados_modelo_volana.csv")
prospectos_dist = prospectos.id_distribuidor.unique()
selection = np.array([id_ in prospectos.id_distribuidor.unique() for id_ in data_redes_personales.id_distribuidor.astype(int)])

data_redes_personales = data_redes_personales.loc[selection]

In [19]:
prospectos = prospectos.groupby("id_distribuidor").agg({"monto_autorizado": max, 
                                                  "tasa_sugerida":max,
                                                  "tasa_minima": max,
                                                  "probabilidad" : max,
                                                  "grupo_riesgo": max})

In [20]:
def setcut_prob(p, grupo_ = True):
    p = p*100

    for cut in cortes_probabilidad:
        if cut > p:
            break
            
    grupo = np.where(cortes_probabilidad == cut)[0][0] 
    if grupo_:
        return grupo
    else:
        return cosechas_estimadas[grupo - 1]

In [21]:
prospectos['cosecha_estimada'] = prospectos.probabilidad.apply(setcut_prob, grupo_ = False)
prospectos['grupo_riesgo'] = prospectos.grupo_riesgo.apply(lambda x: int(x.replace("Canasta ","")))

**En caso de que una persona no tenga comportamiento individual, llenar con el comportamiento de la red**

In [22]:
for i, row in data_redes_personales.iterrows():
    if row['total_pagado'] == 0.0:
        
        id_ = row["id_distribuidor"]
        temp = data_redes.query(f'id_distribuidor == {id_}')#.capital_cd.values[0]
        if temp.shape[0] == 0 :
            continue
        data_redes_personales.at[i, "capital_cd"] = temp.capital_cd.values[0]
        data_redes_personales.at[i, "total_pagado"] = temp.total_pagado.values[0]
        data_redes_personales.at[i, "total_pagado_capital"] = temp.total_pagado_capital.values[0]
        data_redes_personales.at[i, "saldo_pendiente_capital"] = temp.saldo_pendiente_capital.values[0]
        data_redes_personales.at[i, "dias_atraso"] = temp.dias_atraso.values[0]

In [23]:
data_redes_personales['perdida_cosecha'] = 1 - data_redes_personales['total_pagado_capital'] / data_redes_personales['capital_cd']

In [24]:
data_redes_personales = data_redes_personales.merge(prospectos, on = 'id_distribuidor')

In [25]:
data_cerrados = data_redes_personales.query("cerrado == 1")

**Matriz de distribución de pérdida de cosecha**

In [26]:
rangos = []
cont = 0
for _ in range(100):
    if cont < 10:
        aum = 1    
    elif cont < 50:
        aum = 5
    else:
        break
        
    rangos.append([cont, int(cont + aum)])
    cont += aum 
    
rangos.append([50, np.inf])

In [27]:
perdida_cosecha_num_personas = pd.DataFrame(rangos, columns = ["inferior", "superior"])

perdida_cosecha_porcentaje_ideal = pd.DataFrame(rangos, columns = ["inferior", "superior"])

In [28]:
for k in range(1, num_grupos_riesgo + 1):
    perdida_cosecha_num_personas[f"{k}"] = 0 
    perdida_cosecha_porcentaje_ideal[f"{k}"] = 0

In [29]:
for grupo_riesgo in range(1, num_grupos_riesgo + 1):
    for cos in data_cerrados.query(f"grupo_riesgo == {grupo_riesgo}").perdida_cosecha:
        for level in perdida_cosecha_num_personas.superior:
            if (cos * 100) < level:
                perdida_cosecha_num_personas.loc[perdida_cosecha_num_personas.superior == level, f"{grupo_riesgo}"] += 1
                break

In [30]:
divided = perdida_cosecha_num_personas.iloc[:, :].sum(0)
divided[0] = 1
divided[1] = 1

perdida_cosecha_porcentaje = perdida_cosecha_num_personas / divided

In [31]:
perdida_cosecha_num_personas

Unnamed: 0,inferior,superior,1,2,3,4,5,6,7,8,9,10
0,0,1.0,2,4,5,4,16,10,8,11,11,11
1,1,2.0,0,0,0,0,0,0,0,0,0,0
2,2,3.0,0,0,0,0,0,0,0,0,0,0
3,3,4.0,0,1,0,0,0,0,0,0,0,0
4,4,5.0,0,0,0,0,0,0,0,0,0,0
5,5,6.0,0,0,0,0,0,0,0,0,0,0
6,6,7.0,0,0,0,0,0,0,0,0,0,0
7,7,8.0,0,0,0,0,0,0,0,0,0,0
8,8,9.0,0,0,0,1,1,2,2,1,0,0
9,9,10.0,0,0,0,0,0,0,0,0,0,0


In [32]:
perdida_cosecha_porcentaje

Unnamed: 0,inferior,superior,1,2,3,4,5,6,7,8,9,10
0,0.0,1.0,0.666667,0.8,0.833333,0.8,0.842105,0.625,0.470588,0.55,0.52381,0.34375
1,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3.0,4.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,5.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,6.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,7.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,8.0,9.0,0.0,0.0,0.0,0.2,0.052632,0.125,0.117647,0.05,0.0,0.0
9,9.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**Matriz ideal**

In [33]:
for grupo_riesgo, nivel in enumerate(cosechas_estimadas):
    for superior in perdida_cosecha_porcentaje_ideal.superior:
        if nivel < superior:
            perdida_cosecha_porcentaje_ideal.loc[perdida_cosecha_porcentaje_ideal.superior == superior, f"{grupo_riesgo+1}"] += 1
            break

In [34]:
perdida_cosecha_porcentaje_ideal

Unnamed: 0,inferior,superior,1,2,3,4,5,6,7,8,9,10
0,0,1.0,1,1,1,1,1,1,1,1,1,1
1,1,2.0,0,0,0,0,0,0,0,0,0,0
2,2,3.0,0,0,0,0,0,0,0,0,0,0
3,3,4.0,0,0,0,0,0,0,0,0,0,0
4,4,5.0,0,0,0,0,0,0,0,0,0,0
5,5,6.0,0,0,0,0,0,0,0,0,0,0
6,6,7.0,0,0,0,0,0,0,0,0,0,0
7,7,8.0,0,0,0,0,0,0,0,0,0,0
8,8,9.0,0,0,0,0,0,0,0,0,0,0
9,9,10.0,0,0,0,0,0,0,0,0,0,0


**Pérdidas de cosechas reales por grupo**

In [39]:
perdida_por_grupo = 1 - data_cerrados.groupby("grupo_riesgo").total_pagado_capital.sum() / data_cerrados.groupby("grupo_riesgo").capital_cd.sum()
c = 1
for real, estimada in zip(perdida_por_grupo, cosechas_estimadas):
    print(f"grupo {c}: {real*100:.2f} <-> {estimada*100:.3f}")
    c+=1

grupo 1: 3.19 <-> 0.598
grupo 2: 0.67 <-> 1.830
grupo 3: 4.34 <-> 3.322
grupo 4: 2.06 <-> 3.116
grupo 5: 1.38 <-> 3.374
grupo 6: 8.19 <-> 3.977
grupo 7: 17.54 <-> 5.244
grupo 8: 11.57 <-> 5.828
grupo 9: 19.02 <-> 7.900
grupo 10: 25.06 <-> 11.227


**Relación de id con cosechas**

In [36]:
data_cerrados[['id_distribuidor','id_red', 'perdida_cosecha','cosecha_estimada']].to_csv("./data/cosecha_esperada_vs_obtenida_2023_11_28.csv", index = False)

In [37]:
data_cerrados

Unnamed: 0,id_distribuidor,id_red,capital,nombre_red,distribuidor,tipo_prestamo,tipo_distribuidor,sucursal,ciclo_credito,inicio_credito,...,saldo_atraso,total_pagado,cerrado,perdida_cosecha,monto_autorizado,tasa_sugerida,tasa_minima,probabilidad,grupo_riesgo,cosecha_estimada
1,143960,156523,6000.020020,YULIVAN,LIZ ANEL HERNANDEZ MENDOZA,PRESTAMO PERSONAL RED,red,COATZACOALCOS,1,2023-07-05,...,0.0,42598.0,1,0.235586,10000,147,110,0.714448,10,0.112269
6,147991,153797,6999.919922,DARUMA,ROSA PATRICIA MINGUEZ SOSA,PRESTAMO PERSONAL RED,red,FORTIN,1,2023-06-07,...,0.0,47200.0,1,0.000000,10000,147,110,0.291307,2,0.018298
15,152232,152320,5000.040039,AVATAR,JOSEFINA TREJO ROSAS,PRESTAMO PERSONAL RED,red,FORTIN,1,2023-05-31,...,0.0,79472.0,1,0.000000,10000,147,110,0.287312,2,0.018298
18,152346,152779,8000.009766,FLOR DE LOTO IXT,GUADALUPE GARCIA ROMERO,PRESTAMO PERSONAL RED,red,IXTAPALUCA,1,2023-06-01,...,0.0,144992.0,1,0.000000,10000,147,110,0.542210,7,0.052439
21,152434,152779,9999.990234,FLOR DE LOTO IXT,AIDE SANTIAGO GARCIA,PRESTAMO PERSONAL RED,red,IXTAPALUCA,1,2023-06-01,...,0.0,144992.0,1,0.000000,10000,147,110,0.736853,10,0.112269
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,158265,158657,6000.020020,REYNAS UNO,ROSA NAZARIO GONZALEZ,PRESTAMO PERSONAL RED,red,FORTIN,1,2023-07-26,...,0.0,56080.0,1,0.000000,10000,147,110,0.301201,2,0.018298
452,158292,158615,9999.990234,DIAMANTE 2020 CH,ANABEL CRUZ PEREZ,PRESTAMO PERSONAL RED,red,CHALCO,1,2023-07-26,...,0.0,80944.0,1,0.000000,10000,147,110,0.406059,4,0.031161
454,158354,158632,6000.020020,ARENAL AMK,BEATRIZ ESTRADA CORTES,PRESTAMO PERSONAL RED,red,AMECAMECA,1,2023-07-26,...,0.0,56736.0,1,0.000000,10000,147,110,0.481709,6,0.039771
455,158356,158632,6000.020020,ARENAL AMK,MA CONCEPCION FLORES MARTINEZ,PRESTAMO PERSONAL RED,red,AMECAMECA,1,2023-07-26,...,0.0,56736.0,1,0.000000,10000,147,110,0.422857,5,0.033741


In [19]:
"".lpad(5)

AttributeError: 'str' object has no attribute 'lpad'