In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import pymysql
import matplotlib.pyplot as plt

**Pérdida de cosecha definida por el grupo de riesgo**

In [2]:
"""
Estos son los datos que debería de llenar.
cosechas_estimadas es la pérdida de cosecha media en cada grupo que esperamos de cada cluster
corteS_probabilidad son las probabilidades asociadas a cada grupo, es decir,
grupo 1 - 1.54% de pérdida de cosecha y probabilidad entre 100% - 81.09%
grupo 2 - 3.4% de pérdida de cosecha y probabilidad entre 81.09% - 72.06%
...
"""
cosechas_estimadas = np.array([1.54, 3.4, 4.97, 7.38, 10.69, 11.87, 12.35, 12.68, 20.33, 22.69])
cortes_probabilidad = np.array(sorted([100, 81.09, 72.06, 63.19, 54.27, 45.11, 36.10, 27.47, 18.17, 9.05, 0.00]))

In [3]:
num_grupos_riesgo = 10

**Carga de información de redes**

In [4]:
data_redes_personales = pd.read_csv("./data_redes_personales.csv")
data_redes = pd.read_csv("./data_redes.csv")

**Carga de datos de prospectos**

Aquí es donde suben el output del pipeline

In [9]:
prospectos = pd.read_csv("./prospectos_riesgo_11_28_23.csv")
prospectos_dist = prospectos.id_distribuidor.unique()
selection = np.array([id_ in prospectos.id_distribuidor.unique() for id_ in data_redes_personales.id_distribuidor.astype(int)])

data_redes_personales = data_redes_personales.loc[selection]

In [10]:
prospectos = prospectos.groupby("id_distribuidor").agg({"monto_autorizado": max, 
                                                  "tasa_sugerida":max,
                                                  "tasa_minima": max,
                                                  "probabilidad" : max,
                                                  "grupo_riesgo": max})

In [11]:
def setcut_prob(p, grupo_ = True):
    p = p*100

    for cut in cortes_probabilidad:
        if cut > p:
            break
            
    grupo = np.where(cortes_probabilidad == cut)[0][0] 
    if grupo_:
        return grupo
    else:
        return cosechas_estimadas[grupo - 1]

In [12]:
prospectos['cosecha_estimada'] = prospectos.probabilidad.apply(setcut_prob, grupo_ = False)
prospectos['grupo_riesgo'] = prospectos.grupo_riesgo.apply(lambda x: int(x.replace("Canasta ","")))

**En caso de que una persona no tenga comportamiento individual, llenar con el comportamiento de la red**

In [13]:
for i, row in data_redes_personales.iterrows():
    if row['total_pagado'] == 0.0:
        
        id_ = row["id_distribuidor"]
        temp = data_redes.query(f'id_distribuidor == {id_}')#.capital_cd.values[0]
        if temp.shape[0] == 0 :
            continue
        data_redes_personales.at[i, "capital_cd"] = temp.capital_cd.values[0]
        data_redes_personales.at[i, "total_pagado"] = temp.total_pagado.values[0]
        data_redes_personales.at[i, "total_pagado_capital"] = temp.total_pagado_capital.values[0]
        data_redes_personales.at[i, "saldo_pendiente_capital"] = temp.saldo_pendiente_capital.values[0]
        data_redes_personales.at[i, "dias_atraso"] = temp.dias_atraso.values[0]

In [14]:
data_redes_personales['perdida_cosecha'] = 1 - data_redes_personales['total_pagado_capital'] / data_redes_personales['capital_cd']

In [15]:
data_redes_personales = data_redes_personales.merge(prospectos, on = 'id_distribuidor')

In [16]:
data_cerrados = data_redes_personales.query("cerrado == 1")

**Matriz de distribución de pérdida de cosecha**

In [14]:
rangos = []
cont = 0
for _ in range(100):
    if cont < 10:
        aum = 1    
    elif cont < 50:
        aum = 5
    else:
        break
        
    rangos.append([cont, int(cont + aum)])
    cont += aum 
    
rangos.append([50, np.inf])

In [15]:
perdida_cosecha_num_personas = pd.DataFrame(rangos, columns = ["inferior", "superior"])

perdida_cosecha_porcentaje_ideal = pd.DataFrame(rangos, columns = ["inferior", "superior"])

In [16]:
for k in range(1, num_grupos_riesgo + 1):
    perdida_cosecha_num_personas[f"{k}"] = 0 
    perdida_cosecha_porcentaje_ideal[f"{k}"] = 0

In [17]:
for grupo_riesgo in range(1, num_grupos_riesgo + 1):
    for cos in data_cerrados.query(f"grupo_riesgo == {grupo_riesgo}").perdida_cosecha:
        for level in perdida_cosecha_num_personas.superior:
            if (cos * 100) < level:
                perdida_cosecha_num_personas.loc[perdida_cosecha_num_personas.superior == level, f"{grupo_riesgo}"] += 1
                break

In [18]:
divided = perdida_cosecha_num_personas.iloc[:, :].sum(0)
divided[0] = 1
divided[1] = 1

perdida_cosecha_porcentaje = perdida_cosecha_num_personas / divided

In [22]:
perdida_cosecha_num_personas

Unnamed: 0,inferior,superior,1,2,3,4,5,6,7,8,9,10
0,0,1.0,4,12,18,4,19,14,18,16,17,14
1,1,2.0,0,0,0,0,0,0,0,0,0,0
2,2,3.0,0,0,0,0,0,0,0,0,0,0
3,3,4.0,0,0,0,0,0,0,1,0,0,0
4,4,5.0,0,0,0,0,0,0,0,0,0,0
5,5,6.0,0,0,0,0,0,0,0,0,0,0
6,6,7.0,0,0,0,0,0,0,0,1,3,1
7,7,8.0,0,0,0,0,0,0,0,0,0,0
8,8,9.0,0,1,1,1,0,2,2,0,0,0
9,9,10.0,0,0,0,0,0,0,1,0,0,0


In [18]:
perdida_cosecha_porcentaje

NameError: name 'perdida_cosecha_porcentaje' is not defined

**Matriz ideal**

In [19]:
for grupo_riesgo, nivel in enumerate(cosechas_estimadas):
    for superior in perdida_cosecha_porcentaje_ideal.superior:
        if nivel < superior:
            perdida_cosecha_porcentaje_ideal.loc[perdida_cosecha_porcentaje_ideal.superior == superior, f"{grupo_riesgo+1}"] += 1
            break

In [20]:
perdida_cosecha_porcentaje_ideal

Unnamed: 0,inferior,superior,1,2,3,4,5,6,7,8,9,10
0,0,1.0,0,0,0,0,0,0,0,0,0,0
1,1,2.0,1,0,0,0,0,0,0,0,0,0
2,2,3.0,0,0,0,0,0,0,0,0,0,0
3,3,4.0,0,1,0,0,0,0,0,0,0,0
4,4,5.0,0,0,1,0,0,0,0,0,0,0
5,5,6.0,0,0,0,0,0,0,0,0,0,0
6,6,7.0,0,0,0,0,0,0,0,0,0,0
7,7,8.0,0,0,0,1,0,0,0,0,0,0
8,8,9.0,0,0,0,0,0,0,0,0,0,0
9,9,10.0,0,0,0,0,0,0,0,0,0,0


**Pérdidas de cosechas reales por grupo**

In [21]:
perdida_por_grupo = 1 - data_cerrados.groupby("grupo_riesgo").total_pagado_capital.sum() / data_cerrados.groupby("grupo_riesgo").capital_cd.sum()
c = 1
for real, estimada in zip(perdida_por_grupo, cosechas_estimadas):
    print(f"grupo {c}: {real*100:.2f} <-> {estimada:.2f}")
    c+=1

grupo 1: 24.26 <-> 1.54
grupo 2: 13.96 <-> 3.40
grupo 3: 12.69 <-> 4.97
grupo 4: 32.38 <-> 7.38
grupo 5: 10.94 <-> 10.69
grupo 6: 15.99 <-> 11.87
grupo 7: 10.96 <-> 12.35
grupo 8: 17.15 <-> 12.68
grupo 9: 17.28 <-> 20.33
grupo 10: 17.76 <-> 22.69


**Relación de id con cosechas**

In [30]:
data_cerrados[['id_distribuidor','id_red', 'perdida_cosecha','cosecha_estimada']].to_csv("./data/cosecha_esperada_vs_obtenida_2023_11_28.csv", index = False)

In [17]:
data_cerrados

Unnamed: 0,id_distribuidor,id_red,capital,nombre_red,distribuidor,tipo_prestamo,tipo_distribuidor,sucursal,ciclo_credito,inicio_credito,...,saldo_atraso,total_pagado,cerrado,perdida_cosecha,monto_autorizado,tasa_sugerida,tasa_minima,probabilidad,grupo_riesgo,cosecha_estimada
2,143960,156523,6000.020020,YULIVAN,LIZ ANEL HERNANDEZ MENDOZA,PRESTAMO PERSONAL RED,red,COATZACOALCOS,1,2023-07-05,...,0.0,42598.0,1,0.235586,0,0,0,0.7173,9,12.68
8,147486,155034,7999.990234,MARIPOSAS NEZA,CECILIA DURAN RODRIGUEZ,PRESTAMO PERSONAL RED,red,NEZAHUALCOYOTL 2,1,2023-06-19,...,0.0,54000.0,1,0.000000,8000,147,127,0.1874,3,4.97
11,147490,155034,7999.990234,MARIPOSAS NEZA,ADRIANA ALCANTAR GARCIA,PRESTAMO PERSONAL RED,red,NEZAHUALCOYOTL 2,1,2023-06-19,...,0.0,54000.0,1,0.000000,9000,147,147,0.4916,7,11.87
13,147582,156979,9999.990234,ANISA,JULIA ALBINO MATEO,PRESTAMO PERSONAL RED,red,COATZACOALCOS,1,2023-07-07,...,0.0,118832.0,1,0.000000,10000,147,147,0.4450,7,10.69
15,147991,153797,6999.919922,DARUMA,ROSA PATRICIA MINGUEZ SOSA,PRESTAMO PERSONAL RED,red,FORTIN,1,2023-06-07,...,0.0,47200.0,1,0.000000,0,0,0,0.4288,7,10.69
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
648,158292,158615,9999.990234,DIAMANTE 2020 CH,ANABEL CRUZ PEREZ,PRESTAMO PERSONAL RED,red,CHALCO,1,2023-07-26,...,0.0,80944.0,1,0.000000,10000,147,142,0.3341,5,7.38
650,158354,158632,6000.020020,ARENAL AMK,BEATRIZ ESTRADA CORTES,PRESTAMO PERSONAL RED,red,AMECAMECA,1,2023-07-26,...,0.0,56736.0,1,0.000000,0,0,0,0.8780,10,22.69
651,158356,158632,6000.020020,ARENAL AMK,MA CONCEPCION FLORES MARTINEZ,PRESTAMO PERSONAL RED,red,AMECAMECA,1,2023-07-26,...,0.0,56736.0,1,0.000000,8000,147,127,0.1739,3,3.40
671,158579,158632,6000.020020,ARENAL AMK,ANDREA VERONICA SOLIS SOLIS,PRESTAMO PERSONAL RED,red,AMECAMECA,1,2023-07-26,...,0.0,56736.0,1,0.000000,0,0,0,0.4360,7,10.69


In [19]:
"".lpad(5)

AttributeError: 'str' object has no attribute 'lpad'