In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import pymysql
import matplotlib.pyplot as plt

**Pérdida de cosecha definida por el grupo de riesgo**

In [5]:
"""
Estos son los datos que debería de llenar.
cosechas_estimadas es la pérdida de cosecha media en cada grupo que esperamos de cada cluster
corteS_probabilidad son las probabilidades asociadas a cada grupo, es decir,
grupo 1 - 1.54% de pérdida de cosecha y probabilidad entre 100% - 81.09%
grupo 2 - 3.4% de pérdida de cosecha y probabilidad entre 81.09% - 72.06%
...
"""
cosechas_estimadas = np.array([1, 1.98, 4.06, 6.11, 7.65, 11.8, 14.76])/100

#cosechas_estimadas += np.array([0,0,0,0,0,.1,.1,.1,.1,.1])

cortes_probabilidad = np.array(sorted([  0. ,  50.2,  75.4,  84.1,  93.6,  96.8, 100.]))

In [6]:
num_grupos_riesgo = len(cosechas_estimadas)

**Carga de información de redes**

In [7]:
data_redes_personales = pd.read_csv("./data_redes_personales.csv")
data_redes = pd.read_csv("./data_redes.csv")

**Carga de datos de prospectos**

Aquí es donde suben el output del pipeline

In [8]:
prospectos = pd.read_csv("../evaluacion_modelo_quash/data/data_quash_prospectos_evaluados_formato_aws.csv")
prospectos_dist = prospectos.id_distribuidor.unique()
selection = np.array([id_ in prospectos.id_distribuidor.unique() for id_ in data_redes_personales.id_distribuidor.astype(int)])

data_redes_personales = data_redes_personales.loc[selection]

In [10]:
len(data_redes_personales)

1389

In [53]:
prospectos = prospectos.groupby("id_distribuidor").agg({"monto_autorizado": max, 
                                                  "tasa_sugerida":max,
                                                  "tasa_minima": max,
                                                  "probabilidad" : max,
                                                  "grupo_riesgo": max})

In [54]:
def setcut_prob(p, grupo_ = True):
    p = p*100

    for cut in cortes_probabilidad:
        if cut > p:
            break
            
    grupo = np.where(cortes_probabilidad == cut)[0][0] 
    if grupo_:
        return grupo
    else:
        return cosechas_estimadas[grupo - 1]

In [55]:
prospectos['cosecha_estimada'] = prospectos.probabilidad.apply(setcut_prob, grupo_ = False)
prospectos['grupo_riesgo'] = prospectos.grupo_riesgo.apply(lambda x: int(x.replace("Canasta ","")))

**En caso de que una persona no tenga comportamiento individual, llenar con el comportamiento de la red**

In [56]:
for i, row in data_redes_personales.iterrows():
    if row['total_pagado'] == 0.0:
        
        id_ = row["id_distribuidor"]
        temp = data_redes.query(f'id_distribuidor == {id_}')#.capital_cd.values[0]
        if temp.shape[0] == 0 :
            continue
        data_redes_personales.at[i, "capital_cd"] = temp.capital_cd.values[0]
        data_redes_personales.at[i, "total_pagado"] = temp.total_pagado.values[0]
        data_redes_personales.at[i, "total_pagado_capital"] = temp.total_pagado_capital.values[0]
        data_redes_personales.at[i, "saldo_pendiente_capital"] = temp.saldo_pendiente_capital.values[0]
        data_redes_personales.at[i, "dias_atraso"] = temp.dias_atraso.values[0]

In [57]:
data_redes_personales['perdida_cosecha'] = 1 - data_redes_personales['total_pagado_capital'] / data_redes_personales['capital_cd']

In [58]:
data_redes_personales = data_redes_personales.merge(prospectos, on = 'id_distribuidor')

In [59]:
data_cerrados = data_redes_personales.query("cerrado == 1")

**Matriz de distribución de pérdida de cosecha**

In [60]:
rangos = []
cont = 0
for _ in range(100):
    if cont < 10:
        aum = 1    
    elif cont < 50:
        aum = 5
    else:
        break
        
    rangos.append([cont, int(cont + aum)])
    cont += aum 
    
rangos.append([50, np.inf])

In [61]:
perdida_cosecha_num_personas = pd.DataFrame(rangos, columns = ["inferior", "superior"])

perdida_cosecha_porcentaje_ideal = pd.DataFrame(rangos, columns = ["inferior", "superior"])

In [62]:
for k in range(1, num_grupos_riesgo + 1):
    perdida_cosecha_num_personas[f"{k}"] = 0 
    perdida_cosecha_porcentaje_ideal[f"{k}"] = 0

In [63]:
for grupo_riesgo in range(1, num_grupos_riesgo + 1):
    for cos in data_cerrados.query(f"grupo_riesgo == {grupo_riesgo}").perdida_cosecha:
        for level in perdida_cosecha_num_personas.superior:
            if (cos * 100) < level:
                perdida_cosecha_num_personas.loc[perdida_cosecha_num_personas.superior == level, f"{grupo_riesgo}"] += 1
                break

In [64]:
divided = perdida_cosecha_num_personas.iloc[:, :].sum(0)
divided[0] = 1
divided[1] = 1

perdida_cosecha_porcentaje = perdida_cosecha_num_personas / divided

In [75]:
perdida_cosecha_num_personas.to_csv("./data/resultado_matriz_perdida_cosecha_quash_2024_04_16.csv", index=False)

In [66]:
perdida_cosecha_porcentaje

Unnamed: 0,inferior,superior,1,2,3,4,5,6,7
0,0.0,1.0,0.402174,0.456522,0.475,0.542857,0.6,0.9,
1,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,
2,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,
3,3.0,4.0,0.0,0.0,0.0,0.028571,0.0,0.0,
4,4.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,
5,5.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,
6,6.0,7.0,0.021739,0.01087,0.0,0.0,0.0,0.0,
7,7.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,
8,8.0,9.0,0.0,0.021739,0.075,0.028571,0.066667,0.0,
9,9.0,10.0,0.01087,0.0,0.0,0.0,0.0,0.0,


**Matriz ideal**

In [67]:
for grupo_riesgo, nivel in enumerate(cosechas_estimadas):
    for superior in perdida_cosecha_porcentaje_ideal.superior:
        if nivel < superior:
            perdida_cosecha_porcentaje_ideal.loc[perdida_cosecha_porcentaje_ideal.superior == superior, f"{grupo_riesgo+1}"] += 1
            break

In [68]:
perdida_cosecha_porcentaje_ideal

Unnamed: 0,inferior,superior,1,2,3,4,5,6,7
0,0,1.0,1,1,1,1,1,1,1
1,1,2.0,0,0,0,0,0,0,0
2,2,3.0,0,0,0,0,0,0,0
3,3,4.0,0,0,0,0,0,0,0
4,4,5.0,0,0,0,0,0,0,0
5,5,6.0,0,0,0,0,0,0,0
6,6,7.0,0,0,0,0,0,0,0
7,7,8.0,0,0,0,0,0,0,0
8,8,9.0,0,0,0,0,0,0,0
9,9,10.0,0,0,0,0,0,0,0


**Pérdidas de cosechas reales por grupo**

In [76]:
perdida_por_grupo = 1 - data_cerrados.groupby("grupo_riesgo").total_pagado_capital.sum() / data_cerrados.groupby("grupo_riesgo").capital_cd.sum()
c = 1
distancia_media = 0
for real, estimada in zip(perdida_por_grupo, cosechas_estimadas[::-1]):
    distancia_media += abs(real - estimada) / 6
    print(f"grupo {c}: {real*100:.2f} <-> {estimada*100:.3f}")
    c+=1
print(f"Distancia media absoluta: {distancia_media*100:.2f}")

grupo 1: 19.70 <-> 14.760
grupo 2: 18.96 <-> 11.800
grupo 3: 12.89 <-> 7.650
grupo 4: 15.40 <-> 6.110
grupo 5: 9.39 <-> 4.060
grupo 6: 1.70 <-> 1.980
Distancia media absoluta: 5.37


In [70]:
real

0.01698555187121653

**Relación de id con cosechas**

In [71]:
data_cerrados[['id_distribuidor','id_red', 'perdida_cosecha','cosecha_estimada']].to_csv("./data/cosecha_esperada_vs_obtenida_2023_11_28.csv", index = False)

In [72]:
data_cerrados

Unnamed: 0,id_distribuidor,id_red,capital,nombre_red,distribuidor,tipo_prestamo,tipo_distribuidor,sucursal,ciclo_credito,inicio_credito,...,saldo_atraso,total_pagado,cerrado,perdida_cosecha,monto_autorizado,tasa_sugerida,tasa_minima,probabilidad,grupo_riesgo,cosecha_estimada
1,143960,156523,6000.020020,YULIVAN,LIZ ANEL HERNANDEZ MENDOZA,PRESTAMO PERSONAL RED,red,COATZACOALCOS,1,2023-07-05,...,0.0,42598.0,1,0.235586,0,0,0,0.504965,2,0.0198
7,147486,155034,7999.990234,MARIPOSAS NEZA,CECILIA DURAN RODRIGUEZ,PRESTAMO PERSONAL RED,red,NEZAHUALCOYOTL 2,1,2023-06-19,...,0.0,54000.0,1,0.000000,0,0,0,0.232673,1,0.0100
10,147490,155034,7999.990234,MARIPOSAS NEZA,ADRIANA ALCANTAR GARCIA,PRESTAMO PERSONAL RED,red,NEZAHUALCOYOTL 2,1,2023-06-19,...,0.0,54000.0,1,0.000000,0,0,0,0.447730,1,0.0100
12,147582,156979,9999.990234,ANISA,JULIA ALBINO MATEO,PRESTAMO PERSONAL RED,red,COATZACOALCOS,1,2023-07-07,...,0.0,118832.0,1,0.000000,0,0,0,0.323847,1,0.0100
14,147991,153797,6999.919922,DARUMA,ROSA PATRICIA MINGUEZ SOSA,PRESTAMO PERSONAL RED,red,FORTIN,1,2023-06-07,...,0.0,47200.0,1,0.000000,7000,147,142,0.882711,4,0.0611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
642,158292,158615,9999.990234,DIAMANTE 2020 CH,ANABEL CRUZ PEREZ,PRESTAMO PERSONAL RED,red,CHALCO,1,2023-07-26,...,0.0,80944.0,1,0.000000,0,0,0,0.596398,2,0.0198
644,158354,158632,6000.020020,ARENAL AMK,BEATRIZ ESTRADA CORTES,PRESTAMO PERSONAL RED,red,AMECAMECA,1,2023-07-26,...,0.0,56736.0,1,0.000000,10000,147,142,0.909286,4,0.0611
645,158356,158632,6000.020020,ARENAL AMK,MA CONCEPCION FLORES MARTINEZ,PRESTAMO PERSONAL RED,red,AMECAMECA,1,2023-07-26,...,0.0,56736.0,1,0.000000,0,0,0,0.535136,2,0.0198
665,158579,158632,6000.020020,ARENAL AMK,ANDREA VERONICA SOLIS SOLIS,PRESTAMO PERSONAL RED,red,AMECAMECA,1,2023-07-26,...,0.0,56736.0,1,0.000000,0,0,0,0.588615,2,0.0198


In [73]:
"".lpad(5)

AttributeError: 'str' object has no attribute 'lpad'