In [2]:
import pandas as pd
import numpy as np

### Importación de datos

In [3]:
sv19_df = pd.read_csv('sourceData/total_sv19.csv')
sv19_df = sv19_df[['CVE_MUNICIPIO', 'total']].astype(int)
sv19_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 393 entries, 0 to 392
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   CVE_MUNICIPIO  393 non-null    int64
 1   total          393 non-null    int64
dtypes: int64(2)
memory usage: 6.3 KB


In [4]:
sv20_df = pd.read_csv('sourceData/total_sv20.csv')
sv20_df = sv20_df[['CVE_MUNICIPIO', 'total']].astype(int)
sv20_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 981 entries, 0 to 980
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype
---  ------         --------------  -----
 0   CVE_MUNICIPIO  981 non-null    int64
 1   total          981 non-null    int64
dtypes: int64(2)
memory usage: 15.5 KB


In [5]:
sv1920_df = pd.read_csv('sourceData/sv_1920.csv')
sv1920_df = sv1920_df[['ET_ID', 'total1920']]
sv1920_df.columns = ['ET_ID', 'Total 2019-2020']
sv1920_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2469 entries, 0 to 2468
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   ET_ID            2469 non-null   int64
 1   Total 2019-2020  2469 non-null   int64
dtypes: int64(2)
memory usage: 38.7 KB


### Limpieza de los DataFrames

In [6]:
et_id = sv1920_df['ET_ID'].to_list()
sv_19_cve_municipio = sv19_df['CVE_MUNICIPIO'].to_list()
sv19_missing = [i for i in et_id if i not in sv_19_cve_municipio]
sv19_missing_df = pd.DataFrame({'CVE_MUNICIPIO': sv19_missing, 'total': np.zeros(len(sv19_missing))})
sv19_df = pd.concat([sv19_df, sv19_missing_df], ignore_index=True)
sv19_df.columns = ['CVE_MUNICIPIO', 'Total 2019']
sv19_df

Unnamed: 0,CVE_MUNICIPIO,Total 2019
0,2001,40000.0
1,2002,5000.0
2,2004,50000.0
3,4002,105000.0
4,4003,935000.0
...,...,...
2467,32054,0.0
2468,32055,0.0
2469,32056,0.0
2470,32057,0.0


In [7]:
et_id = sv1920_df['ET_ID'].to_list()
sv_20_cve_municipio = sv20_df['CVE_MUNICIPIO'].to_list()
sv20_missing = [i for i in et_id if i not in sv_20_cve_municipio]
sv20_missing_df = pd.DataFrame({'CVE_MUNICIPIO': sv20_missing, 'total': np.zeros(len(sv20_missing))})
sv20_df = pd.concat([sv20_df, sv20_missing_df], ignore_index=True)
sv20_df.columns = ['CVE_MUNICIPIO', 'Total 2020']
sv20_df

Unnamed: 0,CVE_MUNICIPIO,Total 2020
0,2001,10000.0
1,2004,10000.0
2,4001,39565000.0
3,4002,220000.0
4,4003,150000.0
...,...,...
2465,32054,0.0
2466,32055,0.0
2467,32056,0.0
2468,32057,0.0


### División en cuantiles

In [8]:
def quantiles(df: pd.DataFrame, values: str, cvs_muncs: str, variable: str='', n: int=10, groupZeros: bool=False) -> pd.DataFrame :
    """
    Divide los datos de un DataFrame en cuantiles.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame con los datos a separar.
    values : str
        Columna con los valores a comparar.
    cvs_muncs : str
        Columna con los elementos a agrupar en cada cuantil.
    variable : str, optional
        Nombre que se le asignará a la variable, por defecto se usará el nombre de la columna seleccionada en 'values'.
    n : int, optional
        Número de cuantiles, `10` cuantiles por defecto.
    groupZeros : bool, optional
        Si se desea crear un cuantil 0, el cual agrupará los elementos con valor `0.0`, `False` por defecto.

    Returns
    -------
    pd.DataFrame
        DataFrame con los datos separados en cuantiles, con el nombre de la variable, el índice del cuantil, los límites
        de los cuantiles y los elementos que pertenecen a cada cuantil como columnas.
    """
    
    variable = values if variable == '' else variable
    df = df.sort_values(by=values, ascending=True)
    zeros = 0
    names = [variable for _ in range(n + groupZeros)]
    bins = []
    lower = []
    upper = []
    muncs = []

    if groupZeros:
        zeros = df[values].value_counts().get(0,0)
        zeroQuantile = df.iloc[0:zeros, [0,1]]
        bins.append(0)
        lower.append(np.float64(zeroQuantile[values].iloc[0]))
        upper.append(np.float64(zeroQuantile[values].iloc[-1]))
        muncs.append(list(zeroQuantile[cvs_muncs]))

    quantiles = np.array_split(df.iloc[zeros:], n)

    for i, q in enumerate(quantiles):
        bins.append(i+1)
        lower.append(np.float64(q[values].iloc[0]))
        upper.append(np.float64(q[values].iloc[-1]))
        muncs.append(list(q[cvs_muncs]))


    return pd.DataFrame({'name':names, 'bin':bins, 'lower': lower, 'upper': upper, 'cells':muncs})


In [9]:
total_sv19_df = quantiles(sv19_df, 'Total 2019', 'CVE_MUNICIPIO', n=10)
total_sv19_df

Unnamed: 0,name,bin,lower,upper,cells
0,Total 2019,1,0.0,0.0,"[13023, 19017, 19018, 19020, 19021, 19022, 190..."
1,Total 2019,2,0.0,0.0,"[20071, 20070, 20069, 20041, 20045, 20048, 200..."
2,Total 2019,3,0.0,0.0,"[15090, 15091, 15093, 15113, 15114, 15115, 160..."
3,Total 2019,4,0.0,0.0,"[26005, 26006, 26007, 26008, 26009, 26010, 250..."
4,Total 2019,5,0.0,0.0,"[20439, 20442, 20374, 20443, 20446, 20447, 204..."
5,Total 2019,6,0.0,0.0,"[20221, 12035, 21183, 31054, 20282, 31094, 200..."
6,Total 2019,7,0.0,0.0,"[20204, 28019, 21030, 20518, 20465, 20341, 290..."
7,Total 2019,8,0.0,0.0,"[5018, 5017, 5027, 7014, 7017, 7020, 7093, 708..."
8,Total 2019,9,0.0,5160000.0,"[1001, 8029, 8027, 4004, 18009, 25017, 30123, ..."
9,Total 2019,10,5275000.0,908500000.0,"[30116, 30034, 30145, 30071, 7088, 21029, 3016..."


In [10]:
total_sv20_df = quantiles(sv20_df, 'Total 2020', 'CVE_MUNICIPIO', n=10)
total_sv20_df

Unnamed: 0,name,bin,lower,upper,cells
0,Total 2020,1,0.0,0.0,"[13024, 19018, 19020, 19021, 19022, 19023, 190..."
1,Total 2020,2,0.0,0.0,"[20072, 20090, 20071, 20069, 20045, 20048, 200..."
2,Total 2020,3,0.0,0.0,"[15090, 15091, 15092, 15094, 15114, 15115, 151..."
3,Total 2020,4,0.0,0.0,"[26004, 26005, 26006, 26007, 26008, 26009, 260..."
4,Total 2020,5,0.0,0.0,"[20438, 20439, 20442, 20374, 20443, 20446, 204..."
5,Total 2020,6,0.0,0.0,"[13023, 7085, 7083, 7078, 7075, 7064, 7058, 70..."
6,Total 2020,7,0.0,1680000.0,"[11003, 11007, 11008, 11009, 11010, 11011, 110..."
7,Total 2020,8,1690000.0,7315000.0,"[8051, 21049, 17002, 21101, 20223, 20288, 2102..."
8,Total 2020,9,7325000.0,21425000.0,"[30157, 31058, 29040, 20394, 29008, 20486, 301..."
9,Total 2020,10,21430000.0,955155000.0,"[30100, 24014, 30195, 10030, 7079, 7069, 30204..."


In [11]:
total_sv1920_df = quantiles(sv1920_df, 'Total 2019-2020', 'ET_ID', n=10)
total_sv1920_df

Unnamed: 0,name,bin,lower,upper,cells
0,Total 2019-2020,1,0.0,0.0,"[13027, 19018, 19020, 19021, 19022, 19023, 190..."
1,Total 2019-2020,2,0.0,0.0,"[20072, 20090, 20071, 20069, 20045, 20048, 200..."
2,Total 2019-2020,3,0.0,0.0,"[15090, 15091, 15092, 15094, 15114, 15115, 151..."
3,Total 2019-2020,4,0.0,0.0,"[26004, 26005, 26006, 26007, 26008, 26009, 260..."
4,Total 2019-2020,5,0.0,0.0,"[20438, 20439, 20442, 20374, 20443, 20446, 204..."
5,Total 2019-2020,6,0.0,5000.0,"[13023, 7108, 7106, 7098, 7094, 7093, 7086, 70..."
6,Total 2019-2020,7,5000.0,1835000.0,"[18017, 19038, 16058, 28028, 21140, 21166, 260..."
7,Total 2019-2020,8,1840000.0,8415000.0,"[31003, 20093, 20456, 20304, 31072, 20237, 200..."
8,Total 2019-2020,9,8425000.0,26540000.0,"[10015, 20520, 30150, 20274, 8041, 21025, 2041..."
9,Total 2019-2020,10,27265000.0,1863655000.0,"[29034, 16065, 12057, 7091, 30127, 10019, 1003..."


In [12]:
## |Sembrando vida total 2019, q00, 0.0:0.0|cells

def varByQuantil(df: pd.DataFrame, values: str, cvs_muncs: str, variable: str='', n: int=10, groupZeros: bool=False) -> pd.DataFrame :
    """
    Divide los datos de un DataFrame en cuantiles, donde cada cuantil es una variable diferente.

    Parameters
    ----------
    df : pd.DataFrame
        DataFrame con los datos a separar.
    values : str
        Columna con los valores a comparar.
    cvs_muncs : str
        Columna con los elementos a agrupar en cada cuantil.
    variable : str, optional
        Nombre que se le asignará a la variable, por defecto se usará el nombre de la columna seleccionada en 'values'.
    n : int, optional
        Número de cuantiles, `10` cuantiles por defecto.
    groupZeros : bool, optional
        Si se desea crear un cuantil 0, el cual agrupará los elementos con valor `0.0`, `False` por defecto.

    Returns
    -------
    pd.DataFrame
        DataFrame con los datos separados en cuantiles, con el nombre de la variable y los elementos que pertenecen a
        cada cuantil como columnas. El nombre de cada variable es de la forma 'name, qi,l:u' donde `name` es el nombre
        de la variable ingresado, `i` el índice del cuantil, `l` el límite inferior del cuantil y `u` el límite
        superior del cuantil.
    """
    
    variable = values if variable == '' else variable
    df = df.sort_values(by=values, ascending=True)
    zeros = 0
    names = []
    muncs = []

    if groupZeros:
        zeros = df[values].value_counts().get(0,0)
        zeroQuantile = df.iloc[0:zeros, [0,1]]
        name = variable + ', q00, 0.0:0.0'
        names.append(name)
        muncs.append(list(zeroQuantile[cvs_muncs]))

    quantiles = np.array_split(df.iloc[zeros:], n)

    for i, q in enumerate(quantiles):
        name = variable + ', q' + '{:02}'.format(i+1) + ', ' + str(np.float64(q[values].iloc[0])) + ':' + str(np.float64(q[values].iloc[-1]))
        names.append(name)
        muncs.append(list(q[cvs_muncs]))


    return pd.DataFrame({'name':names, 'cells':muncs})


In [13]:
total_sv19_df = varByQuantil(sv19_df, 'Total 2019', 'CVE_MUNICIPIO', n=10, variable='Sembrando vida total 2019')
total_sv19_df

Unnamed: 0,name,cells
0,"Sembrando vida total 2019, q01, 0.0:0.0","[13023, 19017, 19018, 19020, 19021, 19022, 190..."
1,"Sembrando vida total 2019, q02, 0.0:0.0","[20071, 20070, 20069, 20041, 20045, 20048, 200..."
2,"Sembrando vida total 2019, q03, 0.0:0.0","[15090, 15091, 15093, 15113, 15114, 15115, 160..."
3,"Sembrando vida total 2019, q04, 0.0:0.0","[26005, 26006, 26007, 26008, 26009, 26010, 250..."
4,"Sembrando vida total 2019, q05, 0.0:0.0","[20439, 20442, 20374, 20443, 20446, 20447, 204..."
5,"Sembrando vida total 2019, q06, 0.0:0.0","[20221, 12035, 21183, 31054, 20282, 31094, 200..."
6,"Sembrando vida total 2019, q07, 0.0:0.0","[20204, 28019, 21030, 20518, 20465, 20341, 290..."
7,"Sembrando vida total 2019, q08, 0.0:0.0","[5018, 5017, 5027, 7014, 7017, 7020, 7093, 708..."
8,"Sembrando vida total 2019, q09, 0.0:5160000.0","[1001, 8029, 8027, 4004, 18009, 25017, 30123, ..."
9,"Sembrando vida total 2019, q10, 5275000.0:9085...","[30116, 30034, 30145, 30071, 7088, 21029, 3016..."


In [14]:
total_sv20_df = varByQuantil(sv20_df, 'Total 2020', 'CVE_MUNICIPIO', n=10, variable='Sembrando vida total 2020')
total_sv20_df

Unnamed: 0,name,cells
0,"Sembrando vida total 2020, q01, 0.0:0.0","[13024, 19018, 19020, 19021, 19022, 19023, 190..."
1,"Sembrando vida total 2020, q02, 0.0:0.0","[20072, 20090, 20071, 20069, 20045, 20048, 200..."
2,"Sembrando vida total 2020, q03, 0.0:0.0","[15090, 15091, 15092, 15094, 15114, 15115, 151..."
3,"Sembrando vida total 2020, q04, 0.0:0.0","[26004, 26005, 26006, 26007, 26008, 26009, 260..."
4,"Sembrando vida total 2020, q05, 0.0:0.0","[20438, 20439, 20442, 20374, 20443, 20446, 204..."
5,"Sembrando vida total 2020, q06, 0.0:0.0","[13023, 7085, 7083, 7078, 7075, 7064, 7058, 70..."
6,"Sembrando vida total 2020, q07, 0.0:1680000.0","[11003, 11007, 11008, 11009, 11010, 11011, 110..."
7,"Sembrando vida total 2020, q08, 1690000.0:7315...","[8051, 21049, 17002, 21101, 20223, 20288, 2102..."
8,"Sembrando vida total 2020, q09, 7325000.0:2142...","[30157, 31058, 29040, 20394, 29008, 20486, 301..."
9,"Sembrando vida total 2020, q10, 21430000.0:955...","[30100, 24014, 30195, 10030, 7079, 7069, 30204..."


In [15]:
total_sv1920_df = varByQuantil(sv1920_df, 'Total 2019-2020', 'ET_ID', n=10, variable='Sembrando vida total 2019-2020')
total_sv1920_df

Unnamed: 0,name,cells
0,"Sembrando vida total 2019-2020, q01, 0.0:0.0","[13027, 19018, 19020, 19021, 19022, 19023, 190..."
1,"Sembrando vida total 2019-2020, q02, 0.0:0.0","[20072, 20090, 20071, 20069, 20045, 20048, 200..."
2,"Sembrando vida total 2019-2020, q03, 0.0:0.0","[15090, 15091, 15092, 15094, 15114, 15115, 151..."
3,"Sembrando vida total 2019-2020, q04, 0.0:0.0","[26004, 26005, 26006, 26007, 26008, 26009, 260..."
4,"Sembrando vida total 2019-2020, q05, 0.0:0.0","[20438, 20439, 20442, 20374, 20443, 20446, 204..."
5,"Sembrando vida total 2019-2020, q06, 0.0:5000.0","[13023, 7108, 7106, 7098, 7094, 7093, 7086, 70..."
6,"Sembrando vida total 2019-2020, q07, 5000.0:18...","[18017, 19038, 16058, 28028, 21140, 21166, 260..."
7,"Sembrando vida total 2019-2020, q08, 1840000.0...","[31003, 20093, 20456, 20304, 31072, 20237, 200..."
8,"Sembrando vida total 2019-2020, q09, 8425000.0...","[10015, 20520, 30150, 20274, 8041, 21025, 2041..."
9,"Sembrando vida total 2019-2020, q10, 27265000....","[29034, 16065, 12057, 7091, 30127, 10019, 1003..."


### Almacenamineto de los datos en JSON

In [None]:
## total_sv19_df.to_json('generatedData/sembrandoVida/sv19.json', index=False, orient='records')
## total_sv20_df.to_json('generatedData/sembrandoVida/sv20.json', index=False, orient='records')
## total_sv1920_df.to_json('generatedData/sembrandoVida/sv1920.json', index=False, orient='records')

### Comparación con los datos de `covariables_sv.csv`

In [17]:
covariables19_df = pd.read_csv("sourceData/covariables_sv.csv", dtype={'especievalida':str, 'min':str, 'max':str, 'ET_ID':int})[:2469]
covariables19_df['especievalida'] = covariables19_df['especievalida'].replace(regex=['\APolitic01.p', ' .*'], value='')
covariables19_df = covariables19_df[['especievalida', 'ET_ID']]
covariables19_df

Unnamed: 0,especievalida,ET_ID
0,00,15030
1,00,15069
2,00,20103
3,00,20157
4,00,20215
...,...,...
2464,09,23004
2465,00,4006
2466,10,7059
2467,10,23002


In [18]:
covariables20_df = pd.read_csv("sourceData/covariables_sv.csv", dtype={'especievalida':str, 'min':str, 'max':str, 'ET_ID':int})[2469:4938]
covariables20_df['especievalida'] = covariables20_df['especievalida'].replace(regex=['\APolitic02.p', ' .*'], value='')
covariables20_df = covariables20_df[['especievalida', 'ET_ID']].reset_index(drop=True)
covariables20_df

Unnamed: 0,especievalida,ET_ID
0,00,15030
1,00,15069
2,00,20103
3,00,20157
4,02,20215
...,...,...
2464,10,23004
2465,10,4006
2466,10,7059
2467,10,23002


In [19]:
covariables1920_df = pd.read_csv("sourceData/covariables_sv.csv", dtype={'especievalida':str, 'min':str, 'max':str, 'ET_ID':int})[4938:7407]
covariables1920_df['especievalida'] = covariables1920_df['especievalida'].replace(regex=['\APolitic03.p', ' .*'], value='')
covariables1920_df = covariables1920_df[['especievalida', 'ET_ID']].reset_index(drop=True)
covariables1920_df

Unnamed: 0,especievalida,ET_ID
0,00,15030
1,00,15069
2,00,20103
3,00,20157
4,02,20215
...,...,...
2464,10,23004
2465,10,4006
2466,10,7059
2467,10,23002


Distribución de las celdas en cada cuantil por año

In [20]:
def cells_by_quantile(df, variable):
    cells = {}
    for i in range(0,11):
        q_index = '{:02}'.format(i)
        q = df[variable].value_counts().get(q_index,0)
        cells[q_index] = q
    return cells

cells19 = cells_by_quantile(covariables19_df, 'especievalida')
cells20 = cells_by_quantile(covariables20_df, 'especievalida')
cells1920 = cells_by_quantile(covariables1920_df, 'especievalida')
print(cells19)
print(cells20)
print(cells1920)

{'00': 2079, '01': 39, '02': 38, '03': 39, '04': 40, '05': 39, '06': 39, '07': 39, '08': 39, '09': 39, '10': 39}
{'00': 1489, '01': 97, '02': 98, '03': 99, '04': 98, '05': 98, '06': 98, '07': 98, '08': 98, '09': 98, '10': 98}
{'00': 1479, '01': 98, '02': 100, '03': 99, '04': 99, '05': 99, '06': 99, '07': 99, '08': 99, '09': 99, '10': 99}


Obteniendo las celdas de cada quantil por año

In [21]:
def get_quantile_cells(df: pd.DataFrame, q:int, variable:str, cell:str):
    quantiles = [[] for i in range(0,q)]
    for i in range(0,q):
        q = '{:02}'.format(i)
        
        for j in range(0,len(df)):
            if df[variable][j] == q:
                quantiles[i].append(df[cell][j])
    return quantiles

q_cov19 = get_quantile_cells(covariables19_df, 11, 'especievalida', 'ET_ID')
q_cov20 = get_quantile_cells(covariables20_df, 11, 'especievalida', 'ET_ID')
q_cov1920 = get_quantile_cells(covariables1920_df, 11, 'especievalida', 'ET_ID')
print(q_cov19)
print(q_cov20)
print(q_cov1920)

[[15030, 15069, 20103, 20157, 20215, 20389, 20393, 21013, 21038, 21097, 21117, 21118, 21144, 26007, 20411, 20087, 15044, 20192, 11044, 32057, 26016, 20063, 32006, 21151, 20538, 21125, 26070, 29054, 20174, 26060, 20132, 29056, 29060, 20315, 20555, 20013, 15100, 32013, 32005, 20539, 14098, 20069, 21090, 8053, 20542, 20077, 29058, 24049, 20342, 20109, 29048, 20049, 20338, 15058, 29008, 20268, 20403, 20033, 26048, 15108, 29053, 26071, 29057, 20145, 29044, 29042, 10036, 15031, 15022, 15125, 20375, 20387, 20007, 20390, 26026, 20061, 26047, 21181, 15076, 20227, 24025, 16011, 1004, 32037, 15053, 20488, 13069, 32053, 15011, 1011, 15055, 21146, 20452, 32016, 29011, 20301, 32035, 26064, 9014, 21041, 20106, 20238, 29059, 9015, 29045, 20360, 15027, 26025, 20144, 26046, 32054, 21182, 29029, 20241, 15059, 20380, 20493, 32032, 14070, 15073, 20399, 20101, 20556, 29019, 29051, 15012, 21128, 29049, 26056, 21133, 26017, 20096, 1009, 29016, 21171, 32025, 17031, 26004, 32039, 21126, 20165, 8024, 21136, 5010

Dado que en `covariables_sv.csv` hay un cuantil que agrupa los municipios con un total de 0 en cada año, se debe hacer lo mismo con los obtenidos anteriormente para poder compararlos.

In [22]:
total_sv19_df = quantiles(sv19_df, 'Total 2019', 'CVE_MUNICIPIO', n=10, groupZeros=True)
total_sv19_df

Unnamed: 0,name,bin,lower,upper,cells
0,Total 2019,0,0.0,0.0,"[13023, 19017, 19018, 19020, 19021, 19022, 190..."
1,Total 2019,1,5000.0,50000.0,"[2002, 27999, 7107, 20490, 30999, 7999, 19046,..."
2,Total 2019,2,50000.0,215000.0,"[17020, 30191, 15070, 9009, 15024, 7124, 30090..."
3,Total 2019,3,250000.0,2945000.0,"[21105, 23005, 30118, 30028, 30190, 20044, 100..."
4,Total 2019,4,2945000.0,7040000.0,"[30131, 21077, 30068, 30102, 21210, 30052, 301..."
5,Total 2019,5,7200000.0,10190000.0,"[31014, 31008, 30042, 30157, 31043, 21192, 300..."
6,Total 2019,6,10320000.0,18115000.0,"[31097, 30110, 30039, 21044, 30137, 30020, 211..."
7,Total 2019,7,18565000.0,24765000.0,"[10029, 30189, 30010, 30204, 30160, 7102, 1000..."
8,Total 2019,8,25410000.0,42605000.0,"[30032, 7084, 30134, 30168, 7096, 30089, 7050,..."
9,Total 2019,9,42670000.0,58310000.0,"[7067, 10016, 30047, 7105, 10023, 30043, 30031..."


Los cuantiles generados coinciden con los de `covariables.csv`, a excepción de los municipios que no se encuentran en 2019-2020 pero sí en 2019 (27999, 30999, 7999), y los municipios 30191, 17020 y 30120 debido a la diferente distribución de los cuantiles en `covariables.csv`.

In [23]:
for i in range(0, 11):
    q_total_sv19 = total_sv19_df['cells'][i]
    res = [x for x in q_cov19[i] + q_total_sv19 if x not in q_cov19[i] or x not in q_total_sv19]
    print("Dif en q", i, ":", res)

Dif en q 0 : []
Dif en q 1 : [30191, 17020, 27999, 30999, 7999]
Dif en q 2 : [17020, 30191]
Dif en q 3 : [30120]
Dif en q 4 : [30120]
Dif en q 5 : []
Dif en q 6 : []
Dif en q 7 : []
Dif en q 8 : []
Dif en q 9 : []
Dif en q 10 : []


In [24]:
total_sv20_df = quantiles(sv20_df, 'Total 2020', 'CVE_MUNICIPIO', n=10, groupZeros=True)
total_sv20_df

Unnamed: 0,name,bin,lower,upper,cells
0,Total 2020,0,0.0,0.0,"[13024, 19018, 19020, 19021, 19022, 19023, 190..."
1,Total 2020,1,5000.0,45000.0,"[18017, 15058, 15109, 16006, 16058, 16087, 190..."
2,Total 2020,2,45000.0,1160000.0,"[30118, 7099, 30106, 23005, 21173, 8018, 30028..."
3,Total 2020,3,1160000.0,2450000.0,"[20303, 21039, 31101, 20164, 20423, 20230, 202..."
4,Total 2020,4,2545000.0,4460000.0,"[20250, 12022, 30119, 20552, 20262, 20562, 204..."
5,Total 2020,5,4495000.0,7345000.0,"[31054, 21183, 12035, 20221, 21025, 20480, 170..."
6,Total 2020,6,7375000.0,11130000.0,"[29008, 20486, 30196, 20130, 28026, 21208, 100..."
7,Total 2020,7,11195000.0,17370000.0,"[7012, 30099, 21086, 31093, 30137, 30059, 3004..."
8,Total 2020,8,17520000.0,25860000.0,"[12038, 17015, 21064, 30101, 24026, 20036, 707..."
9,Total 2020,9,25895000.0,44670000.0,"[20052, 31102, 16038, 30032, 29034, 7084, 1606..."


Aquí también coinciden los cuantiles generados con los de `covariables.csv`, a excepción del municipio que no se encuentran en 2019-2020 pero sí en 2020 (18999), y los municipios 21063, 20440 también debido a la diferente distribución de los cuantiles en `covariables.csv`.

In [25]:
for i in range(0, 11):
    q_total_sv20 = total_sv20_df['cells'][i]
    res = [x for x in q_cov20[i] + q_total_sv20 if x not in q_cov20[i] or x not in q_total_sv20]
    print("Dif en q", i, ":", res)

Dif en q 0 : []
Dif en q 1 : [18999, 21063]
Dif en q 2 : [21063, 20440]
Dif en q 3 : [20440]
Dif en q 4 : []
Dif en q 5 : []
Dif en q 6 : []
Dif en q 7 : []
Dif en q 8 : []
Dif en q 9 : []
Dif en q 10 : []


In [26]:
total_sv1920_df = quantiles(sv1920_df, 'Total 2019-2020', 'ET_ID', n=10, groupZeros=True)
total_sv1920_df

Unnamed: 0,name,bin,lower,upper,cells
0,Total 2019-2020,0,0.0,0.0,"[13027, 19018, 19020, 19021, 19022, 19023, 190..."
1,Total 2019-2020,1,5000.0,85000.0,"[16006, 20357, 16087, 18017, 19038, 16058, 280..."
2,Total 2019-2020,2,85000.0,1100000.0,"[9016, 13081, 16029, 21126, 7063, 8037, 12055,..."
3,Total 2019-2020,3,1145000.0,2610000.0,"[21102, 21028, 31038, 21039, 20303, 20440, 311..."
4,Total 2019-2020,4,2620000.0,4820000.0,"[20404, 29047, 20128, 17018, 20257, 20173, 804..."
5,Total 2019-2020,5,4855000.0,8225000.0,"[20283, 29052, 17033, 20075, 20270, 12030, 204..."
6,Total 2019-2020,6,8370000.0,14540000.0,"[21159, 30054, 10015, 20520, 30150, 20274, 804..."
7,Total 2019-2020,7,14565000.0,21320000.0,"[4008, 21070, 13026, 31043, 30157, 12074, 1801..."
8,Total 2019-2020,8,21385000.0,36450000.0,"[7012, 24014, 30039, 21044, 21199, 21186, 1605..."
9,Total 2019-2020,9,36485000.0,72860000.0,"[30007, 25005, 8066, 13025, 24046, 30151, 2405..."


El único municipio que no coincide (8019) también se debe a la diferente distribución de los cuantiles en `covariables.csv` 

In [27]:
for i in range(0, 11):
    q_total_sv1920 = total_sv1920_df['cells'][i]
    res = [x for x in q_cov1920[i] + q_total_sv1920 if x not in q_cov1920[i] or x not in q_total_sv1920]
    print("Dif en q", i, ":", res)

Dif en q 0 : []
Dif en q 1 : [8019]
Dif en q 2 : [8019]
Dif en q 3 : []
Dif en q 4 : []
Dif en q 5 : []
Dif en q 6 : []
Dif en q 7 : []
Dif en q 8 : []
Dif en q 9 : []
Dif en q 10 : []
