In [2]:
import pandas as pd
import numpy as np

### Importación de datos

In [3]:
df_total_sv19 = pd.read_csv('sourceData/total_sv19.csv')[:-1] ## El último registro de total_sv19.csv no contiene ningún dato.
df_total_sv20 = pd.read_csv('sourceData/total_sv20.csv')
df_sv_1920_dict = pd.read_csv('sourceData/sv_1920.csv')

In [4]:
df_total_sv19 = df_total_sv19[['CVE_MUNICIPIO', 'total']].astype(int)
df_total_sv19 = df_total_sv19.sort_values(by='CVE_MUNICIPIO', ascending=True)
df_total_sv19.head()

Unnamed: 0,CVE_MUNICIPIO,total
0,2001,40000
1,2002,5000
2,2004,50000
3,4002,105000
4,4003,935000


In [5]:
df_total_sv20 = df_total_sv20[['CVE_MUNICIPIO', 'total']].astype(int)
df_total_sv20 = df_total_sv20.sort_values(by='CVE_MUNICIPIO', ascending=True)
df_total_sv20.head()

Unnamed: 0,CVE_MUNICIPIO,total
0,2001,10000
1,2004,10000
2,4001,39565000
3,4002,220000
4,4003,150000


In [6]:
df_sv_1920_dict = df_sv_1920_dict[['ET_ID', 'total_sv19_total', 'total_sv_total.1', 'total1920']]
df_sv_1920_dict = df_sv_1920_dict.sort_values(by='ET_ID', ascending=True)
df_sv_1920_dict.head()

Unnamed: 0,ET_ID,total_sv19_total,total_sv_total.1,total1920
990,1001,,,0
991,1002,,,0
992,1003,,,0
993,1004,,,0
994,1005,,,0


### Verificación de datos

Creando diccionarios de la forma `clave municipio: total` para 2019 y 2020, y `clave municipio: (total 2019, total 2020)` para 2019-2020

In [7]:
total_sv19_dict: dict[int, int] = dict(zip(df_total_sv19['CVE_MUNICIPIO'], df_total_sv19['total']))
total_sv20_dict: dict[int, int] = dict(zip(df_total_sv20['CVE_MUNICIPIO'], df_total_sv20['total']))
sv_1920_dict: dict[int, tuple[int, int]] = dict(zip(df_sv_1920_dict['ET_ID'], zip(df_sv_1920_dict['total_sv19_total'], df_sv_1920_dict['total_sv_total.1'], df_sv_1920_dict['total1920'])))

Verificando que cada municipio en 2019 se encuentre también en 2019-2020 y comparando los montos por municipio.

In [8]:

not_in_1920 = {}
diff_in_1920 = {}
for k in total_sv19_dict.keys():
    if k not in sv_1920_dict.keys():
        not_in_1920.update({k: total_sv19_dict[k]})
        continue
    if sv_1920_dict[k][0] != total_sv19_dict[k]:
        diff_in_1920.update({k: [total_sv19_dict[k], sv_1920_dict[k][0]]})

print("Diferentes montos totales: ", diff_in_1920)
print("Municipios ausentes en 2019-2020: ", not_in_1920)

Diferentes montos totales:  {}
Municipios ausentes en 2019-2020:  {7999: 20000, 27999: 5000, 30999: 10000}


Realizando lo mismo entre 2020 y 2019-2020.

In [9]:
not_in_1920 = {}
diff_in_1920 = {}
for k in total_sv20_dict.keys():
    if k not in sv_1920_dict.keys():
        not_in_1920.update({k: total_sv20_dict[k]})
        continue
    if sv_1920_dict[k][1] != total_sv20_dict[k]:
        diff_in_1920.update({k: [total_sv20_dict[k], sv_1920_dict[k][0]]})

print("Diferentes montos totales: ", diff_in_1920)
print("Municipios ausentes en 2019-2020: ", not_in_1920)

Diferentes montos totales:  {}
Municipios ausentes en 2019-2020:  {18999: 30000}


Verificando que la suma de los montos de 2019 y 2020 coincidan con el total en 2019-2020 por municipio. El resultado es un diccionario de la forma: `clave municipio: (total 2019, total 2020, total 2019-2020)` con los municipios donde difieren las cantidades.

In [10]:
diff_in_1920 = {}
for k in range(1001, 32042):
    if(k not in total_sv19_dict.keys() and k not in sv_1920_dict.keys() and k not in total_sv20_dict.keys()):
        continue
    total_19 = total_sv19_dict[k] if k in total_sv19_dict.keys() else 0
    total_20 = total_sv20_dict[k] if k in total_sv20_dict.keys() else 0
    total_1920 = sv_1920_dict[k][2] if k in sv_1920_dict.keys() else 0

    if (total_19 + total_20) != total_1920:
        diff_in_1920.update({k: (total_19, total_20, total_1920)})
    
print("Diferente en 2019-2020: ", diff_in_1920)

Diferente en 2019-2020:  {7999: (20000, 0, 0), 18999: (0, 30000, 0), 27999: (5000, 0, 0), 30999: (10000, 0, 0), 31102: (0, 26150000, 50510000)}
