In [1]:
import geopandas as gpd
import pandas as pd
from tqdm import tqdm
import requests
from zipfile import ZipFile 
import time
import os
import shutil

In [3]:
#Funciones de apoyo
def download(url,dir):
    time.sleep(5)
    chunk_size = 1024
    r = requests.get(url, stream = True)
    total_size = int(r.headers['content-length'])
    filename = url.split('/')[-1]
    with open(dir+filename, 'wb') as f:
        for data in tqdm(iterable = r.iter_content(chunk_size = chunk_size), total = total_size/chunk_size, unit = 'KB'):
            f.write(data)
            
def extract_shapefile(states,directory,shp_dir,shape_type):
    for i in range(32):
        estado = states[i]
        file = str(i+1).zfill(2)
        zip_file = directory+estado
        shp_file = f'conjunto_de_datos/{file}{shape_type}.shp'
        cpg_file = f'conjunto_de_datos/{file}{shape_type}.cpg'
        dbf_file = f'conjunto_de_datos/{file}{shape_type}.dbf'
        prj_file = f'conjunto_de_datos/{file}{shape_type}.prj'
        shx_file = f'conjunto_de_datos/{file}{shape_type}.shx'
        with ZipFile(zip_file, 'r') as zip:
            zip.extract(shp_file,shp_dir)
            zip.extract(dbf_file,shp_dir)
            zip.extract(prj_file,shp_dir)
            zip.extract(shx_file,shp_dir)
            try:
                zip.extract(cpg_file,shp_dir)
            except:
                with open(shp_dir+cpg_file, 'w') as out_file:
                    out_file.write("ISO 88591")

In [4]:
# Estructura de Directorios
os.makedirs("./inegi/ccpvagebmza/csv/conjunto_de_datos") 
os.makedirs("./inegi/mgccpv/shp/m/conjunto_de_datos") 
os.makedirs("./inegi/mgccpv/gpkg/")

In [6]:
directory= "./inegi/ccpvagebmza/"
for i in range(32):
    estado = str(i+1).zfill(2)
    ageb_mza = f'https://www.inegi.org.mx/contenidos/programas/ccpv/2020/datosabiertos/ageb_manzana/ageb_mza_urbana_{estado}_cpv2020_csv.zip'
    download(ageb_mza,directory)

directory= "./inegi/mgccpv/"
url_mgccpv = "https://www.inegi.org.mx/contenidos/productos/prod_serv/contenidos/espanol/bvinegi/productos/geografia/marcogeo/889463807469/"
states = ["01_aguascalientes.zip","02_bajacalifornia.zip","03_bajacaliforniasur.zip","04_campeche.zip","05_coahuiladezaragoza.zip","06_colima.zip","07_chiapas.zip","08_chihuahua.zip","09_ciudaddemexico.zip","10_durango.zip","11_guanajuato.zip","12_guerrero.zip","13_hidalgo.zip","14_jalisco.zip","15_mexico.zip","16_michoacandeocampo.zip","17_morelos.zip","18_nayarit.zip","19_nuevoleon.zip","20_oaxaca.zip","21_puebla.zip","22_queretaro.zip","23_quintanaroo.zip","24_sanluispotosi.zip","25_sinaloa.zip","26_sonora.zip","27_tabasco.zip","28_tamaulipas.zip","29_tlaxcala.zip","30_veracruzignaciodelallave.zip","31_yucatan.zip","32_zacatecas.zip"]
for state in states:
    print(state)
    state_file = url_mgccpv+state
    download(state_file,directory)

2558KB [00:00, 3520.41KB/s]                                     
  full_bar = Bar(frac,
100%|██████████| 8625/8624.53125 [00:02<00:00, 3743.11KB/s]
100%|██████████| 2297/2296.5458984375 [00:00<00:00, 3353.83KB/s]
1800KB [00:00, 3398.77KB/s]                                     
100%|██████████| 7879/7878.5419921875 [00:02<00:00, 3691.10KB/s]
2048KB [00:00, 3429.94KB/s]                                    
100%|██████████| 7783/7782.9921875 [00:01<00:00, 4768.08KB/s]
9540KB [00:02, 4543.58KB/s]                                     
100%|██████████| 12743/12742.533203125 [00:03<00:00, 3791.74KB/s]
100%|██████████| 4097/4096.9013671875 [00:00<00:00, 4435.97KB/s]
10006KB [00:02, 4799.98KB/s]                                    
7294KB [00:01, 3758.16KB/s]                                    
5411KB [00:01, 4774.98KB/s]                                     
100%|██████████| 15993/15992.505859375 [00:04<00:00, 3663.36KB/s]
24659KB [00:06, 3816.12KB/s]                                      
100%|███

01_aguascalientes.zip


35903KB [00:09, 3630.58KB/s]                                      


02_bajacalifornia.zip


100%|██████████| 66733/66732.7880859375 [00:17<00:00, 3821.21KB/s]


03_bajacaliforniasur.zip


32043KB [00:07, 4407.48KB/s]                                     


04_campeche.zip


100%|██████████| 32889/32888.828125 [00:07<00:00, 4581.91KB/s]


05_coahuiladezaragoza.zip


100%|██████████| 65732/65731.93359375 [00:14<00:00, 4584.22KB/s]


06_colima.zip


100%|██████████| 28298/28297.5615234375 [00:07<00:00, 3817.40KB/s]


07_chiapas.zip


100%|██████████| 106237/106236.5068359375 [00:27<00:00, 3819.62KB/s]


08_chihuahua.zip


100%|██████████| 103312/103311.6591796875 [00:22<00:00, 4572.40KB/s]


09_ciudaddemexico.zip


81258KB [00:23, 3517.16KB/s]                                      


10_durango.zip


60160KB [00:13, 4332.53KB/s]                                     


11_guanajuato.zip


100%|██████████| 130036/130035.5380859375 [00:27<00:00, 4810.46KB/s]


12_guerrero.zip


122744KB [00:32, 3755.82KB/s]                                       


13_hidalgo.zip


100%|██████████| 107392/107391.822265625 [00:29<00:00, 3655.76KB/s]


14_jalisco.zip


100%|██████████| 139284/139283.65625 [00:29<00:00, 4801.61KB/s]


15_mexico.zip


100%|██████████| 221411/221410.6123046875 [00:45<00:00, 4862.94KB/s]


16_michoacandeocampo.zip


119353KB [00:33, 3602.36KB/s]                                       


17_morelos.zip


61927KB [00:19, 3106.00KB/s]                                     


18_nayarit.zip


100%|██████████| 45301/45300.8037109375 [00:10<00:00, 4310.89KB/s]


19_nuevoleon.zip


89256KB [00:19, 4535.18KB/s]                                      


20_oaxaca.zip


100%|██████████| 170480/170479.783203125 [00:44<00:00, 3850.11KB/s]


21_puebla.zip


100%|██████████| 148512/148511.5595703125 [00:37<00:00, 3919.34KB/s]


22_queretaro.zip


64303KB [00:15, 4272.77KB/s]                                    


23_quintanaroo.zip


100%|██████████| 36973/36972.77734375 [00:07<00:00, 4690.30KB/s]


24_sanluispotosi.zip


100%|██████████| 89046/89045.7939453125 [00:20<00:00, 4302.12KB/s]


25_sinaloa.zip


70221KB [00:15, 4676.56KB/s]                                     


26_sonora.zip


100%|██████████| 72258/72257.6943359375 [00:16<00:00, 4502.12KB/s]


27_tabasco.zip


100%|██████████| 49727/49726.9833984375 [00:13<00:00, 3587.08KB/s]


28_tamaulipas.zip


78114KB [00:16, 4876.94KB/s]                                     


29_tlaxcala.zip


52192KB [00:13, 3865.32KB/s]                                     


30_veracruzignaciodelallave.zip


179524KB [00:38, 4608.84KB/s]                                      


31_yucatan.zip


100%|██████████| 49385/49384.92578125 [00:13<00:00, 3618.82KB/s]


32_zacatecas.zip


72684KB [00:19, 3729.00KB/s]                                    


In [7]:
#Extracción de Datos
# Variables e indicadores del Censo de Población 2020
directory= "./inegi/ccpvagebmza/"
csv_dir = directory+"csv/"
for i in range(32):
    estado = str(i+1).zfill(2)
    zip_file = directory+f'ageb_mza_urbana_{estado}_cpv2020_csv.zip'
    csv_file = f'conjunto_de_datos/conjunto_de_datos_ageb_urbana_{estado}_cpv2020.csv'
    with ZipFile(zip_file, 'r') as zip:
        zip.extract(csv_file,csv_dir)

# Manzanas
shape_type = "m"
directory= "./inegi/mgccpv/"
shp_dir = directory+"shp/m/"
extract_shapefile(states,directory,shp_dir,shape_type)

# Caserío disperso
shape_type = "cd"
directory= "./inegi/mgccpv/"
shp_dir = directory+"shp/m/"
extract_shapefile(states,directory,shp_dir,shape_type)

# Polígono externo de manzana
shape_type = "pem"
directory= "./inegi/mgccpv/"
shp_dir = directory+"shp/m/"
extract_shapefile(states,directory,shp_dir,shape_type)

In [8]:
#Union de Datos
for i in range(32):
    estado = str(i+1).zfill(2)
    print("procesando estado: "+str(estado))
    gpdf = gpd.read_file(f"./inegi/mgccpv/shp/m/conjunto_de_datos/{estado}m.shp")
    cddf = gpd.read_file(f"./inegi/mgccpv/shp/m/conjunto_de_datos/{estado}cd.shp")
    pemdf = gpd.read_file(f"./inegi/mgccpv/shp/m/conjunto_de_datos/{estado}pem.shp")
    df = pd.read_csv(f"./inegi/ccpvagebmza/csv/conjunto_de_datos/conjunto_de_datos_ageb_urbana_{estado}_cpv2020.csv",na_values=['N/A','N/D','*'])
    df['CVEGEO'] = df.apply(lambda row: str(row['ENTIDAD']).zfill(2) + str(row['MUN']).zfill(3)+ str(row['LOC']).zfill(4)+ str(row['AGEB']).zfill(4)+ str(row['MZA']).zfill(3), axis=1)
    df_geo_censo = pd.merge(df, gpdf, how = 'left').drop(["CVE_ENT", "CVE_MUN", "CVE_LOC", "CVE_AGEB", "CVE_MZA"], axis = 1)
    df_geo_censo = df_geo_censo.drop(df[df.MZA == 0].index)
    df_geo_censo = gpd.GeoDataFrame(df_geo_censo, geometry="geometry")
    #df_geo_censo = df_geo_censo.to_crs("EPSG:4326")
    df_full = df_geo_censo[df_geo_censo['geometry'].notnull()]
    df1 = df_geo_censo[df_geo_censo['geometry'].isnull()]
    df1 = df1.drop(["TIPOMZA","AMBITO","geometry"], axis = 1)
    df_geo_dif = pd.merge(df1, cddf, how = 'left').drop(["CVE_ENT", "CVE_MUN", "CVE_LOC", "CVE_AGEB", "CVE_MZA"], axis = 1)
    pemdf = pemdf.rename(columns={'geometry': 'geometry_pem'})
    pemdf = pemdf.drop(["CVE_ENT", "CVE_MUN", "CVE_LOC", "CVE_AGEB", "CVE_MZA"], axis = 1)
    df_geo_dif = pd.merge(df_geo_dif, pemdf, how = 'left')
    df_geo_dif['geometry'] = df_geo_dif.apply(lambda row : row['geometry_pem'] if row['geometry_pem'] != None else row['geometry'], axis = 1)
    df_geo_dif =  df_geo_dif.drop(["geometry_pem"], axis = 1)
    final_shape = pd.concat([df_full,df_geo_dif],ignore_index=True)
    final_shape["AGEB"] = final_shape.AGEB.apply(str)
    print("guardando datos del estado: "+str(estado))
    final_shape.to_file(f"./inegi/mgccpv/gpkg/cpv2020_{estado}.gpkg", layer='cpv2020', driver="GPKG")

procesando estado: 01
guardando datos del estado: 01
procesando estado: 02


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


guardando datos del estado: 02
procesando estado: 03
guardando datos del estado: 03
procesando estado: 04
guardando datos del estado: 04
procesando estado: 05
guardando datos del estado: 05
procesando estado: 06
guardando datos del estado: 06
procesando estado: 07
guardando datos del estado: 07
procesando estado: 08
guardando datos del estado: 08
procesando estado: 09
guardando datos del estado: 09
procesando estado: 10
guardando datos del estado: 10
procesando estado: 11
guardando datos del estado: 11
procesando estado: 12
guardando datos del estado: 12
procesando estado: 13
guardando datos del estado: 13
procesando estado: 14
guardando datos del estado: 14
procesando estado: 15
guardando datos del estado: 15
procesando estado: 16
guardando datos del estado: 16
procesando estado: 17
guardando datos del estado: 17
procesando estado: 18
guardando datos del estado: 18
procesando estado: 19
guardando datos del estado: 19
procesando estado: 20
guardando datos del estado: 20
procesando esta