In [1]:
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import networkx as nx
import osmnx as ox
import pandas as pd
import geopandas as gpd
from IPython.display import IFrame
%matplotlib inline
ox.__version__

'1.0.0'

You can configure OSMnx using `ox.config()`. See the [documentation](https://osmnx.readthedocs.io/en/stable/osmnx.html#osmnx.utils.config) for the settings you can configure. For example, by default OSMnx caches all server responses to prevent repeatedly hitting the server for the same query every time you run it. This both makes our code faster on subsequent runs and helps us be a \"good neighbor\" to the server. But you can turn caching off (or back on again) with the `use_cache` parameter. Any parameters not passed to the config function are (re-)set to their original default values whenever you call it. API responses can be cached locally so OSMnx doesn't have to request the same data from the API multiple times: saving bandwidth, increasing speed, and enabling reproducibility.

In [2]:
# turn response caching on and turn on logging to your terminal window
ox.config(log_console=True, use_cache=True)

### Importing shapefiles containing polygons of Colonias de Mexico using GeoPandas (http://datamx.io/dataset/colonias-mexico)

In [3]:
#read the files from the Colonias folder using geopandas just as if we were using only pandas
colonias = gpd.read_file('input_data/coloniasmexico/Colonias/')

In [4]:
colonias.head()

Unnamed: 0,OBJECTID,POSTALCODE,ST_NAME,MUN_NAME,SETT_NAME,SETT_TYPE,AREA,Shape_Leng,Shape_Area,geometry
0,1,20000,AGUASCALIENTES,AGUASCALIENTES,ZONA CENTRO,COLONIA,722654869.0,0.05796,0.000137,"POLYGON Z ((-102.28709 21.88362 0.00000, -102...."
1,2,20010,AGUASCALIENTES,AGUASCALIENTES,OLIVARES SANTANA,FRACCIONAMIENTO,722654869.0,0.031534,2.3e-05,"POLYGON Z ((-102.31287 21.89787 0.00000, -102...."
2,3,20010,AGUASCALIENTES,AGUASCALIENTES,RAMON ROMO FRANCO,FRACCIONAMIENTO,722654869.0,0.007414,2e-06,"POLYGON Z ((-102.31221 21.89372 0.00000, -102...."
3,4,20010,AGUASCALIENTES,AGUASCALIENTES,SAN CAYETANO,FRACCIONAMIENTO,722654869.0,0.028007,4.4e-05,"POLYGON Z ((-102.30707 21.90146 0.00000, -102...."
4,5,20010,AGUASCALIENTES,AGUASCALIENTES,COLINAS DEL RIO,FRACCIONAMIENTO,722654869.0,0.031292,4.8e-05,"POLYGON Z ((-102.31689 21.89465 0.00000, -102...."


In [5]:
#observe useful info of each column in the dataframe
colonias.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 58227 entries, 0 to 58226
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   OBJECTID    58227 non-null  int64   
 1   POSTALCODE  58227 non-null  object  
 2   ST_NAME     58227 non-null  object  
 3   MUN_NAME    58227 non-null  object  
 4   SETT_NAME   58227 non-null  object  
 5   SETT_TYPE   58227 non-null  object  
 6   AREA        58227 non-null  float64 
 7   Shape_Leng  58227 non-null  float64 
 8   Shape_Area  58227 non-null  float64 
 9   geometry    58227 non-null  geometry
dtypes: float64(3), geometry(1), int64(1), object(5)
memory usage: 4.4+ MB


In [6]:
#drop the OBJECTID column from the colonias of mexico dataframe
colonias.drop(columns=['OBJECTID'], inplace=True)

In [7]:
#query to filter only the colonias from the Yucatan state
colonias_yucatan = colonias[(colonias['ST_NAME']=='YUCATAN')]

In [8]:
colonias_yucatan

Unnamed: 0,POSTALCODE,ST_NAME,MUN_NAME,SETT_NAME,SETT_TYPE,AREA,Shape_Leng,Shape_Area,geometry
56426,97825,YUCATAN,ABALÁ,ABALA,PUEBLO,700004445.0,0.049039,0.000064,"POLYGON Z ((-89.68047 20.65040 0.00000, -89.68..."
56427,97380,YUCATAN,ACANCEH,ACANCEH,PUEBLO,700004446.0,0.102653,0.000548,"POLYGON Z ((-89.46891 20.81472 0.00000, -89.46..."
56428,97990,YUCATAN,AKIL,AKIL,PUEBLO,700004447.0,0.198995,0.001248,"POLYGON Z ((-89.37038 20.27926 0.00000, -89.36..."
56429,97990,YUCATAN,AKIL,CENTRO,PUEBLO,700004447.0,0.020045,0.000023,"POLYGON Z ((-89.34468 20.26434 0.00000, -89.34..."
56430,97450,YUCATAN,BACA,BACA,PUEBLO,700004448.0,0.089344,0.000237,"POLYGON Z ((-89.40608 21.10913 0.00000, -89.40..."
...,...,...,...,...,...,...,...,...,...
57215,97922,YUCATAN,YAXCABÁ,YOKDZONOT,PUEBLO,700001791.0,0.019157,0.000020,"POLYGON Z ((-88.73336 20.70711 0.00000, -88.73..."
57216,97923,YUCATAN,YAXCABÁ,LIBRE UNION,PUEBLO,700001791.0,0.038392,0.000041,"POLYGON Z ((-88.81322 20.70681 0.00000, -88.81..."
57217,97924,YUCATAN,YAXCABÁ,YAXUNAH,PUEBLO,700001791.0,0.017072,0.000014,"POLYGON Z ((-88.67712 20.54384 0.00000, -88.67..."
57218,97925,YUCATAN,YAXCABÁ,KANKABDZONOT,PUEBLO,700001791.0,0.021147,0.000019,"POLYGON Z ((-88.70678 20.51053 0.00000, -88.70..."


In [9]:
#query to filter only the colonias from Merida municipality of Yucatan state
colonias_yucatan_merida = colonias[(colonias['ST_NAME']=='YUCATAN') & (colonias['MUN_NAME'] == 'MÉRIDA')]

In [10]:
#query to filter only the colonias from Kanasin municipality of Yucatan state
colonias_yucatan_kanasin = colonias[(colonias['ST_NAME']=='YUCATAN') & (colonias['MUN_NAME'] == 'KANASÍN')]

In [11]:
colonias_yucatan_merida.head()

Unnamed: 0,POSTALCODE,ST_NAME,MUN_NAME,SETT_NAME,SETT_TYPE,AREA,Shape_Leng,Shape_Area,geometry
56515,97000,YUCATAN,MÉRIDA,BARRIO SAN SEBASTIAN,FRACCIONAMIENTO,724836804.0,0.02866,3.8e-05,"POLYGON Z ((-89.63250 20.96072 0.00000, -89.63..."
56516,97000,YUCATAN,MÉRIDA,BARRIO SANTA ANA,FRACCIONAMIENTO,724836804.0,0.031112,5.1e-05,"POLYGON Z ((-89.62424 20.97568 0.00000, -89.62..."
56517,97000,YUCATAN,MÉRIDA,BARRIO SANTIAGO,FRACCIONAMIENTO,724836804.0,0.023338,3.4e-05,"POLYGON Z ((-89.62914 20.96978 0.00000, -89.62..."
56518,97000,YUCATAN,MÉRIDA,FRACC JARDINES DE SAN SEBASTIAN,FRACCIONAMIENTO,724836804.0,0.005479,2e-06,"POLYGON Z ((-89.63250 20.96072 0.00000, -89.63..."
56519,97000,YUCATAN,MÉRIDA,FRACC LA QUINTA,FRACCIONAMIENTO,724836804.0,0.004742,1e-06,"POLYGON Z ((-89.63478 20.97566 0.00000, -89.63..."


In [12]:
colonias_yucatan_merida.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 578 entries, 56515 to 57093
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   POSTALCODE  578 non-null    object  
 1   ST_NAME     578 non-null    object  
 2   MUN_NAME    578 non-null    object  
 3   SETT_NAME   578 non-null    object  
 4   SETT_TYPE   578 non-null    object  
 5   AREA        578 non-null    float64 
 6   Shape_Leng  578 non-null    float64 
 7   Shape_Area  578 non-null    float64 
 8   geometry    578 non-null    geometry
dtypes: float64(3), geometry(1), object(5)
memory usage: 45.2+ KB


In [13]:
colonias_yucatan_merida.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [14]:
#calculate area using geometry
colonias_yucatan_merida.area


  colonias_yucatan_merida.area


56515    0.000038
56516    0.000051
56517    0.000034
56518    0.000002
56519    0.000001
           ...   
57089    0.000040
57090    0.000061
57091    0.000038
57092    0.000004
57093    0.000023
Length: 578, dtype: float64

In [15]:
colonias_yucatan_kanasin.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 24 entries, 56482 to 56505
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   POSTALCODE  24 non-null     object  
 1   ST_NAME     24 non-null     object  
 2   MUN_NAME    24 non-null     object  
 3   SETT_NAME   24 non-null     object  
 4   SETT_TYPE   24 non-null     object  
 5   AREA        24 non-null     float64 
 6   Shape_Leng  24 non-null     float64 
 7   Shape_Area  24 non-null     float64 
 8   geometry    24 non-null     geometry
dtypes: float64(3), geometry(1), object(5)
memory usage: 1.9+ KB


### Import shapefiles containing information of the INEGI 2010

#### Population by municipality 

In [174]:
#municipal_inegi = gpd.read_file('input_data/00_SCINCE_zip/00/municipal.dbf')

In [175]:
#municipal_inegi.head()

In [176]:
#select only the columns that are useful. 
#POB1 is the total population according to the data dictionary
#pop_municipal = municipal_inegi[['NOM_ENT', 'NOM_MUN', 'POB1', 'geometry']]

In [177]:
#pop_municipal

In [178]:
#calculate area using geometry
#merida_area = pop_municipal[(pop_municipal['NOM_ENT']=='Yucatán') & (pop_municipal['NOM_MUN']=='Mérida')]
#merida_area.area

#### Population by state

In [179]:
#state_inegi = gpd.read_file('input_data/00_SCINCE_zip/00/estatal.dbf')

In [180]:
#state_inegi.head()

In [181]:
#pop_state = state_inegi[['NOMBRE', 'POB1', 'geometry']]

In [182]:
#pop_state.head()

### Importing data from Marco geoestadístico 2019 (https://www.inegi.org.mx/temas/mg/#Descargas)

Direct download: (https://www.inegi.org.mx/contenido/productos/prod_serv/contenidos/espanol/bvinegi/productos/geografia/marcogeo/889463776079_s.zip)

For this data we are only using the geometries because it does not contain population info.

Geometries we can use:

<ul>
    <li>Areas geoestadísticas básicas urbanas (AGEB). Filename: ee<strong>a</strong></li>
    <li>Polígonos de manzana. Filename: ee<strong>m</strong></li>
</ul>

Note: **ee** refers to the numeric key of the federal state: 01, 02,...,32

In [25]:
yucatan_ageb = gpd.read_file('input_data/marco_geoestadistico2019/31_yucatan/conjunto_de_datos/31a.dbf')

In [26]:
yucatan_manzana = gpd.read_file('input_data/marco_geoestadistico2019/31_yucatan/conjunto_de_datos/31m.dbf')

In [27]:
yucatan_ageb.head()

Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,CVE_LOC,CVE_AGEB,geometry
0,3100100010130,31,1,1,130,"POLYGON ((3776847.045 1015576.473, 3776850.565..."
1,3100100010145,31,1,1,145,"POLYGON ((3776745.860 1015499.562, 3776745.439..."
2,3100100010164,31,1,1,164,"POLYGON ((3776136.004 1014614.653, 3776022.228..."
3,3100100010126,31,1,1,126,"POLYGON ((3776247.740 1014594.755, 3776229.182..."
4,3100100010107,31,1,1,107,"POLYGON ((3776613.825 1015270.341, 3776615.972..."


In [28]:
yucatan_manzana.head()

Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,CVE_LOC,CVE_AGEB,CVE_MZA,AMBITO,TIPOMZA,geometry
0,3100100010126002,31,1,1,0126,2,Urbana,Típica,"POLYGON ((3776317.318 1014695.955, 3776290.436..."
1,310010003008A010,31,1,3,008A,10,Rural,Típica,"POLYGON ((3785255.760 1012979.803, 3785285.454..."
2,310010007008A021,31,1,7,008A,21,Rural,Típica,"POLYGON ((3785798.935 1021132.561, 3785818.269..."
3,310010007008A033,31,1,7,008A,33,Rural,Típica,"POLYGON ((3785159.752 1021511.817, 3785187.686..."
4,3100100010107029,31,1,1,0107,29,Urbana,Típica,"POLYGON ((3776873.117 1014643.985, 3776879.112..."


In [29]:
yucatan_manzana.crs

<Projected CRS: PROJCS["MEXICO_ITRF_2008_LCC",GEOGCS["GCS_ITRF_200 ...>
Name: MEXICO_ITRF_2008_LCC
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- undefined
Coordinate Operation:
- name: unnamed
- method: Lambert Conic Conformal (2SP)
Datum: International Terrestrial Reference Frame 2008
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

### Importing data from Censo y Conteo de Población y Vivienda 2010 > Principales resultados por AGEB y manzana urbana (https://www.inegi.org.mx/servicios/datosabiertos.html) 

Yucatan:(https://www.inegi.org.mx/contenidos/programas/ccpv/2010/datosabiertos/ageb_y_manzana/resageburb_31_2010_csv.zip)


In [149]:
yucatan_censo = pd.read_csv('input_data/resageburb_31_2010_csv/resultados_ageb_urbana_31_cpv2010/conjunto_de_datos/resultados_ageb_urbana_31_cpv2010.csv')

In [150]:
yucatan_censo.head()

Unnamed: 0,entidad,nom_ent,mun,nom_mun,loc,nom_loc,ageb,mza,pobtot,pobmas,...,vph_snbien,vph_radio,vph_tv,vph_refri,vph_lavad,vph_autom,vph_pc,vph_telef,vph_cel,vph_inter
0,31,Yucatán,0,Total de la entidad Yucatán,0,Total de la entidad,0,0,1955577,963333,...,17659,377161,462587,386610,344598,169501,129964,155513,335973,91839
1,31,Yucatán,1,Abalá,0,Total del municipio,0,0,6356,3254,...,67,1086,1461,1009,866,168,31,42,681,7
2,31,Yucatán,1,Abalá,1,Total de la localidad urbana,0,0,1890,954,...,10,358,478,361,311,88,21,14,308,3
3,31,Yucatán,1,Abalá,1,Total AGEB urbana,107,0,779,385,...,0,140,197,150,124,37,9,4,135,0
4,31,Yucatán,1,Abalá,1,Abalá,107,1,44,19,...,0,8,10,7,6,*,0,0,6,0


In [151]:
#yucatan_censo = yucatan_censo[['entidad', 'nom_ent', 'mun', 'nom_mun', 'loc', 'nom_loc', 'ageb', 'mza', 'pobtot']]

In [152]:
yucatan_censo.head()

Unnamed: 0,entidad,nom_ent,mun,nom_mun,loc,nom_loc,ageb,mza,pobtot,pobmas,...,vph_snbien,vph_radio,vph_tv,vph_refri,vph_lavad,vph_autom,vph_pc,vph_telef,vph_cel,vph_inter
0,31,Yucatán,0,Total de la entidad Yucatán,0,Total de la entidad,0,0,1955577,963333,...,17659,377161,462587,386610,344598,169501,129964,155513,335973,91839
1,31,Yucatán,1,Abalá,0,Total del municipio,0,0,6356,3254,...,67,1086,1461,1009,866,168,31,42,681,7
2,31,Yucatán,1,Abalá,1,Total de la localidad urbana,0,0,1890,954,...,10,358,478,361,311,88,21,14,308,3
3,31,Yucatán,1,Abalá,1,Total AGEB urbana,107,0,779,385,...,0,140,197,150,124,37,9,4,135,0
4,31,Yucatán,1,Abalá,1,Abalá,107,1,44,19,...,0,8,10,7,6,*,0,0,6,0


## Preprocessing data

In [153]:
yucatan_censo.head()

Unnamed: 0,entidad,nom_ent,mun,nom_mun,loc,nom_loc,ageb,mza,pobtot,pobmas,...,vph_snbien,vph_radio,vph_tv,vph_refri,vph_lavad,vph_autom,vph_pc,vph_telef,vph_cel,vph_inter
0,31,Yucatán,0,Total de la entidad Yucatán,0,Total de la entidad,0,0,1955577,963333,...,17659,377161,462587,386610,344598,169501,129964,155513,335973,91839
1,31,Yucatán,1,Abalá,0,Total del municipio,0,0,6356,3254,...,67,1086,1461,1009,866,168,31,42,681,7
2,31,Yucatán,1,Abalá,1,Total de la localidad urbana,0,0,1890,954,...,10,358,478,361,311,88,21,14,308,3
3,31,Yucatán,1,Abalá,1,Total AGEB urbana,107,0,779,385,...,0,140,197,150,124,37,9,4,135,0
4,31,Yucatán,1,Abalá,1,Abalá,107,1,44,19,...,0,8,10,7,6,*,0,0,6,0


In [154]:
#rename columns that will be compared for equality condition when trying to match data
#yucatan_censo = yucatan_censo.rename(columns={'mun':'CVE_MUN', 'loc':'CVE_LOC', 'ageb':'CVE_AGEB', 'mza':'CVE_MZA'})

In [155]:
#filter only the totals on a new dataframe
yucatan_censo_totales = yucatan_censo[yucatan_censo['mza'] == 0]

In [156]:
yucatan_censo_totales.head()

Unnamed: 0,entidad,nom_ent,mun,nom_mun,loc,nom_loc,ageb,mza,pobtot,pobmas,...,vph_snbien,vph_radio,vph_tv,vph_refri,vph_lavad,vph_autom,vph_pc,vph_telef,vph_cel,vph_inter
0,31,Yucatán,0,Total de la entidad Yucatán,0,Total de la entidad,0,0,1955577,963333,...,17659,377161,462587,386610,344598,169501,129964,155513,335973,91839
1,31,Yucatán,1,Abalá,0,Total del municipio,0,0,6356,3254,...,67,1086,1461,1009,866,168,31,42,681,7
2,31,Yucatán,1,Abalá,1,Total de la localidad urbana,0,0,1890,954,...,10,358,478,361,311,88,21,14,308,3
3,31,Yucatán,1,Abalá,1,Total AGEB urbana,107,0,779,385,...,0,140,197,150,124,37,9,4,135,0
36,31,Yucatán,1,Abalá,1,Total AGEB urbana,111,0,1066,548,...,7,209,273,205,182,51,12,10,167,3


In [157]:
#filter all rows for which the CVE_MZA is 0 because it represents the totals and drop them from the dataframe
yucatan_censo.drop(yucatan_censo[yucatan_censo['mza'] == 0].index, inplace = True) 

In [158]:
yucatan_censo.head()

Unnamed: 0,entidad,nom_ent,mun,nom_mun,loc,nom_loc,ageb,mza,pobtot,pobmas,...,vph_snbien,vph_radio,vph_tv,vph_refri,vph_lavad,vph_autom,vph_pc,vph_telef,vph_cel,vph_inter
4,31,Yucatán,1,Abalá,1,Abalá,107,1,44,19,...,0,8,10,7,6,*,0,0,6,0
5,31,Yucatán,1,Abalá,1,Abalá,107,2,28,13,...,0,5,8,6,7,*,*,0,5,0
6,31,Yucatán,1,Abalá,1,Abalá,107,3,32,18,...,0,4,7,5,5,*,0,0,4,0
7,31,Yucatán,1,Abalá,1,Abalá,107,4,46,25,...,0,4,11,8,9,5,*,0,8,0
8,31,Yucatán,1,Abalá,1,Abalá,107,5,26,13,...,0,6,6,6,5,*,0,0,6,0


Now, we need to create a new column where we took values from entidad, mun, loc, ageb, and mza that conform the CVEGEO (Clave Geoestadistica Concatenada) so we can concat the dataframes afterwards and insert the pobtot.

In [159]:
print(yucatan_manzana['CVEGEO'].iloc[0])
len(yucatan_manzana['CVEGEO'].iloc[0])

3100100010107001


16

The CVEGEO has a length of 16 characters. We need to add some zeros to the values of the elements that conform the CVEGEO.

Check the lengths of the elements:

In [160]:
print(yucatan_manzana['CVE_MUN'].iloc[0])
len(yucatan_manzana['CVE_MUN'].iloc[0])

001


3

In [161]:
print(yucatan_manzana['CVE_LOC'].iloc[0])
len(yucatan_manzana['CVE_LOC'].iloc[0])

0001


4

In [162]:
print(yucatan_manzana['CVE_AGEB'].iloc[0])
len(yucatan_manzana['CVE_AGEB'].iloc[0])

0107


4

In [163]:
print(yucatan_manzana['CVE_MZA'].iloc[0])
len(yucatan_manzana['CVE_MZA'].iloc[0])

001


3

In [164]:
yucatan_censo['mun'] = yucatan_censo['mun'].apply(lambda x: '{0:0>3}'.format(x))
yucatan_censo['loc'] = yucatan_censo['loc'].apply(lambda x: '{0:0>4}'.format(x))
yucatan_censo['mza'] = yucatan_censo['mza'].apply(lambda x: '{0:0>3}'.format(x))

In [165]:
yucatan_censo

Unnamed: 0,entidad,nom_ent,mun,nom_mun,loc,nom_loc,ageb,mza,pobtot,pobmas,...,vph_snbien,vph_radio,vph_tv,vph_refri,vph_lavad,vph_autom,vph_pc,vph_telef,vph_cel,vph_inter
4,31,Yucatán,001,Abalá,0001,Abalá,0107,001,44,19,...,0,8,10,7,6,*,0,0,6,0
5,31,Yucatán,001,Abalá,0001,Abalá,0107,002,28,13,...,0,5,8,6,7,*,*,0,5,0
6,31,Yucatán,001,Abalá,0001,Abalá,0107,003,32,18,...,0,4,7,5,5,*,0,0,4,0
7,31,Yucatán,001,Abalá,0001,Abalá,0107,004,46,25,...,0,4,11,8,9,5,*,0,8,0
8,31,Yucatán,001,Abalá,0001,Abalá,0107,005,26,13,...,0,6,6,6,5,*,0,0,6,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33951,31,Yucatán,106,Yobaín,0001,Yobaín,0058,034,5,*,...,*,*,*,*,*,*,*,*,*,*
33952,31,Yucatán,106,Yobaín,0001,Yobaín,0058,035,18,9,...,0,*,3,3,3,*,*,*,3,0
33953,31,Yucatán,106,Yobaín,0001,Yobaín,0058,036,22,11,...,0,6,6,*,4,0,0,0,6,0
33954,31,Yucatán,106,Yobaín,0001,Yobaín,0058,037,9,4,...,*,*,*,*,*,0,0,0,0,0


In [166]:
#change data type from int to string so we can manipulate data for equality for CVE_GEO
yucatan_censo['entidad'] = yucatan_censo['entidad'].astype('str')
yucatan_censo['mun'] = yucatan_censo['mun'].astype('str')
yucatan_censo['loc'] = yucatan_censo['loc'].astype('str')
yucatan_censo['mza'] = yucatan_censo['mza'].astype('str')

In [167]:
#concatenate columns to create the CVEGEO columns
yucatan_censo['CVEGEO'] = yucatan_censo['entidad'] + yucatan_censo['mun'] + yucatan_censo['loc'] + yucatan_censo['ageb'] + yucatan_censo['mza']

In [168]:
yucatan_censo.reset_index(inplace=True, drop=True)
yucatan_censo

Unnamed: 0,entidad,nom_ent,mun,nom_mun,loc,nom_loc,ageb,mza,pobtot,pobmas,...,vph_radio,vph_tv,vph_refri,vph_lavad,vph_autom,vph_pc,vph_telef,vph_cel,vph_inter,CVEGEO
0,31,Yucatán,001,Abalá,0001,Abalá,0107,001,44,19,...,8,10,7,6,*,0,0,6,0,3100100010107001
1,31,Yucatán,001,Abalá,0001,Abalá,0107,002,28,13,...,5,8,6,7,*,*,0,5,0,3100100010107002
2,31,Yucatán,001,Abalá,0001,Abalá,0107,003,32,18,...,4,7,5,5,*,0,0,4,0,3100100010107003
3,31,Yucatán,001,Abalá,0001,Abalá,0107,004,46,25,...,4,11,8,9,5,*,0,8,0,3100100010107004
4,31,Yucatán,001,Abalá,0001,Abalá,0107,005,26,13,...,6,6,6,5,*,0,0,6,0,3100100010107005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32278,31,Yucatán,106,Yobaín,0001,Yobaín,0058,034,5,*,...,*,*,*,*,*,*,*,*,*,3110600010058034
32279,31,Yucatán,106,Yobaín,0001,Yobaín,0058,035,18,9,...,*,3,3,3,*,*,*,3,0,3110600010058035
32280,31,Yucatán,106,Yobaín,0001,Yobaín,0058,036,22,11,...,6,6,*,4,0,0,0,6,0,3110600010058036
32281,31,Yucatán,106,Yobaín,0001,Yobaín,0058,037,9,4,...,*,*,*,*,0,0,0,0,0,3110600010058037


#### Concatenate dataframes

In [169]:
yucatan_manzana.sort_values(by=['CVEGEO'], ignore_index=True, inplace=True)

In [170]:
yucatan_censo['pobtot'] = yucatan_censo['pobtot'].astype('str')

In [171]:
#drop columns that are duplicated in the other dataframe
yucatan_censo.drop(columns=['entidad', 'nom_ent', 'mun', 'nom_mun', 'loc', 'nom_loc', 'ageb', 'mza'], inplace = True) 

In [172]:
yucatan = yucatan_manzana.merge(yucatan_censo, on=['CVEGEO'], how='inner')

In [173]:
yucatan.head()

Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,CVE_LOC,CVE_AGEB,CVE_MZA,AMBITO,TIPOMZA,geometry,pobtot,...,vph_snbien,vph_radio,vph_tv,vph_refri,vph_lavad,vph_autom,vph_pc,vph_telef,vph_cel,vph_inter
0,3100100010107001,31,1,1,107,1,Urbana,Típica,"POLYGON ((3776466.958 1014806.740, 3776406.112...",44,...,0,8,10,7,6,*,0,0,6,0
1,3100100010107002,31,1,1,107,2,Urbana,Típica,"POLYGON ((3776530.575 1014793.851, 3776549.319...",28,...,0,5,8,6,7,*,*,0,5,0
2,3100100010107003,31,1,1,107,3,Urbana,Típica,"POLYGON ((3776356.775 1014813.714, 3776391.659...",32,...,0,4,7,5,5,*,0,0,4,0
3,3100100010107004,31,1,1,107,4,Urbana,Típica,"POLYGON ((3776415.586 1014964.702, 3776399.848...",46,...,0,4,11,8,9,5,*,0,8,0
4,3100100010107005,31,1,1,107,5,Urbana,Típica,"POLYGON ((3776335.436 1015048.289, 3776333.770...",26,...,0,6,6,6,5,*,0,0,6,0
