In [1]:
import matplotlib.cm as cm
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import networkx as nx
import osmnx as ox
import pandas as pd
import geopandas as gpd
from IPython.display import IFrame
%matplotlib inline
ox.__version__

'1.0.0'

You can configure OSMnx using `ox.config()`. See the [documentation](https://osmnx.readthedocs.io/en/stable/osmnx.html#osmnx.utils.config) for the settings you can configure. For example, by default OSMnx caches all server responses to prevent repeatedly hitting the server for the same query every time you run it. This both makes our code faster on subsequent runs and helps us be a \"good neighbor\" to the server. But you can turn caching off (or back on again) with the `use_cache` parameter. Any parameters not passed to the config function are (re-)set to their original default values whenever you call it. API responses can be cached locally so OSMnx doesn't have to request the same data from the API multiple times: saving bandwidth, increasing speed, and enabling reproducibility.

In [2]:
# turn response caching on and turn on logging to your terminal window
ox.config(log_console=True, use_cache=True)

### Importing shapefiles containing polygons of Colonias de Mexico using GeoPandas

In [2]:
#read the files from the Colonias folder using geopandas just as if we were using only pandas
colonias = gpd.read_file('input_data/coloniasmexico/Colonias/')

In [3]:
colonias.head()

Unnamed: 0,OBJECTID,POSTALCODE,ST_NAME,MUN_NAME,SETT_NAME,SETT_TYPE,AREA,Shape_Leng,Shape_Area,geometry
0,1,20000,AGUASCALIENTES,AGUASCALIENTES,ZONA CENTRO,COLONIA,722654869.0,0.05796,0.000137,"POLYGON Z ((-102.28709 21.88362 0.00000, -102...."
1,2,20010,AGUASCALIENTES,AGUASCALIENTES,OLIVARES SANTANA,FRACCIONAMIENTO,722654869.0,0.031534,2.3e-05,"POLYGON Z ((-102.31287 21.89787 0.00000, -102...."
2,3,20010,AGUASCALIENTES,AGUASCALIENTES,RAMON ROMO FRANCO,FRACCIONAMIENTO,722654869.0,0.007414,2e-06,"POLYGON Z ((-102.31221 21.89372 0.00000, -102...."
3,4,20010,AGUASCALIENTES,AGUASCALIENTES,SAN CAYETANO,FRACCIONAMIENTO,722654869.0,0.028007,4.4e-05,"POLYGON Z ((-102.30707 21.90146 0.00000, -102...."
4,5,20010,AGUASCALIENTES,AGUASCALIENTES,COLINAS DEL RIO,FRACCIONAMIENTO,722654869.0,0.031292,4.8e-05,"POLYGON Z ((-102.31689 21.89465 0.00000, -102...."


In [5]:
#observe useful info of each column in the dataframe
colonias.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 58227 entries, 0 to 58226
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   OBJECTID    58227 non-null  int64   
 1   POSTALCODE  58227 non-null  object  
 2   ST_NAME     58227 non-null  object  
 3   MUN_NAME    58227 non-null  object  
 4   SETT_NAME   58227 non-null  object  
 5   SETT_TYPE   58227 non-null  object  
 6   AREA        58227 non-null  float64 
 7   Shape_Leng  58227 non-null  float64 
 8   Shape_Area  58227 non-null  float64 
 9   geometry    58227 non-null  geometry
dtypes: float64(3), geometry(1), int64(1), object(5)
memory usage: 4.4+ MB


In [6]:
#drop the OBJECTID column from the colonias of mexico dataframe
colonias.drop(columns=['OBJECTID'], inplace=True)

In [7]:
#query to filter only the colonias from the Yucatan state
colonias_yucatan = colonias[(colonias['ST_NAME']=='YUCATAN')]

In [8]:
colonias_yucatan

Unnamed: 0,POSTALCODE,ST_NAME,MUN_NAME,SETT_NAME,SETT_TYPE,AREA,Shape_Leng,Shape_Area,geometry
56426,97825,YUCATAN,ABALÁ,ABALA,PUEBLO,700004445.0,0.049039,0.000064,"POLYGON Z ((-89.68047 20.65040 0.00000, -89.68..."
56427,97380,YUCATAN,ACANCEH,ACANCEH,PUEBLO,700004446.0,0.102653,0.000548,"POLYGON Z ((-89.46891 20.81472 0.00000, -89.46..."
56428,97990,YUCATAN,AKIL,AKIL,PUEBLO,700004447.0,0.198995,0.001248,"POLYGON Z ((-89.37038 20.27926 0.00000, -89.36..."
56429,97990,YUCATAN,AKIL,CENTRO,PUEBLO,700004447.0,0.020045,0.000023,"POLYGON Z ((-89.34468 20.26434 0.00000, -89.34..."
56430,97450,YUCATAN,BACA,BACA,PUEBLO,700004448.0,0.089344,0.000237,"POLYGON Z ((-89.40608 21.10913 0.00000, -89.40..."
...,...,...,...,...,...,...,...,...,...
57215,97922,YUCATAN,YAXCABÁ,YOKDZONOT,PUEBLO,700001791.0,0.019157,0.000020,"POLYGON Z ((-88.73336 20.70711 0.00000, -88.73..."
57216,97923,YUCATAN,YAXCABÁ,LIBRE UNION,PUEBLO,700001791.0,0.038392,0.000041,"POLYGON Z ((-88.81322 20.70681 0.00000, -88.81..."
57217,97924,YUCATAN,YAXCABÁ,YAXUNAH,PUEBLO,700001791.0,0.017072,0.000014,"POLYGON Z ((-88.67712 20.54384 0.00000, -88.67..."
57218,97925,YUCATAN,YAXCABÁ,KANKABDZONOT,PUEBLO,700001791.0,0.021147,0.000019,"POLYGON Z ((-88.70678 20.51053 0.00000, -88.70..."


In [10]:
#query to filter only the colonias from Merida municipality of Yucatan state
colonias_yucatan_merida = colonias[(colonias['ST_NAME']=='YUCATAN') & (colonias['MUN_NAME'] == 'MÉRIDA')]

In [11]:
#query to filter only the colonias from Kanasin municipality of Yucatan state
colonias_yucatan_kanasin = colonias[(colonias['ST_NAME']=='YUCATAN') & (colonias['MUN_NAME'] == 'KANASÍN')]

In [12]:
colonias_yucatan_merida.head()

Unnamed: 0,POSTALCODE,ST_NAME,MUN_NAME,SETT_NAME,SETT_TYPE,AREA,Shape_Leng,Shape_Area,geometry
56515,97000,YUCATAN,MÉRIDA,BARRIO SAN SEBASTIAN,FRACCIONAMIENTO,724836804.0,0.02866,3.8e-05,"POLYGON Z ((-89.63250 20.96072 0.00000, -89.63..."
56516,97000,YUCATAN,MÉRIDA,BARRIO SANTA ANA,FRACCIONAMIENTO,724836804.0,0.031112,5.1e-05,"POLYGON Z ((-89.62424 20.97568 0.00000, -89.62..."
56517,97000,YUCATAN,MÉRIDA,BARRIO SANTIAGO,FRACCIONAMIENTO,724836804.0,0.023338,3.4e-05,"POLYGON Z ((-89.62914 20.96978 0.00000, -89.62..."
56518,97000,YUCATAN,MÉRIDA,FRACC JARDINES DE SAN SEBASTIAN,FRACCIONAMIENTO,724836804.0,0.005479,2e-06,"POLYGON Z ((-89.63250 20.96072 0.00000, -89.63..."
56519,97000,YUCATAN,MÉRIDA,FRACC LA QUINTA,FRACCIONAMIENTO,724836804.0,0.004742,1e-06,"POLYGON Z ((-89.63478 20.97566 0.00000, -89.63..."


In [13]:
colonias_yucatan_merida.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 578 entries, 56515 to 57093
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   POSTALCODE  578 non-null    object  
 1   ST_NAME     578 non-null    object  
 2   MUN_NAME    578 non-null    object  
 3   SETT_NAME   578 non-null    object  
 4   SETT_TYPE   578 non-null    object  
 5   AREA        578 non-null    float64 
 6   Shape_Leng  578 non-null    float64 
 7   Shape_Area  578 non-null    float64 
 8   geometry    578 non-null    geometry
dtypes: float64(3), geometry(1), object(5)
memory usage: 45.2+ KB


In [15]:
#calculate area using geometry
colonias_yucatan_merida.area


  colonias_yucatan_merida.area


56515    0.000038
56516    0.000051
56517    0.000034
56518    0.000002
56519    0.000001
           ...   
57089    0.000040
57090    0.000061
57091    0.000038
57092    0.000004
57093    0.000023
Length: 578, dtype: float64

In [14]:
colonias_yucatan_kanasin.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 24 entries, 56482 to 56505
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   POSTALCODE  24 non-null     object  
 1   ST_NAME     24 non-null     object  
 2   MUN_NAME    24 non-null     object  
 3   SETT_NAME   24 non-null     object  
 4   SETT_TYPE   24 non-null     object  
 5   AREA        24 non-null     float64 
 6   Shape_Leng  24 non-null     float64 
 7   Shape_Area  24 non-null     float64 
 8   geometry    24 non-null     geometry
dtypes: float64(3), geometry(1), object(5)
memory usage: 1.9+ KB


### Import shapefiles containing information of the INEGI 2010

#### Population by municipality 

In [16]:
municipal_inegi = gpd.read_file('input_data/00_SCINCE_zip/00/municipal.dbf')

In [17]:
municipal_inegi.head()

Unnamed: 0,CVEGEO,NOM_ENT,NOM_MUN,POB1,POB2,POB2_R,POB3,POB3_R,POB4,POB4_R,...,POB78,POB78_R,POB79,POB79_R,POB80,POB80_R,POB81,POB81_R,OID,geometry
0,1001,Aguascalientes,Aguascalientes,797010.0,44873.0,5.6,76707.0,9.6,47951.0,6.0,...,188761.0,48.8,27248.0,7.1,17571.0,4.5,10818.0,2.8,1,"POLYGON ((2489072.505 1115771.584, 2489352.774..."
1,1002,Aguascalientes,Asientos,45492.0,3206.0,7.0,5285.0,11.6,3175.0,7.0,...,9720.0,43.2,1813.0,8.1,1334.0,5.9,965.0,4.3,2,"POLYGON ((2494680.262 1141224.506, 2494749.947..."
2,1003,Aguascalientes,Calvillo,54136.0,3433.0,6.3,5809.0,10.7,3561.0,6.6,...,12136.0,46.2,2756.0,10.5,1982.0,7.6,1358.0,5.2,3,"POLYGON ((2429607.454 1120262.254, 2429071.900..."
3,1004,Aguascalientes,Cosío,15042.0,967.0,6.4,1651.0,11.0,1033.0,6.9,...,3231.0,44.1,579.0,7.9,418.0,5.7,290.0,4.0,4,"POLYGON ((2470517.824 1155028.587, 2470552.249..."
4,1005,Aguascalientes,Jesús María,99590.0,7044.0,7.1,11982.0,12.0,7377.0,7.4,...,21527.0,43.9,2679.0,5.5,1810.0,3.7,1133.0,2.3,5,"POLYGON ((2465526.729 1114740.466, 2465752.546..."


In [18]:
#select only the columns that are useful. 
#POB1 is the total population according to the data dictionary
pop_municipal = municipal_inegi[['NOM_ENT', 'NOM_MUN', 'POB1', 'geometry']]

In [19]:
pop_municipal

Unnamed: 0,NOM_ENT,NOM_MUN,POB1,geometry
0,Aguascalientes,Aguascalientes,797010.0,"POLYGON ((2489072.505 1115771.584, 2489352.774..."
1,Aguascalientes,Asientos,45492.0,"POLYGON ((2494680.262 1141224.506, 2494749.947..."
2,Aguascalientes,Calvillo,54136.0,"POLYGON ((2429607.454 1120262.254, 2429071.900..."
3,Aguascalientes,Cosío,15042.0,"POLYGON ((2470517.824 1155028.587, 2470552.249..."
4,Aguascalientes,Jesús María,99590.0,"POLYGON ((2465526.729 1114740.466, 2465752.546..."
...,...,...,...,...
2451,Zacatecas,Villa Hidalgo,18490.0,"POLYGON ((2531223.367 1166188.348, 2531242.386..."
2452,Zacatecas,Villanueva,29395.0,"POLYGON ((2420011.779 1183552.394, 2420043.461..."
2453,Zacatecas,Zacatecas,138176.0,"POLYGON ((2421707.751 1202019.972, 2421780.085..."
2454,Zacatecas,Trancoso,16934.0,"POLYGON ((2476976.320 1198274.003, 2476566.139..."


In [25]:
#calculate area using geometry
merida_area = pop_municipal[(pop_municipal['NOM_ENT']=='Yucatán') & (pop_municipal['NOM_MUN']=='Mérida')]
merida_area.area

2341    8.755307e+08
dtype: float64

#### Population by state

In [33]:
state_inegi = gpd.read_file('input_data/00_SCINCE_zip/00/estatal.dbf')

In [39]:
state_inegi.head()

Unnamed: 0,CVEGEO,NOMBRE,POB1,POB2,POB2_R,POB3,POB3_R,POB4,POB4_R,POB5,...,POB78,POB78_R,POB79,POB79_R,POB80,POB80_R,POB81,POB81_R,OID,geometry
0,1,Aguascalientes,1184996,71484,6.0,121557,10.3,75453,6.4,154348,...,272459,47.2,41163,7.1,27477,4.8,17582,3.0,1,"POLYGON ((2470517.824 1155028.587, 2470552.249..."
1,2,Baja California,3155070,170792,5.4,293234,9.3,182211,5.8,367940,...,817531,51.4,101804,6.4,65653,4.1,40567,2.5,2,"MULTIPOLYGON (((1458026.171 1855728.155, 14578..."
2,3,Baja California Sur,637026,36388,5.7,60758,9.5,36255,5.7,72035,...,169932,52.2,20896,6.4,13323,4.1,8098,2.5,3,"MULTIPOLYGON (((1694646.290 1227655.310, 16946..."
3,4,Campeche,822441,45537,5.5,76925,9.4,46836,5.7,95136,...,205395,50.4,34128,8.4,23517,5.8,15922,3.9,4,"MULTIPOLYGON (((3702830.928 1030965.604, 37034..."
4,5,Coahuila de Zaragoza,2748391,151918,5.5,258246,9.4,158570,5.8,328429,...,691959,50.7,111157,8.1,73690,5.4,46863,3.4,5,"POLYGON ((2469954.193 1978522.993, 2469982.807..."


In [40]:
pop_state = state_inegi[['NOMBRE', 'POB1', 'geometry']]

In [42]:
pop_state.head()

Unnamed: 0,NOMBRE,POB1,geometry
0,Aguascalientes,1184996,"POLYGON ((2470517.824 1155028.587, 2470552.249..."
1,Baja California,3155070,"MULTIPOLYGON (((1458026.171 1855728.155, 14578..."
2,Baja California Sur,637026,"MULTIPOLYGON (((1694646.290 1227655.310, 16946..."
3,Campeche,822441,"MULTIPOLYGON (((3702830.928 1030965.604, 37034..."
4,Coahuila de Zaragoza,2748391,"POLYGON ((2469954.193 1978522.993, 2469982.807..."


### Importing data from Marco geoestadístico 2019 (https://www.inegi.org.mx/temas/mg/#Descargas)

For this data we are only using the geometries because it does not contain population info.

Geometries we can use:

<ul>
    <li>Areas geoestadísticas básicas urbanas (AGEB). Filename: ee<strong>a</strong></li>
    <li>Polígonos de manzana. Filename: ee<strong>m</strong></li>
</ul>

Note: **ee** refers to the numeric key of the federal state: 01, 02,...,32

In [3]:
yucatan_ageb = gpd.read_file('input_data/marco_geoestadistico2019/31_yucatan/conjunto_de_datos/31a.dbf')

In [4]:
yucatan_manzana = gpd.read_file('input_data/marco_geoestadistico2019/31_yucatan/conjunto_de_datos/31m.dbf')

In [8]:
yucatan_ageb.head()

Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,CVE_LOC,CVE_AGEB,geometry
0,3100100010130,31,1,1,130,"POLYGON ((3776847.045 1015576.473, 3776850.565..."
1,3100100010145,31,1,1,145,"POLYGON ((3776745.860 1015499.562, 3776745.439..."
2,3100100010164,31,1,1,164,"POLYGON ((3776136.004 1014614.653, 3776022.228..."
3,3100100010126,31,1,1,126,"POLYGON ((3776247.740 1014594.755, 3776229.182..."
4,3100100010107,31,1,1,107,"POLYGON ((3776613.825 1015270.341, 3776615.972..."


In [7]:
yucatan_manzana.head()

Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,CVE_LOC,CVE_AGEB,CVE_MZA,AMBITO,TIPOMZA,geometry
0,3100100010126002,31,1,1,0126,2,Urbana,Típica,"POLYGON ((3776317.318 1014695.955, 3776290.436..."
1,310010003008A010,31,1,3,008A,10,Rural,Típica,"POLYGON ((3785255.760 1012979.803, 3785285.454..."
2,310010007008A021,31,1,7,008A,21,Rural,Típica,"POLYGON ((3785798.935 1021132.561, 3785818.269..."
3,310010007008A033,31,1,7,008A,33,Rural,Típica,"POLYGON ((3785159.752 1021511.817, 3785187.686..."
4,3100100010107029,31,1,1,0107,29,Urbana,Típica,"POLYGON ((3776873.117 1014643.985, 3776879.112..."


### Importing data from Censo y Conteo de Población y Vivienda 2010 > Principales resultados por AGEB y manzana urbana (https://www.inegi.org.mx/servicios/datosabiertos.html)


In [12]:
yucatan_censo = pd.read_csv('input_data/resageburb_31_2010_csv/resultados_ageb_urbana_31_cpv2010/conjunto_de_datos/resultados_ageb_urbana_31_cpv2010.csv')

In [17]:
yucatan_censo = yucatan_censo[['entidad', 'nom_ent', 'mun', 'nom_mun', 'loc', 'nom_loc', 'ageb', 'mza', 'pobtot']]

In [18]:
yucatan_censo

Unnamed: 0,entidad,nom_ent,mun,nom_mun,loc,nom_loc,ageb,mza,pobtot
0,31,Yucatán,0,Total de la entidad Yucatán,0,Total de la entidad,0000,0,1955577
1,31,Yucatán,1,Abalá,0,Total del municipio,0000,0,6356
2,31,Yucatán,1,Abalá,1,Total de la localidad urbana,0000,0,1890
3,31,Yucatán,1,Abalá,1,Total AGEB urbana,0107,0,779
4,31,Yucatán,1,Abalá,1,Abalá,0107,1,44
...,...,...,...,...,...,...,...,...,...
33952,31,Yucatán,106,Yobaín,1,Yobaín,0058,35,18
33953,31,Yucatán,106,Yobaín,1,Yobaín,0058,36,22
33954,31,Yucatán,106,Yobaín,1,Yobaín,0058,37,9
33955,31,Yucatán,106,Yobaín,1,Total AGEB urbana,0077,0,7


## Merging data

In [37]:
#change data type from object to int so we can work with equalities when filtering data
yucatan_manzana['CVE_MZA'] = yucatan_manzana['CVE_MZA'].astype('int')
yucatan_manzana['CVE_MUN'] = yucatan_manzana['CVE_MUN'].astype('int')
yucatan_manzana['CVE_LOC'] = yucatan_manzana['CVE_LOC'].astype('int')

In [38]:
yucatan_manzana.head()

Unnamed: 0,CVEGEO,CVE_ENT,CVE_MUN,CVE_LOC,CVE_AGEB,CVE_MZA,AMBITO,TIPOMZA,geometry
0,3100100010126002,31,1,1,0126,2,Urbana,Típica,"POLYGON ((3776317.318 1014695.955, 3776290.436..."
1,310010003008A010,31,1,3,008A,10,Rural,Típica,"POLYGON ((3785255.760 1012979.803, 3785285.454..."
2,310010007008A021,31,1,7,008A,21,Rural,Típica,"POLYGON ((3785798.935 1021132.561, 3785818.269..."
3,310010007008A033,31,1,7,008A,33,Rural,Típica,"POLYGON ((3785159.752 1021511.817, 3785187.686..."
4,3100100010107029,31,1,1,0107,29,Urbana,Típica,"POLYGON ((3776873.117 1014643.985, 3776879.112..."


In [33]:
yucatan_censo.head()

Unnamed: 0,entidad,nom_ent,mun,nom_mun,loc,nom_loc,ageb,mza,pobtot
0,31,Yucatán,0,Total de la entidad Yucatán,0,Total de la entidad,0,0,1955577
1,31,Yucatán,1,Abalá,0,Total del municipio,0,0,6356
2,31,Yucatán,1,Abalá,1,Total de la localidad urbana,0,0,1890
3,31,Yucatán,1,Abalá,1,Total AGEB urbana,107,0,779
4,31,Yucatán,1,Abalá,1,Abalá,107,1,44


In [34]:
#rename columns that will be compared for equality condition when trying to match data
yucatan_censo = yucatan_censo.rename(columns={'mun':'CVE_MUN', 'loc':'CVE_LOC', 'ageb':'CVE_AGEB', 'mza':'CVE_MZA'})

In [40]:
yucatan_censo.head()

Unnamed: 0,entidad,nom_ent,CVE_MUN,nom_mun,CVE_LOC,nom_loc,CVE_AGEB,CVE_MZA,pobtot
0,31,Yucatán,0,Total de la entidad Yucatán,0,Total de la entidad,0,0,1955577
1,31,Yucatán,1,Abalá,0,Total del municipio,0,0,6356
2,31,Yucatán,1,Abalá,1,Total de la localidad urbana,0,0,1890
3,31,Yucatán,1,Abalá,1,Total AGEB urbana,107,0,779
4,31,Yucatán,1,Abalá,1,Abalá,107,1,44


In [42]:
#filter only the totals
yucatan_censo_totales = yucatan_censo[yucatan_censo['CVE_MZA'] == 0]

In [43]:
yucatan_censo_totales

Unnamed: 0,entidad,nom_ent,CVE_MUN,nom_mun,CVE_LOC,nom_loc,CVE_AGEB,CVE_MZA,pobtot
0,31,Yucatán,0,Total de la entidad Yucatán,0,Total de la entidad,0000,0,1955577
1,31,Yucatán,1,Abalá,0,Total del municipio,0000,0,6356
2,31,Yucatán,1,Abalá,1,Total de la localidad urbana,0000,0,1890
3,31,Yucatán,1,Abalá,1,Total AGEB urbana,0107,0,779
36,31,Yucatán,1,Abalá,1,Total AGEB urbana,0111,0,1066
...,...,...,...,...,...,...,...,...,...
33868,31,Yucatán,106,Yobaín,0,Total del municipio,0000,0,2137
33869,31,Yucatán,106,Yobaín,1,Total de la localidad urbana,0000,0,1820
33870,31,Yucatán,106,Yobaín,1,Total AGEB urbana,0043,0,998
33917,31,Yucatán,106,Yobaín,1,Total AGEB urbana,0058,0,815


In [41]:
condition = (
            (yucatan_manzana['CVE_MUN'].values == yucatan_censo['CVE_MUN'].values)
            & (yucatan_manzana['CVE_LOC'].values == yucatan_censo['CVE_LOC'].values)
            & (yucatan_manzana['CVE_AGEB'].values == yucatan_censo['CVE_AGEB'].values) 
             & (yucatan_manzana['CVE_MZA'].values == yucatan_censo['CVE_MZA'].values)
            )

  (yucatan_manzana['CVE_MUN'].values == yucatan_censo['CVE_MUN'].values)
  & (yucatan_manzana['CVE_LOC'].values == yucatan_censo['CVE_LOC'].values)
  & (yucatan_manzana['CVE_AGEB'].values == yucatan_censo['CVE_AGEB'].values)
  & (yucatan_manzana['CVE_MZA'].values == yucatan_censo['CVE_MZA'].values)


In [None]:
yucatan_manzana['POBTOT'] = np.where()