In [1]:
from timeit import default_timer as timer
import geopandas as gpd
import pandas as pd
import numpy as np
from shapely.ops import unary_union
from shapely.geometry import MultiPolygon, Polygon
import multiprocessing as mp
from itertools import chain
from functools import partial
import json
import folium
import matplotlib.pyplot as plt

# Data Preparation

In [2]:
path_to_data = '/scratch/spf248/cuebiq/data/'

In [3]:
print('Import Census Shapes')
start = timer()

# Source: INEGI
# https://www.inegi.org.mx/programas/ccpv/2010/?ps=Microdatos
# Data available here:
# https://www.dropbox.com/sh/dc3amdfgicqsjrj/AABDtR1TYpLzLFFXR4ngm1Jta?dl=0
blocks = gpd.read_file(path_to_data+'blocks-mexico.geojson')
urban  = gpd.read_file(path_to_data+'urban-mexico.geojson')
munic  = gpd.read_file(path_to_data+'munic-mexico.geojson')
print('# Blocks:', blocks.shape[0])
print('# Urban:', urban.shape[0])
print('# Munic:', munic.shape[0])

end = timer()
print('Computing Time:', round(end - start), 'sec')

Import Census Shapes
# Blocks: 1376969
# Urban: 4525
# Munic: 2456
Computing Time: 268 sec


In [4]:
print('Import Clusters')
start = timer()

# Data available here:
# https://www.dropbox.com/sh/dc3amdfgicqsjrj/AABDtR1TYpLzLFFXR4ngm1Jta?dl=0
clusters_urban   = pd.read_pickle(path_to_data+'clusters-urban-mexico.pkl')
clusters_rural   = pd.read_pickle(path_to_data+'clusters-rural-mexico.pkl')
clusters         = pd.read_pickle(path_to_data+'clusters-mexico.pkl')
clusters_geojson = gpd.read_file(path_to_data+'clusters-mexico.geojson', driver='GeoJSON')

end = timer()
print('Computing Time:', round(end - start), 'sec')

Import Clusters
Computing Time: 197 sec


In [5]:
blocks.set_index('BLOCK',inplace=True)
urban.set_index('URBAN',inplace=True)
munic.set_index('MUNIC',inplace=True)

clusters.set_index('cluster',inplace=True)
clusters['urban'] = clusters['cvegeo'].apply(lambda x:next(iter(set([len(y) for y in x])))==16)
idx_urban = clusters.loc[clusters['urban']==True].index
idx_rural = clusters.loc[clusters['urban']==False].index
clusters['check_pop'] = False

In [6]:
print('Check if Urban Population is Consistent')
start = timer()

def check_urban_pop(cluster):
    return clusters.loc[cluster,'population'] == blocks.loc[clusters.loc[cluster,'cvegeo'],'POB1'].sum()

with mp.Pool() as pool:
    urban_pop_equal = pool.map(check_urban_pop, idx_urban)
    
end = timer()
print('Computing Time:', round(end - start), 'sec')

Check if Urban Population is Consistent
Computing Time: 18 sec


In [7]:
print('Check if Rural Population is Consistent')
start = timer()

def check_rural_pop(cluster):
    return clusters.loc[cluster,'population'] == munic.loc[clusters.loc[cluster,'cvegeo'],'POB1'].sum() - \
    urban.loc[(urban.MUNIC.isin(munic.loc[clusters.loc[cluster,'cvegeo']].index))&(urban.POB1>=100),'POB1'].sum()

with mp.Pool() as pool:
    rural_pop_equal = pool.map(check_rural_pop, idx_rural)
    
end = timer()
print('Computing Time:', round(end - start), 'sec')

Check if Rural Population is Consistent
Computing Time: 3 sec


In [8]:
print('Population Mexico:', munic.POB1.sum())
print('# Clusters:', clusters.shape[0])
print('Population Clusters:', clusters.population.sum())
print('Min. Cluster Population:', clusters.population.min())
print('Are Urban Clusters Population Consistent With Census Data?', min(urban_pop_equal))
print('Are Rural Clusters Population Consistent With Census Data?', min(rural_pop_equal))

Population Mexico: 112336538
# Clusters: 430743
Population Clusters: 112336538
Min. Cluster Population: 100
Are Urban Clusters Population Consistent With Census Data? True
Are Rural Clusters Population Consistent With Census Data? True
