In [1]:
from timeit import default_timer as timer
import geopandas as gpd
import numpy as np
from shapely.ops import unary_union, polygonize
from shapely.geometry import MultiPolygon, Polygon

# Import

In [2]:
path_to_shp = '../data/INEGI/'

In [3]:
print('Import Shapes from Census')
start = timer()

blocks = gpd.read_file(path_to_shp+'ALLManzanas_SCINCE.shp')

print('# Blocks:', blocks.shape[0])

end = timer()
print('Computing Time:', round(end - start), 'sec')

Import Shapes from Census
# Blocks: 1376969
Computing Time: 1048 sec


In [4]:
print('Construct Missing Indices')

blocks['AGEB']  = blocks['CVEGEO'].apply(lambda x:x[:13])
blocks['URBAN'] = blocks['AGEB'].apply(lambda x:x[:9])
blocks['MUNIC'] = blocks['URBAN'].apply(lambda x:x[:5])

print('# AGEB:', blocks['AGEB'].unique().shape[0])
print('# Urban:', blocks['URBAN'].unique().shape[0])
print('# Municipalities:', blocks['MUNIC'].unique().shape[0])

Construct Missing Indices
# AGEB: 56193
# Urban: 4525
# Municipalities: 2456


In [5]:
print('Select Fields')

blocks = blocks[['CVEGEO','AGEB','URBAN','MUNIC','POB1','geometry']
].rename(columns={'CVEGEO':'BLOCK'}).sort_values(by=['BLOCK']).reset_index(drop=True).copy()

Select Fields


In [6]:
print('Reproject')
start = timer()

blocks = blocks.to_crs({'init': 'epsg:4326'})
    
end = timer()
print('Computing Time:', round(end - start), 'sec')

Reproject
Computing Time: 319 sec


In [7]:
blocks.head()

Unnamed: 0,BLOCK,AGEB,URBAN,MUNIC,POB1,geometry
0,100100010229001,100100010229,10010001,1001,65,"POLYGON ((-102.29587 21.92999, -102.29582 21.9..."
1,100100010229002,100100010229,10010001,1001,0,"POLYGON ((-102.29192 21.92319, -102.29201 21.9..."
2,100100010229003,100100010229,10010001,1001,0,"POLYGON ((-102.29163 21.91891, -102.28678 21.9..."
3,100100010229004,100100010229,10010001,1001,0,"POLYGON ((-102.29486 21.91719, -102.29359 21.9..."
4,100100010229006,100100010229,10010001,1001,25,"POLYGON ((-102.29620 21.93126, -102.29614 21.9..."


# Fix Invalid Blocks

In [8]:
invalid_blocks = blocks[-blocks.geometry.is_valid].copy()
print('# Invalid Blocks:', invalid_blocks.shape[0])

# Invalid Blocks: 42


In [59]:
# Could Break
def make_valid(invalid_polygon,tol=0.000001):
    
    if np.abs(invalid_polygon.buffer(0.0).area/invalid_polygon.area-1)<tol:
        
        return invalid_polygon.buffer(0.0)
    
    else:
        
        # Take exterior
        pe = invalid_polygon.exterior

        # Intersect with itself
        mls = pe.intersection(pe)

        # Create a polygon from multiline string
        polygons = polygonize(mls)

        # Aggregate into multi-polyogns
        return MultiPolygon(polygons)

In [77]:
print('Fix Invalid Blocks')
start = timer()

valid_blocks = invalid_blocks.copy()

valid_blocks.geometry = valid_blocks.geometry.apply(make_valid)

blocks.loc[invalid_blocks.index,'geometry'] = valid_blocks['geometry'].copy()

print('# Invalid Blocks:', blocks[-blocks.geometry.is_valid].shape[0])

end = timer()
print('Computing Time:', round(end - start), 'sec')

Fix Invalid Blocks
Computing Time: 0 sec


In [76]:
print('Max. % Error Area', np.max(np.abs(valid_blocks.area/invalid_blocks.area-1)))

1.0895808477684454e-08

In [75]:
print('Max. % Error Centroid Longitude', np.max(np.abs(valid_blocks.centroid.x/invalid_blocks.centroid.x-1)))

1.2323475573339238e-14

In [74]:
print('Max. % Error Centroid Latitude', np.max(np.abs(valid_blocks.centroid.y/invalid_blocks.centroid.y-1)))

2.2293278334473143e-13

In [78]:
print('Create homogenous geometry for exporting')
start = timer()

blocks["geometry"]=[MultiPolygon([feature]) if type(feature)==Polygon else feature for feature in blocks["geometry"]]

end = timer()
print('Computing Time:', round(end - start), 'sec')

Create homogenous geometry for exporting
Computing Time: 84 sec


# Save Blocks

In [80]:
print('Save Blocks')
start = timer()

blocks.to_file("../data/blocks-mexico.geojson", driver='GeoJSON')

end = timer()
print('Computing Time:', round(end - start), 'sec')

Save Blocks


CRSError: Invalid input to create CRS: {'init': 'epsg:4326'}