In [1]:
from path import Path
import sys

root_path = Path().abspath().parent.parent
sys.path.insert(0, root_path)
data_path = root_path.joinpath('data')
data_path

Path('/media/data/projects/data')

In [2]:
import json

ibge_path = data_path.joinpath('ibge-divisoes-geograficas').mkdir_p()
with open(data_path.joinpath('zips_estados.json')) as fp:
    zips_estados, byte_size = json.load(fp)

## Baixar e extrair os zips com as informações geográficas

In [None]:
import ftplib
import zipfile

from tqdm import tqdm_notebook

host = 'geoftp.ibge.gov.br'

ftp = ftplib.FTP(host)
ftp.login()

pbar = tqdm_notebook(total=byte_size, desc='Progresso')
    
for estado, info in zips_estados.items():
    
    for divisao, path in info.items():
        zip_fname = ibge_path.joinpath('{}_{}.zip'.format(estado.lower(), divisao))
        dir_fname = ibge_path.joinpath('{}_{}'.format(estado.lower(), divisao))

        # Já existe
        if zip_fname.isfile():
            with zipfile.ZipFile(zip_fname) as fzip:
                fzip.extractall(dir_fname)
            pbar.update(zip_fname.getsize())
            continue
        
        comando = 'RETR ' + path
        fp = open(zip_fname, 'wb')

        def write_to_file(block):
            fp.write(block)
            pbar.update(len(block))

        ftp.retrbinary(comando, write_to_file)
        fp.close()
        
        with zipfile.ZipFile(zip_fname) as fzip:
            fzip.extractall(dir_fname)
pbar.close()

## Extrair todas as shapes e guardá-las no formato JSON

In [3]:
campos = {
    'municipios': 'CD_GEOCODM',
    'distritos': 'CD_GEOCODD',
    'subdistritos': 'CD_GEOCODS',
    'setores': 'CD_GEOCODI'
}

In [None]:
import shapely.geometry.polygon
import shapely.geometry.multipolygon

def orientar(s, sinal=+1.0):
    geo = s
    if isinstance(geo, shapely.geometry.polygon.Polygon):
        return shapely.geometry.polygon.orient(geo, sinal)
    elif isinstance(geo, shapely.geometry.multipolygon.MultiPolygon):
        res = shapely.geometry.multipolygon.MultiPolygon()
        res.geom = [shapely.geometry.polygon.orient(p, sinal)
                    for p in geo]
        return res
    else:
        return geo

In [None]:
import geopandas
import shapely.wkt

shapes_path = ibge_path.parent.joinpath('ibge-shapes')
shapes_path.mkdir_p()

def estado_to_json(estado, info, pbar=None):
    estado_info = {}
    
    for divisao, path in info.items():
        dir_fname = ibge_path.joinpath('{}_{}'.format(estado.lower(), divisao))
        shp_fname = [fname for fname in dir_fname.glob('*.shp') if fname.lower().endswith('shp')][0]
        
        df = geopandas.read_file(shp_fname).to_crs(epsg=4326) # WGS 84 Web Mercator
        
        df['geometry'] = df['geometry'].map(orientar)
        df.set_geometry('geometry')
        
        filter_empty = df['geometry'].map(lambda p: bool(not p.is_empty and p.bounds))
        df = df[filter_empty]
        
        ibge_codigos = list(map(str, df[campos[divisao]]))
        centroides_lat = [p.centroid.y for p in df['geometry']]
        centroides_lng = [p.centroid.x for p in df['geometry']]
        poligonos = [shapely.wkt.dumps(p) for p in df['geometry']]
        bounds = [p.bounds for p in df['geometry']]
        areas = list(map(float, df.to_crs(epsg=4087)['geometry'].area))
        
        estado_info[divisao] = {
            'codigo': ibge_codigos,
            'poligono': poligonos,
            'lat_min': list(map(lambda b: b[1], bounds)),
            'lng_min': list(map(lambda b: b[0], bounds)),
            'lat_max': list(map(lambda b: b[3], bounds)),
            'lng_max': list(map(lambda b: b[2], bounds)),
            'lat_centroide': centroides_lat,
            'lng_centroide': centroides_lng,
            'area': areas
        }
        
        if pbar is not None:
            pbar.update(1)
            
    with open(shapes_path.joinpath(estado+'.json'), 'w') as fp:
        json.dump(estado_info, fp)

In [None]:
from tqdm import tqdm_notebook

pbar = tqdm_notebook(total=4*26)

for estado, info in zips_estados.items():
    estado_to_json(estado, info, pbar)

pbar.close()

## Extrair as shapes dos JSONs e guardá-las no banco de dados

In [2]:
from jfp_backend.db import schema as db_schema
db_schema.default_engine = (
    'mysql+mysqlconnector://diogenes:123456'
    '@localhost:3306/generic'
)
SessionMaker, engine = db_schema.get_db_session()
con = engine.connect()

In [3]:
db_schema.default_engine

'mysql+mysqlconnector://comef:C0m3FdF1701@dbdevii.c0m0rtpl64jd.us-west-2.rds.amazonaws.com:3306/generic'

In [5]:
db_schema.Base.metadata.create_all(
    bind=engine,
    tables=[t.__table__ for t in [
        db_schema.Regiao, db_schema.Estado, db_schema.Municipio, db_schema.Distrito,
        db_schema.Subdistrito, db_schema.Setor, db_schema.Geometria,
        db_schema.Parametro, db_schema.Categoria, db_schema.Fonte, db_schema.GeoDado,
        db_schema.GeoDadoSubdistrito, db_schema.GeoDadoDistrito, db_schema.GeoDadoMunicipio,
        db_schema.GeoDadoEstado, db_schema.Lugar, db_schema.ClasseLugar, db_schema.CategoriaLugar,
        db_schema.Usuario
    ]]
)

Para calcular o número de linhas antes

In [7]:
import json
from tqdm import tqdm_notebook

shapes_path = ibge_path.parent.joinpath('ibge-shapes')
estados_infos = {str(p.basename().splitext()[0]): p for p in shapes_path.glob('*.json')}
numero_linhas = 0

for estado, estado_info_path in tqdm_notebook(estados_infos.items()):
    with open(estado_info_path) as fp:
        estado_info = json.load(fp)
        for divisao, divisao_info in estado_info.items():
            for i, val in enumerate(divisao_info['codigo']):
                numero_linhas += 1

print('numero_linhas =', numero_linhas)