# Distritos das quadras

Neste notebook identificamos os distritos a que pertencem as quadras do municipio

In [1]:
import pandas as pd
import geopandas as gpd
import os
import matplotlib.pyplot as plt


from core.utils.file_path import solve_path, list_files_recursive
from core.config import SHP_FOLDER, GENERATED_DATA_FOLDER



In [2]:
def open_shp(tema):
    
    tema_path = solve_path(tema, SHP_FOLDER)
    shp = list_files_recursive(tema_path, '.shp')[0]
    geodf = gpd.read_file(shp)
    geodf = geodf.set_crs(epsg = 31983)
    
    return geodf

In [3]:
distritos = open_shp('distritos')

In [4]:
distritos.head()

Unnamed: 0,ds_nome,ds_codigo,ds_cd_sub,ds_subpref,ds_sigla,ds_areamt,ds_areakm,geometry
0,MANDAQUI,51,5,SANTANA-TUCURUVI,MAN,13247860.0,13.248,"POLYGON ((330950.373 7407837.176, 330952.177 7..."
1,MOOCA,53,25,MOOCA,MOO,7948972.0,7.949,"POLYGON ((338349.356 7394264.944, 338362.239 7..."
2,CIDADE ADEMAR,22,16,CIDADE ADEMAR,CAD,12342840.0,12.343,"POLYGON ((332332.417 7381170.543, 332348.402 7..."
3,TREMEMBE,81,6,JACANA-TREMEMBE,TRE,56714770.0,56.715,"POLYGON ((337037.395 7403901.769, 337033.929 7..."
4,SAO LUCAS,72,29,VILA PRUDENTE,SLU,9675986.0,9.676,"POLYGON ((340488.820 7391250.151, 340501.069 7..."


In [5]:
quadras = open_shp('quadras_fiscais')

In [6]:
quadras.head()

Unnamed: 0,qd_id_orig,qd_setor,qd_fiscal,qd_tipo,qd_subqua,qd_area,qd_situac,qd_id,qd_tx_tipo,geometry
0,10346990,153,29,F,1,4859.1528512042,1,137794,FISCAL,"POLYGON ((347209.277 7387175.531, 347202.470 7..."
1,10117715,139,181,F,1,5347.62152613216,1,137795,FISCAL,"POLYGON ((353322.778 7398472.568, 353302.766 7..."
2,10119816,192,150,F,1,6928.73697365148,1,137796,FISCAL,"POLYGON ((357992.155 7398204.925, 357990.766 7..."
3,8895416,8,30,F,1,10531.7999187608,1,137797,FISCAL,"POLYGON ((332589.689 7396772.635, 332583.548 7..."
4,10273851,138,217,F,1,5638.25762262573,1,137798,FISCAL,"POLYGON ((354512.065 7396517.983, 354508.503 7..."


In [7]:
quadras['quadra'] = quadras['qd_setor']+quadras['qd_fiscal']

Tem muita quadra duplicada porque tem mais de um poligono.

Precisa dissolver as quadras

In [8]:
quadras['quadra'].duplicated().mean()

0.2593934472311584

In [9]:
dissolved = quadras.dissolve(by='quadra')
dissolved=  dissolved.reset_index()

In [10]:
quadras = dissolved

In [11]:
quadras['quadra'].duplicated().mean()

0.0

Para evitar que as quadras intersectem os distritos por problemas de desenho em suas bordas,
 vamos usar o centroid da quadra para identificar em que distrito ele se situa.

In [12]:
geometria_original = quadras['geometry']
quadras['geometry'] = quadras.geometry.centroid

Agora podemos fazer a intersecção

In [13]:
len(quadras)

47424

In [14]:
quadras['id_pol'] = quadras.reset_index(drop=True).index

In [15]:
joined = gpd.sjoin(quadras, distritos, how='left', predicate='within')

In [16]:
len(joined)

47424

In [17]:
joined.sample(3)

Unnamed: 0,quadra,geometry,qd_id_orig,qd_setor,qd_fiscal,qd_tipo,qd_subqua,qd_area,qd_situac,qd_id,qd_tx_tipo,id_pol,index_right,ds_nome,ds_codigo,ds_cd_sub,ds_subpref,ds_sigla,ds_areamt,ds_areakm
33694,155242,POINT (344652.191 7388384.290),10550392,155,242,F,1,4511.19352881079,1,168022,FISCAL,33694,5.0,SAPOPEMBA,76,32,SAPOPEMBA,SAP,13631250.0,13.631
31905,148299,POINT (344628.497 7391329.700),10551658,148,299,F,1,5114.16127611961,1,163474,FISCAL,31905,46.0,ARICANDUVA,4,26,ARICANDUVA-FORMOSA-CARRAO,ARI,6959765.0,6.96
18774,100087,POINT (339355.979 7391420.453),9507269,100,87,F,1,4667.07247858799,1,177423,FISCAL,18774,39.0,VILA PRUDENTE,93,29,VILA PRUDENTE,VPR,9584953.0,9.585


In [18]:
joined['ds_nome'].isnull().sum()/len(quadras)

0.0005060728744939271

In [19]:
joined['ds_nome'].isnull().sum()

24

In [20]:
quadras['geometry']=geometria_original

In [21]:
encontrados = joined[joined['ds_nome'].notnull()]['id_pol']
area_encontrados = quadras[quadras['id_pol'].isin(encontrados)].geometry.area.sum()

In [22]:
area_encontrados/quadras.geometry.area.sum()

0.9987662389680297

In [23]:
def get_fname(fname):
    
    fname = solve_path(fname, GENERATED_DATA_FOLDER)
    
    return fname

In [24]:
fname = get_fname('quadras_com_distritos.csv')

In [25]:
joined.columns

Index(['quadra', 'geometry', 'qd_id_orig', 'qd_setor', 'qd_fiscal', 'qd_tipo',
       'qd_subqua', 'qd_area', 'qd_situac', 'qd_id', 'qd_tx_tipo', 'id_pol',
       'index_right', 'ds_nome', 'ds_codigo', 'ds_cd_sub', 'ds_subpref',
       'ds_sigla', 'ds_areamt', 'ds_areakm'],
      dtype='object')

In [26]:
cols_interesses = ['quadra', 'qd_id_orig', 'qd_setor', 'qd_fiscal', 'qd_tipo', 'qd_subqua',
       'qd_area', 'qd_situac', 'qd_id', 'qd_tx_tipo', 'ds_areamt', 'ds_subpref', 'ds_sigla', 'ds_nome',
       'ds_areakm', 'ds_cd_sub', 'ds_codigo']

In [27]:
quadras_dists = pd.DataFrame(joined[cols_interesses])

In [28]:
quadras_dists.head()

Unnamed: 0,quadra,qd_id_orig,qd_setor,qd_fiscal,qd_tipo,qd_subqua,qd_area,qd_situac,qd_id,qd_tx_tipo,ds_areamt,ds_subpref,ds_sigla,ds_nome,ds_areakm,ds_cd_sub,ds_codigo
0,1001,8893734,1,1,M,1,6463.70042403409,1,172817,AREA PUBLICA MUNICIPAL,2396993.441,SE,REP,REPUBLICA,2.397,9,66
1,1002,8893731,1,2,M,1,3848.08338581389,1,142629,AREA PUBLICA MUNICIPAL,2193819.874,SE,SEE,SE,2.194,9,78
2,1003,8893737,1,3,F,1,21744.9002690475,1,180769,FISCAL,4205896.639,SE,BRE,BOM RETIRO,4.206,9,9
3,1004,8893678,1,4,F,1,14803.4112659378,1,191772,FISCAL,4205896.639,SE,BRE,BOM RETIRO,4.206,9,9
4,1005,8893742,1,5,F,1,9177.1884098398,1,192049,FISCAL,4205896.639,SE,BRE,BOM RETIRO,4.206,9,9


In [29]:
quadras_dists['quadra'].duplicated().sum()

0

In [30]:
quadras_dists.to_csv(fname, sep=';', index=False,encoding='utf-8', quotechar='"')