In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
DEFAULT_CRS = "EPSG:4326"

In [3]:
censu_gpd = gpd.read_file("data/SC2010_CEM_RMSAO_V4")

In [None]:
censu_gpd.head()

Read tables values

In [5]:
variables = pd.read_excel("data/SC2010_CEM_RMSAO_V4/SC2010_CEM_RMSAO.xlsx", usecols=["CODSETOR", "CODSETTX", "COD_RM", "NOM_RM", "D1_012", "D1_017", "D1_035", "D1_044"])

In [None]:
variables.head()

In [None]:
variables[variables['CODSETOR'] == 355030832000011]

In [None]:
variables.dtypes

In [9]:
censu_gpd = (censu_gpd
             .join(variables[['CODSETOR', 'D1_012', 'D1_017', 'D1_035', 'D1_044']].set_index("CODSETOR"),
                    how='left',
                    on='SC_2010'
                )
            )

Criar variáveis com as densidades das variavies de acesso 

In [10]:
censu_gpd["area_poly"] = (
    censu_gpd
    .geometry
    .to_crs("EPSG:5641")
    .area) # Area may be invalid for a geographic CRS using degrees as units; use GeoSeries.to_crs() to project geometries to a planar CRS before using this function.


In [11]:
def calculate_var_density(gpd, var, area):
    return gpd[var]/gpd[area]

In [12]:
censu_gpd["D1_012_d"] = calculate_var_density(censu_gpd, 'D1_012', 'area_poly')
censu_gpd["D1_017_d"] = calculate_var_density(censu_gpd, 'D1_017', 'area_poly')
censu_gpd["D1_035_d"] = calculate_var_density(censu_gpd, 'D1_035', 'area_poly')
censu_gpd["D1_044_d"] = calculate_var_density(censu_gpd, 'D1_044', 'area_poly')


In [None]:
censu_gpd.head()

Colunas de interesse

In [14]:
censu_gpd = censu_gpd[['ID', 'SC_2010', 'geometry', 'D1_012_d', 'D1_017_d', 'D1_035_d', 'D1_044_d']]

# Lendo H3 

In [15]:
hex_sp = gpd.read_file("data/shapeFiles/GRIDS_H3_SP_RES10") 

In [None]:
hex_sp.head()

# Intersect data

In [17]:
if censu_gpd.crs != DEFAULT_CRS:
    censu_gpd = censu_gpd.to_crs(DEFAULT_CRS)
if hex_sp.crs != DEFAULT_CRS:
    hex_sp = hex_sp.to_crs(DEFAULT_CRS)

In [18]:
censu_join_h3 =  censu_gpd.overlay(hex_sp[['id_hex', 'geometry']], how='intersection')


Gerar qnt de cada variável referente a área de recorte do censu nos hexagonos

In [19]:
censu_join_h3["area_poly"] = (
    censu_join_h3
    .geometry
    .to_crs("EPSG:5641")
    .area) # Area may be invalid for a geographic CRS using degrees as units; use GeoSeries.to_crs() to project geometries to a planar CRS before using this function.

censu_join_h3["D1_012_d"] = censu_join_h3["area_poly"] * censu_join_h3["D1_012_d"]
censu_join_h3["D1_017_d"] = censu_join_h3["area_poly"] * censu_join_h3["D1_017_d"]
censu_join_h3["D1_035_d"] = censu_join_h3["area_poly"] * censu_join_h3["D1_035_d"]
censu_join_h3["D1_044_d"] = censu_join_h3["area_poly"] * censu_join_h3["D1_044_d"]



In [None]:
censu_join_h3[censu_join_h3["SC_2010"] == 355030832000011]

In [None]:
censu_join_h3.head()

In [22]:
agg_variables_by_hex = censu_join_h3.groupby(['id_hex']).agg({
    "D1_012_d": ['sum'],
    "D1_017_d": ['sum'],
    "D1_035_d": ['sum'],
    "D1_044_d": ['sum'],

}).reset_index()

In [23]:
agg_variables_by_hex.columns = ['_'.join(col).strip() for col in agg_variables_by_hex.columns.values if col != 'id_hex']


In [None]:
agg_variables_by_hex.head()

In [25]:
agg_variables_by_hex["D1_012_d_sum"] = agg_variables_by_hex["D1_012_d_sum"].fillna(0)
agg_variables_by_hex["D1_017_d_sum"] = agg_variables_by_hex["D1_017_d_sum"].fillna(0)
agg_variables_by_hex["D1_035_d_sum"] = agg_variables_by_hex["D1_035_d_sum"].fillna(0)
agg_variables_by_hex["D1_044_d_sum"] = agg_variables_by_hex["D1_044_d_sum"].fillna(0)

In [None]:
agg_variables_by_hex[agg_variables_by_hex["id_hex_"] == '8aa810000007fff']

In [None]:
agg_variables_by_hex.isna().sum()

In [32]:
hex_censu_final = hex_sp.join(agg_variables_by_hex.set_index('id_hex_'), how='left', on='id_hex')

In [None]:
hex_censu_final.head()

In [34]:
#hex_censu_final["id_hex"] = hex_censu_final["id_hex"].astype(int) #geopandas as a bug with fid https://github.com/geopandas/geopandas/issues/1035
#hex_censu_final.drop("fid", inplace=True, axis=1)
hex_censu_final["D1_012_d_sum"] = hex_censu_final["D1_012_d_sum"].round()
hex_censu_final["D1_017_d_sum"] = hex_censu_final["D1_017_d_sum"].round()
hex_censu_final["D1_035_d_sum"] = hex_censu_final["D1_035_d_sum"].round()
hex_censu_final["D1_044_d_sum"] = hex_censu_final["D1_044_d_sum"].round()

In [35]:
hex_censu_final.to_file("data/WGS84_hex_census_2010.gpkg", driver="GPKG")