In [1]:
import geopandas as gpd
import pandas as pd

In [2]:
DEFAULT_CRS = "EPSG:4326"

In [3]:
censu_gpd = gpd.read_file("data/SC2010_CEM_RMSAO_V4")

In [4]:
censu_gpd.head()

Unnamed: 0,ID,AREA,DATA,SC_2010,SC_2010_CH,AP_2010,AP_2010_CH,SC_2000,SC_2000_CH,COD_GR,...,BA_007,BA_009,BA_011,AREA_KM2,DENS_KM2,SITUA1_201,SITUA_2_20,TIPO_2016,TIPO_2_201,geometry
0,2895191,14.88,602,354680100000000.0,354680105000064,3546801003001,3546801003001,354680100000000.0,354680105000064,3,...,1195.91,578.19,991.61,14.88144,26,8.0,RURAL,0,N,"POLYGON ((-46.17788 -23.35886, -46.17866 -23.3..."
1,2905077,13.3,1082,353060700000000.0,353060705000181,3530607005007,3530607005007,353060700000000.0,353060705000160,3,...,1042.77,574.84,907.13,13.298441,58,8.0,RURAL,0,N,"POLYGON ((-46.21148 -23.37766, -46.21124 -23.3..."
2,2894895,9.65,1081,353060700000000.0,353060705000180,3530607005007,3530607005007,353060700000000.0,353060705000159,3,...,865.09,413.37,704.5,9.645352,41,8.0,RURAL,0,N,"POLYGON ((-46.21148 -23.37766, -46.21119 -23.3..."
3,2905096,6.1,1083,353060700000000.0,353060705000182,3530607005007,3530607005007,353060700000000.0,353060705000161,3,...,1754.07,893.2,1335.42,6.101509,58,8.0,RURAL,0,N,"POLYGON ((-46.17788 -23.35886, -46.17665 -23.3..."
4,2905155,1.23,603,353060700000000.0,353060705000164,3530607005007,3530607005007,353060700000000.0,353060705000149,3,...,876.96,429.7,763.29,1.225181,515,3.0,URBANO,0,N,"POLYGON ((-46.15525 -23.36233, -46.15625 -23.3..."


Read tables values

In [5]:
variables = pd.read_excel("data/SC2010_CEM_RMSAO_V4/SC2010_CEM_RMSAO.xlsx", usecols=["CODSETOR", "CODSETTX", "COD_RM", "NOM_RM", "D1_012", "D1_017", "D1_035", "D1_044"])

In [6]:
variables.head()

Unnamed: 0,CODSETOR,CODSETTX,COD_RM,NOM_RM,D1_012,D1_017,D1_035,D1_044
0,350390105000001,350390105000001,20,RM SAO PAULO,109.0,107.0,109.0,109.0
1,350390105000002,350390105000002,20,RM SAO PAULO,151.0,151.0,151.0,151.0
2,350390105000003,350390105000003,20,RM SAO PAULO,175.0,169.0,175.0,175.0
3,350390105000004,350390105000004,20,RM SAO PAULO,234.0,216.0,238.0,238.0
4,350390105000005,350390105000005,20,RM SAO PAULO,106.0,93.0,106.0,106.0


In [7]:
variables[variables['CODSETOR'] == 355030832000011]

Unnamed: 0,CODSETOR,CODSETTX,COD_RM,NOM_RM,D1_012,D1_017,D1_035,D1_044
17221,355030832000011,355030832000011,20,RM SAO PAULO,94.0,94.0,94.0,94.0


In [8]:
variables.dtypes

CODSETOR      int64
CODSETTX      int64
COD_RM        int64
NOM_RM       object
D1_012      float64
D1_017      float64
D1_035      float64
D1_044      float64
dtype: object

In [9]:
censu_gpd = (censu_gpd
             .join(variables[['CODSETOR', 'D1_012', 'D1_017', 'D1_035', 'D1_044']].set_index("CODSETOR"),
                    how='left',
                    on='SC_2010'
                )
            )

Criar variáveis com as densidades das variavies de acesso 

In [10]:
censu_gpd["area_poly"] = (
    censu_gpd
    .geometry
    .to_crs("EPSG:5641")
    .area) # Area may be invalid for a geographic CRS using degrees as units; use GeoSeries.to_crs() to project geometries to a planar CRS before using this function.


In [11]:
def calculate_var_density(gpd, var, area):
    return gpd[var]/gpd[area]

In [12]:
censu_gpd["D1_012_d"] = calculate_var_density(censu_gpd, 'D1_012', 'area_poly')
censu_gpd["D1_017_d"] = calculate_var_density(censu_gpd, 'D1_017', 'area_poly')
censu_gpd["D1_035_d"] = calculate_var_density(censu_gpd, 'D1_035', 'area_poly')
censu_gpd["D1_044_d"] = calculate_var_density(censu_gpd, 'D1_044', 'area_poly')


In [13]:
censu_gpd.head()

Unnamed: 0,ID,AREA,DATA,SC_2010,SC_2010_CH,AP_2010,AP_2010_CH,SC_2000,SC_2000_CH,COD_GR,...,geometry,D1_012,D1_017,D1_035,D1_044,area_poly,D1_012_d,D1_017_d,D1_035_d,D1_044_d
0,2895191,14.88,602,354680100000000.0,354680105000064,3546801003001,3546801003001,354680100000000.0,354680105000064,3,...,"POLYGON ((-46.17788 -23.35886, -46.17866 -23.3...",3.0,0.0,97.0,125.0,17576410.0,1.706834e-07,0.0,6e-06,7e-06
1,2905077,13.3,1082,353060700000000.0,353060705000181,3530607005007,3530607005007,353060700000000.0,353060705000160,3,...,"POLYGON ((-46.21148 -23.37766, -46.21124 -23.3...",6.0,2.0,146.0,205.0,15711550.0,3.818847e-07,1.272949e-07,9e-06,1.3e-05
2,2894895,9.65,1081,353060700000000.0,353060705000180,3530607005007,3530607005007,353060700000000.0,353060705000159,3,...,"POLYGON ((-46.21148 -23.37766, -46.21119 -23.3...",1.0,1.0,39.0,97.0,11399320.0,8.772451e-08,8.772451e-08,3e-06,9e-06
3,2905096,6.1,1083,353060700000000.0,353060705000182,3530607005007,3530607005007,353060700000000.0,353060705000161,3,...,"POLYGON ((-46.17788 -23.35886, -46.17665 -23.3...",0.0,4.0,101.0,109.0,7207873.0,0.0,5.549487e-07,1.4e-05,1.5e-05
4,2905155,1.23,603,353060700000000.0,353060705000164,3530607005007,3530607005007,353060700000000.0,353060705000149,3,...,"POLYGON ((-46.15525 -23.36233, -46.15625 -23.3...",4.0,4.0,191.0,186.0,1446876.0,2.764577e-06,2.764577e-06,0.000132,0.000129


Colunas de interesse

In [14]:
censu_gpd = censu_gpd[['ID', 'SC_2010', 'geometry', 'D1_012_d', 'D1_017_d', 'D1_035_d', 'D1_044_d']]

# Lendo H3 

In [15]:
hex_sp = gpd.read_file("data/shapeFiles/wgs84_hex_grid_sp_v2")

In [16]:
hex_sp.head()

Unnamed: 0,fid,id_hex,abbrev_mun,name_muni,code_muni,geometry
0,2563.0,89a81009a8bffff,spo,Sao Paulo,3550308.0,"POLYGON ((-46.43596 -23.58661, -46.43800 -23.5..."
1,2564.0,89a8108dca7ffff,spo,Sao Paulo,3550308.0,"POLYGON ((-46.77886 -23.89706, -46.78089 -23.8..."
2,2565.0,89a81015a8bffff,spo,Sao Paulo,3550308.0,"POLYGON ((-46.74000 -23.72290, -46.74204 -23.7..."
3,2566.0,89a810019d7ffff,spo,Sao Paulo,3550308.0,"POLYGON ((-46.62778 -23.65457, -46.62982 -23.6..."
4,2567.0,89a8100d9d7ffff,spo,Sao Paulo,3550308.0,"POLYGON ((-46.62335 -23.50435, -46.62539 -23.5..."


# Intersect data

In [17]:
if censu_gpd.crs != DEFAULT_CRS:
    censu_gpd = censu_gpd.to_crs(DEFAULT_CRS)
if hex_sp.crs != DEFAULT_CRS:
    hex_sp = hex_sp.to_crs(DEFAULT_CRS)

In [18]:
censu_join_h3 =  censu_gpd.overlay(hex_sp[['fid', 'geometry']], how='intersection')


Gerar qnt de cada variável referente a área de recorte do censu nos hexagonos

In [19]:
censu_join_h3["area_poly"] = (
    censu_join_h3
    .geometry
    .to_crs("EPSG:5641")
    .area) # Area may be invalid for a geographic CRS using degrees as units; use GeoSeries.to_crs() to project geometries to a planar CRS before using this function.

censu_join_h3["D1_012_d"] = censu_join_h3["area_poly"] * censu_join_h3["D1_012_d"]
censu_join_h3["D1_017_d"] = censu_join_h3["area_poly"] * censu_join_h3["D1_017_d"]
censu_join_h3["D1_035_d"] = censu_join_h3["area_poly"] * censu_join_h3["D1_035_d"]
censu_join_h3["D1_044_d"] = censu_join_h3["area_poly"] * censu_join_h3["D1_044_d"]



In [20]:
censu_join_h3[censu_join_h3["SC_2010"] == 355030832000011]

Unnamed: 0,ID,SC_2010,D1_012_d,D1_017_d,D1_035_d,D1_044_d,fid,geometry,area_poly
6,1928880,355030800000000.0,0.373261,0.373261,0.373261,0.373261,3794.0,"POLYGON ((-46.66439 -23.59517, -46.66457 -23.5...",7523.883764
7,1928880,355030800000000.0,6.250081,6.250081,6.250081,6.250081,6882.0,"POLYGON ((-46.66168 -23.59317, -46.66265 -23.5...",125984.066365
8,1928880,355030800000000.0,4.712445,4.712445,4.712445,4.712445,7107.0,"POLYGON ((-46.66436 -23.58964, -46.66421 -23.5...",94989.636385
9,1928880,355030800000000.0,6.250185,6.250185,6.250185,6.250185,7318.0,"POLYGON ((-46.65858 -23.59178, -46.65955 -23.5...",125986.151216
10,1928880,355030800000000.0,6.250136,6.250136,6.250136,6.250136,7532.0,"POLYGON ((-46.66159 -23.59010, -46.66256 -23.5...",125985.173636
11,1928880,355030800000000.0,1.368813,1.368813,1.368813,1.368813,7745.0,"POLYGON ((-46.66271 -23.59461, -46.66168 -23.5...",27591.418897
12,1928880,355030800000000.0,5.220015,5.220015,5.220015,5.220015,7962.0,"POLYGON ((-46.66283 -23.59465, -46.66469 -23.5...",105220.822011
13,1928880,355030800000000.0,2.126296,2.126296,2.126296,2.126296,8182.0,"POLYGON ((-46.65836 -23.59317, -46.65947 -23.5...",42860.155228
14,1928880,355030800000000.0,0.495271,0.495271,0.495271,0.495271,8368.0,"POLYGON ((-46.65878 -23.57685, -46.65820 -23.5...",9983.270313
15,1928880,355030800000000.0,3.016311,3.016311,3.016311,3.016311,8816.0,"POLYGON ((-46.66331 -23.58135, -46.66351 -23.5...",60800.359472


In [21]:
censu_join_h3.head()

Unnamed: 0,ID,SC_2010,D1_012_d,D1_017_d,D1_035_d,D1_044_d,fid,geometry,area_poly
0,1037688,355030800000000.0,13.284932,13.284932,13.284932,13.284932,8816.0,"POLYGON ((-46.66367 -23.58103, -46.66351 -23.5...",27359.189882
1,1037688,355030800000000.0,4.058893,4.058893,4.058893,4.058893,15093.0,"MULTIPOLYGON (((-46.66532 -23.58491, -46.66610...",8358.94475
2,1037688,355030800000000.0,24.433651,24.433651,24.433651,24.433651,15319.0,"POLYGON ((-46.66813 -23.58417, -46.66838 -23.5...",50319.030414
3,1037688,355030800000000.0,19.64294,19.64294,19.64294,19.64294,15542.0,"POLYGON ((-46.66363 -23.58391, -46.66493 -23.5...",40452.968782
4,1037688,355030800000000.0,32.30751,32.30751,32.30751,32.30751,15768.0,"POLYGON ((-46.66650 -23.58317, -46.66735 -23.5...",66534.574566


In [22]:
agg_variables_by_hex = censu_join_h3.groupby(['fid']).agg({
    "D1_012_d": ['sum'],
    "D1_017_d": ['sum'],
    "D1_035_d": ['sum'],
    "D1_044_d": ['sum'],

}).reset_index()

In [23]:
agg_variables_by_hex.columns = ['_'.join(col).strip() for col in agg_variables_by_hex.columns.values if col != 'fid']


In [24]:
agg_variables_by_hex.head()

Unnamed: 0,fid_,D1_012_d_sum,D1_017_d_sum,D1_035_d_sum,D1_044_d_sum
0,2563.0,152.372083,117.495154,172.151288,175.365031
1,2564.0,0.014834,0.0,0.267007,0.178005
2,2565.0,10.161207,8.451977,10.924975,10.993266
3,2566.0,0.260353,0.023668,0.260353,0.260353
4,2567.0,216.861985,216.773123,216.861985,216.861985


In [25]:
agg_variables_by_hex["D1_012_d_sum"] = agg_variables_by_hex["D1_012_d_sum"].fillna(0)
agg_variables_by_hex["D1_017_d_sum"] = agg_variables_by_hex["D1_017_d_sum"].fillna(0)
agg_variables_by_hex["D1_035_d_sum"] = agg_variables_by_hex["D1_035_d_sum"].fillna(0)
agg_variables_by_hex["D1_044_d_sum"] = agg_variables_by_hex["D1_044_d_sum"].fillna(0)

In [26]:
agg_variables_by_hex[agg_variables_by_hex["fid_"] == 3794.0]

Unnamed: 0,fid_,D1_012_d_sum,D1_017_d_sum,D1_035_d_sum,D1_044_d_sum
1229,3794.0,298.824212,298.824212,298.824212,298.824212


In [35]:
agg_variables_by_hex.isna().sum()

fid_            0
D1_012_d_sum    0
D1_017_d_sum    0
D1_035_d_sum    0
D1_044_d_sum    0
dtype: int64

In [28]:
hex_censu_final = hex_sp.join(agg_variables_by_hex.set_index('fid_'), how='left', on='fid')

In [29]:
hex_censu_final.head()

Unnamed: 0,fid,id_hex,abbrev_mun,name_muni,code_muni,geometry,D1_012_d_sum,D1_017_d_sum,D1_035_d_sum,D1_044_d_sum
0,2563.0,89a81009a8bffff,spo,Sao Paulo,3550308.0,"POLYGON ((-46.43596 -23.58661, -46.43800 -23.5...",152.372083,117.495154,172.151288,175.365031
1,2564.0,89a8108dca7ffff,spo,Sao Paulo,3550308.0,"POLYGON ((-46.77886 -23.89706, -46.78089 -23.8...",0.014834,0.0,0.267007,0.178005
2,2565.0,89a81015a8bffff,spo,Sao Paulo,3550308.0,"POLYGON ((-46.74000 -23.72290, -46.74204 -23.7...",10.161207,8.451977,10.924975,10.993266
3,2566.0,89a810019d7ffff,spo,Sao Paulo,3550308.0,"POLYGON ((-46.62778 -23.65457, -46.62982 -23.6...",0.260353,0.023668,0.260353,0.260353
4,2567.0,89a8100d9d7ffff,spo,Sao Paulo,3550308.0,"POLYGON ((-46.62335 -23.50435, -46.62539 -23.5...",216.861985,216.773123,216.861985,216.861985


In [33]:
hex_censu_final["hex_fid"] = hex_censu_final["fid"].astype(int) #geopandas as a bug with fid https://github.com/geopandas/geopandas/issues/1035
hex_censu_final.drop("fid", inplace=True, axis=1)
hex_censu_final["D1_012_d_sum"] = hex_censu_final["D1_012_d_sum"].round()
hex_censu_final["D1_017_d_sum"] = hex_censu_final["D1_017_d_sum"].round()
hex_censu_final["D1_035_d_sum"] = hex_censu_final["D1_035_d_sum"].round()
hex_censu_final["D1_044_d_sum"] = hex_censu_final["D1_044_d_sum"].round()

In [34]:
hex_censu_final.to_file("data/WGS84_hex_census_2010.gpkg", driver="GPKG")