In [1]:
import geopandas as gpd 
import pandas as pd

from shapely import wkt

In [2]:
pd.options.display.max_columns = None
pd.options.display.max_rows = None

In [3]:
DEFAULT_CRS = "EPSG:4326"

# read building data
O poligono utilizado para realizar download de dados de construção do Google Open Building foi: POLYGON((-46.92166552116743 -23.35298450738664,-46.19244799187055 -23.35298450738664,-46.19244799187055 -23.899033072097712,-46.92166552116743 -23.899033072097712,-46.92166552116743 -23.35298450738664))

In [4]:

# buildings_sp = gpd.read_parquet("spBuilding.geoparquet") # lendo arquivo do OverTure
builds_csv = pd.read_csv("data/open_buildings_v3_polygons_sp.csv")


In [5]:
builds_csv['geometry'] = builds_csv['geometry'].apply(wkt.loads)
buildings_sp = gpd.GeoDataFrame(builds_csv, geometry='geometry', crs=DEFAULT_CRS)


In [None]:
#buildings_sp.rename(columns={'area_in_meters':'area'}, inplace=True)
buildings_sp.head()

# Reading h2 

In [7]:
hex_sp = gpd.read_file("data/shapeFiles/GRIDS_H3_SP_RES10") # res10

In [None]:
hex_sp.head()

# Intersect data

In [9]:
if buildings_sp.crs != DEFAULT_CRS:
    buildings_sp = buildings_sp.to_crs(DEFAULT_CRS)
if hex_sp.crs != DEFAULT_CRS:
    hex_sp = hex_sp.to_crs(DEFAULT_CRS)

In [10]:
building_join_h3 =  buildings_sp.overlay(hex_sp[['id_hex', 'geometry']], how='intersection')


In [11]:

building_join_h3["area"] = (
    building_join_h3
    .geometry
    .to_crs("EPSG:5641")
    .area) # Area may be invalid for a geographic CRS using degrees as units; use GeoSeries.to_crs() to project geometries to a planar CRS before using this function.


In [None]:
building_join_h3.head()

In [13]:
building_join_h3_na = building_join_h3.dropna(subset = ['id_hex'])

In [14]:
area_sum_by_hex = building_join_h3.groupby(['id_hex']).agg({
    "area": ['sum', 'mean', 'median', 'std', 'var', 'count']
}).reset_index()

In [15]:
area_sum_by_hex.columns = ['_'.join(col).strip() for col in area_sum_by_hex.columns.values if col != 'id_hex']


In [None]:
area_sum_by_hex.head()

In [17]:
hex_area = hex_sp.join(area_sum_by_hex.set_index('id_hex_'), how='left', rsuffix='sum', on='id_hex')

In [18]:
hex_area['area_sum'] = hex_area['area_sum'].fillna(0)
hex_area['area_mean'] = hex_area['area_mean'].fillna(0)
hex_area['area_median'] = hex_area['area_median'].fillna(0)
hex_area['area_std'] = hex_area['area_std'].fillna(0)
hex_area['area_var'] = hex_area['area_var'].fillna(0)
hex_area['area_count'] = hex_area['area_count'].fillna(0)




In [19]:
hex_area['area_hex'] =  (
    hex_area
    .geometry
    .to_crs("EPSG:5641")
    .area) # Area may be invalid for a geographic CRS using degrees as units; use GeoSeries.to_crs() to project geometries to a planar CRS before using this function.
hex_area["area_const"] = hex_area['area_sum']/ hex_area['area_hex']

In [None]:
hex_area.head()

In [None]:
hex_area.isna().sum()

Final transformations

In [22]:
#hex_area["hex_fid"] = hex_area["fid"].astype(int) #geopandas as a bug with fid https://github.com/geopandas/geopandas/issues/1035
#hex_area.drop("fid", inplace=True, axis=1)

In [23]:
hex_area.to_file("data/WGS84_hex_construcao_open_buildings_2023.gpkg", driver="GPKG")