# Config

In [1]:
# Libraries
import geopandas as gpd
from config import INTERIM_DATA_DIR,PROCESSED_DATA_DIR

In [2]:
# Params
CITY = "barcelona"
RES = 10

datasets = {
    # "ndvi_data": "Points",
    "overture_places": "Points"
}

# Load data

In [3]:
gdf_h3 = gpd.read_parquet(INTERIM_DATA_DIR/f"{CITY}_h3_res{RES}.parquet")
gdf_h3.head()

Unnamed: 0,h3_id,geometry
0,8a394461b98ffff,"POLYGON ((430592.392 4576915.43, 430577.5 4576..."
1,8a394461b92ffff,"POLYGON ((430681.742 4577368.251, 430666.851 4..."
2,8a394461b91ffff,"POLYGON ((430637.067 4577141.842, 430622.176 4..."
3,8a394461b90ffff,"POLYGON ((430722.225 4577245.969, 430707.333 4..."
4,8a39446f4b1ffff,"POLYGON ((430607.72 4574700.8, 430592.828 4574..."


In [4]:
gdf = gpd.read_parquet(INTERIM_DATA_DIR / "overture_places.parquet")
gdf.to_crs(gdf_h3.crs, inplace=True)

gdf = gpd.sjoin(gdf_h3[[ "h3_id", "geometry"]],gdf, how="left", predicate="contains")
results = gdf.groupby(["h3_id","category","geometry"]).size().reset_index(name="count")
results = gpd.GeoDataFrame(results, geometry="geometry", crs=gdf_h3.crs)
results.head()

#

Unnamed: 0,h3_id,category,geometry,count
0,8a3944600007fff,architectural_design_service,"POLYGON ((427657.851 4583973.849, 427642.959 4...",2
1,8a3944600007fff,bakery,"POLYGON ((427657.851 4583973.849, 427642.959 4...",1
2,8a3944600007fff,beauty_salon,"POLYGON ((427657.851 4583973.849, 427642.959 4...",2
3,8a3944600007fff,christian_place_of_worship,"POLYGON ((427657.851 4583973.849, 427642.959 4...",3
4,8a3944600007fff,clothing_store,"POLYGON ((427657.851 4583973.849, 427642.959 4...",1


In [5]:
results_wide = results.pivot_table(
    index='h3_id', 
    columns='category', 
    values='count', 
    aggfunc='sum',
    fill_value=0
).reset_index()

results_wide = gpd.GeoDataFrame(
    results_wide.merge(results[['h3_id', 'geometry']].drop_duplicates(), on='h3_id'),
    geometry='geometry',
    crs=results.crs
)



['accommodation', 'accountant_or_bookkeeper', 'adoption_service', 'agricultural_area', 'agricultural_service', 'air_transport_facility_service', 'airport', 'airport_shuttle_service', 'airport_terminal', 'allergy_and_immunology', 'alternative_medicine', 'amateur_sport_team', 'ambulance_ems_service', 'amusement_park', 'animal_boarding', 'animal_hospital', 'animal_rescue', 'animal_service', 'animal_shelter', 'animal_training', 'antique_shop', 'apartment_building', 'applicance_repair_service', 'aquarium', 'arcade', 'architectural_design_service', 'art_craft_hobby_store', 'art_gallery', 'art_museum', 'astrological_advising', 'atm', 'attorney_or_law_firm', 'auto_body_shop', 'auto_detailing_service', 'auto_glass_service', 'auto_rental_service', 'auto_repair_service', 'b2b_service', 'b2b_supplier_distributor', 'bakery', 'bank', 'bar', 'bar_and_grill', 'barber_shop', 'beauty_salon', 'bed_and_breakfast', 'beverage_shop', 'bicycle_rental_service', 'bingo_hall', 'boat_dealer', 'bookstore', 'bowlin

## ABC

  from .autonotebook import tqdm as notebook_tqdm


{np.int32(14): 'restaurant',
 np.int32(1): 'government_office',
 np.int32(8): 'family_service',
 np.int32(11): 'dog_park',
 np.int32(2): 'technical_service',
 np.int32(7): 'vehicle_service',
 np.int32(5): 'entertainment_location',
 np.int32(3): 'doctors_office',
 np.int32(9): 'sport_recreation_club',
 np.int32(13): 'department_store',
 np.int32(4): 'museum',
 np.int32(10): 'massage_salon',
 np.int32(0): 'liquor_store',
 np.int32(6): 'place_of_worship',
 np.int32(12): 'electric_utility_service'}

# Data management (Code)

In [None]:
#Main body of code
for dataset,format in datasets.items():
    print(f"Aggregating {dataset} into H3 resolution {RES}...")
    # gdf = gpd.read_parquet(INTERIM_DATA_DIR/f"{CITY}_{dataset}.parquet")
    gdf = gpd.read_parquet(INTERIM_DATA_DIR/f"{dataset}.parquet")
    # Ensure same CRS
    if gdf.crs is None:
        raise ValueError(f"{dataset} has no CRS defined")
    if gdf.crs != gdf_h3.crs:
            gdf = gdf.to_crs(gdf_h3.crs)
    print(gdf.crs)
    print(gdf_h3.crs)
    if format == "Points":
        # Spatial join points -> H3 polygons to assign h3_id
        try:
            gdf_pts = gpd.sjoin(gdf, gdf_h3[['h3_id', 'geometry']], how='left', predicate='within')        
        except TypeError:
            gdf_pts = gpd.sjoin(gdf_h3[['h3_id', 'geometry']],gdf, how='left', op='within')

        # Drop points not matched to any H3 cell and count points per h3_id
        gdf_pts = gdf_pts.dropna(subset=['h3_id'])
        gdf_agg = gdf_pts.groupby('h3_id').size().reset_index(name='count')
    elif format == "Polygons":
        gdf_poly = gpd.overlay(gdf_h3, gdf, how='intersection')
        gdf_agg = gdf_poly.groupby('h3_id').agg({'some_field': 'sum'}).reset_index()
    
    gdf_h3 = gdf_h3.merge(gdf_agg, on="h3_id", how="left")
    # gdf_h3['count'] = gdf_h3['count'].fillna(0)
    
    print(f"Completed aggregation for {dataset}.")

Aggregating overture_places into H3 resolution 10...
{"$schema": "https://proj.org/schemas/v0.7/projjson.schema.json", "type": "ProjectedCRS", "name": "ETRS89 / UTM zone 31N", "base_crs": {"name": "ETRS89", "datum_ensemble": {"name": "European Terrestrial Reference System 1989 ensemble", "members": [{"name": "European Terrestrial Reference Frame 1989"}, {"name": "European Terrestrial Reference Frame 1990"}, {"name": "European Terrestrial Reference Frame 1991"}, {"name": "European Terrestrial Reference Frame 1992"}, {"name": "European Terrestrial Reference Frame 1993"}, {"name": "European Terrestrial Reference Frame 1994"}, {"name": "European Terrestrial Reference Frame 1996"}, {"name": "European Terrestrial Reference Frame 1997"}, {"name": "European Terrestrial Reference Frame 2000"}, {"name": "European Terrestrial Reference Frame 2005"}, {"name": "European Terrestrial Reference Frame 2014"}, {"name": "European Terrestrial Reference Frame 2020"}], "ellipsoid": {"name": "GRS 1980", "sem

MergeError: Passing 'suffixes' which cause duplicate columns {'count_x'} is not allowed.

# Plots

In [3]:
# Plots and visualizations

# Save results

In [26]:
#Save results and figures
results.to_parquet(PROCESSED_DATA_DIR/f"{CITY}_h3_res{RES}_aggregated.parquet", index=False)
results_wide.to_parquet(PROCESSED_DATA_DIR/f"{CITY}_h3_res{RES}_aggregated_wide.parquet", index=False)