# Config

In [15]:
# Libraries
import geopandas as gpd
from config import INTERIM_DATA_DIR,PROCESSED_DATA_DIR

In [31]:
# Params
CITY = "barcelona"
RES = 10

datasets = {
    # "ndvi_data": "Points",
    "overture_places": "Points"
}

# Load data

In [43]:
gdf_h3 = gpd.read_parquet(INTERIM_DATA_DIR/f"{CITY}_h3_res{RES}.parquet")
gdf_h3.head()

Unnamed: 0,h3_id,geometry
0,8a394461b98ffff,"POLYGON ((430592.392 4576915.43, 430577.5 4576..."
1,8a394461b92ffff,"POLYGON ((430681.742 4577368.251, 430666.851 4..."
2,8a394461b91ffff,"POLYGON ((430637.067 4577141.842, 430622.176 4..."
3,8a394461b90ffff,"POLYGON ((430722.225 4577245.969, 430707.333 4..."
4,8a39446f4b1ffff,"POLYGON ((430607.72 4574700.8, 430592.828 4574..."


In [54]:
gdf = gpd.read_parquet(INTERIM_DATA_DIR / "overture_places.parquet")
gdf.to_crs(gdf_h3.crs, inplace=True)

gdf = gpd.sjoin(gdf, gdf_h3[[ "h3_id", "geometry"]], how="left", predicate="within")
results = gdf.groupby(["h3_id","category"]).size().reset_index(name="count")
results.head()

Unnamed: 0,h3_id,category,count
0,8a3944600007fff,architectural_design_service,2
1,8a3944600007fff,bakery,1
2,8a3944600007fff,beauty_salon,2
3,8a3944600007fff,christian_place_of_worship,3
4,8a3944600007fff,clothing_store,1


# Data management (Code)

In [44]:
#Main body of code
for dataset,format in datasets.items():
    print(f"Aggregating {dataset} into H3 resolution {RES}...")
    # gdf = gpd.read_parquet(INTERIM_DATA_DIR/f"{CITY}_{dataset}.parquet")
    gdf = gpd.read_parquet(INTERIM_DATA_DIR/f"{dataset}.parquet")

    if format == "Points":
        # Ensure same CRS
        if gdf.crs is None:
            raise ValueError(f"{dataset} has no CRS defined")
        if gdf.crs != gdf_h3.crs:
            gdf = gdf.to_crs(gdf_h3.crs)

        # Spatial join points -> H3 polygons to assign h3_id
        try:
            gdf_pts = gpd.sjoin(gdf, gdf_h3[['h3_id', 'geometry']], how='left', predicate='within')        
        except TypeError:
            gdf_pts = gpd.sjoin(gdf_h3[['h3_id', 'geometry']],gdf, how='left', op='within')

        # Drop points not matched to any H3 cell and count points per h3_id
        gdf_pts = gdf_pts.dropna(subset=['h3_id'])
        gdf_agg = gdf_pts.groupby('h3_id').size().reset_index(name='count')
    elif format == "Polygons":
        gdf_poly = gpd.overlay(gdf_h3, gdf, how='intersection')
        gdf_agg = gdf_poly.groupby('h3_id').agg({'some_field': 'sum'}).reset_index()
    
    gdf_h3 = gdf_h3.merge(gdf_agg, on="h3_id", how="left")
    # gdf_h3['count'] = gdf_h3['count'].fillna(0)
    
    print(f"Completed aggregation for {dataset}.")

Aggregating overture_places into H3 resolution 10...
Completed aggregation for overture_places.


In [45]:
gdf_h3

Unnamed: 0,h3_id,geometry,count
0,8a394461b98ffff,"POLYGON ((430592.392 4576915.43, 430577.5 4576...",
1,8a394461b92ffff,"POLYGON ((430681.742 4577368.251, 430666.851 4...",
2,8a394461b91ffff,"POLYGON ((430637.067 4577141.842, 430622.176 4...",
3,8a394461b90ffff,"POLYGON ((430722.225 4577245.969, 430707.333 4...",
4,8a39446f4b1ffff,"POLYGON ((430607.72 4574700.8, 430592.828 4574...",
...,...,...,...
6915,8a394463016ffff,"POLYGON ((421721.842 4585294.905, 421706.946 4...",
6916,8a394463542ffff,"POLYGON ((421904.893 4586548.056, 421889.997 4...",
6917,8a39446342affff,"POLYGON ((420896.783 4586344.359, 420881.886 4...",
6918,8a3944635497fff,"POLYGON ((421604.946 4586009.684, 421590.05 45...",


# Plots

In [3]:
# Plots and visualizations

# Save results

In [5]:
#Save results and figures