In [2]:
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# --- Directory setup ---
RAW_DATA_DIR = "../raw_data/"
GEOCODED_DATA_DIR = "../geocoded_data/"
OUTPUT_DIR = "../aggregated_data/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# --- Load neighbourhood shapefile ---
neighbourhoods = gpd.read_file(os.path.join(RAW_DATA_DIR, "Buurtgrenzen_Zwolle.shp"))

# --- Function to join with neighbourhoods ---
def add_neighbourhood(input_filename, output_filename, lat_col='latitude', lon_col='longitude'):
    input_path = os.path.join(GEOCODED_DATA_DIR, input_filename)
    output_path = os.path.join(OUTPUT_DIR, output_filename)

    # Load CSV and convert to GeoDataFrame in EPSG:4326
    df = pd.read_csv(input_path)
    gdf = gpd.GeoDataFrame(
        df,
        geometry=[Point(xy) for xy in zip(df[lon_col], df[lat_col])],
        crs="EPSG:4326"  # GPS coords
    )

    # Reproject to match shapefile CRS (e.g. EPSG:28992)
    gdf = gdf.to_crs(neighbourhoods.crs)

    # Spatial join
    gdf_joined = gpd.sjoin(gdf, neighbourhoods, how="left", predicate="within")

    # Use 'officiele' field and rename to 'buurt_naam'
    gdf_joined["buurt_naam"] = gdf_joined.get("OFFICIËLE", None)


    # Save result
    gdf_joined.drop(columns="geometry").to_csv(output_path, index=False)
    print(f"✅ Saved: {output_path}")

# --- Run the function on your geocoded files ---
add_neighbourhood("coachgesprekken_geocoded.csv", "coachgesprekken_with_neighbourhood.csv")
add_neighbourhood("sde_minimized_geocoded.csv", "sde_minimized_with_neighbourhood.csv")
add_neighbourhood("isde_subsidies_geocoded.csv", "isde_minimized_with_neighbourhood.csv")
add_neighbourhood("sce_minimized_geocoded.csv", "sce_minimized_with_neighbourhood.csv")


✅ Saved: ../aggregated_data/coachgesprekken_with_neighbourhood.csv
✅ Saved: ../aggregated_data/sde_minimized_with_neighbourhood.csv
✅ Saved: ../aggregated_data/isde_minimized_with_neighbourhood.csv
✅ Saved: ../aggregated_data/sce_minimized_with_neighbourhood.csv
