In [1]:
import pandas as pd
import geopandas as gpd
from shapely import geometry

In [2]:
#import IRIS
iris_data = gpd.read_file("../data/boundaries/IRIS_with_RegDepCommArr.gpkg").to_crs(epsg=2154)

# import stations
stations_data = gpd.read_file("../data/transport/RATP/railstations_with_traveltime.gpkg").to_crs(epsg=2154)

In [3]:
# Spatial join identifying station to the IRIS polygon that contains it
#stations_with_iris = gpd.sjoin(stations_data, iris_data[['INSEE_IRIS_ID', 'geometry']], how='left', predicate='within')

In [4]:
# Dissolve multipart IRIS geometries
iris_dissolved = iris_data.dissolve(by="INSEE_IRIS_ID", as_index=False)
# Compute centroids for each IRIS polygon
iris_centroids = iris_dissolved.copy()
iris_centroids["geometry"] = iris_centroids.centroid  # replace polygon geometry with its centroid

# Perform spatial join: assign each station to the nearest centroid
stations_nearestIRIS = gpd.sjoin_nearest(stations_data, iris_centroids[["INSEE_IRIS_ID", "geometry"]], how="left", distance_col="dist_to_centroid_m")
stations_nearestIRIS = stations_nearestIRIS.drop(columns=["index_right"])
# Perform spatial join: find nearest station to each centroid
iris_with_station = gpd.sjoin_nearest(iris_centroids, stations_data[["id_gares", "geometry"]], how="left", distance_col="dist_to_station_m")
iris_with_station = (iris_with_station.sort_values("dist_to_station_m", ascending=True).drop_duplicates(subset="INSEE_IRIS_ID", keep="first"))# Keep only the closest station per IRIS centroid
iris_with_station = iris_with_station.drop(columns=["index_right"])

# Calculate average walking time to closest station for each IRIS,in minutes, using 5km/hr average (1.4 m/s / 60)
# Walk time for stations
stations_nearestIRIS["walk_time_min"] = stations_nearestIRIS["dist_to_centroid_m"] / 1.38889 / 60
# Walk time for IRIS
iris_with_station["walk_time_min"] = iris_with_station["dist_to_station_m"] / 1.38889 / 60

# Add centroid-based calculations to original IRIS geodataframe
iris_final = iris_dissolved.merge(
    iris_with_station[["INSEE_IRIS_ID", "id_gares", "dist_to_station_m", "walk_time_min"]],
    on="INSEE_IRIS_ID",
    how="left")

In [5]:
# Perform spatial join to find which IRIS each station falls in
stations_in_iris = gpd.sjoin(stations_data, iris_final[["INSEE_IRIS_ID", "geometry"]], how="left", predicate="within")

# Count stations per IRIS
station_counts = (stations_in_iris.groupby("INSEE_IRIS_ID").size().reset_index(name="station_count"))

# Add counts to IRIS gdf
iris_with_counts = iris_final.merge(station_counts, on="INSEE_IRIS_ID", how="left")

# Replace NaN (no stations) with 0
iris_with_counts["station_count"] = iris_with_counts["station_count"].fillna(0).astype(int)

In [6]:
# add travel time to IRIS data
stations_unique = (stations_data.sort_values("time_min", ascending=True).drop_duplicates(subset="id_gares", keep="first")) # keep shortest time for duplicate station entrances
iris_with_calcscountstime = iris_with_counts.merge(stations_unique[["id_gares", "time_min"]], on="id_gares", how="left")


In [7]:
# output final file to use for choropleths
stations_nearestIRIS.to_file("../data/transport/railstations_IRIS_accessibility.gpkg", driver="GPKG")

# output final file to use for choropleths
iris_with_calcscountstime.to_file("../data/boundaries/IRIS_transport_accessibility.gpkg", driver="GPKG")