# Spatial join shapes to roads

* Parallel routes

Instead of using points (stops, stop arrivals), let's join a bunch of shapes to road segments and see how that turns out.

In [None]:
import dask.dataframe as dd
import dask_geopandas as dg
import geopandas as gpd
import intake
import pandas as pd

from shared_utils import rt_dates, rt_utils
from segment_speed_utils import helpers
from segment_speed_utils.project_vars import (SCHED_GCS, 
                                              SHARED_GCS, 
                                              PROJECT_CRS
                                             )
                                             
catalog = intake.open_catalog(
    "../_shared_utils/shared_utils/shared_data_catalog.yml")

analysis_date = rt_dates.DATES["sep2023"]

In [None]:
primary_secondary = dg.read_parquet(
    f"{SHARED_GCS}segmented_roads_2020_primarysecondary.parquet"
)

local = dg.read_parquet(
    f"{SHARED_GCS}segmented_roads_2020_local.parquet",
    columns = [i for i in primary_secondary.columns]
)

In [None]:
road_segments = dd.multi.concat(
    [primary_secondary, local], 
    axis=0
).reset_index(drop=True).repartition(npartitions=10)

In [None]:
shapes = helpers.import_scheduled_shapes(
    analysis_date,
    columns = ["shape_array_key", "n_trips", "geometry"],
    get_pandas = True,
    crs = PROJECT_CRS
)

In [None]:
def spatial_join_by_partition(road_segments, shapes):
    road_segments_buff = road_segments.assign(
        geometry = road_segments.geometry.buffer(35)
    )
    
    s1 = gpd.sjoin(
        road_segments_buff,
        shapes,
        how = "inner",
        predicate = "intersects"
    ).drop(columns = ["index_right", "geometry"])
        
    return s1
    

In [None]:
road_cols = [
    "linearid", "mtfcc", 
    "fullname", "segment_sequence", 
    "primary_direction"
]

road_dtypes = road_segments[road_cols].dtypes.to_dict()
shape_dtypes = shapes[["shape_array_key", "n_trips"]].dtypes.to_dict()

sjoin_results = road_segments.map_partitions(
    spatial_join_by_partition,
    shapes,
    meta = {
        **road_dtypes,
        **shape_dtypes
    },
    align_dataframes = False,
).persist()

In [None]:
trips_per_segment = (sjoin_results.groupby(road_cols, 
                                           observed=True, group_keys=False)
                     .agg({
                         "shape_array_key": "count",
                         "n_trips": "sum"
                     }).reset_index()
                    ).compute()

In [None]:
trips_per_segment.to_parquet(
    f"{SCHED_GCS}trips_road_segments_{analysis_date}.parquet"
)