# Multiple stops in a route segment

* How should we handle it?
* Do we have any stop-level metrics where we'd want to aggregate it?
* If we had road segments, we would be aggregating stop-level metrics across multiple operators. Let's test that now

In [1]:
import dask.dataframe as dd
import dask_geopandas as dg
import geopandas as gpd
import intake
import pandas as pd

from segment_speed_utils import (gtfs_schedule_wrangling, helpers, 
                                 sched_rt_utils)
from segment_speed_utils.project_vars import SEGMENT_GCS, analysis_date

catalog = intake.open_catalog("*.yml")



In [2]:
route_segments = catalog.route_segments(analysis_date = analysis_date).read()

In [3]:
route_segments.head()

Unnamed: 0,geometry,feed_key,name,route_id,direction_id,longest_shape_id,route_dir_identifier,segment_sequence,gtfs_dataset_key,geometry_arrowized
0,"LINESTRING (-110114.012 74427.063, -110114.176...",008d5112a7e531d0562d26e34d77869d,Sacramento Schedule,1,0.0,45169,901211056,0,927430a256c4d23e37cf417362df9e7f,"POLYGON ((-110092.479 74347.387, -110092.539 7..."
1,"LINESTRING (-111032.269 74341.302, -111064.225...",008d5112a7e531d0562d26e34d77869d,Sacramento Schedule,1,0.0,45169,901211056,1,927430a256c4d23e37cf417362df9e7f,"POLYGON ((-112031.987 74325.091, -112033.947 7..."
2,"LINESTRING (-112031.911 74345.090, -112032.352...",008d5112a7e531d0562d26e34d77869d,Sacramento Schedule,1,0.0,45169,901211056,2,927430a256c4d23e37cf417362df9e7f,"POLYGON ((-112315.038 74335.610, -113031.548 7..."
3,"LINESTRING (-113031.553 74355.410, -113049.934...",008d5112a7e531d0562d26e34d77869d,Sacramento Schedule,1,0.0,45169,901211056,3,927430a256c4d23e37cf417362df9e7f,"POLYGON ((-114031.549 74339.090, -114033.509 7..."
4,"LINESTRING (-114031.475 74359.090, -114100.641...",008d5112a7e531d0562d26e34d77869d,Sacramento Schedule,1,0.0,45169,901211056,4,927430a256c4d23e37cf417362df9e7f,"POLYGON ((-114627.487 74347.454, -114629.315 7..."


In [4]:
stops = helpers.import_scheduled_stops(
    analysis_date,
    columns = ["feed_key", "stop_id", "geometry"]
).compute()

In [5]:
'''
scheduled_trips = sched_rt_utils.crosswalk_scheduled_trip_grouping_with_rt_key(
    analysis_date = analysis_date, 
    keep_trip_cols = ["feed_key", "trip_id", "route_id", "direction_id"]
)
'''

'\nscheduled_trips = sched_rt_utils.crosswalk_scheduled_trip_grouping_with_rt_key(\n    analysis_date = analysis_date, \n    keep_trip_cols = ["feed_key", "trip_id", "route_id", "direction_id"]\n)\n'

In [6]:
stops_in_route_segments = gpd.sjoin(
    stops,
    route_segments,
    how = "inner",
    predicate = "intersects"
).drop(columns = "index_right")

In [7]:
stops_in_route_segments.shape

(29379, 12)

In [8]:
# stops for an operator that falls in route_segment belonging
# to another operator
stops_in_route_segments[stops_in_route_segments.feed_key_left != 
                        stops_in_route_segments.feed_key_right
                       ].shape

(212, 12)