# Find local bus routes with same origin / destination

In [None]:
import dask.dataframe as dd
import dask_geopandas as dg
import intake
import geopandas as gpd
import pandas as pd

from shapely.geometry import Point

import _utils
from _utils import GCS_FILE_PATH, SELECTED_DATE, COMPILED_CACHED_VIEWS
from shared_utils import geography_utils
from bus_service_utils import gtfs_build

catalog = intake.open_catalog("*.yml")

In [None]:
amtrak_routes = catalog.amtrak_thruway_routes_with_od.read()

amtrak_routes = (amtrak_routes[amtrak_routes.route_type=='3']
                 .reset_index(drop=True)
                )

In [None]:
trips = dd.read_parquet(
    f"{COMPILED_CACHED_VIEWS}trips_{SELECTED_DATE}.parquet"
)

routelines = dg.read_parquet(
    f"{COMPILED_CACHED_VIEWS}routelines_{SELECTED_DATE}.parquet")

In [None]:
def keep_longest_route(routelines: gpd.GeoDataFrame | dg.GeoDataFrame, 
                       trips: pd.DataFrame | dd.DataFrame
                      ):

    m1 = gtfs_build.merge_routes_trips(
        routelines,
        trips,
        crs = f"EPSG: {routelines.crs.to_epsg()}"
    ).to_crs(geography_utils.CA_StatePlane)
    
    # Let's keep only full info and only buses
    m2 = m1[(m1._merge=="both") & (m1.route_type=='3')]

    m3 = m2.assign(
        route_length = m2.geometry.length
    )
    
    keep_cols = ["calitp_itp_id", "route_id", "route_length", 
                 "route_short_name", "route_long_name", 
                 "route_type", "shape_id", 
                 "geometry"
                ]
    
    longest_route = (m3.sort_values(["calitp_itp_id", "route_id", "route_length"],
                                  ascending = [True, True, False])
                     .drop_duplicates(subset = ["calitp_itp_id", "route_id"])
                     .reset_index(drop=True)
                      [keep_cols]
                    )
    
    # Add the route's origin and destination
    longest_route = longest_route.assign(
        origin = longest_route.geometry.apply(lambda x: Point(x.coords[0])),
        destination = longest_route.geometry.apply(lambda x: Point(x.coords[-1])),
    )

    return longest_route

In [None]:
longest_route = keep_longest_route(routelines, trips)

In [None]:
# Draw buffer around origin and buffer (let's cast a ___ mile buffer)
def buffer_around_origin_destination(gdf: gpd.GeoDataFrame, 
                                     buffer_feet: int = 0):
    
    # Project to CA State Plane (feet)
    gdf = (gdf.to_crs(geography_utils.CA_StatePlane)
           #.drop_duplicates(subset="origin_destination")
          )

    gdf = gdf.assign(
        origin_buffer = (gdf.origin.to_crs(geography_utils.CA_StatePlane)
                         .buffer(buffer_feet)
                        ),
        destination_buffer = (gdf.destination.to_crs(geography_utils.CA_StatePlane)
                              .buffer(buffer_feet)
                             )
    )
    
    return gdf

In [None]:
amtrak_routes2 = buffer_around_origin_destination(
    amtrak_routes, buffer_feet = geography_utils.FEET_PER_MI * 5)

In [None]:
intersect_origin = gpd.sjoin(
    longest_route.set_geometry("origin"), 
    amtrak_routes2[["destination_buffer"]].set_geometry("destination_buffer"), 
    how = "inner",
    predicate = "intersects"
)[["calitp_itp_id", "route_id", "geometry"]].drop_duplicates().set_geometry("geometry")

In [None]:
intersect_destination = gpd.sjoin(
    intersect_origin.set_geometry("destination"), 
    amtrak_routes2[["destination_buffer"]].set_geometry("destination_buffer"), 
    how = "inner",
    predicate = "intersects"
)[["calitp_itp_id", "route_id", "geometry"]].drop_duplicates().set_geometry("geometry")

In [None]:
intersect_destination.head()

In [None]:
test = intersect_destination[(intersect_destination.calitp_itp_id==182) & 
                             (intersect_destination.route_id.str.contains("720"))
                            ]

In [None]:
test = longest_route[(longest_route.calitp_itp_id==182) &
                     (longest_route.route_id.str.contains("720"))
                    ]

In [None]:
intersect_origin = gpd.sjoin(
    test.set_geometry("origin"), 
    amtrak_routes2[["route_id", "origin_buffer"]].set_geometry("origin_buffer"), 
    how = "inner",
    predicate = "intersects"
)

intersect_origin

In [None]:
intersect_destination = gpd.sjoin(
    test.set_geometry("destination"), 
    amtrak_routes2[["route_id", "destination_buffer"]].set_geometry("destination_buffer"), 
    how = "inner",
    predicate = "intersects"
)

intersect_destination

In [None]:
this_amtrak_route="37329"

intersect_origin[intersect_origin.route_id_right==this_amtrak_route].plot()

In [None]:
intersect_destination[intersect_destination.route_id_right==this_amtrak_route].plot()

In [None]:
amtrak_routes2[amtrak_routes2.route_id==this_amtrak_route][
    ["origin_stop_name", "destination_stop_name"]].drop_duplicates()