# Grab transit routes near the State Highway Network (SHN)

* Find transit routes within 1 mile of the SHN.
* Visualize from operator or highway point of view to see % parallel or intersecting

Data Sources: 
* [SHN on Geoportal](https://opendata.arcgis.com/datasets/77f2d7ba94e040a78bfbe36feb6279da_0.geojson) > processed in `create_parallel_corridors.py` > exported to GCS > saved in catalog.
* Transit routes: all transit routes, those in `gtfs_schedule.shapes` and those not, but found in `stops`. Use `traffic_ops/export_shapefiles.py` created `routes_assembled.parquet` in GCS > saved in catalog.

In [1]:
import branca
import geopandas as gpd
import intake
import ipywidgets as widgets

import pandas as pd

from ipywidgets import *
from IPython.display import Markdown
from IPython.core.display import display

import create_parallel_corridors
import setup_corridors_stats
from shared_utils import map_utils, geography_utils
from shared_utils import calitp_color_palette as cp


catalog = intake.open_catalog("*.yml")

IMG_PATH = create_parallel_corridors.IMG_PATH
DATA_PATH = create_parallel_corridors.DATA_PATH



## Read in processed data 

The overlay/intersection between transit routes and highways:
* % of transit route that intersection represents
* % of highway that intersection represents

If it passes a certain threshold for transit route (> 0.5?) **and** certain threshold for highway (> 0.1?), then count that transit route as parallel. Otherwise, intersecting.

Play with thresholds and see what makes sense, visualize with interactive maps.

Look at LA Metro, San Jose specifically to see if it is behaving as expected.

In [2]:
'''
create_parallel_corridors.make_analysis_data(hwy_buffer_feet=
                       geography_utils.FEET_PER_MI, 
                       pct_route_threshold = 0.3,
                       pct_highway_threshold = 0.1,
                       DATA_PATH = create_parallel_corridors.DATA_PATH
                      )


# For map, need highway to be 250 ft buffer
#highways = create_parallel_corridors.process_highways(buffer_feet=250)
#highways.to_parquet(f"{DATA_PATH}highways.parquet")
'''

'\ncreate_parallel_corridors.make_analysis_data(hwy_buffer_feet=\n                       geography_utils.FEET_PER_MI, \n                       pct_route_threshold = 0.3,\n                       pct_highway_threshold = 0.1,\n                       DATA_PATH = create_parallel_corridors.DATA_PATH\n                      )\n\n\n# For map, need highway to be 250 ft buffer\n#highways = create_parallel_corridors.process_highways(buffer_feet=250)\n#highways.to_parquet(f"{DATA_PATH}highways.parquet")\n'

# Interactive Stats

Be able to do dropdown to check how many routes are parallel or intersecting by highway or operator.

In [3]:
gdf = gpd.read_parquet(f"{DATA_PATH}parallel_or_intersecting.parquet")
highways= gpd.read_parquet(f"{DATA_PATH}highways.parquet")

## Aggregate to highway or operator

Display these stats along with interactive map.

Ref: [aggregation-examples.ipynb](./aggregation-examples.ipynb) for how to get to right unit of analysis.

### Viability
* for a route, it's only viable if the end to end trip is no more than 2x car trip time
* can have all the parallel routes, but even smaller subset is viable

In [4]:
def display_operator_stats(df, select_col = "itp_id", operator_name = 182):
    # Subset for the operator
    operator_df = df[df[select_col]==operator_name]
    operator_stats = (setup_corridors_stats.aggregate(operator_df, by="operator")
                      .sort_values("pct_parallel", ascending=False)
                      .reset_index(drop=True)
                     )
    
    # Grab the highways that it intersects with
    hwy_df = df[(df.Route.isin(operator_df.Route) & 
                 (df.County.isin(operator_df.County))
                )]
    hwy_stats = (setup_corridors_stats.aggregate(hwy_df, by="highway")
                 .sort_values("pct_parallel", ascending=False)
                 .reset_index(drop=True)
                )

    display(Markdown(f"### Summary Stats for ITP ID: {operator_name}"))
    display(operator_stats)
    
    display(Markdown(f"### Summary Stats for Highways that ITP ID {operator_name} Intersects"))
    display(hwy_stats)
    
   
    make_transit_map(operator_df, hwy_df)

In [5]:
def make_transit_map(operator_df, hwy_df):
    ## Add a map to show the operator's routes and highways from C1
    
    def data_to_plot(df):
        keep_cols = ["itp_id", "route_id", 
                     "Route", "County", "District", "RouteType",
                     "pct_route", "pct_highway", "parallel",
                     "geometry"
                    ]
        df = df[keep_cols].reset_index(drop=True)
        df = df.assign(
            geometry = df.geometry.buffer(200).simplify(tolerance=100),
        )

        # Use simplify to make gdf smaller
        # folium map is creating too large of an HTML file to check in

        return df

    to_map = data_to_plot(operator_df)
    
    # Set various components for map
    hwys_popup_dict = {
        "Route": "Highway Route",
        "RouteType": "Route Type",
        "County": "County"   
    }

    transit_popup_dict = {
        "itp_id": "Operator ITP ID",
        "route_id": "Route ID",
        "pct_route": "% overlapping route",
        "pct_highway": "% overlapping highway",
    }

    hwys_color = branca.colormap.StepColormap(
        colors=["black", "gray"],
    )

    colorscale = branca.colormap.StepColormap(
        colors=[
            cp.CALITP_CATEGORY_BRIGHT_COLORS[0], #blue
            cp.CALITP_CATEGORY_BRIGHT_COLORS[1] # orange
        ],
    )
    
    # Instead of using county centroid, calculate centroid from transit_df
    # Otherwise, it's too zoomed out from where transit routes are
    transit_centroid = (to_map
                        .to_crs(geography_utils.WGS84).geometry.centroid
                        .iloc[0]
                       )

    LAYERS_DICT = {
        "Highways": {"df": hwy_df,
            "plot_col": "Route",
            "popup_dict": hwys_popup_dict, 
            "tooltip_dict": hwys_popup_dict,
            "colorscale": hwys_color,
        },
        "Transit Routes": {"df": to_map,
            "plot_col": "parallel",
            "popup_dict": transit_popup_dict, 
            "tooltip_dict": transit_popup_dict,
            "colorscale": colorscale,
        },
    }
    
    LEGEND_URL = (
        "https://raw.githubusercontent.com/cal-itp/data-analyses/"
        "main/bus_service_increase/"
        "img/legend_intersecting_parallel.png"
    )
    
    LEGEND_DICT = {
        "legend_url": LEGEND_URL,
        "legend_bottom": 85,
        "legend_left": 5,
    }
     
    
    fig = map_utils.make_folium_multiple_layers_map(
        LAYERS_DICT,
        fig_width = 700, fig_height = 700, 
        zoom=11, 
        centroid = [round(transit_centroid.y,2), 
                    round(transit_centroid.x, 2)], 
        title=f"Parallel vs Intersecting Lines for {to_map.itp_id.iloc[0]}",
        legend_dict = LEGEND_DICT
    )
    
    display(fig)
    #fig.save(f"{IMG_PATH}parallel_{operator_name}.html")
    #print(f"{operator_name} map saved")

In [6]:
def display_highway_stats(df, select_col = "Route", hwy_name = 5):
    # Subset for highways
    hwy_df = df[df[select_col]==hwy_name]
    hwy_stats = (setup_corridors_stats.aggregate(hwy_df, by="highway")
                 .sort_values("pct_parallel", ascending=False)
                 .reset_index(drop=True)
                )    
    
    # Grab the operators that hwys intersect with
    operator_df = df[(df.itp_id.isin(hwy_df.itp_id))]
    operator_stats = (setup_corridors_stats.aggregate(operator_df, by="operator")
                      .sort_values("pct_parallel", ascending=False)
                      .reset_index(drop=True)
                     )
    
    display(Markdown(f"### Summary Stats for Highway Route: {hwy_name}"))
    display(hwy_stats)

    display(Markdown(f"### Summary Stats for Operators Route {hwy_name} Intersects"))
    display(operator_stats)
    

In [7]:
## Think about how to wrap above interactive widget into a function
# Be able to select agency, county, district, etc
def interactive_widget(df, select_col):
    
    dropdown = widgets.Dropdown(
        description=f"{select_col.title()}",
        options=df[select_col].sort_values().unique().tolist(),
    )
    output = widgets.Output()

    display(dropdown)
    display(output)

    def on_selection(*args):
        output.clear_output()
        with output:
            if select_col=="Route":
                display_highway_stats(df, select_col, dropdown.value)
            elif select_col=="itp_id":
                display_operator_stats(df, select_col, dropdown.value)

    dropdown.observe(on_selection, names="value")
    on_selection()

In [8]:
interactive_widget(gdf, "itp_id")

Dropdown(description='Itp_Id', options=(4, 6, 10, 11, 13, 14, 15, 16, 17, 18, 21, 23, 29, 30, 33, 34, 35, 36, …

Output()

In [9]:
interactive_widget(gdf, "Route")

Dropdown(description='Route', options=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, …

Output()