# Grab transit routes near the State Highway Network (SHN)

* Find transit routes within 1 mile of the SHN.
* Visualize from operator or highway point of view to see % parallel or intersecting

Data Sources: 
* [SHN on Geoportal](https://opendata.arcgis.com/datasets/77f2d7ba94e040a78bfbe36feb6279da_0.geojson) > processed in `highway_transit_utils.py` > exported to GCS > saved in catalog.
* Transit routes: all transit routes, those in `gtfs_schedule.shapes` and those not, but found in `stops`. Use `traffic_ops/export_shapefiles.py` created `routes_assembled.parquet` in GCS > saved in catalog.

In [None]:
import branca
import geopandas as gpd
import intake
import pandas as pd

import highway_transit_utils
from shared_utils import geography_utils, map_utils

IMG_PATH = "./img/"
DATA_PATH = "./data/"

## Read in processed data 

The overlay/intersection between transit routes and highways:
* % of transit route that intersection represents
* % of highway that intersection represents

If it passes a certain threshold for transit route (> 0.5?) **and** certain threshold for highway (> 0.1?), then count that transit route as parallel. Otherwise, intersecting.

Play with thresholds and see what makes sense, visualize with interactive maps.

Look at LA Metro, San Jose specifically to see if it is behaving as expected.

In [None]:
'''
gdf = highway_transit_utils.overlay_transit_to_highways()
transit_routes = highway_transit_utils.process_transit_routes()
# For map, need highway to be 250 ft buffer
highways = highway_transit_utils.process_highways(buffer_feet=250)

gdf.to_parquet(f"{DATA_PATH}overlay.parquet")
transit_routes.to_parquet(f"{DATA_PATH}transit_routes.parquet")
highways.to_parquet(f"{DATA_PATH}highways.parquet")
'''

gdf = gpd.read_parquet(f"{DATA_PATH}overlay.parquet")
transit_routes = gpd.read_parquet(f"{DATA_PATH}transit_routes.parquet")
highways = gpd.read_parquet(f"{DATA_PATH}highways.parquet")

gdf = gdf.assign(
    pct_route = (gdf.geometry.length / gdf.route_length).round(3),
    pct_highway = (gdf.geometry.length / gdf.highway_length).round(3),
)

# Set pct_highway to have max of 1
# LA Metro Line 910 (Silver Line) runs on the 110 freeway in both directions.
# Has to do with the fact that highway length was calculated only for 1 direction (~centerline). 
# The length was calculated, 1 mi buffer drawn, then the dissolve (computationally expensive).

gdf = gdf.assign(
    pct_highway = gdf.apply(lambda x: 1 if x.pct_highway > 1 
                            else x.pct_highway, axis=1),
)

gdf.head(2)

In [None]:
def parallel_or_intersecting(df, pct_route_threshold=0.5, 
                             pct_highway_threshold=0.1):
    df = df.assign(
        parallel = df.apply(lambda x: 
                            1 if (
                                (x.pct_route >= pct_route_threshold) and 
                                (x.pct_highway >= pct_highway_threshold)
                            ) else 0, axis=1),
    )
    
    return df

## Aggregate to highway or operator

Display these stats along with interactive map

In [None]:
# For each highway or operator, calculate % of parallel to intersecting routes
def aggregate(df, groupby="highway"):
    
    if groupby=="highway":
        group_cols = ["Route", "County", "District", "RouteType", 
                      "NB", "SB", "EB", "WB", "highway_length"]
        # To calculate % parallel routes along a highway
        # need to know how many route_ids there are for that highway
        count_cols = ["route_id"]
        nunique_cols = ["itp_id"]
        denominator = "route_id"
        
    elif groupby=="operator":
        group_cols = ["itp_id"]
        # To calculate % of transit routes that are parallel
        # count how many rows there are as denominator
        # nunique("Routes") would be too small
        count_cols = ["Route"]
        nunique_cols = []
        denominator = "Route"
        
    df = (geography_utils.aggregate_by_geography(
            df, 
            group_cols = group_cols,
            sum_cols = ["parallel"],
            count_cols = count_cols,
            nunique_cols = nunique_cols
           )
         )

    df = df.assign(
        pct_parallel = df.parallel.divide(df[denominator]).round(3)
    )
        
    return df

In [None]:
'''
gdf = parallel_or_intersecting(gdf)

hwy_stats = aggregate(gdf, groupby="highway")
operator_stats = aggregate(gdf, groupby="operator")


gdf.to_parquet(f"{DATA_PATH}parallel_or_intersecting.parquet")
hwy_stats.to_parquet(f"{DATA_PATH}hwy_stats.parquet")
operator_stats.to_parquet(f"{DATA_PATH}operator_stats.parquet")
'''

## Map 

In [None]:
gdf = gpd.read_parquet(f"{DATA_PATH}parallel_or_intersecting.parquet")
highways= gpd.read_parquet(f"{DATA_PATH}highways.parquet")
#hwy_stats = pd.read_parquet(f"{DATA_PATH}hwy_stats.parquet")
#operator_stats = pd.read_parquet(f"{DATA_PATH}operator_stats.parquet")

In [None]:
def data_to_plot(df):
    keep_cols = ["itp_id", "route_id", 
                 "Route", "County", "District", "RouteType",
                 "pct_route", "pct_highway", "parallel",
                 "geometry"
                ]
    df = df[keep_cols].reset_index(drop=True)
    df = df.assign(
        geometry = df.geometry.buffer(100),
    )
    
    return df

In [None]:
hwys_popup_dict = {
    "Route": "Highway Route",
    "RouteType": "Route Type",
    "County": "County"   
}

transit_popup_dict = {
    "itp_id": "Operator ITP ID",
    "route_id": "Route ID",
    "pct_route": "% overlapping route",
    "pct_highway": "% overlapping highway",
}

hwys_color = branca.colormap.StepColormap(
    colors=["black", "gray"],
)

colorscale = branca.colormap.StepColormap(
    colors=["blue", "orange"],
)

LAYERS_DICT = {
    "Highways": {"df": highways[highways.County=="LA"].reset_index(drop=True),
        "plot_col": "Route",
        "popup_dict": hwys_popup_dict, 
        "tooltip_dict": hwys_popup_dict,
        "colorscale": hwys_color,
    },
    "Transit Routes": {"df": data_to_plot(gdf[gdf.itp_id==182]),
        "plot_col": "parallel",
        "popup_dict": transit_popup_dict, 
        "tooltip_dict": transit_popup_dict,
        "colorscale": colorscale,
    },
}

In [None]:
fig = map_utils.make_folium_multiple_layers_map(
    LAYERS_DICT,
    fig_width = 700, fig_height = 700, 
    zoom=map_utils.REGION_CENTROIDS["Los Angeles"][1], 
    centroid = map_utils.REGION_CENTROIDS["Los Angeles"][0], 
    title="Parallel vs Intersecting Lines for ITP ID: 182",
)
fig

In [None]:
MAP_ME = {
    182: "LA Metro", 
    294: "SJ Valley Transportation Authority", 
    279: "BART", 
    282: "SF Muni",
    278: "SD Metropolitan Transit System", 
}

for i, name in MAP_ME.items():
    subset_df = gdf[gdf.itp_id==i]
    print(f"# routes originally for {i}: {transit_routes[transit_routes.itp_id==i].route_id.nunique()}")
    print(f"# routes for {i}: {subset_df.route_id.nunique()}")
    
    
    fig, ax = plt.subplots(figsize  = (12, 8))
    subset_df.plot(column="route_id",  
                         ax = ax)
    ax.set_axis_off()
    plt.title(f"{name} (ITP ID: {i})")
    #display(fig)
    plt.close()

In [None]:
orig_highways = (catalog.state_highway_network.read()
                .to_crs(geography_utils.CA_StatePlane))
    

for i in [0, 0.05, 0.1, 0.15, 0.2]:
    gdf2 = parallel_or_intersecting(gdf, pct_route_threshold=0.4, 
                                    pct_highway_threshold=i)
    print(f"highway threshold: {i}")
    print("------------------------------------")
    print(gdf2.parallel.value_counts())
    print(f"%: {len(gdf2[gdf2.parallel==1]) / len(gdf2)}")
    
    for i, name in MAP_ME.items():
        subset_df = gdf2[gdf2.itp_id==i]

        print(f"# routes for {i}: {subset_df.route_id.nunique()}")
        print(f"# routes parallel: {len(subset_df[subset_df.parallel==1]) / len(subset_df)}")
    
        fig, ax = plt.subplots(figsize  = (12, 8))
        orig_highways[
            (orig_highways.Route.isin(subset_df.Route)) & 
            (orig_highways.County.isin(subset_df.County))
        ].drop_duplicates(subset=["Route", "County"]).plot(ax=ax, color="gray")
        
        subset_df.plot(column="parallel", ax = ax, 
                       categorical=True, legend=True)
        
        ax.set_axis_off()
        
        plt.title(f"{name} (ITP ID: {i}, parallel vs intersecting)")
        #display(fig)
        plt.close()