# Grab transit routes near the State Highway Network (SHN)

* Find transit routes within 1 mile of the SHN.
* Visualize from operator or highway point of view to see % parallel or intersecting

Data Sources: 
* [SHN on Geoportal](https://opendata.arcgis.com/datasets/77f2d7ba94e040a78bfbe36feb6279da_0.geojson) > processed in `highway_transit_utils.py` > exported to GCS > saved in catalog.
* Transit routes: all transit routes, those in `gtfs_schedule.shapes` and those not, but found in `stops`. Use `traffic_ops/export_shapefiles.py` created `routes_assembled.parquet` in GCS > saved in catalog.

In [1]:
import branca
import geopandas as gpd
import intake
import pandas as pd

import highway_transit_utils
from shared_utils import geography_utils, map_utils, utils
from shared_utils import calitp_color_palette as cp

catalog = intake.open_catalog("*.yml")

IMG_PATH = "./img/"
DATA_PATH = "./data/"



## Read in processed data 

The overlay/intersection between transit routes and highways:
* % of transit route that intersection represents
* % of highway that intersection represents

If it passes a certain threshold for transit route (> 0.5?) **and** certain threshold for highway (> 0.1?), then count that transit route as parallel. Otherwise, intersecting.

Play with thresholds and see what makes sense, visualize with interactive maps.

Look at LA Metro, San Jose specifically to see if it is behaving as expected.

## Aggregate to highway or operator

Display these stats along with interactive map

In [None]:
# For each highway or operator, calculate % of parallel to intersecting routes
def aggregate(df, groupby="highway"):
    
    if groupby=="highway":
        group_cols = ["Route", "County", "District", "RouteType", 
                      "NB", "SB", "EB", "WB", "highway_length"]
        # To calculate % parallel routes along a highway
        # need to know how many route_ids there are for that highway
        count_cols = ["route_id"]
        nunique_cols = ["itp_id"]
        denominator = "route_id"
        
    elif groupby=="operator":
        group_cols = ["itp_id"]
        # To calculate % of transit routes that are parallel
        # count how many rows there are as denominator
        # nunique("Routes") would be too small
        count_cols = ["Route"]
        nunique_cols = []
        denominator = "Route"
        
    df = (geography_utils.aggregate_by_geography(
            df, 
            group_cols = group_cols,
            sum_cols = ["parallel"],
            count_cols = count_cols,
            nunique_cols = nunique_cols
           )
         )

    df = df.assign(
        pct_parallel = df.parallel.divide(df[denominator]).round(3)
    )
        
    return df

In [None]:
'''
gdf = highway_transit_utils.make_processed_data()

gdf = parallel_or_intersecting(gdf, 
                               pct_route_threshold=0.5, 
                               pct_highway_threshold=0.1)

hwy_stats = aggregate(gdf, groupby="highway")
operator_stats = aggregate(gdf, groupby="operator")

gdf.to_parquet(f"{DATA_PATH}parallel_or_intersecting.parquet")
hwy_stats.to_parquet(f"{DATA_PATH}hwy_stats.parquet")
operator_stats.to_parquet(f"{DATA_PATH}operator_stats.parquet")


# For map, need highway to be 250 ft buffer
highways = highway_transit_utils.process_highways(buffer_feet=250)
highways.to_parquet(f"{DATA_PATH}highways.parquet")
'''

## Map 

In [3]:
gdf = gpd.read_parquet(f"{DATA_PATH}parallel_or_intersecting.parquet")
highways= gpd.read_parquet(f"{DATA_PATH}highways.parquet")

In [35]:
def data_to_plot(df):
    keep_cols = ["itp_id", "route_id", 
                 "Route", "County", "District", "RouteType",
                 "pct_route", "pct_highway", "parallel",
                 "geometry"
                ]
    df = df[keep_cols].reset_index(drop=True)
    df = df.assign(
        geometry = df.geometry.buffer(200).simplify(tolerance=100),
    )
    
    # Use simplify to make gdf smaller
    # folium map is creating too large of an HTML file to check in
    
    return df


In [36]:
hwys_popup_dict = {
    "Route": "Highway Route",
    "RouteType": "Route Type",
    "County": "County"   
}

transit_popup_dict = {
    "itp_id": "Operator ITP ID",
    "route_id": "Route ID",
    "pct_route": "% overlapping route",
    "pct_highway": "% overlapping highway",
}

hwys_color = branca.colormap.StepColormap(
    colors=["black", "gray"],
)

colorscale = branca.colormap.StepColormap(
    colors=[
        cp.CALITP_CATEGORY_BRIGHT_COLORS[0], #blue
        cp.CALITP_CATEGORY_BRIGHT_COLORS[1] # orange
    ],
)

In [37]:
%%html
<style>
@import url('https://fonts.googleapis.com/css?family=Raleway');
@import url('https://fonts.googleapis.com/css?family=Nunito+Sans');
@import url('https://fonts.googleapis.com/css?family=Bitter');
</style>

In [38]:
# Save this colorscale as png and use as FloatImage in folium map
print("           Intersecting                        Parallel")
display(colorscale)

           Intersecting                        Parallel


In [43]:
from folium.plugins import FloatImage

# Change this URL to GitHub URL, can't use relative path

legend = (
    "https://raw.githubusercontent.com/cal-itp/data-analyses/"
    "more-highways/bus_service_increase/"
    "img/legend_intersecting_parallel.png"
)
    
image = FloatImage(legend, bottom=100, left=5)

MAP_ME = {
    182: "LA Metro", 
    294: "SJ Valley Transportation Authority", 
    279: "BART", 
    282: "SF Muni",
    278: "SD Metropolitan Transit System", 
}


for itp_id, operator in MAP_ME.items(): 
    
    transit_df = data_to_plot(gdf[gdf.itp_id==itp_id])
    highway_df = (highways[highways.County.isin(transit_df.County)]
                  .reset_index(drop=True)
                 )
    
    # Instead of using county centroid, calculate centroid from transit_df
    # Otherwise, it's too zoomed out from where transit routes are
    transit_centroid = transit_df.geometry.centroid.iloc[0]

    LAYERS_DICT = {
        "Highways": {"df": highway_df,
            "plot_col": "Route",
            "popup_dict": hwys_popup_dict, 
            "tooltip_dict": hwys_popup_dict,
            "colorscale": hwys_color,
        },
        "Transit Routes": {"df": transit_df,
            "plot_col": "parallel",
            "popup_dict": transit_popup_dict, 
            "tooltip_dict": transit_popup_dict,
            "colorscale": colorscale,
        },
    }
    
    
    fig = map_utils.make_folium_multiple_layers_map(
        LAYERS_DICT,
        fig_width = 700, fig_height = 700, 
        zoom=13, 
        centroid = [transit_centroid.y, transit_centroid.x], 
        title=f"Parallel vs Intersecting Lines for {operator}",
    )

    fig.add_child(image)
    fig.save(f"{IMG_PATH}parallel_{itp_id}.html")
    print(f"{itp_id} map saved")

182 map saved
294 map saved
279 map saved
282 map saved
278 map saved


In [None]:
#https://stackoverflow.com/questions/61065945/how-can-a-plot-a-map-with-folium-and-place-a-bar-graph-next-to-the-map-in-python
# Can folium also include subplots for other stats?

In [None]:
# Legend doesn't show up with multiple layers
# One way around, create the colorscale(s) as one image and save it 
# Then, insert the image with fig.add_child(image)
import folium
from folium.plugins import FloatImage

url = (
    "https://raw.githubusercontent.com/ocefpaf/secoora_assets_map/"
    "a250729bbcf2ddd12f46912d36c33f7539131bec/secoora_icons/rose.png"
)

m = folium.Map([-13, -38.15], zoom_start=10)
image = FloatImage(url, bottom=10, left=5)

image.add_to(m)
m
                   
#fig.add_child(image)
#fig

In [None]:
MAP_ME = {
    182: "LA Metro", 
    294: "SJ Valley Transportation Authority", 
    279: "BART", 
    282: "SF Muni",
    278: "SD Metropolitan Transit System", 
}

for i, name in MAP_ME.items():
    subset_df = gdf[gdf.itp_id==i]
    print(f"# routes originally for {i}: {transit_routes[transit_routes.itp_id==i].route_id.nunique()}")
    print(f"# routes for {i}: {subset_df.route_id.nunique()}")
    
    
    fig, ax = plt.subplots(figsize  = (12, 8))
    subset_df.plot(column="route_id",  
                         ax = ax)
    ax.set_axis_off()
    plt.title(f"{name} (ITP ID: {i})")
    #display(fig)
    plt.close()

In [None]:
orig_highways = (catalog.state_highway_network.read()
                .to_crs(geography_utils.CA_StatePlane))
    

for i in [0, 0.05, 0.1, 0.15, 0.2]:
    gdf2 = parallel_or_intersecting(gdf, pct_route_threshold=0.4, 
                                    pct_highway_threshold=i)
    print(f"highway threshold: {i}")
    print("------------------------------------")
    print(gdf2.parallel.value_counts())
    print(f"%: {len(gdf2[gdf2.parallel==1]) / len(gdf2)}")
    
    for i, name in MAP_ME.items():
        subset_df = gdf2[gdf2.itp_id==i]

        print(f"# routes for {i}: {subset_df.route_id.nunique()}")
        print(f"# routes parallel: {len(subset_df[subset_df.parallel==1]) / len(subset_df)}")
    
        fig, ax = plt.subplots(figsize  = (12, 8))
        orig_highways[
            (orig_highways.Route.isin(subset_df.Route)) & 
            (orig_highways.County.isin(subset_df.County))
        ].drop_duplicates(subset=["Route", "County"]).plot(ax=ax, color="gray")
        
        subset_df.plot(column="parallel", ax = ax, 
                       categorical=True, legend=True)
        
        ax.set_axis_off()
        
        plt.title(f"{name} (ITP ID: {i}, parallel vs intersecting)")
        #display(fig)
        plt.close()