# Plot routes on SHN

See what highway corridors show up.

Can span multiple operators. These corridors, if improved, would benefit all operators.

## Brainstorm
Filter to just routes that are on SHN. 
* In top 75% of `trips_all_day_per_mi`...mostly to select out where trips occur on highways **OR**
* <= 25 mph average for RT speeds, relatively slow speeds for routes that are running on SHN! Only if RT info is present (not always available for express buses)

In [1]:
import branca
import geopandas as gpd
import intake
import pandas as pd

from IPython.display import Markdown

from bus_service_utils import calenviroscreen_lehd_utils
from shared_utils import geography_utils 
from shared_utils import calitp_color_palette as cp

catalog = intake.open_catalog("*.yml")

# Modify rt_utils.ZERO_THIRTY_COLORSCALE to go up to 65 mph
ZERO_SIXTY_COLORSCALE = branca.colormap.step.RdYlGn_11.scale(vmin=0, vmax=65)
ZERO_SIXTY_COLORSCALE.caption = "Speed (miles per hour)"

# Map args
TILES = "CartoDB positron"

CUSTOM_CATEGORICAL = [
    cp.CALITP_CATEGORY_BRIGHT_COLORS[2], # yellow
    cp.CALITP_CATEGORY_BRIGHT_COLORS[1], # orange
    cp.CALITP_CATEGORY_BRIGHT_COLORS[0], # blue
    cp.CALITP_CATEGORY_BRIGHT_COLORS[5], # purple  
    cp.CALITP_CATEGORY_BRIGHT_COLORS[3], # green
]



In [2]:
gdf = catalog.highway_segment_stats.read()

gdf = gdf.assign(
    geometry = gdf.geometry.to_crs(
        geography_utils.CA_StatePlane).buffer(300).to_crs(geography_utils.WGS84)
)

districts = sorted(gdf.District.unique().tolist())

In [3]:
stats_cols = [
    "trips_all_day_per_mi", 
    "mean_speed_mph_trip_weighted",
]

def subset_by_district(gdf: gpd.GeoDataFrame, district: int) -> gpd.GeoDataFrame:
    # extra filtering to only keep if trips > 0
    gdf2 = gdf[(gdf.District == district) & 
               (gdf.trips_all_day > 0)].reset_index(drop=True)
    
    return gdf2

def get_quartiles_by_district(gdf: gpd.GeoDataFrame, 
                              plot_col: list) -> gpd.GeoDataFrame:
    
    gdf_with_quartiles = gpd.GeoDataFrame()
    
    for i in sorted(gdf.District.unique()):
        district_df = subset_by_district(gdf, district = i)
        if len(district_df) > 0:
            quartiles = calenviroscreen_lehd_utils.define_equity_groups(
                district_df, percentile_col = plot_col, num_groups = 4
            )

            gdf_with_quartiles = pd.concat(
                [gdf_with_quartiles, quartiles], 
                axis=0, ignore_index=True)
    
    return gdf_with_quartiles


gdf2 = get_quartiles_by_district(gdf, stats_cols)

In [4]:
gdf2[gdf2.trips_all_day_per_mi_group >=2].describe()

Unnamed: 0,Route,District,hwy_segment_id,trips_peak,trips_all_day,stop_arrivals_peak,stop_arrivals_all_day,stops_peak,stops_all_day,route_length,trips_peak_per_mi,stop_arrivals_peak_per_mi,stop_arrivals_all_day_per_mi,stops_peak_per_mi,stops_all_day_per_mi,trips_all_day_per_mi,trips_all_day_per_mi_group,mean_speed_mph_trip_weighted,mean_speed_mph_trip_weighted_group
count,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0,45.0
mean,88.911111,7.311111,2288938000.0,66.955556,114.444444,83.422222,171.222222,5.311111,5.377778,14683.680165,65.723556,62.440667,126.990222,3.775556,3.788889,113.374222,2.911111,23.328667,2.177778
std,82.761932,2.44784,1224283000.0,69.625281,116.766581,93.50097,192.88455,5.857482,5.874788,10554.022068,106.654729,83.957042,170.710654,3.872039,3.863522,185.802707,0.949216,7.460918,1.072145
min,1.0,3.0,128519000.0,6.0,10.0,3.0,5.0,1.0,1.0,597.26293,1.2,0.6,1.0,0.2,0.2,2.0,2.0,6.98,1.0
25%,27.0,6.0,1515119000.0,17.0,34.0,13.0,28.0,2.0,2.0,3066.942722,10.74,16.11,28.19,1.0,1.4,18.79,2.0,17.0,1.0
50%,74.0,7.0,2302459000.0,48.0,80.0,55.0,114.0,3.0,3.0,15735.575795,26.05,35.36,79.56,2.01,2.01,49.28,3.0,22.64,2.0
75%,110.0,8.0,3130616000.0,59.0,100.0,95.0,188.0,6.0,6.0,26400.0,84.82,62.59,127.6,5.2,5.2,159.13,4.0,29.73,3.0
max,282.0,12.0,4265316000.0,238.0,397.0,354.0,716.0,26.0,26.0,26400.0,635.97,435.14,903.75,16.03,16.03,1121.32,4.0,38.02,4.0


In [None]:
def set_cutoffs(gdf: gpd.GeoDataFrame, 
                speed_threshold: int, 
                trip_min_group: int,
               ) -> gpd.GeoDataFrame: 
    print(f"speed cutoff: {speed_threshold}, trip quartile cutoff: {trip_min_group}")
    # Get statewide counts to see how many fall into each district
    subset = gdf[(gdf.mean_speed_mph_trip_weighted <= speed_threshold) &
                 (gdf.trips_all_day_per_mi >= trip_min_group)
                ]
    print(f"# obs statewide: {len(subset)}")
    display(subset.District.value_counts())

In [None]:
set_cutoffs(gdf, speed_threshold = 20, trip_min_group = 2)

In [None]:
def plot_highway_corridor(gdf: gpd.GeoDataFrame, 
                          district: int, speed: int = 20):
    """
    Returns a folium.Map using geopandas.explore()
    """
    plot_df = gdf[(gdf.District == district) & 
                  ((gdf.trips_all_day_per_mi_group >=2) | 
                  (gdf.mean_speed_mph_trip_weighted <= speed))]
        
    if len(plot_df) > 0:
        display(Markdown(f"#### Average Speed"))

        m1 = plot_df.explore(
            "mean_speed_mph_trip_weighted", 
            # switch out colormap to allow higher speeds
            cmap = ZERO_SIXTY_COLORSCALE, 
            categorical=False, tiles = TILES)

        display(m1)
     
        display(Markdown(f"#### Trips"))
        
        m2 = plot_df.explore(
            "trips_all_day_per_mi_group", 
            cmap = CUSTOM_CATEGORICAL, #branca.colormap.linear.GnBu_09.scale(
            categorical=True, tiles = TILES)

        display(m2)
    else:
        print("No highway corridors meet this criteria.")
        

In [None]:
for i in districts:
    display(Markdown(f"## District {i}"))
    
    plot_highway_corridor(gdf2, i, speed = 25)

Highway 1 in District 4, near Golden Gate Park.
PCH (1) in District 7.
Both are technically part of SHN, but have much slower speeds, or are more like arterial segments. 

Change colormap to go above 30 mph because that was more applicable to urban speeds.

Maybe different scales by `RouteType`, because Interstate/US vs State is different speeds. See how speed limit can be brought in.