## Plot Transit Routes with Low Speeds / High Trips 

Plot the transit route's line geom. Not highway segments. See what shows up.

In [1]:
import geopandas as gpd
import intake
import pandas as pd

from IPython.display import Markdown

from bus_service_utils import calenviroscreen_lehd_utils, create_parallel_corridors
# TODO: D1_pmac_routes cannot be imported...re-org logic of functions?
from D1_pmac_routes import ANALYSIS_DATE, merge_routelines_with_trips
from shared_utils import calitp_color_palette as cp
from shared_utils import geography_utils

catalog = intake.open_catalog("./*.yml")



In [2]:
def prep_data_for_viz():
    df = catalog.bus_routes_aggregated_stats.read()

    # Add in a route's line geom
    trips_with_geom = merge_routelines_with_trips(ANALYSIS_DATE)

    bus_route_geom = create_parallel_corridors.process_transit_routes(
        alternate_df=trips_with_geom).rename(columns = {"itp_id": "calitp_itp_id"})

    gdf = pd.merge(
        bus_route_geom[["calitp_itp_id", "route_id", "route_length", "geometry"]],
        df,
        on = ["calitp_itp_id", "route_id"],
        # there are some that are right_only 
        # these routes don't have geom though, can't be plotted, don't keep
        how = "left"
    )
    
    # Fix types, rounding
    drop_cols = ["route_length", "bus_difference_mean", "bus_multiplier_mean",
                "bus_difference_spread", ]
    
    gdf = gdf.assign(
        route_length_mi = round(gdf.route_length / geography_utils.FEET_PER_MI, 2),
        mean_speed_mph = gdf.mean_speed_mph.round(1),
        pct_trips_competitive = gdf.pct_trips_competitive.round(2)
    ).drop(columns = drop_cols).to_crs(geography_utils.WGS84)
    
    integrify_me = ["num_competitive", "trips_all_day", "trips_peak"]
    gdf[integrify_me] = gdf[integrify_me].astype("Int64")
    
    # Missing values present, drop now
    gdf = gdf[gdf.caltrans_district.notna()].reset_index(drop=True)
    
    return gdf

gdf = prep_data_for_viz()

In [3]:
def make_map(gdf: gpd.GeoDataFrame, district: str,
             trips_col: str, speed_col: str) -> gpd.GeoDataFrame: 
    
    
    table_title = (f"Routes in top 50% of {trips_col.replace('_', ' ')} and "
                   f"bottom 50% of {speed_col.replace('_', ' ')}"
                  )
    
    display(Markdown(f"#### {table_title}"))
    display(gdf)
    
    m = gdf.drop(columns = "service_date").explore(
        "route_id", categorical=True,
        cmap = cp.CALITP_CATEGORY_BOLD_COLORS,
        tiles = "Carto DB Positron"
    )
    
    display(Markdown(f"### District {district} Routes with Low Speeds / High Trips"))
    display(m)


def identify_high_trip_low_speeds_by_district(df: gpd.GeoDataFrame, 
                                              district: str, trips_col: str, 
                                              speed_col: str) -> gpd.GeoDataFrame:
    subset = df[df.caltrans_district == district].reset_index(drop=True)
    
    print(f"# routes: {len(subset)}")
    
    stats_cols = ["trips_peak", "trips_all_day", 
                  "mean_speed_mph", "pct_trips_competitive"]
    
    subset2 = subset[(subset.mean_speed_mph.notna()) & 
                     (subset[trips_col].notna()) & 
                     (subset.geometry.notna())
                    ].reset_index(drop=True)
    
    if len(subset2) > 0:
        print(f"# routes with full info:{len(subset2)}")
        print(f"# routes with missing info: {len(subset) - len(subset2)}")
        
        subset3 = create_calenviroscreen_lehd_data.define_equity_groups(
            subset2, percentile_col = stats_cols , num_groups = 4)
        
        # Plot trips that are top 50%, speeds are bottom 50%
        can_be_plotted = subset3[(subset3[f"{trips_col}_group"] >= 3) & 
                                 (subset3[f"{speed_col}_group"] <= 2)
                                ]
        if len(can_be_plotted) > 0:
            make_map(can_be_plotted, district, trips_col, speed_col)

            return subset3
        else:
            print(f"No observations with combination: top 50% of {trips_col}, bottom 50% of {speed_col}")
    else:
        print("No observations after missing info dropped")

In [4]:
districts = gdf[gdf.caltrans_district.notna()].caltrans_district.unique().tolist()

for d in sorted(districts):
    display(Markdown(f"## District {d}"))
    
    district = identify_high_trip_low_speeds_by_district(
        gdf, d, 
        trips_col = "trips_all_day", speed_col = "mean_speed_mph")

## District 01 - Eureka

# routes: 8
# routes with full info:2
# routes with missing info: 6


#### Routes in top 50% of trips all day and bottom 50% of mean speed mph

Unnamed: 0,calitp_itp_id,route_id,geometry,service_date,route_type,route_group,num_competitive,caltrans_district,route_length_mi,trips_peak,trips_peak_group,trips_all_day,trips_all_day_group,mean_speed_mph,mean_speed_mph_group,pct_trips_competitive,pct_trips_competitive_group
0,159,2017,"LINESTRING (-122.61575 38.92995, -122.61534 38...",2022-05-04,3,medium,21,01 - Eureka,37.14,14,4,21,4,26.3,1,1.0,2


### District 01 - Eureka Routes with Low Speeds / High Trips

## District 02 - Redding

# routes: 8
No observations after missing info dropped


## District 03 - Marysville

# routes: 21
No observations after missing info dropped


## District 04 - Oakland

# routes: 53
# routes with full info:11
# routes with missing info: 42


#### Routes in top 50% of trips all day and bottom 50% of mean speed mph

Unnamed: 0,calitp_itp_id,route_id,geometry,service_date,route_type,route_group,num_competitive,caltrans_district,route_length_mi,trips_peak,trips_peak_group,trips_all_day,trips_all_day_group,mean_speed_mph,mean_speed_mph_group,pct_trips_competitive,pct_trips_competitive_group
0,4,10,"LINESTRING (-122.08709 37.67010, -122.08693 37...",2022-05-04,3,short,129,04 - Oakland,7.68,62,3,129,3,10.6,1,1.0,4
4,4,40,"LINESTRING (-122.12512 37.69633, -122.12501 37...",2022-05-04,3,medium,123,04 - Oakland,13.15,111,4,196,4,10.1,1,0.63,3


### District 04 - Oakland Routes with Low Speeds / High Trips

## District 05 - San Luis Obispo

# routes: 21
No observations after missing info dropped


## District 06 - Fresno

# routes: 9
# routes with full info:4
# routes with missing info: 5


#### Routes in top 50% of trips all day and bottom 50% of mean speed mph

Unnamed: 0,calitp_itp_id,route_id,geometry,service_date,route_type,route_group,num_competitive,caltrans_district,route_length_mi,trips_peak,trips_peak_group,trips_all_day,trips_all_day_group,mean_speed_mph,mean_speed_mph_group,pct_trips_competitive,pct_trips_competitive_group
0,126,22,"LINESTRING (-119.10411 35.35144, -119.10418 35...",2022-05-04,3,medium,0,06 - Fresno,16.08,35,4,50,4,12.4,1,0.0,1
1,126,83,"LINESTRING (-119.07572 35.33015, -119.07448 35...",2022-05-04,3,short,0,06 - Fresno,9.51,21,2,33,3,13.2,1,0.0,1


### District 06 - Fresno Routes with Low Speeds / High Trips

## District 07 - Los Angeles

# routes: 45
# routes with full info:27
# routes with missing info: 18


#### Routes in top 50% of trips all day and bottom 50% of mean speed mph

Unnamed: 0,calitp_itp_id,route_id,geometry,service_date,route_type,route_group,num_competitive,caltrans_district,route_length_mi,trips_peak,trips_peak_group,trips_all_day,trips_all_day_group,mean_speed_mph,mean_speed_mph_group,pct_trips_competitive,pct_trips_competitive_group
9,182,232-13159,"LINESTRING (-118.18863 33.76930, -118.18830 33...",2022-05-04,3,,,07 - Los Angeles,24.93,70,4,96,3,11.7,1,,
10,182,501-13159,"LINESTRING (-118.14884 34.14133, -118.14883 34...",2022-05-04,3,,,07 - Los Angeles,17.79,52,3,82,3,15.3,2,,
21,183,566,"LINESTRING (-118.17056 34.11970, -118.17169 34...",2022-05-04,3,short,79.0,07 - Los Angeles,7.56,51,3,79,3,10.3,1,1.0,4.0


### District 07 - Los Angeles Routes with Low Speeds / High Trips

## District 08 - San Bernardino

# routes: 18
# routes with full info:12
# routes with missing info: 6


#### Routes in top 50% of trips all day and bottom 50% of mean speed mph

Unnamed: 0,calitp_itp_id,route_id,geometry,service_date,route_type,route_group,num_competitive,caltrans_district,route_length_mi,trips_peak,trips_peak_group,trips_all_day,trips_all_day_group,mean_speed_mph,mean_speed_mph_group,pct_trips_competitive,pct_trips_competitive_group
0,269,1,"LINESTRING (-117.59438 33.87950, -117.59427 33...",2022-05-04,3,,,08 - San Bernardino,22.09,75,4,118,4,13.8,1,,
2,269,19,"LINESTRING (-117.22945 33.78494, -117.22943 33...",2022-05-04,3,,,08 - San Bernardino,20.09,60,4,101,4,15.3,1,,


### District 08 - San Bernardino Routes with Low Speeds / High Trips

## District 09 - Bishop

# routes: 6
# routes with full info:3
# routes with missing info: 3
No observations with combination: top 50% of trips_all_day, bottom 50% of mean_speed_mph


## District 10 - Stockton

# routes: 23
# routes with full info:15
# routes with missing info: 8


#### Routes in top 50% of trips all day and bottom 50% of mean speed mph

Unnamed: 0,calitp_itp_id,route_id,geometry,service_date,route_type,route_group,num_competitive,caltrans_district,route_length_mi,trips_peak,trips_peak_group,trips_all_day,trips_all_day_group,mean_speed_mph,mean_speed_mph_group,pct_trips_competitive,pct_trips_competitive_group
1,284,40,"LINESTRING (-121.28642 37.95532, -121.28512 37...",2022-05-04,3,short,0.0,10 - Stockton,5.75,61,4,113,4,11.9,1,0.0,1.0
7,484,10,"LINESTRING (-121.00071 37.63926, -121.00245 37...",2022-05-04,3,,,10 - Stockton,6.95,36,3,61,3,10.5,1,,


### District 10 - Stockton Routes with Low Speeds / High Trips

## District 11 - San Diego

# routes: 20
No observations after missing info dropped


## District 12 - Irvine

# routes: 12
No observations after missing info dropped
