In [1]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import calitp.magics
import geopandas as gpd
import intake
import pandas as pd

from IPython.display import display, Markdown, HTML

import parallel_corridors_utils
#import deploy_portfolio_yaml
from shared_utils import styleguide, geography_utils

catalog = intake.open_catalog("./*.yml")
alt.renderers.enable("html")
alt.data_transformers.enable('default', max_rows=None)

In [2]:
# parameters cell
itp_id = 182

In [3]:
df = catalog.competitive_route_variability.read()

df = (df[df.calitp_itp_id == itp_id]
      .rename(columns = {"route_name_used": "route_name"})
      .reset_index(drop=True)
     )


if itp_id == 182:
    df = df.assign(
        route_id = df.route_id.str.split('-131', expand=True)[0]
    )

# Use route_short_name instead
# But where route_short_name is missing, use route_id
df = df.assign(
    route_id2 = df.apply(lambda x: x.route_short_name 
                         if x.route_short_name is not None 
                         else x.route_id, axis=1),
)

In [4]:
%%capture_parameters
operator_name = df.calitp_agency_name.iloc[0]
district_number = f"{df.caltrans_district.iloc[0].split('-')[0].strip()}"
district_name = f"{df.caltrans_district.iloc[0].split('-')[1].strip().lower().replace(' ', '-')}"

itp_id, operator_name, district_number, district_name

{"itp_id": 182, "operator_name": "Metro", "district_number": "07", "district_name": "los-angeles"}


# {operator_name}

In [5]:
display(HTML("<h1>Competitive Routes near SHN</h1>")) 

In [6]:
# Grab parameters for narrative text - clean up formatting here 
district = df.caltrans_district.iloc[0]
formatted_date = pd.to_datetime(
    parallel_corridors_utils.ANALYSIS_DATE).strftime('%m-%d-%Y')

PCT_COMPETITIVE_THRESHOLD = parallel_corridors_utils.PCT_COMPETITIVE_THRESHOLD

stats = parallel_corridors_utils.operator_parallel_competitive_stats(
    itp_id, PCT_COMPETITIVE_THRESHOLD)

pct_parallel = round(
    stats['on_shn_or_intersecting_routes'] / stats['num_routes'] * 100 , 1)
pct_competitive =  round(
    stats['competitive_routes'] / stats['num_routes'] * 100, 1)
pct_competitive_and_parallel = round(
    stats['competitive_routes_on_shn_or_intersecting'] / stats['num_routes'] * 100, 1)

## Quick Stats

In [7]:
display(
    Markdown(
        f"**Bus routes in service: {stats['num_routes']}**"
        "<br>**Routes near** the State Highway Network (SHN): "
        f"**{stats['on_shn_or_intersecting_routes']} routes ({pct_parallel}%)**"
        "<br>**Competitive routes** against car travel "
        "(within 1.5x car travel time): "
        f"**{stats['competitive_routes']} routes ({pct_competitive}%)**"
        "<br>**Near SHN and competitive routes** against car travel "
        "(within 1.5x car travel time): "
        f"**{stats['competitive_routes_on_shn_or_intersecting']} routes ({pct_competitive_and_parallel}%)**"
    )
)

**Bus routes in service: 121**<br>**Routes near** the State Highway Network (SHN): **74 routes (61.2%)**<br>**Competitive routes** against car travel (within 1.5x car travel time): **69 routes (57.0%)**<br>**Near SHN and competitive routes** against car travel (within 1.5x car travel time): **45 routes (37.2%)**

In [8]:
def set_yaxis_range(df, y_col):
    Y_MIN = df[y_col].min()
    Y_MAX = df[y_col].max()
    
    return Y_MIN, Y_MAX

# Rather than set it according to the operator, which can vary,
# just use set thresholds for each group
def top15_routes(df, route_group):
    # grab top 15 routes where majority of trips are below that cut-off 
    # but show 15 max, otherwise, show less, since these won't be as interesting to operators
    df2 = (df[(df.route_group==route_group) & 
              (df.category.isin(["on_shn", "intersects_shn"]))]
           .sort_values(["calitp_itp_id", 
                         "pct_trips_competitive", "route_id"],
                        ascending = [True, False, True]
                       )
           .drop_duplicates(subset=["calitp_itp_id", "route_id"])
          ).head(15)
    
    if len(df2) > 1:
        return list(df2.route_id)

In [9]:
def remove_datetime_and_geom_columns(df: gpd.GeoDataFrame) -> pd.DataFrame:
    # These columns will throw JSON not serializable error
    # in altair
    # but can't use df.select_dtypes(exclude="datetime") because
    # some are stored as objects
    remove_cols = ["service_date", 
                   "trip_first_departure", 
                   "trip_departure", "geometry"]
    
    return df.drop(columns = remove_cols)


df = remove_datetime_and_geom_columns(
    df[(df.pct_trips_competitive > PCT_COMPETITIVE_THRESHOLD) & 
       (df.category.isin(["on_shn", "intersects_shn"]))
      ])

In [10]:
def setup_charts(df: pd.DataFrame, route_group: str) -> list:
    
    # Set y-ranges to be the same across route_groups, 
    # otherwise charts fluctuate too much, even within operator
    y_col1 = "bus_multiplier"
    Y_MIN1, Y_MAX1 = set_yaxis_range(df, y_col1)

    # Grab the routes to plot
    subset_routes = top15_routes(df, route_group)
    
    if len(subset_routes) > 1:
        
        # Display charts
        multiplier_chart = parallel_corridors_utils.make_stripplot(
            df[df.route_id.isin(subset_routes)], 
            y_col1, Y_MIN = Y_MIN1, Y_MAX = 2.5
        )
    
        return subset_routes, multiplier_chart 

    else: 
        return None


In [11]:
# Display a table of route-level stats for each route_group
# Displaying route_name makes chart too crowded    
def style_route_stats(df):
    # Rename columns for display
    rename_cols = {
        "route_id2": "Route ID",
        "route_short_name": "Route Name",
        "route_group": "Route Group",
        "num_trips": "# trips",
        "daily_avg_freq": "Daily Avg Freq (trips per hr)",
        "pm_peak_freq": "PM Peak Avg Freq (trips per hr)",
        "percentiles": "25th, 50th, 75th ptile (hrs)",
    }
    
    # Style it
    drop_cols = [
        "calitp_itp_id", "route_id", "route_group", 
        "pct_trips_competitive",
        "p25", "p50", "p75",
        "category"
    ]
    
    # Change alignment for some columns
    # https://stackoverflow.com/questions/59453091/left-align-the-first-column-and-center-align-the-other-columns-in-a-pandas-table
    df_style = (df.sort_values(
        ["pct_trips_competitive", "route_id2"], 
        ascending=[False , True])
           .drop(columns = drop_cols)
           .rename(columns = rename_cols)
           .style.format(
               subset=['Daily Avg Freq (trips per hr)', 
                       'PM Peak Avg Freq (trips per hr)'], 
               **{'formatter': '{:,.3}'})
                .set_properties(subset=['Route ID', 'Route Name'], 
                                **{'text-align': 'left'})
                .set_properties(subset=['# trips', 'Daily Avg Freq (trips per hr)', 
                                       'PM Peak Avg Freq (trips per hr)'], 
                               **{'text-align': 'center'})
                .set_table_styles([dict(selector='th', 
                                        props=[('text-align', 'center')])
                                        ])
           .hide(axis="index")
           .to_html()
          )
    
    display(HTML("<h4>Route Stats</h4>"))
    display(HTML(df_style))

In [12]:
# Allow for possibility that operator doesn't have routes that fall into certain route_groups
# But, jupyterbook will not like markdown with f-string
# separate out route_group and put in each cell, and use try/except + if to display...
# but all 3 route groups must be displayed. 
# just add sentence about why no routes were returned at the end
# Change to fewer than 2 routes, instead of no routes, because we don't want to show just 1 route

def display_charts_for_route_group(df, route_group):
    
    if setup_charts(df, route_group) is not None:
        subset_routes, mult = setup_charts(df, route_group)
    
        display(mult)
        # Display route stats for just plotted routes
        route_stats = parallel_corridors_utils.competitive_route_level_stats(
            df[df.route_id.isin(subset_routes)])
        style_route_stats(route_stats)
    #else: 
    #    print("Fewer than 2 routes meet this criteria.")
    
    elif setup_charts(df, route_group) is None:
        print("No routes meet this criteria.")

## Competitive Routes
### Short Routes (< 1 hr)

In [13]:
route_group = "short"
display_charts_for_route_group(df, route_group)

Route ID,Route Name,# trips,Daily Avg Freq (trips per hr),"25th, 50th, 75th ptile (hrs)",PM Peak Avg Freq (trips per hr)
602,602.0,60,2.5,"0.63, 0.77, 0.82",5.67
665,665.0,32,1.33,"0.33, 0.35, 0.38",3.33
690,690.0,42,1.75,"0.8, 0.86, 0.9",4.0
802,,150,6.25,"0.48, 0.48, 0.48",
803,,193,8.04,"0.57, 0.57, 0.57",
805,,147,6.12,"0.22, 0.22, 0.22",
806,,205,8.54,"0.77, 0.77, 0.78",
901,,254,10.6,"0.7, 0.82, 0.88",
96,96.0,43,1.79,"0.9, 0.95, 0.98",4.33
230,230.0,62,2.58,"0.8, 0.89, 0.93",7.0


### Medium Routes (1-1.5 hrs)

In [14]:
route_group = "medium"
display_charts_for_route_group(df, route_group)

Route ID,Route Name,# trips,Daily Avg Freq (trips per hr),"25th, 50th, 75th ptile (hrs)",PM Peak Avg Freq (trips per hr)
161,161,35,1.46,"1.15, 1.18, 1.2",4.33
222,222,39,1.62,"1.02, 1.12, 1.18",3.33
487,487,58,2.42,"0.9, 0.96, 1.03",6.33
534,534,39,1.62,"0.88, 0.98, 1.08",4.0
66,66,174,7.25,"1.08, 1.2, 1.27",17.0
182,182,74,3.08,"0.96, 1.02, 1.13",8.0
35/38,35/38,130,5.42,"0.58, 0.64, 1.23",11.3
662,662,47,1.96,"0.84, 0.98, 1.04",4.33
78,78,163,6.79,"0.92, 1.02, 1.23",15.3
501,501,82,3.42,"0.83, 0.85, 0.9",9.0


### Long Routes (> 1.5 hrs)

In [15]:
route_group = "long"
display_charts_for_route_group(df, route_group)

Route ID,Route Name,# trips,Daily Avg Freq (trips per hr),"25th, 50th, 75th ptile (hrs)",PM Peak Avg Freq (trips per hr)
120,120,40,1.67,"1.83, 2.02, 2.18",3.0
155,155,36,1.5,"1.44, 1.6, 1.65",3.33
260,260,126,5.25,"1.55, 1.73, 2.0",10.3
33,33,211,8.79,"1.35, 1.5, 1.73",18.3
4,4,232,9.67,"1.57, 1.8, 1.98",17.3
70,70,204,8.5,"1.3, 1.45, 1.53",18.3
94,94,122,5.08,"1.34, 1.47, 1.52",10.0
76,76,104,4.33,"1.14, 1.37, 1.43",8.0
207,207,264,11.0,"1.03, 1.27, 1.4",27.0
108,108,174,7.25,"1.32, 1.5, 1.68",17.7


### GTFS Real-Time Speedmaps

In [None]:
RT_SITE_YAML = "../portfolio/sites/rt.yml"

# Grab list of ITP IDs with GTFS RT speedmaps
rt_itp_id = deploy_portfolio_yaml.check_if_rt_data_available(RT_SITE_YAML)

# Construct the URL 
RT_URL = ("https://analysis.calitp.org/rt/"
          f"district_{district_number}-{district_name}/"
          f"speedmaps__itp_id_{itp_id}.html"
         ) 

# If this ITP ID is found in RT analysis, give URL to that operator in analysis.calitp.org
if itp_id in rt_itp_id:
    display(HTML(f'''<a href={RT_URL}>GTFS RT speedmaps here.</a>'''))

else:
    display(HTML("No GTFS RT data available."))